#!/usr/bin/env python
"""
.. module:: databaseObj
:synopsis: Contains Database class that represents the database of experimental results.
.. moduleauthor:: Veronika Magerl <v.magerl@gmx.at>
.. moduleauthor:: Andre Lessa <lessa.a.p@gmail.com>
.. moduleauthor:: Wolfgang Waltenberger <wolfgang.waltenberger@gmail.com>
"""
import sys
import os
import time
from smodels.experiment import datasetObj
from smodels.experiment.expResultObj import ExpResult
from smodels.experiment.exceptions import DatabaseNotFoundException
from smodels.tools.physicsUnits import fb
try:
import cPickle as serializer
except ImportError as e:
import pickle as serializer
from smodels.tools.smodelsLogging import logger, setLogLevel
[docs]class Database(object):
"""
Database object. Holds a list of ExpResult objects.
:ivar base: path to the database (string)
:ivar force_load: force loading the text database ("txt"),
or binary database ("pcl"), dont force anything if None
:ivar expResultList: list of ExpResult objects
"""
def __init__(self, base=None, force_load = None, verbosity=None ):
"""
:param force_load: force loading the text database ("txt"),
or binary database ("pcl"), dont force anything if None
"""
self.force_load = force_load
self.pclfilename = "database.pcl"
self.hasFastLim = False # True if any ExpResult is from fastlim
self._validateBase(base)
self._verbosity = verbosity
self._databaseVersion = None
self.expResultList = []
self.txt_mtime = None, None
self.pcl_mtime = None, None
self.pcl_db = None
self.sw_format_version = "115" ## what format does the software support?
self.pcl_format_version = None ## what format is in the binary file?
self.binfile = os.path.join ( self._base, self.pclfilename )
setLogLevel ( self._verbosity )
if self.force_load=="txt":
self.loadTextDatabase()
self.printFastlimBanner()
return
if self.force_load=="pcl":
self.loadBinaryFile()
self.printFastlimBanner()
return
if self.force_load in [ None, "none", "None" ]:
self.loadDatabase()
self.printFastlimBanner()
return
logger.error ( "when initialising database: force_load=%s is not " \
"recognized. Valid values are: pcl, txt, None." % force_load )
sys.exit()
[docs] def printFastlimBanner ( self ):
""" check if fastlim appears in data.
If yes, print a statement to stdout. """
if not self.hasFastLim: return
logger.info ( "FastLim v1.1 efficiencies loaded. Please cite: arXiv:1402.0492, EPJC74 (2014) 11" )
def __eq__ ( self, other ):
""" compare two databases
"""
if self.databaseVersion != other.databaseVersion:
return False
if len(self.expResultList ) != len (other.expResultList):
return False
for ( myres, otherres ) in zip ( self.expResultList, other.expResultList ):
if myres != otherres:
return False
return True
[docs] def loadDatabase ( self ):
""" if no binary file is available, then
load the database and create the binary file.
if binary file is available, then check if
it needs update, create new binary file, in
case it does need an update.
"""
if not os.path.exists ( self.binfile ):
# self.loadTextDatabase()
self.createBinaryFile()
else:
if self.needsUpdate():
self.createBinaryFile()
else:
self.loadBinaryFile( lastm_only = False )
[docs] def loadTextDatabase ( self ):
""" simply loads the textdabase """
if self._databaseVersion and len(self.expResultList)>0:
logger.debug ( "Asked to load database, but has already been loaded. Ignore." )
return
logger.info ( "Parsing text database at %s" % self._base )
self._databaseVersion = self._getDatabaseVersion
self.expResultList = self._loadExpResults()
[docs] def lastModifiedDir ( self, dirname, lastm ):
"""
Return the last modified timestamp of dirname, working recursively
:param dirname: directory name that is checked
:param lastm: the most recent timestamp so far
:returns: the most recent timestamp, and the number of files
"""
ret = lastm
ctr=0
for f in os.listdir ( dirname ):
if f in [ "orig", "sms.root", "validation" ]:
continue
if f[-1:]=="~":
continue
if f[0]==".":
continue
if f[-3:]==".py":
continue
lf = os.path.join ( dirname, f )
if os.path.isdir ( lf ):
(ret,tctr) = self.lastModifiedDir ( lf, ret )
ctr+=tctr+1
else:
ctr+=1
tmp = os.stat ( lf ).st_mtime
if tmp > ret:
ret = tmp
return (ret,ctr)
[docs] def lastModifiedAndFileCount( self ):
if self.txt_mtime[0]:
## already evaluated
return
versionfile = os.path.join ( self._base, "version" )
if not os.path.exists ( versionfile ):
logger.error("%s does not exist." % versionfile )
sys.exit()
lastm = os.stat(versionfile).st_mtime
count=1
topdir = os.listdir ( self._base )
for File in topdir:
subdir = os.path.join ( self._base, File )
if not os.path.isdir ( subdir ) or File in [ ".git" ]:
continue
(lastm,tcount) = self.lastModifiedDir ( subdir, lastm )
count+=tcount+1
self.txt_mtime = lastm, count
[docs] def loadBinaryFile ( self, lastm_only = False ):
"""
Load a binary database, returning last modified, file count, database.
:param lastm_only: if true, the database itself is not read.
:returns: database object, or None, if lastm_only == True.
"""
if lastm_only and self.pcl_mtime[0]:
## doesnt need to load database, and mtime is already
## loaded
return None
if self.pcl_db:
return self.pcl_db
if not os.path.exists ( self.binfile ):
return None
try:
with open ( self.binfile, "rb" ) as f:
t0=time.time()
self.pcl_python = serializer.load ( f )
self.pcl_format_version = serializer.load ( f )
self.pcl_mtime = serializer.load ( f )
self._databaseVersion = serializer.load ( f )
if not lastm_only:
if self.pcl_python != sys.version:
logger.warning ( "binary file was written with a different "
"python version. Regenerating." )
self.createBinaryFile()
return self
if self.pcl_format_version != self.sw_format_version:
logger.warning ( "binary file format (%s) and format "
"supported by software (%s) disagree." %
( self.pcl_format_version, self.sw_format_version ) )
logger.warning ( "will recreate binary." )
self.createBinaryFile()
return self
logger.info ( "loading binary db file %s format version %s" %
( self.binfile, self.pcl_format_version ) )
self.hasFastLim = serializer.load ( f )
self.expResultList = serializer.load ( f )
t1=time.time()-t0
logger.info ( "Loaded database from %s in %.1f secs." % \
( self.binfile, t1 ) )
except EOFError as e:
os.unlink ( self.binfile )
if lastm_only:
self.pcl_format_version = -1
self.pcl_mtime = 0
return self
logger.error ( "%s is not a binary database file! recreate it!" % self.binfile )
self.createBinaryFile()
return self
[docs] def checkBinaryFile ( self ):
nu=self.needsUpdate()
logger.debug ( "Checking binary db file." )
logger.debug ( "Binary file dates to %s(%d)" % \
( time.ctime(self.pcl_mtime[0]),self.pcl_mtime[1] ) )
logger.debug ( "Database dates to %s(%d)" % \
( time.ctime(self.txt_mtime[0]),self.txt_mtime[1] ) )
if nu:
logger.info ( "Binary db file needs an update." )
else:
logger.info ( "Binary db file does not need an update." )
return nu
[docs] def needsUpdate ( self ):
""" does the binary db file need an update? """
try:
# logger.debug ( "needsUpdate?" )
self.lastModifiedAndFileCount()
self.loadBinaryFile ( lastm_only = True )
return ( self.txt_mtime[0] > self.pcl_mtime[0] or \
self.txt_mtime[1] != self.pcl_mtime[1] or \
self.sw_format_version != self.pcl_format_version
)
except (IOError,DatabaseNotFoundException,TypeError,ValueError):
# if we encounter a problem, we rebuild the database.
return True
[docs] def createBinaryFile ( self, filename=None ):
""" create a pcl file from the text database,
potentially overwriting an old pcl file. """
t0=time.time()
logger.info ( "Creating binary database " )
logger.info ( "(this may take a few minutes, but it's done only once!)" )
logger.debug ( " * compute last modified timestamp." )
self.lastModifiedAndFileCount()
logger.debug ( " * compute timestamp: %s filecount: %d" % \
( time.ctime ( self.txt_mtime[0] ), self.txt_mtime[1] ) )
binfile = filename
if binfile == None:
binfile = self.binfile
logger.debug ( " * create %s" % self.binfile )
with open ( binfile, "wb" ) as f:
logger.debug ( " * load text database" )
self.loadTextDatabase()
logger.debug ( " * write %s version %s" % ( self.binfile,
self.sw_format_version ) )
ptcl = serializer.HIGHEST_PROTOCOL
self.pcl_python = sys.version
serializer.dump ( self.pcl_python, f, protocol=ptcl )
serializer.dump ( self.sw_format_version, f, protocol=ptcl )
serializer.dump ( self.txt_mtime, f, protocol=ptcl )
serializer.dump ( self._databaseVersion, f, protocol=ptcl )
serializer.dump ( self.hasFastLim, f, protocol=ptcl )
serializer.dump ( self.expResultList, f, protocol=ptcl )
logger.info ( " * done writing %s in %.1f secs." % \
( binfile, time.time()-t0 ) )
@property
def databaseVersion(self):
"""
The version of the database, read from the 'version' file.
"""
return self._databaseVersion
@property
def base(self):
"""
This is the path to the base directory where to find the database.
"""
return self._base
def _validateBase(self, path):
"""
Validates the base directory to locate the database.
Raises an exception if something is wrong with the path.
"""
logger.debug('Try to set the path for the database to: %s', path)
tmp = os.path.realpath(path)
if os.path.isfile ( tmp ):
self._base = os.path.dirname ( tmp )
self.force_load = "pcl"
self.pclfilename = os.path.basename ( tmp )
return
if tmp[-4:]==".pcl":
if not os.path.exists ( tmp ):
logger.error ( "File not found: %s" % tmp )
sys.exit()
logger.error ( "Supplied a pcl filename, but %s is not a file." % tmp )
sys.exit()
path = tmp + '/'
if not os.path.exists(path):
logger.error('%s is no valid path!' % path)
raise DatabaseNotFoundException("Database not found")
self._base = path
def __str__(self):
idList = "Database version: " + self.databaseVersion
idList += "\n"
idList += "-" * len(idList) + "\n"
if self.expResultList == None:
idList += "no experimental results available! "
else:
for expRes in self.expResultList:
idList += expRes.globalInfo.getInfo('id') + ', '
idList = idList[:-2] + '\n'
return idList
@property
def _getDatabaseVersion(self):
"""
Retrieves the version of the database using the version file.
"""
try:
vfile = os.path.join ( self._base, "version" )
versionFile = open( vfile )
content = versionFile.readlines()
versionFile.close()
line = content[0].strip()
logger.debug("Found version file %s with content ``%s''" \
% ( vfile, line) )
return line
except IOError:
logger.error('There is no version file %s', vfile )
return 'unknown version'
@property
def verbosity(self):
"""
Tells the level the logger is set to.
"""
return self._verbosity
@verbosity.setter
def verbosity(self, level):
"""
Set the logger to specified level.
"""
level = self._validateLevel(level)
self._verbosity = level
self._setLogLevel(level)
def _validateLevel(self, level):
"""
Validates given level for Python's logger module.
"""
if not level.lower() in ['debug', 'info', 'warning', 'error']:
logger.error('No valid level for verbosity: %s! Browser will ' +
'use default setting!' % level)
return 'error'
return level.lower()
def _loadExpResults(self):
"""
Checks the database folder and generates a list of ExpResult objects for
each (globalInfo.txt,sms.py) pair.
:returns: list of ExpResult objects
"""
folders=[]
for root, _, files in os.walk(self._base):
folders.append ( (root, files) )
folders.sort()
roots = []
for root,files in folders:
if "/.git/" in root:
continue
if root[-11:] == "/validation":
continue
if root[-5:] == "/orig":
continue
if not 'globalInfo.txt' in files:
# logger.debug("Missing globalInfo.txt in %s", root)
continue
else:
roots.append ( root )
resultsList = []
for root in roots:
expres = ExpResult(root)
if expres:
resultsList.append(expres)
contact = expres.globalInfo.getInfo("contact")
if contact and "fastlim" in contact.lower():
self.hasFastLim = True
if not resultsList:
logger.warning("Zero results loaded.")
return resultsList
[docs] def getExpResults(self, analysisIDs=['all'], datasetIDs=['all'], txnames=['all'],
dataTypes = ['all'], useSuperseded=False, useNonValidated=False):
"""
Returns a list of ExpResult objects.
Each object refers to an analysisID containing one (for UL) or more
(for Efficiency maps) dataset (signal region) and each dataset
containing one or more TxNames. If analysisIDs is defined, returns
only the results matching one of the IDs in the list. If dataTypes is
defined, returns only the results matching a dataType in the list. If
datasetIDs is defined, returns only the results matching one of the IDs
in the list. If txname is defined, returns only the results matching
one of the Tx names in the list.
:param analysisID: list of analysis ids ([CMS-SUS-13-006,...])
:param dataType: dataType of the analysis (all, efficiencyMap or upperLimit)
:param datasetIDs: list of dataset ids ([ANA-CUT0,...])
:param txnames: list of txnames ([TChiWZ,...])
:param useSuperseded: If False, the supersededBy results will not be included
:param useNonValidated: If False, the results with validated = False
will not be included
:returns: list of ExpResult objects or the ExpResult object if the list
contains only one result
"""
expResultList = []
for expResult in self.expResultList:
superseded = None
if hasattr(expResult.globalInfo,'supersededBy'):
superseded = expResult.globalInfo.supersededBy.replace(" ","")
if superseded and (not useSuperseded):
continue
ID = expResult.globalInfo.getInfo('id')
# Skip analysis not containing any of the required ids:
if analysisIDs != ['all']:
if not ID in analysisIDs:
continue
newExpResult = ExpResult()
newExpResult.path = expResult.path
newExpResult.globalInfo = expResult.globalInfo
newExpResult.datasets = []
for dataset in expResult.datasets:
if dataTypes != ['all']:
if not dataset.dataInfo.dataType in dataTypes:
continue
if datasetIDs != ['all']:
if not dataset.dataInfo.dataId in datasetIDs:
continue
newDataSet = datasetObj.DataSet(dataset.path, dataset.globalInfo,False)
newDataSet.dataInfo = dataset.dataInfo
newDataSet.txnameList = []
for txname in dataset.txnameList:
if txname.validated is False and (not useNonValidated):
continue
if txnames != ['all']:
if not txname.txName in txnames:
continue
newDataSet.txnameList.append(txname)
# Skip data set not containing any of the required txnames:
if not newDataSet.txnameList:
continue
newExpResult.datasets.append(newDataSet)
# Skip analysis not containing any of the required txnames:
if not newExpResult.getTxNames():
continue
expResultList.append(newExpResult)
return expResultList
[docs] def updateBinaryFile ( self ):
""" write a binar db file, but only if
necessary. """
if self.needsUpdate():
logger.debug ( "Binary db file needs an update." )
self.createBinaryFile()
else:
logger.debug ( "Binary db file does not need an update." )
[docs]class ExpResultList(object):
"""
Holds a list of ExpResult objects for printout.
:ivar expResultList: list of ExpResult objects
"""
def __init__(self, expResList):
self.expResultList = expResList
if __name__ == "__main__":
import argparse
""" Run as a script, this checks and/or writes database.pcl files """
argparser = argparse.ArgumentParser(description='simple script to check \
and/or write database.pcl files')
argparser.add_argument('-c', '--check', help='check binary db file',
action='store_true')
argparser.add_argument('-t', '--time', help='time reading db',
action='store_true')
argparser.add_argument('-r', '--read', help='read binary db file',
action='store_true')
argparser.add_argument('-w', '--write', help='force writing binary db file',
action='store_true')
argparser.add_argument('-u', '--update', help='update binary db file, if necessary',
action='store_true')
argparser.add_argument('-d', '--debug', help='debug mode',
action='store_true')
argparser.add_argument('-D', '--database', help='directory name of database',
default="../../../smodels-database/" )
args = argparser.parse_args()
logger.setLevel(level=logging.INFO )
if args.debug:
logger.setLevel(level=logging.DEBUG )
if args.write:
db = Database ( args.database, force_load="txt" )
if args.debug:
db.verbosity = "debug"
logger.debug ( "%s" % db )
db.createBinaryFile()
sys.exit()
db = Database ( args.database )
if args.debug:
db.verbosity = "debug"
logger.debug ( "%s" % db )
if args.update:
db.updateBinaryFile()
if args.check:
db.checkBinaryFile()
if args.time:
t0=time.time()
expResult = db.loadBinaryFile ( lastm_only = False )
t1=time.time()
print ( "Time it took reading binary db file: %.1f s." % (t1-t0) )
txtdb = db.loadTextDatabase()
t2=time.time()
print ( "Time it took reading text file: %.1f s." % (t2-t1) )
if args.read:
db = db.loadBinaryFile ( lastm_only = False )
listOfExpRes = db.getExpResults()
for expResult in listOfExpRes:
print (expResult)