Source code for experiment.datasetObj

"""
.. module:: datasetObj
   :synopsis: Holds the classes and methods used to read and store the information in the
              data folders.

.. moduleauthor:: Andre Lessa <lessa.a.p@gmail.com>

"""


import os,glob
from smodels.experiment import txnameObj,infoObj
from smodels.tools import statistics
from smodels.tools.physicsUnits import fb
from smodels.experiment.exceptions import SModelSExperimentError as SModelSError
from smodels.tools.smodelsLogging import logger

[docs]class DataSet(object): """ Holds the information to a data set folder (TxName objects, dataInfo,...) """ def __init__(self, path=None, info=None, createInfo=True): self.path = path self.globalInfo = info self.txnameList = [] if path and createInfo: logger.debug('Creating object based on data folder : %s' %self.path) #Get data folder info: if not os.path.isfile(os.path.join(path,"dataInfo.txt")): logger.error("dataInfo.txt file not found in " + path) raise TypeError self.dataInfo = infoObj.Info(os.path.join(path,"dataInfo.txt")) #Get list of TxName objects: for txtfile in glob.iglob(os.path.join(path,"*.txt")): try: txname = txnameObj.TxName(txtfile,self.globalInfo,self.dataInfo) self.txnameList.append(txname) except TypeError: continue self.txnameList.sort() def __ne__ ( self, other ): return not self.__eq__ ( other ) def __str__ ( self ): ret = "Dataset: %s" % ( ", ".join ( map ( str, self.txnameList ) ) ) return ret def __eq__ ( self, other ): if self.dataInfo != other.dataInfo: return False if len(self.txnameList ) != len ( other.txnameList ): return False return True
[docs] def getTxName(self,txname): """ get one specific txName object. """ for tn in self.txnameList: if tn.txName == txname: return tn return None
[docs] def getEfficiencyFor(self,txname,mass): """ convenience function. same as self.getTxName(txname).getEfficiencyFor(m) """ txname = self.getTxName(txname) if txname: return txname.getEfficiencyFor(mass) return None
[docs] def getValuesFor(self,attribute=None): """ Returns a list for the possible values appearing in the DataSet for the required attribute. :param attribute: name of a field in the database (string). If not defined it will return a dictionary with all fields and their respective values :return: list of values """ fieldDict = self.__dict__.items()[:] valuesDict = {} while fieldDict: for field,value in fieldDict[:]: if not '<smodels.experiment' in str(value): if not field in valuesDict: valuesDict[field] = [value] else: valuesDict[field].append(value) else: if isinstance(value,list): for entry in value: fieldDict += entry.__dict__.items()[:] else: fieldDict += value.__dict__.items()[:] fieldDict.remove((field,value)) #Try to keep only the set of unique values for key,val in valuesDict.items(): try: valuesDict[key] = list(set(val)) except TypeError as e: pass if not attribute: return valuesDict elif not attribute in valuesDict: logger.warning("Could not find field %s in database" % attribute) return False else: return valuesDict[attribute]
[docs] def likelihood ( self, nsig, deltas=None): """ Computes the likelihood to observe nobs events, given a predicted signal "nsig", assuming "deltas" error on the signal efficiency. The values observedN, expectedBG, and bgError are part of dataInfo. :param nsig: predicted signal (float) :param deltas: uncertainty on signal (float). If None, default value (20%) will be used. :return: likelihood to observe nobs events (float) """ return statistics.likelihood(nsig, self.dataInfo.observedN, self.dataInfo.expectedBG, self.dataInfo.bgError, deltas)
[docs] def chi2( self, nsig, deltas=None): """ Computes the chi2 for a given number of observed events "nobs", given number of signal events "nsig", and error on signal "deltas". nobs, expectedBG and bgError are part of dataInfo. :param nsig: predicted signal (float) :param deltas: relative uncertainty in signal (float). If None, default value (20%) will be used. :return: chi2 (float) """ return statistics.chi2(nsig, self.dataInfo.observedN, self.dataInfo.expectedBG, self.dataInfo.bgError, deltas)
[docs] def getAttributes(self,showPrivate=False): """ Checks for all the fields/attributes it contains as well as the attributes of its objects if they belong to smodels.experiment. :param showPrivate: if True, also returns the protected fields (_field) :return: list of field names (strings) """ fields = self.getValuesFor().keys() fields = list(set(fields)) if not showPrivate: for field in fields[:]: if "_" == field[0]: fields.remove(field) return fields
[docs] def getSRUpperLimit(self,alpha = 0.05, expected = False, compute = False ): """ Computes the 95% upper limit on the signal*efficiency for a given dataset (signal region). Only to be used for efficiency map type results. :param alpha: Can be used to change the C.L. value. The default value is 0.05 (= 95% C.L.) :param expected: Compute expected limit ( i.e. Nobserved = NexpectedBG ) :param compute: If True, the upper limit will be computed from expected and observed number of events. If False, the value listed in the database will be used instead. :return: upper limit value """ if not self.dataInfo.dataType == 'efficiencyMap': logger.error("getSRUpperLimit can only be used for efficiency map results!") raise SModelSError() if not compute: if expected: try: return self.dataInfo.expectedUpperLimit except AttributeError: logger.info("expectedUpperLimit field not found. Using observed UL instead.") return self.dataInfo.upperLimit else: return self.dataInfo.upperLimit Nobs = self.dataInfo.observedN #Number of observed events if expected: Nobs = self.dataInfo.expectedBG Nexp = self.dataInfo.expectedBG #Number of expected BG events bgError = self.dataInfo.bgError # error on BG lumi = self.globalInfo.lumi if (lumi*fb).normalize()._unit: ID = self.globalInfo.id logger.error("Luminosity defined with wrong units for %s" %(ID) ) return False maxSignalXsec = statistics.upperLimit(Nobs,Nexp,bgError,lumi,alpha) return maxSignalXsec