from builtins import str
from builtins import object
import os, warnings
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.engine import url
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Float
from sqlalchemy import ForeignKey
from sqlalchemy.orm import relationship, backref
from sqlalchemy.exc import DatabaseError
import numpy as np
Base = declarative_base()
__all__ = ['MetricRow', 'DisplayRow', 'PlotRow', 'SummaryStatRow', 'ResultsDb']
[docs]class MetricRow(Base):
"""
Define contents and format of metric list table.
(Table to list all metrics, their metadata, and their output data files).
"""
__tablename__ = "metrics"
# Define columns in metric list table.
metricId = Column(Integer, primary_key=True)
metricName = Column(String)
slicerName = Column(String)
simDataName = Column(String)
sqlConstraint = Column(String)
metricMetadata = Column(String)
metricDataFile = Column(String)
def __repr__(self):
return "<Metric(metricId='%d', metricName='%s', slicerName='%s', simDataName='%s', " \
"sqlConstraint='%s', metadata='%s', metricDataFile='%s')>" \
%(self.metricId, self.metricName, self.slicerName, self.simDataName,
self.sqlConstraint, self.metricMetadata, self.metricDataFile)
[docs]class DisplayRow(Base):
"""
Define contents and format of the displays table.
(Table to list the display properties for each metric.)
"""
__tablename__ = "displays"
displayId = Column(Integer, primary_key=True)
metricId = Column(Integer, ForeignKey('metrics.metricId'))
# Group for displaying metric (in webpages).
displayGroup = Column(String)
# Subgroup for displaying metric.
displaySubgroup = Column(String)
# Order to display metric (within subgroup).
displayOrder = Column(Float)
# The figure caption.
displayCaption = Column(String)
metric = relationship("MetricRow", backref=backref('displays', order_by=displayId))
def __rep__(self):
return "<Display(displayGroup='%s', displaySubgroup='%s', " \
"displayOrder='%.1f', displayCaption='%s')>" \
%(self.displayGroup, self.displaySubgroup, self.displayOrder, self.displayCaption)
[docs]class PlotRow(Base):
"""
Define contents and format of plot list table.
(Table to list all plots, link them to relevant metrics in MetricList, and provide info on filename).
"""
__tablename__ = "plots"
# Define columns in plot list table.
plotId = Column(Integer, primary_key=True)
# Matches metricID in MetricList table.
metricId = Column(Integer, ForeignKey('metrics.metricId'))
plotType = Column(String)
plotFile = Column(String)
metric = relationship("MetricRow", backref=backref('plots', order_by=plotId))
def __repr__(self):
return "<Plot(metricId='%d', plotType='%s', plotFile='%s')>" \
%(self.metricId, self.plotType, self.plotFile)
[docs]class SummaryStatRow(Base):
"""
Define contents and format of the summary statistics table.
(Table to list and link summary stats to relevant metrics in MetricList, and provide summary stat name,
value and potentially a comment).
"""
__tablename__ = "summarystats"
# Define columns in plot list table.
statId = Column(Integer, primary_key=True)
# Matches metricID in MetricList table.
metricId = Column(Integer, ForeignKey('metrics.metricId'))
summaryName = Column(String)
summaryValue = Column(Float)
metric = relationship("MetricRow", backref=backref('summarystats', order_by=statId))
def __repr__(self):
return "<SummaryStat(metricId='%d', summaryName='%s', summaryValue='%f')>" \
%(self.metricId, self.summaryName, self.summaryValue)
[docs]class ResultsDb(object):
"""The ResultsDb is a sqlite database containing information on the metrics run via MAF,
the plots created, the display information (such as captions), and any summary statistics output.
"""
def __init__(self, outDir= None, database=None, verbose=False):
"""
Instantiate the results database, creating metrics, plots and summarystats tables.
"""
# We now require resultsDb to be a sqlite file (for simplicity). Leaving as attribute though.
self.driver = 'sqlite'
# Connect to database
# for sqlite, connecting to non-existent database creates it automatically
if database is None:
# Using default value for database name, should specify directory.
if outDir is None:
outDir = '.'
# Check for output directory, make if needed.
if not os.path.isdir(outDir):
try:
os.makedirs(outDir)
except OSError as msg:
raise OSError(msg, '\n (If this was the database file (not outDir), '
'remember to use kwarg "database")')
self.database = os.path.join(outDir, 'resultsDb_sqlite.db')
else:
# Using non-default database, but may also specify directory root.
if outDir is not None:
database = os.path.join(outDir, database)
self.database = database
dbAddress = url.URL(self.driver, database=self.database)
engine = create_engine(dbAddress, echo=verbose)
self.Session = sessionmaker(bind=engine)
self.session = self.Session()
# Create the tables, if they don't already exist.
try:
Base.metadata.create_all(engine)
except DatabaseError:
raise ValueError("Cannot create a %s database at %s. Check directory exists." %(self.driver,
self.database))
self.slen = 1024
[docs] def close(self):
"""
Close connection to database.
"""
self.session.close()
[docs] def updateMetric(self, metricName, slicerName, simDataName, sqlConstraint,
metricMetadata, metricDataFile):
"""
Add a row to or update a row in the metrics table.
- metricName: the name of the metric
- sliceName: the name of the slicer
- simDataName: the name used to identify the simData
- sqlConstraint: the sql constraint used to select data from the simData
- metricMetadata: the metadata associated with the metric
- metricDatafile: the data file the metric data is stored in
If same metric (same metricName, slicerName, simDataName, sqlConstraint, metadata)
already exists, it does nothing.
Returns metricId: the Id number of this metric in the metrics table.
"""
if simDataName is None:
simDataName = 'NULL'
if sqlConstraint is None:
sqlConstraint = 'NULL'
if metricMetadata is None:
metricMetadata = 'NULL'
if metricDataFile is None:
metricDataFile = 'NULL'
# Check if metric has already been added to database.
prev = self.session.query(MetricRow).filter_by(metricName=metricName,
slicerName=slicerName,
simDataName=simDataName,
metricMetadata=metricMetadata,
sqlConstraint=sqlConstraint).all()
if len(prev) == 0:
metricinfo = MetricRow(metricName=metricName, slicerName=slicerName, simDataName=simDataName,
sqlConstraint=sqlConstraint, metricMetadata=metricMetadata,
metricDataFile=metricDataFile)
self.session.add(metricinfo)
self.session.commit()
else:
metricinfo = prev[0]
return metricinfo.metricId
[docs] def updateDisplay(self, metricId, displayDict, overwrite=True):
"""
Add a row to or update a row in the displays table.
- metricID: the metric Id of this metric in the metrics table
- displayDict: dictionary containing the display info
Replaces existing row with same metricId.
"""
# Because we want to maintain 1-1 relationship between metricId's and displayDict's:
# First check if a display line is present with this metricID.
displayinfo = self.session.query(DisplayRow).filter_by(metricId=metricId).all()
if len(displayinfo) > 0:
if overwrite:
for d in displayinfo:
self.session.delete(d)
else:
return
# Then go ahead and add new displayDict.
for k in displayDict:
if displayDict[k] is None:
displayDict[k] = 'NULL'
keys = ['group', 'subgroup', 'order', 'caption']
for k in keys:
if k not in displayDict:
displayDict[k] = 'NULL'
if displayDict['order'] == 'NULL':
displayDict['order'] = 0
displayGroup = displayDict['group']
displaySubgroup = displayDict['subgroup']
displayOrder = displayDict['order']
displayCaption = displayDict['caption']
if displayCaption.endswith('(auto)'):
displayCaption = displayCaption.replace('(auto)', '', 1)
displayinfo = DisplayRow(metricId=metricId,
displayGroup=displayGroup, displaySubgroup=displaySubgroup,
displayOrder=displayOrder, displayCaption=displayCaption)
self.session.add(displayinfo)
self.session.commit()
[docs] def updatePlot(self, metricId, plotType, plotFile):
"""
Add a row to or update a row in the plot table.
- metricId: the metric Id of this metric in the metrics table
- plotType: the 'type' of this plot
- plotFile: the filename of this plot
Remove older rows with the same metricId, plotType and plotFile.
"""
plotinfo = self.session.query(PlotRow).filter_by(metricId=metricId, plotType=plotType,
plotFile=plotFile).all()
if len(plotinfo) > 0:
for p in plotinfo:
self.session.delete(p)
plotinfo = PlotRow(metricId=metricId, plotType=plotType, plotFile=plotFile)
self.session.add(plotinfo)
self.session.commit()
[docs] def updateSummaryStat(self, metricId, summaryName, summaryValue):
"""
Add a row to or update a row in the summary statistic table.
- metricId: the metric ID of this metric in the metrics table
- summaryName: the name of this summary statistic
- summaryValue: the value for this summary statistic
Most summary statistics will be a simple name (string) + value (float) pair.
For special summary statistics which must return multiple values, the base name
can be provided as 'name', together with a np recarray as 'value', where the
recarray also has 'name' and 'value' columns (and each name/value pair is then saved
as a summary statistic associated with this same metricId).
"""
# Allow for special summary statistics which return data in a np structured array with
# 'name' and 'value' columns. (specificially needed for TableFraction summary statistic).
if isinstance(summaryValue, np.ndarray):
if (('name' in summaryValue.dtype.names) and ('value' in summaryValue.dtype.names)):
for value in summaryValue:
sSuffix = value['name']
if isinstance(sSuffix, bytes):
sSuffix = sSuffix.decode('utf-8')
else:
sSuffix = str(sSuffix)
summarystat = SummaryStatRow(metricId=metricId,
summaryName=summaryName + ' ' + sSuffix,
summaryValue=value['value'])
self.session.add(summarystat)
self.session.commit()
else:
warnings.warn('Warning! Cannot save non-conforming summary statistic.')
# Most summary statistics will be simple floats.
else:
if isinstance(summaryValue, float) or isinstance(summaryValue, int):
summarystat = SummaryStatRow(metricId=metricId, summaryName=summaryName,
summaryValue=summaryValue)
self.session.add(summarystat)
self.session.commit()
else:
warnings.warn('Warning! Cannot save summary statistic that is not a simple float or int')
[docs] def getMetricId(self, metricName, slicerName=None, metricMetadata=None, simDataName=None):
"""
Given a metric name and optional slicerName/metricMetadata/simData information,
Return a list of the matching metricIds.
"""
metricId = []
query = self.session.query(MetricRow.metricId, MetricRow.metricName, MetricRow.slicerName,
MetricRow.metricMetadata,
MetricRow.simDataName).filter(MetricRow.metricName == metricName)
if slicerName is not None:
query = query.filter(MetricRow.slicerName == slicerName)
if metricMetadata is not None:
query = query.filter(MetricRow.metricMetadata == metricMetadata)
if simDataName is not None:
query = query.filter(MetricRow.simDataName == simDataName)
query = query.order_by(MetricRow.slicerName, MetricRow.metricMetadata)
for m in query:
metricId.append(m.metricId)
return metricId
[docs] def getMetricIdLike(self, metricNameLike=None, slicerNameLike=None,
metricMetadataLike=None, simDataName=None):
metricId = []
query = self.session.query(MetricRow.metricId, MetricRow.metricName, MetricRow.slicerName,
MetricRow.metricMetadata,
MetricRow.simDataName)
if metricNameLike is not None:
query = query.filter(MetricRow.metricName.like('%' + str(metricNameLike) + '%'))
if slicerNameLike is not None:
query = query.filter(MetricRow.slicerName.like('%' + str(slicerNameLike) + '%'))
if metricMetadataLike is not None:
query = query.filter(MetricRow.metricMetadata.like('%' + str(metricMetadataLike) + '%'))
if simDataName is not None:
query = query.filter(MetricRow.simDataName == simDataName)
for m in query:
metricId.append(m.metricId)
return metricId
[docs] def getAllMetricIds(self):
"""
Return a list of all metricIds.
"""
metricIds = []
for m in self.session.query(MetricRow.metricId).all():
metricIds.append(m.metricId)
return metricIds
[docs] def getSummaryStats(self, metricId=None, summaryName=None, withSimName=False):
"""
Get the summary stats (optionally for metricId list).
Optionally, also specify the summary metric name.
Returns a numpy array of the metric information + summary statistic information.
"""
if metricId is None:
metricId = self.getAllMetricIds()
if not hasattr(metricId, '__iter__'):
metricId = [metricId,]
summarystats = []
for mid in metricId:
# Join the metric table and the summarystat table, based on the metricID (the second filter)
query = (self.session.query(MetricRow, SummaryStatRow).filter(MetricRow.metricId == mid)
.filter(MetricRow.metricId == SummaryStatRow.metricId))
if summaryName is not None:
query = query.filter(SummaryStatRow.summaryName == summaryName)
for m, s in query:
vals = (m.metricId, m.metricName, m.slicerName, m.metricMetadata,
s.summaryName, s.summaryValue)
if withSimName:
vals += (m.simDataName,)
summarystats.append(vals)
# Convert to numpy array.
dtype_list = [('metricId', int), ('metricName', str, self.slen),
('slicerName', str, self.slen), ('metricMetadata', str, self.slen),
('summaryName', str, self.slen), ('summaryValue', float)]
if withSimName:
dtype_list += [('simDataName', str, self.slen)]
dtype = np.dtype(dtype_list)
summarystats = np.array(summarystats, dtype)
return summarystats
[docs] def getPlotFiles(self, metricId=None):
"""
Return the metricId, name, metadata, and all plot info (optionally for metricId list).
Returns a numpy array of the metric information + plot file names.
"""
if metricId is None:
metricId = self.getAllMetricIds()
if not hasattr(metricId, '__iter__'):
metricId = [metricId,]
plotFiles = []
for mid in metricId:
# Join the metric table and the plot table based on the metricID (the second filter does the join)
query = (self.session.query(MetricRow, PlotRow).filter(MetricRow.metricId == mid)
.filter(MetricRow.metricId == PlotRow.metricId))
for m, p in query:
# The plotFile typically ends with .pdf (but the rest of name can have '.' or '_')
thumbfile = 'thumb.' + '.'.join(p.plotFile.split('.')[:-1]) + '.png'
plotFiles.append((m.metricId, m.metricName, m.metricMetadata,
p.plotType, p.plotFile, thumbfile))
# Convert to numpy array.
dtype = np.dtype([('metricId', int), ('metricName', str, self.slen),
('metricMetadata', str, self.slen),
('plotType', str, self.slen), ('plotFile', str, self.slen),
('thumbFile', str, self.slen)])
plotFiles = np.array(plotFiles, dtype)
return plotFiles
[docs] def getMetricDataFiles(self, metricId=None):
"""
Get the metric data filenames for all or a single metric.
Returns a list.
"""
if metricId is None:
metricId = self.getAllMetricIds()
if not hasattr(metricId, '__iter__'):
metricId = [metricId,]
dataFiles = []
for mid in metricId:
for m in self.session.query(MetricRow).filter(MetricRow.metricId == mid).all():
dataFiles.append(m.metricDataFile)
return dataFiles
[docs] def getMetricInfo(self, metricId=None):
"""Get the simple metric info, without display information.
"""
if metricId is None:
metricId = self.getAllMetricIds()
if not hasattr(metricId, '__iter__'):
metricId = [metricId,]
metricInfo = []
for mId in metricId:
# Query for all rows in metrics and displays that match any of the metricIds.
query = (self.session.query(MetricRow).filter(MetricRow.metricId==mId))
for m in query:
baseMetricName = m.metricName.split('_')[0]
mInfo = (m.metricId, m.metricName, baseMetricName, m.slicerName,
m.sqlConstraint, m.metricMetadata, m.metricDataFile)
metricInfo.append(mInfo)
# Convert to numpy array.
dtype = np.dtype([('metricId', int), ('metricName', str, self.slen),
('baseMetricNames', str, self.slen),
('slicerName', str, self.slen),
('sqlConstraint', str, self.slen),
('metricMetadata', str, self.slen),
('metricDataFile', str, self.slen)])
metricInfo = np.array(metricInfo, dtype)
return metricInfo
[docs] def getMetricDisplayInfo(self, metricId=None):
"""
Get the contents of the metrics and displays table, together with the 'basemetricname'
(optionally, for metricId list).
Returns a numpy array of the metric information + display information.
One underlying assumption here is that all metrics have some display info.
In newer batches, this may not be the case, as the display info gets auto-generated when the
metric is plotted.
"""
if metricId is None:
metricId = self.getAllMetricIds()
if not hasattr(metricId, '__iter__'):
metricId = [metricId,]
metricInfo = []
for mId in metricId:
# Query for all rows in metrics and displays that match any of the metricIds.
query = (self.session.query(MetricRow, DisplayRow).filter(MetricRow.metricId==mId)
.filter(MetricRow.metricId==DisplayRow.metricId))
for m, d in query:
baseMetricName = m.metricName.split('_')[0]
mInfo = (m.metricId, m.metricName, baseMetricName, m.slicerName,
m.sqlConstraint, m.metricMetadata, m.metricDataFile,
d.displayGroup, d.displaySubgroup, d.displayOrder, d.displayCaption)
metricInfo.append(mInfo)
# Convert to numpy array.
dtype = np.dtype([('metricId', int), ('metricName', np.str_, self.slen),
('baseMetricNames', np.str_, self.slen),
('slicerName', np.str_, self.slen),
('sqlConstraint', np.str_, self.slen),
('metricMetadata', np.str_, self.slen),
('metricDataFile', np.str_, self.slen),
('displayGroup', np.str_, self.slen),
('displaySubgroup', np.str_, self.slen),
('displayOrder', float),
('displayCaption', np.str_, self.slen * 10)])
metricInfo = np.array(metricInfo, dtype)
return metricInfo