#!/usr/bin/env sampy """ Sample script. Originally written as a tool for SAM code developers. The idea is to grab a bunch of metadata from an existing SAM database, then recreate the "necessary" pieces in an empty (or, at least, not the same) database. Requires DB admin privileges (via an oracle username/password) into the database that is being populated. """ import types import sys import string import os import traceback from SamUserApi import getSam from SamAdminApi import getSamAdmin from SamCorba.DbCorbaClient import DbCorbaClient import SAM from SamException import SamExceptions from SamFile.SamDataFile import SamDataFile from SamStruct.ApplicationFamily import ApplicationFamily from SamStruct.DataType import DataType class DbRetrofitter: """ Read the metadata for a file and make sure that all necessary components exist in the database -- that is, "fake" the database so that the stated application family, group, user, etc., exist and the file can be declared. """ KEYWORD = 'keyword' LIST = 'list' METHOD = 'method' INSERT_DICT = { 'applicationFamily' : { KEYWORD : SAM.attrApplicationFamily }, 'group' : { KEYWORD : SAM.attrWorkGroup }, 'dataTier' : { KEYWORD : SAM.attrDataTier }, 'node' : { KEYWORD : SAM.attrNodeName }, 'physicalDataStream': { KEYWORD : SAM.attrPhysicalDatastream }, 'userName' : { KEYWORD : SAM.attrUserName }, 'runInfo' : { KEYWORD : SAM.attrRunDescriptorList }, 'params' : { KEYWORD : SAM.attrParams }, } def __init__(self): self.sam = getSam() self.samAdmin = getSamAdmin() def declare(self, fileMetadata): # first retrofit the db with any missing values: self.retrofitDb(fileMetadata) # then declare the file: print("Declaring file '%s'..." % fileMetadata.getFileName()) try: fileId = self.sam.declareFile(metadata=fileMetadata) print("Success, fileId = %s" % fileId) except SamExceptions.InvalidMetadata, ex: print("FAILURE beyond my skill to heal. You'll need Elvish medicine...\n%s" % str(ex.args)) print("=================================================================") def retrofitDb(self, fileMetadata): sam = self.sam samAdmin = self.samAdmin # first make sure that the file isn't already there: try: x = sam.getMetadata(filename=fileMetadata.getFileName()) raise SamExceptions.DataFileDuplicate("File with name '%s' already exists." % fileMetadata.getFileName()) except SamExceptions.DataFileNotFound: # this is A Good Thing. pass print("Retrofitting db with metadata for file '%s'" % fileMetadata.getFileName()) applicationFamily = fileMetadata.get(SAM.attrApplicationFamily) if applicationFamily is not None: knownApplicationFamilies = sam.getRegisteredValues(attribute=SAM.attrApplicationFamily) if applicationFamily not in knownApplicationFamilies: print("Adding %s to database..." % applicationFamily) samAdmin.addApplicationFamily(appName=applicationFamily.getAppName(), appFamily=applicationFamily.getAppFamily(), appVersion=applicationFamily.getAppVersion()) group = fileMetadata.get(SAM.attrWorkGroup) if group is not None: knownGroupList = sam.getRegisteredValues(attribute=SAM.attrWorkGroup) # group is a NameOrId; we must convert to string to see if it is known. if str(group) not in knownGroupList: print("Adding group %s to database..." % group) samAdmin.addWorkingGroup(name=group) dataTier = fileMetadata.get(SAM.attrDataTier) if dataTier is not None: knownDataTiers = sam.getRegisteredValues(attribute=SAM.attrDataTier) if dataTier not in knownDataTiers: print("Adding dataTier %s to database..." % dataTier) samAdmin.addDataTier(name=dataTier) nodeName = fileMetadata.get(SAM.attrNodeName) if nodeName is not None: knownNodeList = sam.getRegisteredValues(attribute=SAM.attrNodeName) if nodeName not in knownNodeList: print("Adding nodeName %s to database (faking the hardware and os...)" % nodeName) samAdmin.addNode(name=nodeName, hw='pc', os='linux') datastream = fileMetadata.get(SAM.attrPhysicalDatastream) if datastream is not None: knownPhysicalDatastreams = sam.getRegisteredValues(attribute=SAM.attrPhysicalDatastream) # datastream is a NameOrId; we must convert to string to see if it is known if str(datastream) not in knownPhysicalDatastreams: # do we need to fake the logical datastream too? knownLogicalDatastreams = sam.getRegisteredValues(attribute=SAM.attrLogicalDatastream) if 'generic' not in knownLogicalDatastreams: print("Adding logical datastream 'generic' for fake data...") samAdmin.addLogicalDatastream(name='generic') print("Adding physical datastream %s to database (faking the logical datastream...)" % datastream) samAdmin.addPhysicalDatastream(name=datastream, logicalDatastream='generic') userName = fileMetadata.get(SAM.attrUserName) if userName is not None: knownUserNameList = sam.getRegisteredValues(attribute=SAM.attrUserName) if userName not in knownUserNameList: print("Adding userName %s to database...") samAdmin.addPerson(userName=userName) runDescriptorList = fileMetadata.get(SAM.attrRunDescriptorList) if runDescriptorList is not None: knownRunTypes = sam.getRegisteredValues(attribute=SAM.attrRunType) for runInfo in runDescriptorList: runType = runInfo.getRunStruct().getRunType() if runType is not None and runType not in knownRunTypes: print("Adding runType %s to the database...") samAdmin.addRunType(name=runType) params = fileMetadata.get(SAM.attrParams) if params is not None: knownParamDict = sam.getRegisteredParameterKeywords() knownDataTypes = sam.getRegisteredValues(attribute=SAM.attrDataType) if DataType(SAM.SamDataType_StringType) not in knownDataTypes: print("Adding dataType string to the database (faking the param value data types)") samAdmin.addDataType(dataType=DataType(SAM.SamDataType_StringType)) categories = params.keys() for category in categories: if category not in knownParamDict.keys(): print("Adding paramCategory %s to the database..." % category) samAdmin.addParamCategory(paramCategory=category) theDict = params.get(category, {}) for paramType in theDict.keys(): if ( not knownParamDict.has_key(category) or paramType not in knownParamDict[category].keys() ): print("Adding paramType %s (category %s) to the database (faking data type to string)" % (paramType, category)) samAdmin.addParamType(paramCategory=category, paramType=paramType, dataType=SAM.SamDataType_StringType) # # example of how this would be used: # def main(metadataList): retrofitter = DbRetrofitter() for file in metadataList: try: retrofitter.declare(file) except KeyboardInterrupt: sys.exit(1) except SamExceptions.SamException, ex: print(str(ex.args)) if __name__ == "__main__": # ok, let's run the sample: from ExampleMetadata import ExampleMetadataList main(ExampleMetadataList)