xce/lib/XChemDB.py
import csv
import getpass
import sqlite3
from datetime import datetime
from xce.lib import XChemLog
# from XChemUtils import parse
class data_source:
def __init__(self, data_source_file):
self.data_source_file = data_source_file
# [column_name in DB, column_name shown in XCE, SQLite column type]
self.column_list = [
# SQLite column name | XCE column name | SQLite type | in overview tab
# from Lab36
["ID", "ID", "INTEGER PRIMARY KEY", 0],
["LabVisit", "LabVisit", "TEXT", 1],
["LibraryPlate", "LibraryPlate", "TEXT", 0],
["SourceWell", "SourceWell", "TEXT", 0],
["LibraryName", "LibraryName", "TEXT", 1],
["CompoundSMILES", "Smiles", "TEXT", 1],
["CompoundSMILESproduct", "Smiles - Product", "TEXT", 1],
["CompoundCode", "Compound ID", "TEXT", 1],
["CompoundStereo", "Compound Stereo", "TEXT", 1],
["CompoundStereoSMILES", "Stereo SMILES", "TEXT", 1],
["CompoundStereoCIFprogram", "Stereo CIF program", "TEXT", 1],
["CompoundStereoCIFs", "Compound Stereo IDs", "TEXT", 1],
["CompoundAutofitCC", "autofit CC", "TEXT", 1],
["CompoundAutofitprogram", "autofit program", "TEXT", 1],
["CrystalPlate", "CrystalPlate", "TEXT", 1],
["CrystalWell", "CrystalWell", "TEXT", 1],
["EchoX", "EchoX", "TEXT", 0],
["EchoY", "EchoY", "TEXT", 0],
["DropVolume", "DropVolume", "TEXT", 0],
["ProteinName", "ProteinName", "TEXT", 1],
["BatchNumber", "BatchNumber", "TEXT", 0],
["CompoundStockConcentration", "CompoundStockConcentration", "TEXT", 0],
["CompoundConcentration", "CompoundConcentration", "TEXT", 1],
["SolventFraction", "SolventFraction", "TEXT", 1],
["SoakTransferVol", "SoakTransferVol", "TEXT", 0],
["SoakStatus", "SoakStatus", "TEXT", 0],
["SoakTimestamp", "SoakTimestamp", "TEXT", 0],
["CryoStockFraction", "CryoStockFraction", "TEXT", 0],
["CryoFraction", "CryoFraction", "TEXT", 0],
["CryoWell", "CryoWell", "TEXT", 0],
["CryoTransferVolume", "CryoTransferVolume", "TEXT", 0],
["CryoStatus", "CryoStatus", "TEXT", 0],
["CryoTimestamp", "CryoTimestamp", "TEXT", 0],
["SoakingTime", "SoakingTime", "TEXT", 1],
["HarvestStatus", "HarvestStatus", "TEXT", 0],
["CrystalName", "Sample ID", "TEXT", 0],
["Puck", "Puck", "TEXT", 1],
["PuckPosition", "PuckPosition", "TEXT", 1],
["PinBarcode", "SoakDB\nBarcode", "TEXT", 1],
["MountingResult", "MountingResult", "TEXT", 0],
["MountingArrivalTime", "MountingArrivalTime", "TEXT", 0],
["MountedTimestamp", "MountedTimestamp", "TEXT", 0],
["MountingTime", "MountingTime", "TEXT", 0],
["ispybStatus", "ispybStatus", "TEXT", 0],
["DataCollectionVisit", "Visit", "TEXT", 1],
# from XChemExplorer
["ProjectDirectory", "ProjectDirectory", "TEXT", 0],
["CrystalTag", "Tag", "TEXT", 0],
["CrystalFormName", "Crystal Form\nName", "TEXT", 0],
["CrystalFormSpaceGroup", "Space\nGroup", "TEXT", 0],
["CrystalFormPointGroup", "Point\nGroup", "TEXT", 0],
["CrystalFormA", "a", "TEXT", 0],
["CrystalFormB", "b", "TEXT", 0],
["CrystalFormC", "c", "TEXT", 0],
["CrystalFormAlpha", "alpha", "TEXT", 0],
["CrystalFormBeta", "beta", "TEXT", 0],
["CrystalFormGamma", "gamma", "TEXT", 0],
["CrystalFormVolume", "Crystal Form\nVolume", "TEXT", 0],
["DataCollectionBeamline", "Beamline", "TEXT", 0],
["DataCollectionDate", "Data Collection\nDate", "TEXT", 1],
["DataCollectionOutcome", "DataCollection\nOutcome", "TEXT", 1],
["DataCollectionRun", "Run", "TEXT", 0],
["DataCollectionSubdir", "SubDir", "TEXT", 0],
["DataCollectionComment", "DataCollection\nComment", "TEXT", 0],
["DataCollectionWavelength", "Wavelength", "TEXT", 0],
["DataCollectionPinBarcode", "GDA\nBarcode", "TEXT", 1],
["DataCollectionCrystalImage1", "img1", "TEXT", 1],
["DataCollectionCrystalImage2", "img2", "TEXT", 1],
["DataCollectionCrystalImage3", "img3", "TEXT", 1],
["DataCollectionCrystalImage4", "img4", "TEXT", 1],
[
"DataProcessingPathToImageFiles",
"Path to diffraction\nimage files",
"TEXT",
1,
],
["DataProcessingProgram", "Program", "TEXT", 1],
["DataProcessingSpaceGroup", "DataProcessing\nSpaceGroup", "TEXT", 1],
["DataProcessingUnitCell", "DataProcessing\nUnitCell", "TEXT", 0],
["DataProcessingAutoAssigned", "auto-assigned", "TEXT", 0],
["DataProcessingA", "DataProcessing\nA", "TEXT", 0],
["DataProcessingB", "DataProcessing\nB", "TEXT", 0],
["DataProcessingC", "DataProcessing\nC", "TEXT", 0],
["DataProcessingAlpha", "DataProcessing\nAlpha", "TEXT", 0],
["DataProcessingBeta", "DataProcessing\nBeta", "TEXT", 0],
["DataProcessingGamma", "DataProcessing\nGamma", "TEXT", 0],
["DataProcessingResolutionOverall", "Resolution\nOverall", "TEXT", 0],
["DataProcessingResolutionLow", "Resolution\nLow", "TEXT", 0],
[
"DataProcessingResolutionLowInnerShell",
"Resolution\nLow (Inner Shell)",
"TEXT",
0,
],
["DataProcessingResolutionHigh", "Resolution\nHigh", "TEXT", 1],
[
"DataProcessingResolutionHigh15sigma",
"Resolution\n[Mn<I/sig(I)> = 1.5]",
"TEXT",
1,
],
[
"DataProcessingResolutionHigh20sigma",
"Resolution\n[Mn<I/sig(I)> = 2.0]",
"TEXT",
1,
],
[
"DataProcessingResolutionHighOuterShell",
"Resolution\nHigh (Outer Shell)",
"TEXT",
0,
],
["DataProcessingRmergeOverall", "Rmerge\nOverall", "TEXT", 1],
["DataProcessingRmergeLow", "Rmerge\nLow", "TEXT", 1],
["DataProcessingRmergeHigh", "Rmerge\nHigh", "TEXT", 1],
["DataProcessingIsigOverall", "Mn<I/sig(I)>\nOverall", "TEXT", 1],
["DataProcessingIsigLow", "Mn<I/sig(I)>\nLow", "TEXT", 1],
["DataProcessingIsigHigh", "Mn<I/sig(I)>\nHigh", "TEXT", 1],
["DataProcessingCompletenessOverall", "Completeness\nOverall", "TEXT", 1],
["DataProcessingCompletenessLow", "Completeness\nLow", "TEXT", 1],
["DataProcessingCompletenessHigh", "Completeness\nHigh", "TEXT", 1],
["DataProcessingMultiplicityOverall", "Multiplicity\nOverall", "TEXT", 1],
["DataProcessingMultiplicityLow", "Multiplicity\nLow", "TEXT", 1],
["DataProcessingMultiplicityHigh", "Multiplicity\nHigh", "TEXT", 1],
["DataProcessingCChalfOverall", "CC(1/2)\nOverall", "TEXT", 1],
["DataProcessingCChalfLow", "CC(1/2)\nLow", "TEXT", 1],
["DataProcessingCChalfHigh", "CC(1/2)\nHigh", "TEXT", 1],
# the data source is a bit exploding with entries like the ones below,
# but the many different filenames and folder structures of Diamond
# autoprocessing makes it necessary
["DataProcessingPathToLogfile", "DataProcessingPathToLogfile", "TEXT", 1],
["DataProcessingPathToMTZfile", "DataProcessingPathToMTZfile", "TEXT", 1],
["DataProcessingLOGfileName", "DataProcessingLOGfileName", "TEXT", 0],
["DataProcessingMTZfileName", "DataProcessingMTZfileName", "TEXT", 0],
[
"DataProcessingDirectoryOriginal",
"DataProcessingDirectoryOriginal",
"TEXT",
0,
],
[
"DataProcessingUniqueReflectionsOverall",
"Unique Reflections\nOverall",
"TEXT",
1,
],
[
"DataProcessingUniqueReflectionsLow",
"Unique Reflections\nlow",
"TEXT",
1,
],
[
"DataProcessingUniqueReflectionsHigh",
"Unique Reflections\nhigh",
"TEXT",
1,
],
["DataProcessingLattice", "DataProcessing\nLattice", "TEXT", 0],
["DataProcessingPointGroup", "DataProcessing\nPointGroup", "TEXT", 0],
[
"DataProcessingUnitCellVolume",
"DataProcessing\nUnit Cell Volume",
"TEXT",
0,
],
["DataProcessingAlert", "DataProcessing\nAlert", "TEXT", 0],
["DataProcessingScore", "DataProcessing\nScore", "TEXT", 1],
["DataProcessingStatus", "DataProcessing\nStatus", "TEXT", 1],
["DataProcessingRcryst", "DataProcessing\nRcryst", "TEXT", 0],
["DataProcessingRfree", "DataProcessing\nRfree", "TEXT", 0],
[
"DataProcessingPathToDimplePDBfile",
"DataProcessingPathToDimplePDBfile",
"TEXT",
0,
],
[
"DataProcessingPathToDimpleMTZfile",
"DataProcessingPathToDimpleMTZfile",
"TEXT",
0,
],
[
"DataProcessingDimpleSuccessful",
"DataProcessingDimpleSuccessful",
"TEXT",
0,
],
["DimpleResolutionHigh", "Dimple\nResolution High", "TEXT", 1],
["DimpleRcryst", "Dimple\nRcryst", "TEXT", 1],
["DimpleRfree", "Dimple\nRfree", "TEXT", 1],
["DimplePathToPDB", "Dimple\nPath to PDB file", "TEXT", 1],
["DimplePathToMTZ", "Dimple\nPath to MTZ file", "TEXT", 1],
["DimpleReferencePDB", "Dimple\nReference PDB", "TEXT", 1],
["DimpleStatus", "Dimple\nStatus", "TEXT", 1],
["DimpleTwinResolutionHigh", "Dimple - twin\nResolution High", "TEXT", 1],
["DimpleTwinRcryst", "Dimple - twin\nRcryst", "TEXT", 1],
["DimpleTwinRfree", "Dimple - twin\nRfree", "TEXT", 1],
["DimpleTwinPathToPDB", "Dimple - twin\nPath to PDB file", "TEXT", 1],
["DimpleTwinPathToMTZ", "Dimple - twin\nPath to MTZ file", "TEXT", 1],
["DimpleTwinReferencePDB", "Dimple - twin\nReference PDB", "TEXT", 1],
["DimpleTwinStatus", "Dimple - twin\nStatus", "TEXT", 1],
["DimpleTwinFraction", "Dimple - twin\nFraction", "TEXT", 1],
[
"DataProcessingDimpleTwinSuccessful",
"DataProcessingDimpleTwinSuccessful",
"TEXT",
0,
],
["DimplePANDDAwasRun", "PanDDA\nlaunched?", "TEXT", 1],
["DimplePANDDAhit", "PanDDA\nhit?", "TEXT", 1],
["DimplePANDDAreject", "PanDDA\nreject?", "TEXT", 1],
["DimplePANDDApath", "PanDDA\npath?", "TEXT", 1],
["PANDDAStatus", "PanDDA\nStatus", "TEXT", 1],
["DatePanDDAModelCreated", "DatePanDDAModelCreated", "TEXT", 0],
["RefinementResolution", "Refinement\nResolution", "TEXT", 1],
["RefinementResolutionTL", "RefinementResolutionTL", "TEXT", 0],
["RefinementRcryst", "Refinement\nRcryst", "TEXT", 1],
["RefinementRcrystTraficLight", "RefinementRcrystTraficLight", "TEXT", 0],
["RefinementRfree", "Refinement\nRfree", "TEXT", 1],
["RefinementRfreeTraficLight", "RefinementRfreeTraficLight", "TEXT", 0],
["RefinementSpaceGroup", "Refinement\nSpace Group", "TEXT", 1],
["RefinementLigandCC", "Ligand CC", "TEXT", 0],
["RefinementRmsdBonds", "RefinementRmsdBonds", "TEXT", 1],
["RefinementRmsdBondsTL", "RefinementRmsdBondsTL", "TEXT", 0],
["RefinementRmsdAngles", "RefinementRmsdAngles", "TEXT", 1],
["RefinementRmsdAnglesTL", "RefinementRmsdAnglesTL", "TEXT", 0],
["RefinementOutcome", "Refinement\nOutcome", "TEXT", 1],
["RefinementOutcomePerson", "RefinementOutcomePerson", "TEXT", 1],
["RefinementOutcomeDate", "RefinementOutcomeDate", "TEXT", 1],
["RefinementMTZfree", "RefinementMTZfree", "TEXT", 1],
["RefinementTwinMTZfree", "RefinementTwinMTZfree", "TEXT", 1],
["RefinementCIF", "RefinementCIF", "TEXT", 1],
["RefinementCIFStatus", "Compound\nStatus", "TEXT", 1],
["RefinementCIFprogram", "RefinementCIFprogram", "TEXT", 1],
["RefinementPDB_latest", "RefinementPDB_latest", "TEXT", 1],
["RefinementMTZ_latest", "RefinementMTZ_latest", "TEXT", 1],
["RefinementMMCIFmodel_latest", "RefinementMMCIFmodel_latest", "TEXT", 1],
[
"RefinementMMCIFreflections_latest",
"RefinementMMCIFreflections_latest",
"TEXT",
1,
],
["RefinementMatrixWeight", "RefinementMatrixWeight", "TEXT", 0],
["RefinementComment", "RefinementComment", "TEXT", 0],
[
"RefinementPathToRefinementFolder",
"RefinementPathToRefinementFolder",
"TEXT",
0,
],
["RefinementLigandConfidence", "Ligand\nConfidence", "TEXT", 0],
[
"RefinementLigandBoundConformation",
"RefinementLigandBoundConformation",
"TEXT",
0,
],
["RefinementBoundConformation", "RefinementBoundConformation", "TEXT", 0],
["RefinementMolProbityScore", "MolProbity Score", "TEXT", 1],
["RefinementMolProbityScoreTL", "RefinementMolProbityScoreTL", "TEXT", 0],
["RefinementRamachandranOutliers", "Ramachandran\nOutliers", "TEXT", 1],
[
"RefinementRamachandranOutliersTL",
"RefinementRamachandranOutliersTL",
"TEXT",
0,
],
["RefinementRamachandranFavored", "Ramachandran\nFavored", "TEXT", 1],
[
"RefinementRamachandranFavoredTL",
"RefinementRamachandranFavoredTL",
"TEXT",
0,
],
["RefinementProgram", "RefinementProgram", "TEXT", 1],
["RefinementStatus", "Refinement\nStatus", "TEXT", 1],
["RefinementBusterReportHTML", "buster-reports", "TEXT", 1],
["RefinementRefiner", "RefinementRefiner", "TEXT", 1],
["RefinementDate", "RefinementDate", "TEXT", 1],
["Deposition_PDB_ID", "Deposition_PDB_ID", "TEXT", 1],
["Deposition_PDB_file", "Deposition_PDB_file", "TEXT", 0],
["Deposition_Date", "Deposition_Date", "TEXT", 1],
["Deposition_mmCIF_model_file", "Deposition_mmCIF_model_file", "TEXT", 0],
["Deposition_mmCIF_SF_file", "Deposition_mmCIF_SF_file", "TEXT", 0],
["Label", "Label", "TEXT", 0],
["table_one", "table_one", "TEXT", 0],
["AssayIC50", "AssayIC50", "TEXT", 0],
["LastUpdated", "LastUpdated", "TEXT", 0],
["LastUpdated_by", "LastUpdated_by", "TEXT", 0],
]
self.pandda_table_columns = [
["ID", "ID", "INTEGER PRIMARY KEY"],
["CrystalName", "Sample ID", "TEXT"],
["PANDDApath", "PANDDApath", "TEXT"],
["PANDDA_site_index", "PANDDA_site_index", "TEXT"],
["PANDDA_site_name", "PANDDA_site_name", "TEXT"],
["PANDDA_site_comment", "PANDDA_site_comment", "TEXT"],
["PANDDA_site_event_index", "PANDDA_site_event_index", "TEXT"],
["PANDDA_site_event_comment", "PANDDA_site_event_comment", "TEXT"],
["PANDDA_site_confidence", "PANDDA_site_confidence", "TEXT"],
["PANDDA_site_InspectConfidence", "PANDDA_site_InspectConfidence", "TEXT"],
["PANDDA_site_ligand_placed", "PANDDA_site_ligand_placed", "TEXT"],
["PANDDA_site_viewed", "PANDDA_site_viewed", "TEXT"],
["PANDDA_site_interesting", "PANDDA_site_interesting", "TEXT"],
["PANDDA_site_z_peak", "PANDDA_site_z_peak", "TEXT"],
["PANDDA_site_x", "PANDDA_site_x", "TEXT"],
["PANDDA_site_y", "PANDDA_site_y", "TEXT"],
["PANDDA_site_z", "PANDDA_site_z", "TEXT"],
["PANDDA_site_ligand_id", "PANDDA_site_ligand_id", "TEXT"],
["PANDDA_site_ligand_resname", "PANDDA_site_ligand_resname", "TEXT"],
["PANDDA_site_ligand_chain", "PANDDA_site_ligand_chain", "TEXT"],
[
"PANDDA_site_ligand_sequence_number",
"PANDDA_site_ligand_sequence_number",
"TEXT",
],
["PANDDA_site_ligand_altLoc", "PANDDA_site_ligand_altLoc", "TEXT"],
["PANDDA_site_event_map", "PANDDA_site_event_map", "TEXT"],
["PANDDA_site_event_map_mtz", "PANDDA_site_event_map_mtz", "TEXT"],
["PANDDA_site_initial_model", "PANDDA_site_initial_model", "TEXT"],
["PANDDA_site_initial_mtz", "PANDDA_site_initial_mtz", "TEXT"],
["PANDDA_site_spider_plot", "PANDDA_site_spider_plot", "TEXT"],
["PANDDA_site_occupancy", "PANDDA_site_occupancy", "TEXT"],
["PANDDA_site_B_average", "PANDDA_site_B_average", "TEXT"],
[
"PANDDA_site_B_ratio_residue_surroundings",
"PANDDA_site_B_ratio_residue_surroundings",
"TEXT",
],
["PANDDA_site_RSCC", "PANDDA_site_RSCC", "TEXT"],
["PANDDA_site_RSR", "PANDDA_site_RSR", "TEXT"],
["PANDDA_site_RSZD", "PANDDA_site_RSZD", "TEXT"],
["PANDDA_site_rmsd", "PANDDA_site_rmsd", "TEXT"],
["RefinementOutcome", "RefinementOutcome", "TEXT"],
["ApoStructures", "ApoStructures", "TEXT"],
["LastUpdated", "LastUpdated", "TEXT"],
["LastUpdated_by", "LastUpdated_by", "TEXT"],
]
self.deposition_table_columns = [
["ID", "ID", "INTEGER PRIMARY KEY"],
["CrystalName", "Sample ID", "TEXT"],
["StructureType", "StructureType", "TEXT"], # apo/model
["PDB_file", "PDB_file", "TEXT"],
["MTZ_file", "MTZ_file", "TEXT"],
["mmCIF_model_file", "mmCIF_model_file", "TEXT"],
["mmCIF_SF_file", "mmCIF_SF_file", "TEXT"],
["label", "label", "TEXT"], # for index.txt
["description", "description", "TEXT"], # for index.txt
["DimplePANDDApath", "DimplePANDDApath", "TEXT"],
["contact_author_PI_salutation", "contact_author_PI_salutation", "TEXT"],
["contact_author_PI_first_name", "contact_author_PI_first_name", "TEXT"],
["contact_author_PI_last_name", "contact_author_PI_last_name", "TEXT"],
["contact_author_PI_middle_name", "contact_author_PI_middle_name", "TEXT"],
["contact_author_PI_role", "contact_author_PI_role", "TEXT"],
[
"contact_author_PI_organization_type",
"contact_author_PI_organization_type",
"TEXT",
],
[
"contact_author_PI_organization_name",
"contact_author_PI_organization_name",
"TEXT",
],
["contact_author_PI_email", "contact_author_PI_email", "TEXT"],
["contact_author_PI_address", "contact_author_PI_address", "TEXT"],
["contact_author_PI_city", "contact_author_PI_city", "TEXT"],
[
"contact_author_PI_State_or_Province",
"contact_author_PI_State_or_Province",
"TEXT",
],
["contact_author_PI_Zip_Code", "contact_author_PI_Zip_Code", "TEXT"],
["contact_author_PI_Country", "contact_author_PI_Country", "TEXT"],
["contact_author_PI_fax_number", "contact_author_PI_fax_number", "TEXT"],
[
"contact_author_PI_phone_number",
"contact_author_PI_phone_number",
"TEXT",
],
["contact_author_PI_ORCID", "contact_author_PI_ORCID", "TEXT"],
["contact_author_salutation", "contact_author_salutation", "TEXT"],
["contact_author_first_name", "contact_author_first_name", "TEXT"],
["contact_author_last_name", "contact_author_last_name", "TEXT"],
["contact_author_middle_name", "contact_author_middle_name", "TEXT"],
["contact_author_role", "contact_author_role", "TEXT"],
[
"contact_author_organization_type",
"contact_author_organization_type",
"TEXT",
],
[
"contact_author_organization_name",
"contact_author_organization_name",
"TEXT",
],
["contact_author_email", "contact_author_email", "TEXT"],
["contact_author_address", "contact_author_address", "TEXT"],
["contact_author_city", "contact_author_city", "TEXT"],
[
"contact_author_State_or_Province",
"contact_author_State_or_Province",
"TEXT",
],
["contact_author_Zip_Code", "contact_author_Zip_Code", "TEXT"],
["contact_author_Country", "contact_author_Country", "TEXT"],
["contact_author_fax_number", "contact_author_fax_number", "TEXT"],
["contact_author_phone_number", "contact_author_phone_number", "TEXT"],
["contact_author_ORCID", "contact_author_ORCID", "TEXT"],
[
"Release_status_for_coordinates",
"Release_status_for_coordinates",
"TEXT",
],
[
"Release_status_for_structure_factor",
"Release_status_for_structure_factor",
"TEXT",
],
["Release_status_for_sequence", "Release_status_for_sequence", "TEXT"],
["group_deposition_title", "group_deposition_title", "TEXT"],
["group_description", "group_description", "TEXT"],
["structure_title", "structure_title", "TEXT"],
["structure_details", "structure_details", "TEXT"],
["group_deposition_title_apo", "group_deposition_title_apo", "TEXT"],
["structure_title_apo", "structure_title_apo", "TEXT"],
["structure_author_name", "structure_author_name", "TEXT"],
["primary_citation_author_name", "primary_citation_author_name", "TEXT"],
["primary_citation_id", "primary_citation_id", "TEXT"],
[
"primary_citation_journal_abbrev",
"primary_citation_journal_abbrev",
"TEXT",
],
["primary_citation_title", "primary_citation_title", "TEXT"],
["primary_citation_year", "primary_citation_year", "TEXT"],
[
"primary_citation_journal_volume",
"primary_citation_journal_volume",
"TEXT",
],
["primary_citation_page_first", "primary_citation_page_first", "TEXT"],
["primary_citation_page_last", "primary_citation_page_last", "TEXT"],
["molecule_name", "molecule_name", "TEXT"],
["Fragment_name", "Fragment_name", "TEXT"],
["Specific_mutation", "Specific_mutation", "TEXT"],
["Enzyme_Comission_number", "Enzyme_Comission_number", "TEXT"],
[
"Source_organism_scientific_name",
"Source_organism_scientific_name",
"TEXT",
],
["Source_organism_gene", "Source_organism_gene", "TEXT"],
["Source_organism_strain", "Source_organism_strain", "TEXT"],
[
"Expression_system_scientific_name",
"Expression_system_scientific_name",
"TEXT",
],
["Expression_system_strain", "Expression_system_strain", "TEXT"],
["Expression_system_vector_type", "Expression_system_vector_type", "TEXT"],
[
"Expression_system_plasmid_name",
"Expression_system_plasmid_name",
"TEXT",
],
["Manipulated_source_details", "Manipulated_source_details", "TEXT"],
[
"fragment_name_one_specific_mutation",
"fragment_name_one_specific_mutation",
"TEXT",
],
["molecule_chain_one", "molecule_chain_one", "TEXT"],
["molecule_name_two", "molecule_name_two", "TEXT"],
["Fragment_name_two", "Fragment_name_two", "TEXT"],
["Specific_mutation_two", "Specific_mutation_two", "TEXT"],
["Enzyme_Comission_number_two", "Enzyme_Comission_number_two", "TEXT"],
[
"Source_organism_scientific_name_two",
"Source_organism_scientific_name_two",
"TEXT",
],
["Source_organism_gene_two", "Source_organism_gene_two", "TEXT"],
["Source_organism_strain_two", "Source_organism_strain_two", "TEXT"],
[
"Expression_system_scientific_name_two",
"Expression_system_scientific_name_two",
"TEXT",
],
["Expression_system_strain_two", "Expression_system_strain_two", "TEXT"],
[
"Expression_system_vector_type_two",
"Expression_system_vector_type_two",
"TEXT",
],
[
"Expression_system_plasmid_name_two",
"Expression_system_plasmid_name_two",
"TEXT",
],
[
"Manipulated_source_details_two",
"Manipulated_source_details_two",
"TEXT",
],
[
"fragment_name_two_specific_mutation",
"fragment_name_one_specific_mutation_two",
"TEXT",
],
["molecule_chain_two", "molecule_chain_two", "TEXT"],
["structure_keywords", "structure_keywords", "TEXT"],
[
"biological_assembly_chain_number",
"biological_assembly_chain_number",
"TEXT",
],
["crystallization_id", "crystallization_id", "TEXT"],
["crystallization_method", "crystallization_method", "TEXT"],
["crystallization_pH", "crystallization_pH", "TEXT"],
["crystallization_temperature", "crystallization_temperature", "TEXT"],
["crystallization_details", "crystallization_details", "TEXT"],
["radiation_source", "radiation_source", "TEXT"],
["radiation_source_type", "radiation_source_type", "TEXT"],
["radiation_wavelengths", "radiation_wavelengths", "TEXT"],
["radiation_detector", "radiation_detector", "TEXT"],
["radiation_detector_type", "radiation_detector_type", "TEXT"],
["data_collection_date", "data_collection_date", "TEXT"],
["data_collection_temperature", "data_collection_temperature", "TEXT"],
["data_collection_protocol", "data_collection_protocol", "TEXT"],
["SG_project_name", "SG_project_name", "TEXT"],
["full_name_of_SG_center", "full_name_of_SG_center", "TEXT"],
["molecule_one_letter_sequence", "molecule_one_letter_sequence", "TEXT"],
[
"molecule_one_letter_sequence_uniprot_id",
"molecule_one_letter_sequence_uniprot_id",
"TEXT",
],
["molecule_two_letter_sequence", "molecule_two_letter_sequence", "TEXT"],
[
"molecule_two_letter_sequence_uniprot_id",
"molecule_two_letter_sequence_uniprot_id",
"TEXT",
],
["CrystalName_of_pandda_input", "CrystalName_of_pandda_input", "TEXT"],
["pdbx_starting_model", "pdbx_starting_model", "TEXT"],
["data_integration_software", "data_integration_software", "TEXT"],
["phasing_software", "phasing_software", "TEXT"],
["pdbx_funding_ordinal_one", "pdbx_funding_ordinal_one", "TEXT"],
["pdbx_funding_organization_one", "pdbx_funding_organization_one", "TEXT"],
["pdbx_grant_number_one", "pdbx_grant_number_one", "TEXT"],
["pdbx_grant_country_one", "pdbx_grant_country_one", "TEXT"],
["pdbx_funding_ordinal_two", "pdbx_funding_ordinal_two", "TEXT"],
["pdbx_funding_organization_two", "pdbx_funding_organization_two", "TEXT"],
["pdbx_grant_number_two", "pdbx_grant_number_two", "TEXT"],
["pdbx_grant_country_two", "pdbx_grant_country_two", "TEXT"],
["pdbx_funding_ordinal_three", "pdbx_funding_ordinal_three", "TEXT"],
[
"pdbx_funding_organization_three",
"pdbx_funding_organization_three",
"TEXT",
],
["pdbx_grant_number_three", "pdbx_grant_number_three", "TEXT"],
["pdbx_grant_country_three", "pdbx_grant_country_three", "TEXT"],
["LastUpdated", "LastUpdated", "TEXT"],
["LastUpdated_by", "LastUpdated_by", "TEXT"],
]
self.data_collection_columns = [
["ID", "ID", "INTEGER PRIMARY KEY"],
["CrystalName", "Sample ID", "TEXT", 0],
["ProteinName", "ProteinName", "TEXT", 1],
["DataCollectionVisit", "Visit", "TEXT", 0],
["DataCollectionRun", "Run", "TEXT", 0],
["DataCollectionSubdir", "SubDir", "TEXT", 0],
["DataCollectionBeamline", "Beamline", "TEXT", 0],
["DataCollectionOutcome", "DataCollection\nOutcome", "TEXT", 1],
["DataCollectionDate", "Data Collection\nDate", "TEXT", 1],
["DataCollectionWavelength", "Wavelength", "TEXT", 0],
["DataCollectionPinBarcode", "GDA\nBarcode", "TEXT", 1],
["DataCollectionCrystalImage1", "img1", "TEXT", 1],
["DataCollectionCrystalImage2", "img2", "TEXT", 1],
["DataCollectionCrystalImage3", "img3", "TEXT", 1],
["DataCollectionCrystalImage4", "img4", "TEXT", 1],
[
"DataProcessingPathToImageFiles",
"Path to diffraction\nimage files",
"TEXT",
1,
],
["DataProcessingProgram", "Program", "TEXT", 1],
["DataProcessingSpaceGroup", "DataProcessing\nSpaceGroup", "TEXT", 1],
["DataProcessingUnitCell", "DataProcessing\nUnitCell", "TEXT", 0],
["DataProcessingAutoAssigned", "auto-assigned", "TEXT", 0],
["DataProcessingA", "DataProcessing\nA", "TEXT", 0],
["DataProcessingB", "DataProcessing\nB", "TEXT", 0],
["DataProcessingC", "DataProcessing\nC", "TEXT", 0],
["DataProcessingAlpha", "DataProcessing\nAlpha", "TEXT", 0],
["DataProcessingBeta", "DataProcessing\nBeta", "TEXT", 0],
["DataProcessingGamma", "DataProcessing\nGamma", "TEXT", 0],
["DataProcessingResolutionOverall", "Resolution\nOverall", "TEXT", 0],
["DataProcessingResolutionLow", "Resolution\nLow", "TEXT", 0],
[
"DataProcessingResolutionLowInnerShell",
"Resolution\nLow (Inner Shell)",
"TEXT",
0,
],
["DataProcessingResolutionHigh", "Resolution\nHigh", "TEXT", 1],
[
"DataProcessingResolutionHigh15sigma",
"Resolution\n[Mn<I/sig(I)> = 1.5]",
"TEXT",
1,
],
[
"DataProcessingResolutionHigh20sigma",
"Resolution\n[Mn<I/sig(I)> = 2.0]",
"TEXT",
1,
],
[
"DataProcessingResolutionHighOuterShell",
"Resolution\nHigh (Outer Shell)",
"TEXT",
0,
],
["DataProcessingRmergeOverall", "Rmerge\nOverall", "TEXT", 1],
["DataProcessingRmergeLow", "Rmerge\nLow", "TEXT", 1],
["DataProcessingRmergeHigh", "Rmerge\nHigh", "TEXT", 1],
["DataProcessingIsigOverall", "Mn<I/sig(I)>\nOverall", "TEXT", 1],
["DataProcessingIsigLow", "Mn<I/sig(I)>\nLow", "TEXT", 1],
["DataProcessingIsigHigh", "Mn<I/sig(I)>\nHigh", "TEXT", 1],
["DataProcessingCompletenessOverall", "Completeness\nOverall", "TEXT", 1],
["DataProcessingCompletenessLow", "Completeness\nLow", "TEXT", 1],
["DataProcessingCompletenessHigh", "Completeness\nHigh", "TEXT", 1],
["DataProcessingMultiplicityOverall", "Multiplicity\nOverall", "TEXT", 1],
["DataProcessingMultiplicityLow", "Multiplicity\nLow", "TEXT", 1],
["DataProcessingMultiplicityHigh", "Multiplicity\nHigh", "TEXT", 1],
["DataProcessingCChalfOverall", "CC(1/2)\nOverall", "TEXT", 1],
["DataProcessingCChalfLow", "CC(1/2)\nLow", "TEXT", 1],
["DataProcessingCChalfHigh", "CC(1/2)\nHigh", "TEXT", 1],
["DataProcessingPathToLogfile", "DataProcessingPathToLogfile", "TEXT", 1],
["DataProcessingPathToMTZfile", "DataProcessingPathToMTZfile", "TEXT", 1],
["DataProcessingLOGfileName", "DataProcessingLOGfileName", "TEXT", 0],
["DataProcessingMTZfileName", "DataProcessingMTZfileName", "TEXT", 0],
[
"DataProcessingDirectoryOriginal",
"DataProcessingDirectoryOriginal",
"TEXT",
0,
],
[
"DataProcessingUniqueReflectionsOverall",
"Unique Reflections\nOverall",
"TEXT",
1,
],
[
"DataProcessingUniqueReflectionsLow",
"Unique Reflections\nlow",
"TEXT",
1,
],
[
"DataProcessingUniqueReflectionsHigh",
"Unique Reflections\nhigh",
"TEXT",
1,
],
["DataProcessingLattice", "DataProcessing\nLattice", "TEXT", 0],
["DataProcessingPointGroup", "DataProcessing\nPointGroup", "TEXT", 0],
[
"DataProcessingUnitCellVolume",
"DataProcessing\nUnit Cell Volume",
"TEXT",
0,
],
["DataProcessingAlert", "DataProcessing\nAlert", "TEXT", 0],
["DataProcessingScore", "DataProcessing\nScore", "TEXT", 1],
["DataProcessingStatus", "DataProcessing\nStatus", "TEXT", 1],
["LastUpdated", "LastUpdated", "TEXT", 0],
["LastUpdated_by", "LastUpdated_by", "TEXT", 0],
]
self.zenodo_table_columns = [
["ID", "ID", "INTEGER PRIMARY KEY"],
["DimplePANDDApath", "DimplePANDDApath", "TEXT"],
["ZenodoTitle", "ZenodoTitle", "TEXT"],
["ZenodoHTTPS", "ZenodoHTTPS", "TEXT"],
["ZenodoDOI", "ZenodoDOI", "TEXT"],
["LastUpdated", "LastUpdated", "TEXT"],
["LastUpdated_by", "LastUpdated_by", "TEXT"],
]
self.label_table_columns = [
["ID", "ID", "INTEGER PRIMARY KEY"],
["Label", "Label", "TEXT"],
["Description", "Description", "TEXT"],
]
def columns_not_to_display(self):
do_not_display = []
for column in self.column_list:
if column[3] == 0:
do_not_display.append(column[1])
return do_not_display
def create_missing_columns(self):
existing_columns = []
connect = sqlite3.connect(self.data_source_file)
connect.row_factory = sqlite3.Row
cursor = connect.cursor()
tableDict = {
"mainTable": self.column_list,
"panddaTable": self.pandda_table_columns,
"depositTable": self.deposition_table_columns,
"collectionTable": self.data_collection_columns,
"zenodoTable": self.zenodo_table_columns,
"labelTable": self.label_table_columns,
}
for table in tableDict:
cursor.execute("create table if not exists " + table + " (ID INTEGER);")
existing_columns = []
cursor.execute("SELECT * FROM " + table)
for column in cursor.description:
existing_columns.append(column[0])
for column in tableDict[table]:
if column[0] not in existing_columns:
cursor.execute(
"alter table "
+ table
+ " add column '"
+ column[0]
+ "' '"
+ column[2]
+ "'"
)
connect.commit()
if table == "labelTable":
cursor.execute("select ID from labelTable")
id = cursor.fetchall()
if id == []:
for idx in range(5):
cursor.execute(
"insert into labelTable (ID) Values (%s)" % str(idx + 1)
)
def return_column_list(self):
return self.column_list
def create_empty_data_source_file(self):
# creates sqlite file if non existent
connect = sqlite3.connect(self.data_source_file)
with connect:
cursor = connect.cursor()
cursor.execute(
"CREATE TABLE mainTable("
+ self.column_list[0][0]
+ " "
+ self.column_list[0][2]
+ ")"
)
for i in range(1, len(self.column_list)):
cursor.execute(
"alter table mainTable add column '"
+ self.column_list[i][0]
+ "' '"
+ self.column_list[i][2]
+ "'"
)
connect.commit()
# Don't need to create panddaTable at this point, because table will be created
# by create_missing_columns which is called the first time a data source in
# specified in XCE
def get_all_samples_in_data_source_as_list(self):
# creates sqlite file if non existent
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
cursor.execute("SELECT CrystalName FROM mainTable")
existing_samples_in_db = []
samples = cursor.fetchall()
for sample in samples:
existing_samples_in_db.append(str(sample[0]))
return existing_samples_in_db
def execute_statement(self, cmd):
# creates sqlite file if non existent
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
cursor.execute(cmd)
output = cursor.fetchall()
connect.commit()
return output
def get_db_dict_for_sample(self, sampleID):
db_dict = {}
header = []
data = []
# creates sqlite file if non existent
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
cursor.execute(
"select * from mainTable where CrystalName='{0!s}';".format(sampleID)
)
print(
(
"SQLITE: select * from mainTable where CrystalName='{0!s}';".format(
sampleID
)
)
)
for column in cursor.description:
header.append(column[0])
data = cursor.fetchall()
print(("DATA: {0!s}".format(data)))
for n, item in enumerate(data[0]):
db_dict[header[n]] = str(item)
return db_dict
def get_deposit_dict_for_sample(self, sampleID):
db_dict = {}
header = []
data = []
# creates sqlite file if non existent
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
cursor.execute(
"select * from depositTable where CrystalName='{0!s}';".format(sampleID)
)
for column in cursor.description:
header.append(column[0])
data = cursor.fetchall()
try:
for n, item in enumerate(data[0]):
db_dict[header[n]] = str(item)
except IndexError:
pass
return db_dict
def get_db_pandda_dict_for_sample_and_site_and_event(
self, sampleID, site_index, event_index
):
db_dict = {}
header = []
data = []
# creates sqlite file if non existent
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
cursor.execute(
"select * from panddaTable where CrystalName='{0!s}'"
" and PANDDA_site_index='{1!s}'"
" and PANDDA_site_event_index='{2!s}'"
" and PANDDA_site_ligand_placed='True';".format(
sampleID, site_index, event_index
)
)
for column in cursor.description:
header.append(column[0])
data = cursor.fetchall()
for n, item in enumerate(data[0]):
db_dict[header[n]] = str(item)
return db_dict
def check_if_sample_exists_in_data_source(self, sampleID):
sample_exists = False
existing_samples_in_db = self.get_all_samples_in_data_source_as_list()
if sampleID in existing_samples_in_db:
sample_exists = True
return sample_exists
def import_csv_file(self, csv_file):
# creates sqlite file if non existent
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
available_columns = []
cursor.execute("SELECT * FROM mainTable")
for column in cursor.description: # only update existing columns in data source
available_columns.append(column[0])
with open(csv_file, "rb") as csv_import: # `with` statement available in 2.5+
# csv.DictReader uses first line in file for column headings by default
csv_dict = csv.DictReader(csv_import) # comma is default delimiter
for line in csv_dict:
sampleID = line["CrystalName"]
if str(sampleID).replace(" ", "") == "":
continue
if self.check_if_sample_exists_in_data_source(sampleID):
update_string = ""
for key, value in line.items():
if key == "ID" or key == "CrystalName":
continue
if key not in available_columns:
continue
# this is how I had it originally, so it would ignore empty
# fields. the problem is that if the user wants to set all
# values to Null. if will ignore it and leave the inital value
# in the datasource. now try this instead; not sure what will
# break now...
update_string += str(key) + "=" + "'" + str(value) + "'" + ","
cursor.execute(
"UPDATE mainTable SET "
+ update_string[:-1]
+ " WHERE CrystalName="
+ "'"
+ sampleID
+ "';"
)
else:
column_string = ""
value_string = ""
for key, value in line.items():
if key == "ID":
continue
if key not in available_columns:
continue
# ignore if nothing in csv field
if not str(value).replace(" ", "") == "":
value_string += "'" + value + "'" + ","
column_string += key + ","
cursor.execute(
"INSERT INTO mainTable ("
+ column_string[:-1]
+ ") VALUES ("
+ value_string[:-1]
+ ");"
)
connect.commit()
def update_data_source(self, sampleID, data_dict):
print("here")
data_dict["LastUpdated"] = str(datetime.now().strftime("%Y-%m-%d %H:%M"))
data_dict["LastUpdated_by"] = getpass.getuser()
# need to do this since some older sqlite files contain a columnn called
# DataProcessingResolutionHigh1.5sigma
# and this does not go down well with the SQLite statement below
removeKey = ""
for key in data_dict:
if ".5" in key:
removeKey = key
break
if removeKey != "":
del data_dict[removeKey]
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
update_string = ""
for key in data_dict:
value = data_dict[key]
if key == "ID" or key == "CrystalName":
continue
if not str(value).replace(" ", "") == "": # ignore empty fields
update_string += str(key) + "=" + "'" + str(value) + "'" + ","
else:
update_string += str(key) + " = null,"
if update_string != "":
cursor.execute(
"UPDATE mainTable SET "
+ update_string[:-1]
+ " WHERE CrystalName="
+ "'"
+ sampleID
+ "'"
)
connect.commit()
def update_panddaTable(self, sampleID, site_index, data_dict):
data_dict["LastUpdated"] = str(datetime.now().strftime("%Y-%m-%d %H:%M"))
data_dict["LastUpdated_by"] = getpass.getuser()
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
update_string = ""
for key in data_dict:
value = data_dict[key]
if key == "ID" or key == "CrystalName" or key == "PANDDA_site_index":
continue
if not str(value).replace(" ", "") == "": # ignore empty fields
update_string += str(key) + "=" + "'" + str(value) + "'" + ","
else:
update_string += str(key) + " = null,"
if update_string != "":
cursor.execute(
"UPDATE panddaTable SET "
+ update_string[:-1]
+ " WHERE CrystalName='{0!s}' and PANDDA_site_index='{1!s}'".format(
sampleID, site_index
)
)
connect.commit()
def update_site_event_panddaTable(
self, sampleID, site_index, event_index, data_dict
):
data_dict["LastUpdated"] = str(datetime.now().strftime("%Y-%m-%d %H:%M"))
data_dict["LastUpdated_by"] = getpass.getuser()
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
update_string = ""
for key in data_dict:
value = data_dict[key]
if (
key == "ID"
or key == "CrystalName"
or key == "PANDDA_site_index"
or key == "PANDDA_site_event_index"
):
continue
if not str(value).replace(" ", "") == "": # ignore empty fields
update_string += str(key) + "=" + "'" + str(value) + "'" + ","
else:
update_string += str(key) + " = null,"
if update_string != "":
cursor.execute(
"UPDATE panddaTable SET "
+ update_string[:-1]
+ " WHERE CrystalName='{0!s}' and PANDDA_site_index='{1!s}'"
" and PANDDA_site_event_index='{2!s}'".format(
sampleID, site_index, event_index
)
)
connect.commit()
def update_depositTable(self, sampleID, structure_type, data_dict):
data_dict["LastUpdated"] = str(datetime.now().strftime("%Y-%m-%d %H:%M"))
data_dict["LastUpdated_by"] = getpass.getuser()
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
update_string = ""
for key in data_dict:
value = data_dict[key]
if key == "ID" or key == "CrystalName" or key == "StructureType":
continue
if not str(value).replace(" ", "") == "": # ignore empty fields
update_string += (
str(key)
+ "="
+ '"'
+ str(value).replace("\r", "").replace("\n", "")
+ '"'
+ ","
)
else:
update_string += str(key) + " = null,"
if update_string != "":
cursor.execute(
"UPDATE depositTable SET "
+ update_string[:-1]
+ ' WHERE CrystalName="{0!s}" and StructureType="{1!s}"'.format(
sampleID, structure_type
)
)
connect.commit()
def update_specified_table(self, sampleID, data_dict, table):
data_dict["LastUpdated"] = str(datetime.now().strftime("%Y-%m-%d %H:%M"))
data_dict["LastUpdated_by"] = getpass.getuser()
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
update_string = ""
for key in data_dict:
value = data_dict[key]
if key == "ID" or key == "CrystalName":
continue
if not str(value).replace(" ", "") == "": # ignore empty fields
update_string += str(key) + "=" + "'" + str(value) + "'" + ","
if update_string != "":
cursor.execute(
"UPDATE "
+ table
+ " SET "
+ update_string[:-1]
+ " WHERE CrystalName="
+ "'"
+ sampleID
+ "'"
)
connect.commit()
def update_insert_data_source(self, sampleID, data_dict):
data_dict["LastUpdated"] = str(datetime.now().strftime("%Y-%m-%d %H:%M"))
data_dict["LastUpdated_by"] = getpass.getuser()
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
cursor.execute("Select CrystalName FROM mainTable")
available_columns = []
cursor.execute("SELECT * FROM mainTable")
for column in cursor.description: # only update existing columns in data source
available_columns.append(column[0])
if self.check_if_sample_exists_in_data_source(sampleID):
for key in data_dict:
value = data_dict[key]
if key == "ID" or key == "CrystalName":
continue
if not str(value).replace(" ", "") == "": # ignore empty fields
update_string = str(key) + "=" + "'" + str(value) + "'"
cursor.execute(
"UPDATE mainTable SET "
+ update_string
+ " WHERE CrystalName="
+ "'"
+ sampleID
+ "';"
)
else:
column_string = "CrystalName" + ","
value_string = "'" + sampleID + "'" + ","
for key in data_dict:
value = data_dict[key]
if key == "ID":
continue
if key not in available_columns:
continue
if (
not str(value).replace(" ", "") == ""
): # ignore if nothing in csv field
value_string += "'" + str(value) + "'" + ","
column_string += key + ","
cursor.execute(
"INSERT INTO mainTable ("
+ column_string[:-1]
+ ") VALUES ("
+ value_string[:-1]
+ ");"
)
connect.commit()
def update_insert_any_table(self, table, data_dict, condition_dict):
data_dict["LastUpdated"] = str(datetime.now().strftime("%Y-%m-%d %H:%M"))
data_dict["LastUpdated_by"] = getpass.getuser()
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
# columns
columns = ""
for c in condition_dict:
columns += c + ","
# condition
condition_string = ""
for key in condition_dict:
condition = condition_dict[key]
condition_string += str(key) + "=" + "'" + str(condition) + "' and "
cursor.execute(
"Select %s FROM %s where %s" % (columns[:-1], table, condition_string[:-5])
)
tmp = cursor.fetchall()
if not tmp:
data_dict.update(condition_dict)
value_string = ""
column_string = ""
for key in data_dict:
value = data_dict[key]
value_string += "'" + str(value) + "'" + ","
column_string += key + ","
cursor.execute(
"INSERT INTO "
+ table
+ " ("
+ column_string[:-1]
+ ") VALUES ("
+ value_string[:-1]
+ ");"
)
else:
update_string = ""
for key in data_dict:
value = data_dict[key]
update_string += str(key) + "=" + "'" + str(value) + "',"
cursor.execute(
"UPDATE "
+ table
+ " SET "
+ update_string[:-1]
+ " WHERE "
+ condition_string[:-5]
+ ";"
)
connect.commit()
def update_insert_site_event_panddaTable(self, sampleID, data_dict):
data_dict["LastUpdated"] = str(datetime.now().strftime("%Y-%m-%d %H:%M"))
data_dict["LastUpdated_by"] = getpass.getuser()
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
cursor.execute(
"Select CrystalName,PANDDA_site_index,PANDDA_site_event_index"
" FROM panddaTable"
)
samples_sites_in_table = []
tmp = cursor.fetchall()
for item in tmp:
line = [x.encode("UTF8") for x in list(item)]
samples_sites_in_table.append(line)
found_sample_site = False
for entry in samples_sites_in_table:
if (
entry[0] == sampleID
and entry[1] == data_dict["PANDDA_site_index"]
and entry[2] == data_dict["PANDDA_site_event_index"]
):
found_sample_site = True
if found_sample_site:
for key in data_dict:
value = data_dict[key]
if (
key == "ID"
or key == "CrystalName"
or key == "PANDDA_site_index"
or key == "PANDDA_site_event_index"
):
continue
if not str(value).replace(" ", "") == "": # ignore empty fields
update_string = str(key) + "=" + "'" + str(value) + "'"
cursor.execute(
"UPDATE panddaTable SET "
+ update_string
+ " WHERE CrystalName="
+ "'"
+ sampleID
+ "' and PANDDA_site_index is '"
+ data_dict["PANDDA_site_index"]
+ "' and PANDDA_site_event_index is '"
+ data_dict["PANDDA_site_event_index"]
+ "';"
)
else:
column_string = ""
value_string = ""
for key in data_dict:
value = data_dict[key]
if key == "ID":
continue
if (
not str(value).replace(" ", "") == ""
): # ignore if nothing in csv field
value_string += "'" + str(value) + "'" + ","
column_string += key + ","
print(
"INSERT INTO panddaTable ("
+ column_string[:-1]
+ ") VALUES ("
+ value_string[:-1]
+ ");"
)
cursor.execute(
"INSERT INTO panddaTable ("
+ column_string[:-1]
+ ") VALUES ("
+ value_string[:-1]
+ ");"
)
connect.commit()
def update_insert_depositTable(self, sampleID, data_dict):
data_dict["LastUpdated"] = str(datetime.now().strftime("%Y-%m-%d %H:%M"))
data_dict["LastUpdated_by"] = getpass.getuser()
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
available_columns = []
cursor.execute("SELECT * FROM depositTable")
for column in cursor.description: # only update existing columns in data source
available_columns.append(column[0])
cursor.execute("Select CrystalName FROM depositTable")
samples_in_table = []
tmp = cursor.fetchall()
for item in tmp:
line = [x.encode("UTF8") for x in list(item)]
if str(item) not in samples_in_table:
samples_in_table.append(str(line[0]))
if sampleID in samples_in_table:
for key in data_dict:
value = data_dict[key]
if key == "ID" or key == "CrystalName":
continue
if not str(value).replace(" ", "") == "": # ignore empty fields
update_string = str(key) + "=" + "'" + str(value) + "'"
print(
"UPDATE depositTable SET "
+ update_string
+ " WHERE CrystalName="
+ "'"
+ sampleID
+ "';"
)
cursor.execute(
"UPDATE depositTable SET "
+ update_string
+ " WHERE CrystalName="
+ "'"
+ sampleID
+ "';"
)
else:
column_string = "CrystalName" + ","
value_string = "'" + sampleID + "'" + ","
for key in data_dict:
value = data_dict[key]
if key == "ID":
continue
if key not in available_columns:
continue
if (
not str(value).replace(" ", "") == ""
): # ignore if nothing in csv field
value_string += "'" + str(value) + "'" + ","
column_string += key + ","
print(
"INSERT INTO depositTable ("
+ column_string[:-1]
+ ") VALUES ("
+ value_string[:-1]
+ ");"
)
cursor.execute(
"INSERT INTO depositTable ("
+ column_string[:-1]
+ ") VALUES ("
+ value_string[:-1]
+ ");"
)
connect.commit()
def export_to_csv_file(self, csv_file):
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
cursor.execute("SELECT * FROM mainTable")
header = ()
for column in cursor.description:
header += (column[0],)
rows = cursor.fetchall()
csvWriter = csv.writer(open(csv_file, "w"))
csvWriter.writerows([header] + rows)
def load_samples_from_data_source(self):
header = []
data = []
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
cursor.execute("SELECT * FROM mainTable")
for column in cursor.description:
header.append(column[0])
data = cursor.fetchall()
return header, data
def get_todoList_for_coot(self, RefinementOutcome):
sample_list_for_coot = []
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
if RefinementOutcome == "0 - All Datasets":
outcome = " not null "
else:
outcome = " '{0!s}' ".format(RefinementOutcome)
sqlite = (
"select"
" CrystalName,"
" CompoundCode,"
" RefinementCIF,"
" RefinementMTZfree,"
" RefinementPathToRefinementFolder,"
" RefinementOutcome,"
" RefinementLigandConfidence "
"from mainTable "
"where RefinementOutcome is %s"
" and (DimpleRfree is not Null or RefinementRfree is not Null)"
" order by CrystalName ASC;" % outcome
)
cursor.execute(sqlite)
tmp = cursor.fetchall()
for item in tmp:
tmpx = []
for i in list(item):
if i is None:
tmpx.append("None")
else:
tmpx.append(i)
line = [x.encode("UTF8") for x in tmpx]
sample_list_for_coot.append(line)
crystalDict = {}
for entry in sample_list_for_coot:
if entry[0] not in crystalDict:
sqlite = (
"select"
" PANDDA_site_event_map,"
" PANDDA_site_x,"
" PANDDA_site_y,"
" PANDDA_site_z,"
" PANDDA_site_spider_plot,"
" PANDDA_site_index,"
" PANDDA_site_event_index,"
" PANDDA_site_confidence,"
" PANDDA_site_name,"
" PANDDA_site_InspectConfidence,"
" PANDDA_site_interesting,"
" PANDDA_site_event_comment "
"from panddaTable "
"where "
" CrystalName is '%s';" % entry[0]
)
cursor.execute(sqlite)
tmp = cursor.fetchall()
if tmp:
crystalDict[entry[0]] = []
for item in tmp:
print(
[
entry[0],
str(item[0]),
str(item[1]),
str(item[2]),
str(item[3]),
str(item[4]),
str(item[5]),
str(item[6]),
str(item[7]),
str(item[8]),
str(item[9]),
str(item[10]),
str(item[11]),
]
)
crystalDict[entry[0]].append(
[
str(item[0]),
str(item[1]),
str(item[2]),
str(item[3]),
str(item[4]),
str(item[5]),
str(item[6]),
str(item[7]),
str(item[8]),
str(item[9]),
str(item[10]),
str(item[11]),
]
)
return sample_list_for_coot, crystalDict
def translate_xce_column_list_to_sqlite(self, column_list):
out_list = []
for item in column_list:
if item.startswith("Exclude"):
out_list.append(["Exclude"])
if item.startswith("Ignore"):
out_list.append(["Ignore"])
if item.startswith("Export"):
out_list.append(["Export", item])
continue
if item.startswith("Show"):
out_list.append([item, item])
continue
if item.startswith("Run\nDimple"):
out_list.append([item, item])
continue
if item.startswith("Select"):
out_list.append([item, item])
continue
if item.startswith("Run\nxia2"):
out_list.append([item, item])
continue
if item.startswith("Dataset ID"):
out_list.append([item, item])
continue
if item.startswith("Reference\nSpaceGroup"):
out_list.append([item, item])
continue
if item.startswith("Difference\nUC Volume (%)"):
out_list.append([item, item])
continue
if item.startswith("Reference File"):
out_list.append([item, item])
continue
if item.startswith("PanDDA site details"):
out_list.append([item, item])
continue
for entry in self.column_list:
if entry[1] == item:
out_list.append([item, entry[0]])
break
return out_list
def export_csv_for_WONKA(self):
SQLite = (
"select"
" panddaTable.PANDDA_site_ligand_resname,"
" panddaTable.PANDDA_site_ligand_sequence_number,"
" panddaTable.PANDDA_site_ligand_chain,"
" panddaTable.RefinementOutcome,"
" mainTable.CompoundSMILES,"
" mainTable.RefinementBoundConformation,"
" panddaTable.PANDDA_site_initial_mtz,"
" panddaTable.PANDDA_site_event_map,"
" panddaTable.CrystalName,"
" mainTable.CompoundCode "
"from"
" mainTable inner join panddaTable"
" on mainTable.CrystalName = panddaTable.CrystalName "
"where (panddaTable.RefinementOutcome like '3%'"
" or panddaTable.RefinementOutcome like '4%'"
" or panddaTable.RefinementOutcome like '5%')"
)
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
cursor.execute(SQLite)
header = ()
for column in cursor.description:
header += (column[0],)
rows = cursor.fetchall()
csvWriter = csv.writer(open("for_wonka.csv", "w"))
csvWriter.writerows([header] + rows)
def create_or_remove_missing_records_in_depositTable(
self, xce_logfile, xtal, type, db_dict
):
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
Logfile = XChemLog.updateLog(xce_logfile)
oldRefiStage = ""
if type == "ligand_bound":
cursor.execute(
"select RefinementOutcome"
" from mainTable where CrystalName is '{0!s}'".format(xtal)
)
tmp = cursor.fetchall()
oldRefiStage = str(tmp[0][0])
Logfile.insert(
'setting RefinementOutcome field to "'
+ db_dict["RefinementOutcome"]
+ '" for '
+ xtal
)
self.update_insert_data_source(xtal, db_dict)
elif type == "ground_state":
cursor.execute(
"select DimplePANDDApath from depositTable"
" where StructureType is '{0!s}'"
" and DimplePANDDApath is '{1!s}'".format(
type, db_dict["DimplePANDDApath"]
)
)
tmp = cursor.fetchall()
if not tmp:
Logfile.insert(
"entry for ground-state model in depositTable does not exist"
)
else:
Logfile.warning(
"entry for ground-state model in depositTable does already exist"
)
Logfile.warning(
"updating PDB, MTZ and DimplePANDDApath for ground-state entry"
)
cursor.execute(
"update depositTable set PDB_file='%s', MTZ_file='%s',"
" DimplePANDDApath='%s' where StructureType='ground_state'"
% (
db_dict["PDB_file"],
db_dict["MTZ_file"],
db_dict["DimplePANDDApath"],
)
)
connect.commit()
return
cursor.execute(
"select CrystalName,StructureType from depositTable"
" where CrystalName is '{0!s}' and StructureType is '{1!s}'".format(
xtal, type
)
)
tmp = cursor.fetchall()
if type == "ligand_bound":
if not tmp and int(db_dict["RefinementOutcome"].split()[0]) == 5:
sqlite = (
"insert into depositTable (CrystalName,StructureType)"
" values ('{0!s}','{1!s}');".format(xtal, type)
)
Logfile.insert(
"creating new entry for "
+ str(type)
+ " structure of "
+ xtal
+ " in depositTable"
)
cursor.execute(sqlite)
connect.commit()
else:
if int(db_dict["RefinementOutcome"].split()[0]) != 5:
sqlite = (
"delete from depositTable where CrystalName is '{0!s}'"
" and StructureType is '{1!s}'".format(xtal, type)
)
if oldRefiStage.startswith("5"):
Logfile.insert(
"removing entry for "
+ str(type)
+ " structure of "
+ xtal
+ " from depositTable"
)
cursor.execute(sqlite)
connect.commit()
elif type == "ground_state":
sqlite = (
"insert into depositTable"
" (CrystalName,StructureType,DimplePANDDApath,PDB_file,MTZ_file)"
" values ('{0!s}','{1!s}','{2!s}','{3!s}','{4!s}');".format(
xtal,
type,
db_dict["DimplePANDDApath"],
db_dict["PDB_file"],
db_dict["MTZ_file"],
)
)
Logfile.insert(
"creating new entry for "
+ str(type)
+ " structure of "
+ xtal
+ " in depositTable"
)
cursor.execute(sqlite)
connect.commit()
def collected_xtals_during_visit(self, visitID):
# creates sqlite file if non existent
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
cursor.execute(
"select CrystalName from collectionTable"
" where DataCollectionVisit = '{0!s}'".format(visitID)
)
collectedXtals = []
samples = cursor.fetchall()
for sample in samples:
if str(sample[0]) not in collectedXtals:
collectedXtals.append(str(sample[0]))
return collectedXtals
def collected_xtals_during_visit_for_scoring(self, visit):
# creates sqlite file if non existent
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
xtalList = []
cursor.execute(
"select distinct CrystalName from collectionTable"
" where DataCollectionVisit = '%s'" % visit
)
samples = cursor.fetchall()
for sample in samples:
if str(sample[0]) not in xtalList:
xtalList.append(str(sample[0]))
return xtalList
def autoprocessing_result_user_assigned(self, sample):
userassigned = False
# creates sqlite file if non existent
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
cursor.execute(
"select DataProcessingAutoAssigned from mainTable where CrystalName = '%s'"
% sample
)
outcome = cursor.fetchall()
try:
if (
"false" in str(outcome[0]).lower()
or str(outcome[0]).encode("ascii", "ignore") == ""
):
userassigned = (
True # a bit counterintuitive, but here we ask about userassigned
)
# whereas DB records autoassigned
except IndexError:
pass # this is the case when sample is encountered the first time
# and not yet in mainTable
return userassigned
def all_autoprocessing_results_for_xtal_as_dict(self, xtal):
dbList = []
header = []
# creates sqlite file if non existent
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
cursor.execute(
"select * from collectionTable where CrystalName='{0!s}';".format(xtal)
)
for column in cursor.description:
header.append(column[0])
data = cursor.fetchall()
for result in data:
db_dict = {}
for n, item in enumerate(result):
db_dict[header[n]] = str(item)
dbList.append(db_dict)
return dbList
def get_db_dict_for_visit_run_autoproc_score(
self, xtal, visit, run, autoproc, score
):
db_dict = {}
header = []
# creates sqlite file if non existent
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
sqlite = (
"select * "
"from collectionTable "
"where CrystalName ='%s' and" % xtal
+ " DataCollectionVisit = '%s' and" % visit
+ " DataCollectionRun = '%s' and" % run
+ " DataProcessingProgram = '%s' and" % autoproc
+ " DataProcessingScore = '%s'" % score
)
cursor.execute(sqlite)
for column in cursor.description:
header.append(column[0])
data = cursor.fetchall()
print(("SQLITE: {0!s}".format(sqlite)))
print(("DATA: {0!s}".format(data)))
for n, item in enumerate(data[0]):
db_dict[header[n]] = str(item)
return db_dict
def xtals_collected_during_visit_as_dict(self, visitID):
# first get all collected xtals as list
if isinstance(visitID, list): # for Agamemnon data structure
collectedXtals = []
for visit in visitID:
x = self.collected_xtals_during_visit(visit)
for e in x:
collectedXtals.append(e)
else:
collectedXtals = self.collected_xtals_during_visit(visitID)
xtalDict = {}
# creates sqlite file if non existent
sqlite3.connect(self.data_source_file).cursor()
for xtal in sorted(collectedXtals):
db_dict = self.get_db_dict_for_sample(xtal)
xtalDict[xtal] = db_dict
return xtalDict
def samples_for_html_summary(self, whichSamples):
# creates sqlite file if non existent
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
if whichSamples.startswith("4"):
cursor.execute(
"select CrystalName from mainTable"
" where RefinementOutcome like '4%' order by CrystalName ASC"
)
elif whichSamples.startswith("5"):
cursor.execute(
"select CrystalName from mainTable"
" where RefinementOutcome like '5%' order by CrystalName ASC"
)
else:
cursor.execute(
"select CrystalName from mainTable"
" where RefinementOutcome like '4%'"
" or RefinementOutcome like '5%'"
" or RefinementOutcome like '6%' order by CrystalName ASC"
)
xtalList = []
samples = cursor.fetchall()
for sample in samples:
if str(sample[0]) not in xtalList:
xtalList.append(str(sample[0]))
return xtalList
def get_ligand_confidence_for_ligand(self, xtal, ligChain, ligNumber, ligName):
# creates sqlite file if non existent
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
sql = (
"select "
" PANDDA_site_confidence "
"from "
" panddaTable "
"where "
" CrystalName = '%s' and " % xtal
+ " PANDDA_site_ligand_chain='%s' and " % ligChain
+ " PANDDA_site_ligand_sequence_number='%s' and " % ligNumber
+ " PANDDA_site_ligand_resname='%s'" % ligName
)
cursor.execute(sql)
ligConfidence = "not assigned"
ligs = cursor.fetchall()
for lig in ligs:
ligConfidence = lig[0]
return ligConfidence
def get_label_info_from_db(self):
# creates sqlite file if non existent
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
cursor.execute("select Label,Description from labelTable")
labels = cursor.fetchall()
labelList = []
for label in labels:
labelList.append([str(label[0]), str(label[1])])
return labelList
def get_label_of_sample(self, xtalID):
# creates sqlite file if non existent
connect = sqlite3.connect(self.data_source_file)
cursor = connect.cursor()
cursor.execute("select label from mainTable where CrystalName is '%s'" % xtalID)
label = None
lab = cursor.fetchall()
for sample in lab:
label = sample[0]
break
return label