xce/lib/XChemDeposit.py from xchem/XChemExplorer

xce/lib/XChemDeposit.py
Summary

Maintainability

1 mo
Test Coverage

Issues
import fileinput
import glob
import os
import sys

from PyQt4 import QtCore

from xce.lib import XChemDB
from xce.lib import XChemLog
from xce.lib import XChemMain
from xce.lib import XChemUtils


class templates:
    def data_template_cif(self, depositDict):
        taxonomy_dict = XChemMain.NCBI_taxonomy_ID()
        for key in taxonomy_dict:
            if taxonomy_dict[key] == depositDict["Source_organism_scientific_name"]:
                pdbx_gene_src_ncbi_taxonomy_id = key
            if taxonomy_dict[key] == depositDict["Expression_system_scientific_name"]:
                pdbx_host_org_ncbi_taxonomy_id = key

        audit_author_name = ""
        # one name must be within quotation, last name and first initial must be
        # separated by comma and space
        for name in depositDict["structure_author_name"].split(";"):
            if name.replace(" ", "") == "":
                continue
            if name[name.find(",") + 1 : name.find(",") + 2] != " ":
                name = name.replace(",", ", ")
            audit_author_name += "'{0!s}'\n".format(name)

        primary_citation_author_name = ""
        # one name must be within quotation, last name and first initial must be
        # separated by comma and space
        for name in depositDict["primary_citation_author_name"].split(";"):
            if name.replace(" ", "") == "":
                continue
            if name[name.find(",") + 1 : name.find(",") + 2] != " ":
                name = name.replace(",", ", ")
            primary_citation_author_name += "primary '{0!s}'\n".format(name)

        molecule_one_letter_sequence = ";"
        counter = 1
        for aa in depositDict["molecule_one_letter_sequence"]:
            if counter < 70:
                molecule_one_letter_sequence += aa
            if counter == 70:
                molecule_one_letter_sequence += "\n" + aa
                counter = 0
            counter += 1

        if (
            depositDict["molecule_name_two"].replace(" ", "") == ""
            or depositDict["molecule_name_two"].replace(" ", "").lower() == "none"
        ):
            try:
                entity = (
                    "loop_\n"
                    "_entity.id\n"
                    "_entity.type\n"
                    "_entity.src_method\n"
                    "_entity.pdbx_description\n"
                    "_entity.pdbx_mutation\n"
                    '1 polymer     man "%s" %s\n'
                    % (
                        depositDict["Source_organism_gene"],
                        depositDict["fragment_name_one_specific_mutation"],
                    )
                    + "#\n"
                    "loop_\n"
                    "_entity_poly.entity_id\n"
                    "_entity_poly.type\n"
                    "_entity_poly.pdbx_seq_one_letter_code\n"
                    "_entity_poly.pdbx_strand_id\n"
                    "_entity_poly.pdbx_seq_db_id\n"
                    "_entity_poly.pdbx_seq_db_name\n"
                    '1 "polypeptide(L)"\n' + molecule_one_letter_sequence + "\n"
                    ";\n"
                    "%s %s UNP\n"
                    % (
                        depositDict["protein_chains"],
                        depositDict["molecule_one_letter_sequence_uniprot_id"],
                    )
                    + "#\n"
                    "loop_\n"
                    "_entity_src_gen.entity_id\n"
                    "_entity_src_gen.gene_src_strain\n"
                    "_entity_src_gen.pdbx_gene_src_scientific_name\n"
                    "_entity_src_gen.pdbx_gene_src_ncbi_taxonomy_id\n"
                    "_entity_src_gen.pdbx_host_org_scientific_name\n"
                    "_entity_src_gen.pdbx_host_org_ncbi_taxonomy_id\n"
                    '1 ? "%s" %s  "%s" %s\n'
                    % (
                        depositDict["Source_organism_scientific_name"],
                        pdbx_gene_src_ncbi_taxonomy_id,
                        depositDict["Expression_system_scientific_name"],
                        pdbx_host_org_ncbi_taxonomy_id,
                    )
                    + "#\n"
                )
            except UnboundLocalError:
                print(
                    "Error: Something went wrong!"
                    " Please check if you have saved the .deposit file to the database:"
                    " Menu Deposition -> Edit Information"
                )
        else:
            molecule_two_letter_sequence = ";"
            counter = 1
            for aa in depositDict["molecule_two_letter_sequence"]:
                if counter < 70:
                    molecule_two_letter_sequence += aa
                if counter == 70:
                    molecule_two_letter_sequence += "\n" + aa
                    counter = 0
                counter += 1

            entity = (
                "loop_\n"
                "_entity.id\n"
                "_entity.type\n"
                "_entity.src_method\n"
                "_entity.pdbx_description\n"
                "_entity.pdbx_mutation\n"
                '1 polymer     man "%s" %s\n'
                % (
                    depositDict["Source_organism_gene"],
                    depositDict["fragment_name_one_specific_mutation"],
                )
                + '2 polymer     man "%s" %s\n'
                % (
                    depositDict["Source_organism_gene_two"],
                    depositDict["fragment_name_two_specific_mutation"],
                )
                + "#\n"
                "loop_\n"
                "_entity_poly.entity_id\n"
                "_entity_poly.type\n"
                "_entity_poly.pdbx_seq_one_letter_code\n"
                "_entity_poly.pdbx_strand_id\n"
                "_entity_poly.pdbx_seq_db_id\n"
                "_entity_poly.pdbx_seq_db_name\n"
                '1 "polypeptide(L)"\n' + molecule_one_letter_sequence + "\n"
                ";\n"
                "%s %s UNP\n"
                % (
                    depositDict["molecule_chain_one"],
                    depositDict["molecule_one_letter_sequence_uniprot_id"],
                )
                + '2 "polypeptide(L)"\n'
                + molecule_two_letter_sequence
                + "\n"
                ";\n"
                "%s %s UNP\n"
                % (
                    depositDict["molecule_chain_two"],
                    depositDict["molecule_two_letter_sequence_uniprot_id"],
                )
                + "#\n"
                "loop_\n"
                "_entity_src_gen.entity_id\n"
                "_entity_src_gen.gene_src_strain\n"
                "_entity_src_gen.pdbx_gene_src_scientific_name\n"
                "_entity_src_gen.pdbx_gene_src_ncbi_taxonomy_id\n"
                "_entity_src_gen.pdbx_host_org_scientific_name\n"
                "_entity_src_gen.pdbx_host_org_ncbi_taxonomy_id\n"
                '1 ? "%s" %s  "%s" %s\n'
                % (
                    depositDict["Source_organism_scientific_name"],
                    pdbx_gene_src_ncbi_taxonomy_id,
                    depositDict["Expression_system_scientific_name"],
                    pdbx_host_org_ncbi_taxonomy_id,
                )
                + '2 ? "%s" %s  "%s" %s\n'
                % (
                    depositDict["Source_organism_scientific_name"],
                    pdbx_gene_src_ncbi_taxonomy_id,
                    depositDict["Expression_system_scientific_name"],
                    pdbx_host_org_ncbi_taxonomy_id,
                )
                + "#\n"
            )

        data_template_cif = (
            "data_UNNAMED\n"
            "#\n"
            "_pdbx_database_status.entry_id                       UNNAMED\n"
            "_pdbx_database_status.dep_release_code_coordinates   '%s'\n"
            % depositDict["Release_status_for_coordinates"]
            + "_pdbx_database_status.dep_release_code_sequence      '{0!s}'\n".format(
                depositDict["Release_status_for_sequence"]
            )
            + "#\n"
            "_pdbx_deposit_group.group_id       UNNAMED\n"
            '_pdbx_deposit_group.group_description  "%s"\n'
            % depositDict["group_description"]
            + '_pdbx_deposit_group.group_title        "{0!s}"\n'.format(
                depositDict["group_title"]
            )
            + '_pdbx_deposit_group.group_type         "{0!s}"\n'.format(
                depositDict["group_type"]
            )
            + "#\n"
            "_exptl_crystal_grow.crystal_id      1\n"
            "_exptl_crystal_grow.method          '%s'\n"
            % depositDict["crystallization_method"]
            + "_exptl_crystal_grow.pH              {0!s}\n".format(
                depositDict["crystallization_pH"]
            )
            + "_exptl_crystal_grow.temp            {0!s}\n".format(
                depositDict["crystallization_temperature"]
            )
            + '_exptl_crystal_grow.pdbx_details    "{0!s}"\n'.format(
                depositDict["crystallization_details"]
            )
            + "#\n"
            "_diffrn.id                     1\n"
            "_diffrn.ambient_temp           %s\n"
            % depositDict["data_collection_temperature"]
            + "_diffrn.crystal_id             1\n"
            "#\n"
            "_diffrn_source.diffrn_id                       1\n"
            "_diffrn_source.source                          %s\n"
            % depositDict["radiation_source"]
            + '_diffrn_source.type                            "{0!s}"\n'.format(
                depositDict["radiation_source_type"]
            )
            + "_diffrn_source.pdbx_wavelength_list            {0!s}\n".format(
                depositDict["radiation_wavelengths"]
            )
            + "#\n"
            "_diffrn_detector.detector               %s\n"
            % depositDict["radiation_detector"]
            + "_diffrn_detector.type                   '{0!s}'\n".format(
                depositDict["radiation_detector_type"]
            )
            + "_diffrn_detector.pdbx_collection_date   {0!s}\n".format(
                depositDict["data_collection_date"]
            )
            + "_diffrn_detector.diffrn_id              1\n"
            "#\n"
            "_diffrn_radiation.diffrn_id                        1\n"
            "_diffrn_radiation.wavelength_id                    1\n"
            "_diffrn_radiation.pdbx_diffrn_protocol             'SINGLE WAVELENGTH'\n"
            "#\n"
            "_diffrn_radiation_wavelength.id           1\n"
            "_diffrn_radiation_wavelength.wavelength   %s\n"
            % depositDict["radiation_wavelengths"]
            + "#\n"
            "#\n" + entity + "loop_\n"
            "_pdbx_contact_author.id                  \n"
            "_pdbx_contact_author.address_1           \n"
            "_pdbx_contact_author.address_2           \n"
            "_pdbx_contact_author.city                \n"
            "_pdbx_contact_author.state_province      \n"
            "_pdbx_contact_author.postal_code         \n"
            "_pdbx_contact_author.email               \n"
            "_pdbx_contact_author.name_first          \n"
            "_pdbx_contact_author.name_last           \n"
            "_pdbx_contact_author.country             \n"
            "_pdbx_contact_author.phone               \n"
            "_pdbx_contact_author.role                \n"
            "_pdbx_contact_author.organization_type   \n"
            "_pdbx_contact_author.identifier_ORCID    \n"
            "1 '%s' '%s' '%s' '?' '%s' %s %s '%s' '%s' '%s' '%s' %s %s\n"
            % (
                depositDict["contact_author_PI_address"],
                depositDict["contact_author_PI_organization_name"],
                depositDict["contact_author_PI_city"],
                depositDict["contact_author_PI_Zip_Code"],
                depositDict["contact_author_PI_email"],
                depositDict["contact_author_PI_first_name"],
                depositDict["contact_author_PI_last_name"],
                depositDict["contact_author_PI_Country"],
                depositDict["contact_author_PI_phone_number"],
                depositDict["contact_author_PI_role"],
                depositDict["contact_author_PI_organization_type"],
                depositDict["contact_author_PI_ORCID"],
            )
            + "2 '{0!s}' '{1!s}' '{2!s}' '?' '{3!s}' '{4!s}' {5!s}"
            " {6!s} '{7!s}' '{8!s}' '{9!s}' '{10!s}' {11!s}\n".format(
                depositDict["contact_author_address"],
                depositDict["contact_author_organization_name"],
                depositDict["contact_author_city"],
                depositDict["contact_author_Zip_Code"].replace(" ", ""),
                depositDict["contact_author_email"],
                depositDict["contact_author_first_name"],
                depositDict["contact_author_last_name"],
                depositDict["contact_author_Country"],
                depositDict["contact_author_phone_number"],
                depositDict["contact_author_role"],
                depositDict["contact_author_organization_type"],
                depositDict["contact_author_ORCID"],
            )
            + "#\n"
            "loop_\n"
            "_audit_author.name\n" + audit_author_name + "#\n"
            "_citation.id                        primary\n"
            "_citation.title                     '%s'\n" % depositDict["group_title"]
            + "_citation.journal_abbrev            'To Be Published'\n"
            "#\n"
            "loop_\n"
            "_citation_author.citation_id\n"
            "_citation_author.name\n" + primary_citation_author_name + "#\n"
            "_struct.entry_id                     UNNAMED\n"
            "_struct.title\n"
            ";%s\n" % depositDict["title"] + ";\n"
            "#\n"
            "_struct_keywords.entry_id        UNNAMED\n"
            '_struct_keywords.text            "%s"\n'
            % depositDict["structure_keywords"]
            + "#\n"
            "_pdbx_struct_assembly_depositor_info.id                   1\n"
            "_pdbx_struct_assembly_depositor_info.method_details       PISA\n"
            "_pdbx_struct_assembly_depositor_info.oligomeric_count     %s\n"
            % depositDict["biological_assembly_chain_number"]
            + "#\n"
        )

        return data_template_cif


class update_depositTable(QtCore.QThread):
    def __init__(self, deposit_dict, database, xce_logfile):
        QtCore.QThread.__init__(self)
        self.deposit_dict = deposit_dict
        self.Logfile = XChemLog.updateLog(xce_logfile)
        self.db = XChemDB.data_source(database)

    def run(self):
        self.Logfile.insert(
            "all entries in the depositTable will be updated with the following values:"
        )
        for key in self.deposit_dict:
            self.Logfile.insert(key + ": " + self.deposit_dict[key])
        dbEntries = self.db.execute_statement(
            "select CrystalName,StructureType from depositTable;"
        )
        for item in dbEntries:
            xtal = str(item[0])
            type = str(item[1])
            # need to do this because individual fields might need updating
            # for some xtals
            db_dict = self.deposit_dict

            # try to get information about the diffraction experiment
            try:
                diffractionExperiment = self.db.execute_statement(
                    "select DataCollectionBeamline,DataCollectionDate"
                    " from mainTable where CrystalName is '{0!s}'".format(xtal)
                )
                beamline = str(diffractionExperiment[0][0])
                date = str(diffractionExperiment[0][1])
            except (UnboundLocalError, IndexError):
                self.Logfile.warning(
                    "%s: cannot find details about diffraction experiment in mainTable"
                    % xtal
                )
                beamline = db_dict["radiation_source"]
                date = db_dict["data_collection_date"]
                self.Logfile.warning(
                    "%s: using values provided in depositTable for beamline"
                    " and data collection date" % xtal
                )
            if beamline.lower() != "none":
                db_dict = self.tweak_deposit_dict(xtal, db_dict)
            if date.lower() != "none":
                db_dict["data_collection_date"] = date.split()[0]

            self.Logfile.insert("updating depositTable for " + xtal + " @ " + type)
            self.db.update_depositTable(xtal, type, db_dict)
        self.Logfile.insert("Note: use DBbrowser to edit individual entries")

    def tweak_deposit_dict(self, xtal, db_dict):
        dls_beamlines = ["i02", "i03", "i04", "i04-1", "i23", "i24"]
        dls_beamline_dict = {
            "i02": ["DIAMOND BEAMLINE I02", "DECTRIS PILATUS 6M"],
            "i03": ["DIAMOND BEAMLINE I03", "DECTRIS EIGER2 XE 16M"],
            "i04": ["DIAMOND BEAMLINE I04", "DECTRIS EIGER2 XE 16M"],
            "i04-1": ["DIAMOND BEAMLINE I04-1", "DECTRIS EIGER2 XE 9M"],
            "i23": ["DIAMOND BEAMLINE I23", "DECTRIS PILATUS 12M"],
            "i24": ["DIAMOND BEAMLINE I24", "DECTRIS PILATUS 6M"],
        }

        if db_dict["radiation_source_type"] in dls_beamlines:
            db_dict["radiation_source_type"] = dls_beamline_dict[
                db_dict["radiation_source_type"]
            ][0]
            db_dict["radiation_detector_type"] = dls_beamline_dict[
                db_dict["radiation_source_type"]
            ][1]
            db_dict["radiation_detector"] = "PIXEL"
            db_dict["radiation_source"] = "SYNCHROTRON"

        return db_dict


class prepare_mmcif_files_for_deposition(QtCore.QThread):
    def __init__(
        self,
        database,
        xce_logfile,
        overwrite_existing_mmcif,
        projectDir,
        ground_state,
        ignore_event_map,
    ):
        QtCore.QThread.__init__(self)
        self.xce_logfile = xce_logfile
        self.Logfile = XChemLog.updateLog(xce_logfile)
        self.db = XChemDB.data_source(database)
        self.overwrite_existing_mmcif = overwrite_existing_mmcif
        self.projectDir = projectDir
        self.data_template_dict = {}

        self.errorList = []
        self.eventList = []
        self.db_dict = None
        self.data_template_dict = None
        self.pdb = None
        self.mtz = None
        self.logDir = None

        self.ground_state = False
        self.ground_state_pdb = ""
        self.panddaDir = ""
        self.ignore_event_map = ignore_event_map
        if ground_state:
            self.ground_state = True
            self.ground_state_pdb = ground_state[0]
            self.ground_state_mtz = ground_state[1]
            self.panddaDir = ground_state[2]
            self.logDir = self.projectDir
            self.projectDir = self.panddaDir
            self.pdb = XChemUtils.pdbtools(self.ground_state_pdb)
            self.mtz = XChemUtils.mtztools(self.ground_state_mtz)

    def run(self):
        self.Logfile.insert(
            "======= preparing mmcif files for wwPDB deposition ======="
        )
        self.Logfile.insert("checking DB for structures to deposit...")
        if self.ground_state:
            toDeposit = self.db.execute_statement(
                "select CrystalName from depositTable where DimplePANDDApath = '%s';"
                % self.panddaDir
            )
        else:
            toDeposit = self.db.execute_statement(
                "select CrystalName from mainTable where RefinementOutcome like '5%';"
            )
        self.Logfile.insert(
            "found " + str(len(toDeposit)) + " samples ready for deposition"
        )

        progress = 0
        self.emit(QtCore.SIGNAL("update_progress_bar"), progress)

        for item in sorted(toDeposit):
            xtal = str(item[0])
            if self.ground_state:
                os.chdir(self.projectDir)
            else:
                os.chdir(os.path.join(self.projectDir, xtal))
            self.Logfile.insert("%s: ----- preparing files for deposition -----" % xtal)

            if self.ground_state:
                if not self.data_template_dict_exists(xtal):
                    continue

                if not self.save_data_template_dict(xtal):
                    continue

                if not self.create_model_mmcif(xtal):
                    continue

                self.add_funding_information(xtal)

                if not self.apo_mmcif_exists():
                    continue

                if not self.add_apo_sf_mmcif_to_ground_state_mmcif():
                    continue

                if not self.add_data_increment_to_apo_mmcif(xtal):
                    continue

            else:
                if not self.mmcif_files_can_be_replaced(xtal):
                    continue

                if not self.data_template_dict_exists(xtal):
                    continue

                if not self.db_dict_exists(xtal):
                    continue

                if not self.refine_bound_exists(xtal):
                    continue

                if not self.refine_mtz_exists(xtal):
                    continue

                if not self.mtzFree_exists(xtal):
                    continue

                if not self.aimless_logfile_exists(xtal):
                    continue

                if not self.ligand_in_pdb_file(xtal):
                    continue

                if not self.eventMTZ_exists((xtal)):
                    continue

                if not self.find_matching_event_map(xtal):
                    continue

                if not self.save_data_template_dict(xtal):
                    continue

                if not self.create_model_mmcif(xtal):
                    continue

                self.add_funding_information(xtal)

                if not self.add_ligand_cif_to_model_mmcif(xtal):
                    continue

                if not self.create_sf_mmcif(xtal):
                    continue

                if not self.event_maps_exist_in_sf_mmcif(xtal):
                    continue

                self.make_table_one(xtal)

        self.print_errorlist()
        self.Logfile.insert(
            "======= finished preparing mmcif files for wwPDB deposition ======="
        )

    def data_template_dict_exists(self, xtal):
        dictStatus = False
        self.data_template_dict = None
        self.Logfile.insert(
            "%s: reading information from depositTable for sample" % xtal
        )
        self.data_template_dict = self.db.get_deposit_dict_for_sample(xtal)
        if self.data_template_dict == {}:
            self.Logfile.error(
                "%s: cannot find data_template dictionary in depositTable;"
                " moving to next dataset..." % xtal
            )
            self.add_to_errorList(xtal)
        else:
            self.Logfile.insert(
                "%s: found data_template dictionary in depositTable" % xtal
            )
            dictStatus = True
        return dictStatus

    def update_beamline_info_data_template_dict(self, xtal):
        dls_beamlines = ["i02", "i03", "i04", "i04-1", "i23", "i24"]
        dls_beamline_dict = {
            "i02": ["DIAMOND BEAMLINE I02", "DECTRIS PILATUS 6M"],
            "i03": ["DIAMOND BEAMLINE I03", "DECTRIS EIGER2 XE 16M"],
            "i04": ["DIAMOND BEAMLINE I04", "DECTRIS EIGER2 XE 16M"],
            "i04-1": ["DIAMOND BEAMLINE I04-1", "DECTRIS EIGER2 XE 9M"],
            "i23": ["DIAMOND BEAMLINE I23", "DECTRIS PILATUS 12M"],
            "i24": ["DIAMOND BEAMLINE I24", "DECTRIS PILATUS 6M"],
        }

        if self.db_dict["DataCollectionBeamline"] in dls_beamlines:
            self.data_template_dict["radiation_source_type"] = dls_beamline_dict[
                self.db_dict["DataCollectionBeamline"]
            ][0]
            self.data_template_dict["radiation_detector_type"] = dls_beamline_dict[
                self.db_dict["DataCollectionBeamline"]
            ][1]
            self.data_template_dict["radiation_detector"] = "PIXEL"
            self.data_template_dict["radiation_source"] = "SYNCHROTRON"
            self.Logfile.insert(
                (
                    "%s: setting data collection beamline to %s"
                    % (xtal, self.data_template_dict["radiation_source_type"])
                )
            )

    def db_dict_exists(self, xtal):
        dictStatus = False
        self.db_dict = None
        self.Logfile.insert("%s: reading information from mainTable for sample" % xtal)
        self.db_dict = self.db.get_db_dict_for_sample(xtal)
        if self.db_dict == {}:
            self.Logfile.error(
                "%s: cannot find db_dict dictionary in mainTable;"
                " moving to next dataset..." % xtal
            )
            self.add_to_errorList(xtal)
        else:
            self.Logfile.insert("%s: found db_dict dictionary in mainTable" % xtal)
            self.update_beamline_info_data_template_dict(xtal)
            dictStatus = True
        return dictStatus

    def mmcif_files_can_be_replaced(self, xtal):
        status = True
        if self.overwrite_existing_mmcif:
            self.Logfile.insert(
                "%s: removing existing mmcif files as chosen by user" % xtal
            )
            self.db.execute_statement(
                "update depositTable set mmCIF_model_file='',mmCIF_SF_file=''"
                " where CrystalName is '{0!s}'".format(xtal)
            )
            for mmcif in glob.glob("*.mmcif"):
                self.Logfile.warning("%s: removing %s" % (xtal, mmcif))
                os.system("/bin/rm " + mmcif)
        else:
            for mmcif in glob.glob("*.mmcif"):
                self.Logfile.warning("%s: %s exists; skipping..." % (xtal, mmcif))
                status = False
        return status

    def refine_bound_exists(self, xtal):
        self.pdb = None
        self.Logfile.insert(
            "%s: checking if refine.split.bound-state.pdb exists" % xtal
        )
        fileStatus = False
        if os.path.isfile("refine.split.bound-state.pdb"):
            self.Logfile.insert("%s: found refine.split.bound-state.pdb" % xtal)
            self.pdb = XChemUtils.pdbtools("refine.split.bound-state.pdb")
            fileStatus = True
        else:
            self.Logfile.error(
                "%s: cannot find refine.split.bound-state.pdb;"
                " moving to next dataset..." % xtal
            )
            self.add_to_errorList(xtal)
        return fileStatus

    def refine_mtz_exists(self, xtal):
        self.mtz = None
        self.Logfile.insert("%s: checking if refine.mtz exists" % xtal)
        fileStatus = False
        if os.path.isfile("refine.mtz"):
            self.Logfile.insert("%s: found refine.mtz" % xtal)
            self.mtz = XChemUtils.mtztools("refine.mtz")
            fileStatus = True
        else:
            self.Logfile.error(
                "%s: cannot find refine.mtz; moving to next dataset..." % xtal
            )
            self.add_to_errorList(xtal)
        return fileStatus

    def run_aimless_merge_only(self, xtal, unmerged, APpath):
        cmd = (
            "aimless hklin %s hklout mergeonly.mtz << eof > %s\n"
            % (unmerged, unmerged.replace(".mtz", ".log"))
            + " onlymerge\n"
            "eof"
        )
        self.Logfile.insert("%s: running AIMLESS in onlymerge mode..." % xtal)
        os.system(cmd)
        os.chdir(os.path.join(self.projectDir, xtal))
        os.system("/bin/rm %s.log" % xtal)
        os.system(
            "ln -s %s/%s %s.log" % (APpath, unmerged.replace(".mtz", ".log"), xtal)
        )
        self.Logfile.insert("%s: finished running AIMLESS" % xtal)
        if os.path.isfile(xtal + ".log"):
            self.Logfile.insert("%s: AIMLESS logfile successfully created" % xtal)
        else:
            self.Logfile.error("%s: cannot find AIMLESS logfile..." % xtal)

    def prepare_aimless_log(self, xtal):
        XChemUtils.parse().make_pseudo_aimless_log_from_json(xtal + ".log")

    def aimless_logfile_exists(self, xtal):
        self.Logfile.insert(
            "%s: checking if aimless logfile, i.e. %s.log, exists" % (xtal, xtal)
        )
        fileStatus = False
        if os.path.isfile("%s.log" % xtal):
            self.Logfile.insert("%s: found %s.log" % (xtal, xtal))
            for n, line in enumerate(open("%s.log" % xtal)):
                if "AIMLESS" in line:
                    fileStatus = True
                    break
            if not fileStatus:
                if os.path.realpath("%s.log" % xtal).endswith(".table1"):
                    self.Logfile.warning(
                        "{0!s}: {1!s}.log seems to be a staraniso .table1 file".format(
                            xtal, xtal
                        )
                    )
                    fileStatus = True
            if not fileStatus:
                self.Logfile.warning(
                    "%s: this does not seem to be an AIMLESS logfile" % xtal
                )
                Filepath = os.path.relpath(os.path.realpath("%s.log" % xtal))
                APpath = Filepath[: Filepath.rfind("/")]
                self.Logfile.insert("%s: relative path to logfile %s" % (xtal, APpath))
                self.Logfile.insert("%s: file path to logfile %s" % (xtal, Filepath))
                if os.path.isdir(APpath):
                    os.chdir(APpath)
                    for unmerged in glob.glob("*_scaled_unmerged.mtz"):
                        self.Logfile.insert(
                            "%s: found %s in %s" % (xtal, unmerged, APpath)
                        )
                        self.run_aimless_merge_only(xtal, unmerged, APpath)
                        fileStatus = True
                        break
                else:
                    self.Logfile.error("%s: %s is not a directory" % (xtal, APpath))
                    self.add_to_errorList(xtal)
                if not fileStatus:
                    os.chdir(os.path.join(self.projectDir, xtal))
                    self.Logfile.insert(
                        "%s: trying to prepare a pseudo-aimless file from json file..."
                        % xtal
                    )
                    self.prepare_aimless_log(xtal)
                    if os.path.isfile("aimless_dials.log"):
                        self.Logfile.insert("%s: found aimless_dials.log" % xtal)
                        fileStatus = True
                if not fileStatus:
                    self.Logfile.error(
                        "%s: cannot find a suitable AIMLESS logfile" % xtal
                    )
                    self.add_to_errorList(xtal)
        else:
            self.Logfile.error(
                "%s: cannot find %s.log; moving to next dataset..." % (xtal, xtal)
            )
            self.add_to_errorList(xtal)
        return fileStatus

    def mtzFree_exists(self, xtal):
        self.Logfile.insert("%s: checking if %s.free.mtz exists" % (xtal, xtal))
        fileStatus = False
        if os.path.isfile("%s.free.mtz" % xtal):
            self.Logfile.insert("%s: found %s.free.mtz" % (xtal, xtal))
            fileStatus = True
        else:
            self.Logfile.error(
                "%s: cannot find %s.free.mtz; moving to next dataset..." % (xtal, xtal)
            )
            self.add_to_errorList(xtal)
        return fileStatus

    def ligand_in_pdb_file(self, xtal):
        self.Logfile.insert(
            "%s: checking if refine.split.bound-state.pdb contains ligands of type LIG"
            % xtal
        )
        ligandStatus = False
        ligList = XChemUtils.pdbtools(
            "refine.split.bound-state.pdb"
        ).get_residues_with_resname("LIG")
        if ligList is []:
            self.Logfile.error(
                "%s: refine.split.bound-state.pdb does not contain any modelled ligands"
                " of type LIG" % xtal
            )
            self.add_to_errorList(xtal)
        else:
            self.Logfile.insert(
                xtal + ": found " + str(len(ligList)) + " ligands of type LIG"
            )
            ligandStatus = True
        return ligandStatus

    def eventMTZ_exists(self, xtal):
        self.Logfile.insert("%s: checking if mtz of event maps exists" % xtal)
        eventMTZlist = []
        eventMTZexists = False
        if os.path.isfile("no_pandda_analysis_performed") or self.ignore_event_map:
            self.Logfile.warning(
                '%s: found empty file named "no_pandda_analysis_performed"'
                " which suggests we will ignore event maps for this sample" % xtal
            )
            eventMTZexists = True
        elif self.ignore_event_map:
            self.Logfile.warning(
                "%s: user selected to not include event map in SF mmcif file" % xtal
            )
            eventMTZexists = True
        else:
            for mtz in glob.glob("*event*.native*.mtz"):
                eventMTZlist.append(mtz[mtz.rfind("/") + 1 :])
            if eventMTZlist is []:
                self.Logfile.error(
                    "%s: MTZ files of event maps do not exists!"
                    ' Go to PANDDA tab and run "Event Map -> SF"' % xtal
                )
                self.add_to_errorList(xtal)
            else:
                self.Logfile.insert(
                    xtal
                    + ": found "
                    + str(len(eventMTZlist))
                    + " MTZ files of event maps"
                )
                eventMTZexists = True
        return eventMTZexists

    def find_matching_event_map(self, xtal):
        self.eventList = []
        self.Logfile.insert(
            "%s: trying to find fitting event maps for modelled ligands" % xtal
        )
        ligList = self.pdb.save_residues_with_resname(
            os.path.join(self.projectDir, xtal), "LIG"
        )
        foundMatchingMap = None
        if os.path.isfile("no_pandda_analysis_performed") or self.ignore_event_map:
            self.Logfile.warning(
                '%s: found empty file named "no_pandda_analysis_performed"'
                " which suggests we will ignore event maps for this sample" % xtal
            )
            foundMatchingMap = True
            ligList = []

        self.Logfile.insert(
            "%s: looking for event maps for the following ligands -> %s"
            % (xtal, str(ligList))
        )
        for lig in sorted(ligList):
            ligID = lig.replace(".pdb", "")
            ligCC = []
            for mtz in glob.glob(
                ("%s-event_*.native_%s.mtz" % (xtal, lig.replace(".pdb", "")))
            ):
                self.Logfile.insert(xtal + ": found " + mtz)
                foundMatchingMap = True
                self.eventList.append(mtz)
                break

            if foundMatchingMap:
                continue

            for mtz in sorted(glob.glob("*event*.native.mtz")):
                self.get_lig_cc(xtal, mtz, lig)
                cc = self.check_lig_cc(mtz.replace(".mtz", "_CC" + ligID + ".log"))
                self.Logfile.insert("%s: %s -> CC = %s for %s" % (xtal, ligID, cc, mtz))
                try:
                    ligCC.append([mtz, float(cc)])
                except ValueError:
                    ligCC.append([mtz, 0.00])

            for mtz in sorted(glob.glob("*event*.native*P1.mtz")):
                self.get_lig_cc(xtal, mtz, lig)
                cc = self.check_lig_cc(mtz.replace(".mtz", "_CC" + ligID + ".log"))
                self.Logfile.insert("%s: %s -> CC = %s for %s" % (xtal, ligID, cc, mtz))
                try:
                    ligCC.append([mtz, float(cc)])
                except ValueError:
                    ligCC.append([mtz, 0.00])
            try:
                for cm in ligCC:
                    self.Logfile.insert("%s: cc = %s - %s" % (xtal, cm[1], cm[0]))
                highestCCeventmap = max(ligCC, key=lambda x: float(x[1]))[0]
            except ValueError:
                highestCCeventmap = None
            if highestCCeventmap is None or ligCC is []:
                self.Logfile.error(
                    "%s: best CC of ligand %s for any event map is 0!" % (xtal, lig)
                )
                self.add_to_errorList(xtal)
                foundMatchingMap = False
            else:
                self.Logfile.insert(
                    "%s: selected event map for ligand %s is %s"
                    % (xtal, lig, highestCCeventmap)
                )
                if os.path.isfile(
                    highestCCeventmap.replace(".mtz", "_" + ligID + ".mtz")
                ):
                    self.Logfile.warning(
                        "%s: symlink exists %s"
                        % (
                            xtal,
                            highestCCeventmap.replace(".mtz", "_" + ligID + ".mtz"),
                        )
                    )
                else:
                    self.Logfile.insert(
                        "%s: making symlink %s"
                        % (
                            xtal,
                            highestCCeventmap.replace(".mtz", "_" + ligID + ".mtz"),
                        )
                    )
                    os.system(
                        "ln -s %s %s"
                        % (
                            highestCCeventmap,
                            highestCCeventmap.replace(".mtz", "_" + ligID + ".mtz"),
                        )
                    )
                if highestCCeventmap not in self.eventList:
                    self.eventList.append(highestCCeventmap)
                if foundMatchingMap is None:
                    foundMatchingMap = True
        return foundMatchingMap

    def get_lig_cc(self, xtal, mtz, lig):
        ligID = lig.replace(".pdb", "")
        self.Logfile.insert("%s: calculating CC for %s in %s" % (xtal, lig, mtz))
        if os.path.isfile(mtz.replace(".mtz", "_CC" + ligID + ".log")):
            self.Logfile.warning("logfile of CC analysis exists; skipping...")
            return
        cmd = "module load phenix/1.20\n" "phenix.get_cc_mtz_pdb %s %s > %s" % (
            mtz,
            lig,
            mtz.replace(".mtz", "_CC" + ligID + ".log"),
        )
        os.system(cmd)

    def check_lig_cc(self, log):
        cc = "n/a"
        if os.path.isfile(log):
            for line in open(log):
                if line.startswith("local"):
                    cc = line.split()[len(line.split()) - 1]
        else:
            self.Logfile.error("logfile does not exist: %s" % log)
        return cc

    def add_to_errorList(self, xtal):
        if xtal.replace(" ", "") == "":
            self.Logfile.warning(
                "trying to add xtal to error list, but xtal string is empty"
            )
        else:
            if xtal not in self.errorList:
                self.errorList.append(xtal)

    def print_errorlist(self):
        if not self.errorList:
            self.Logfile.insert(
                "XCE did not detect any problems during mmcif file preparation. "
                "It is however recommended to check the logfile."
            )
        else:
            self.Logfile.warning(
                "The following samples had problems during mmcif creation. "
                "Please check the logfile for details!"
            )
            for xtal in self.errorList:
                self.Logfile.error(xtal)

    def save_data_template_dict(self, xtal):
        # check if file exists
        noError = True
        self.Logfile.insert("%s: preparing data_template.cif file" % xtal)
        if self.overwrite_existing_mmcif:
            self.data_template_dict["radiation_wavelengths"] = self.mtz.get_wavelength()
            if str(self.data_template_dict["radiation_wavelengths"]).startswith("0.0"):
                self.Logfile.error(
                    "%s: this does not seem to be the true experimental wavelength: %s"
                    % (xtal, str(self.data_template_dict["radiation_wavelengths"]))
                )
                self.Logfile.insert(
                    "%s: trying to find it from %s.free.mtz..." % (xtal, xtal)
                )
                if os.path.isfile("%s.free.mtz" % xtal):
                    self.data_template_dict[
                        "radiation_wavelengths"
                    ] = XChemUtils.mtztools(xtal + ".free.mtz").get_wavelength()
                    self.Logfile.warning(
                        "%s: found the following wavelength -> %s"
                        % (xtal, str(self.data_template_dict["radiation_wavelengths"]))
                    )
            self.Logfile.insert(
                "%s: experimental wavelength according to %s is %s"
                % (xtal, self.mtz, self.data_template_dict["radiation_wavelengths"])
            )
            if self.ground_state:
                os.chdir(self.projectDir)
                self.data_template_dict["group_type"] = "ground state"
                self.data_template_dict[
                    "group_title"
                ] = "PanDDA analysis group deposition of ground-state model"
                self.data_template_dict["group_description"] = self.data_template_dict[
                    "group_description"
                ].replace(
                    "$ProteinName", self.data_template_dict["Source_organism_gene"]
                )
                self.data_template_dict["title"] = self.data_template_dict[
                    "structure_title_apo"
                ].replace(
                    "$ProteinName", self.data_template_dict["Source_organism_gene"]
                )
            else:
                os.chdir(os.path.join(self.projectDir, xtal))

                title = (
                    self.data_template_dict["structure_title"]
                    .replace(
                        "$ProteinName", self.data_template_dict["Source_organism_gene"]
                    )
                    .replace("$CompoundName", self.db_dict["CompoundCode"])
                    .replace("($SampleID)", "(" + xtal + ")")
                )

                self.data_template_dict["group_type"] = "changed state"

                # edit title
                self.data_template_dict["group_title"] = (
                    self.data_template_dict["group_deposition_title"]
                    .replace(
                        "$ProteinName", self.data_template_dict["Source_organism_gene"]
                    )
                    .replace("$CompoundName", self.db_dict["CompoundCode"])
                )

                self.data_template_dict["group_description"] = self.data_template_dict[
                    "group_description"
                ].replace(
                    "$ProteinName", self.data_template_dict["Source_organism_gene"]
                )
                self.data_template_dict["title"] = (
                    self.data_template_dict["group_title"] + " -- " + title
                )

                if ("$ProteinName" or "$CompoundName") in self.data_template_dict[
                    "title"
                ]:
                    self.Logfile.error(
                        "%s: data_template - title not correctly formatted"
                    )
                    self.add_to_errorList(xtal)
                    noError = False

            # mutations
            mutations = self.data_template_dict["fragment_name_one_specific_mutation"]
            if (
                mutations.lower()
                .replace(" ", "")
                .replace("none", "")
                .replace("null", "")
                == ""
            ):
                self.data_template_dict["fragment_name_one_specific_mutation"] = "?"
            else:
                self.data_template_dict["fragment_name_one_specific_mutation"] = (
                    '"' + mutations.replace(" ", "") + '"'
                )

            # get protein chains
            self.data_template_dict["protein_chains"] = ""
            chains = self.pdb.GetProteinChains()
            for item in chains:
                self.data_template_dict["protein_chains"] += item + ","
            self.data_template_dict["protein_chains"] = self.data_template_dict[
                "protein_chains"
            ][:-1]

            data_template = templates().data_template_cif(self.data_template_dict)
            if self.ground_state:
                f = open(os.path.join(self.projectDir, "data_template.cif"), "w")
            else:
                f = open(os.path.join(self.projectDir, xtal, "data_template.cif"), "w")

            f.write(data_template)
            f.close()

        return noError

    def create_model_mmcif(self, xtal):
        fileStatus = False
        if self.ground_state:
            os.chdir(os.path.join(self.projectDir))
        else:
            os.chdir(os.path.join(self.projectDir, xtal))
        refSoft = self.pdb.get_refinement_program()

        pdb_extract_init = (
            "source /dls/science/groups/i04-1/software/pdb-extract-prod/setup.sh\n"
            "pdb_extract"
        )

        if self.ground_state:
            refXtal = self.ground_state_pdb.split("/")[
                len(self.ground_state_pdb.split("/")) - 2
            ]
            self.Logfile.insert(
                "ground_state deposition reference dataset: {0!s}".format(refXtal)
            )
            aimless = os.path.join(self.logDir, refXtal, refXtal + ".log")
        else:
            aimless = "%s.log" % xtal

        isAimlessFile = False
        for n, line in enumerate(open(aimless)):
            if "AIMLESS" in line:
                isAimlessFile = True
                break
        if not isAimlessFile:
            self.Logfile.warning(
                "processing log file does not seem to be an aimless file: {0!s}".format(
                    aimless
                )
            )
            self.Logfile.insert(
                "realpath of processing logfile: {0!s}".format(
                    os.path.realpath(aimless)
                )
            )
            if os.path.realpath(aimless).endswith(".table1"):
                self.Logfile.warning(
                    "{0!s}: {1!s}.log seems to be a staraniso .table1 file".format(
                        xtal, xtal
                    )
                )
                isAimlessFile = True
            self.Logfile.warning("it does not seem to originate from staraniso either")
        if not isAimlessFile:
            if os.path.isfile("aimless_dials.log"):
                aimless = "aimless_dials.log"
            else:
                XChemUtils.parse().make_pseudo_aimless_log_from_json(aimless)
                aimless = "aimless_dials.log"

        if self.ground_state:
            self.Logfile.insert("aimless.log file: " + aimless)
            Cmd = (
                pdb_extract_init
                + " -r {0!s}".format(refSoft)
                + " -iPDB {0!s}".format(self.ground_state_pdb)
                + " -e MR"
                " -s AIMLESS"
                " -iLOG {0!s}".format(aimless) + " -iENT data_template.cif"
                " -o {0!s}.mmcif > {1!s}.mmcif.log".format(xtal, xtal)
            )
        else:
            Cmd = (
                pdb_extract_init
                + " -r {0!s}".format(refSoft)
                + " -iPDB {0!s}".format("refine.split.bound-state.pdb")
                + " -e MR"
                " -s AIMLESS"
                " -iLOG {0!s}".format(aimless) + " -iENT data_template.cif"
                " -o {0!s}.mmcif > {1!s}.mmcif.log".format(xtal, xtal)
            )

        self.Logfile.insert(xtal + ": running pdb_extract: " + Cmd)
        os.system(Cmd)

        self.update_model_mmcif_header(xtal)

        if os.path.isfile(xtal + ".mmcif") and os.path.getsize(xtal + ".mmcif") > 20000:
            self.Logfile.insert("%s: model mmcif file successfully created" % xtal)
            if self.ground_state:
                self.db.execute_statement(
                    "update depositTable set mmCIF_model_file='{0!s}.mmcif'"
                    " where CrystalName is '{1!s}'"
                    " and DimplePANDDApath is '{2!s}'".format(
                        xtal, xtal, self.panddaDir
                    )
                )
            else:
                self.db.execute_statement(
                    "update depositTable set mmCIF_model_file='{0!s}.mmcif'"
                    " where CrystalName is '{1!s}'".format(xtal, xtal)
                )
            fileStatus = True
        else:
            self.Logfile.error("%s: model mmcif file was not created successfully")
            self.add_to_errorList(xtal)

        return fileStatus

    def update_model_mmcif_header(self, xtal):
        self.Logfile.insert("%s: updating header of model mmcif file" % xtal)
        foundSoftwareBlock = False
        amendSoftwareBlock = False
        softwareEntry = []
        for i, line in enumerate(fileinput.input(xtal + ".mmcif", inplace=1)):
            if "_software.pdbx_ordinal" in line:
                foundSoftwareBlock = True
            if foundSoftwareBlock:
                if not line.startswith("_"):
                    try:
                        softwareEntry.append(int(line.split()[0]))
                    except (ValueError, IndexError):
                        pass
                if "#" in line:
                    amendSoftwareBlock = True
                    foundSoftwareBlock = False
            if "_refine.pdbx_ls_cross_valid_method" in line:
                sys.stdout.write(
                    "_refine.pdbx_ls_cross_valid_method               THROUGHOUT \n"
                )

            elif "_refine.pdbx_starting_model" in line:
                sys.stdout.write(
                    "_refine.pdbx_starting_model                      {0!s} \n".format(
                        self.data_template_dict["pdbx_starting_model"]
                    )
                )

            elif "_refine.pdbx_method_to_determine_struct" in line:
                sys.stdout.write(
                    "_refine.pdbx_method_to_determine_struct"
                    "          'FOURIER SYNTHESIS'\n"
                )
            elif "_struct.title      ---" in line:
                Title = ""
                foundTitle = False
                for li in open("data_template.cif"):
                    if li.startswith("_struct.title"):
                        foundTitle = True
                    if foundTitle:
                        if (
                            li.replace(" ", "").replace("\n", "").replace("\r", "")
                            == ";"
                        ):
                            Title += li
                            break
                        Title += li
                sys.stdout.write(Title)
            elif amendSoftwareBlock:
                cifItem = "{0!s} {1!s} ? ? program ? ? 'data reduction' ? ?\n".format(
                    str(max(softwareEntry) + 1),
                    self.data_template_dict["data_integration_software"],
                ) + "{0!s} {1!s} ? ? program ? ? phasing ? ?\n".format(
                    str(max(softwareEntry) + 2),
                    self.data_template_dict["phasing_software"],
                )

                sys.stdout.write(cifItem)
                amendSoftwareBlock = False

            else:
                sys.stdout.write(line)

    def add_funding_information(self, xtal):
        pdbx_funding_ordinal_one = self.data_template_dict["pdbx_funding_ordinal_one"]
        if (
            pdbx_funding_ordinal_one.lower()
            .replace(" ", "")
            .replace("none", "")
            .replace("null", "")
            == ""
        ):
            self.data_template_dict["pdbx_funding_ordinal_one"] = ""
            self.data_template_dict["pdbx_funding_organization_one"] = ""
            self.data_template_dict["pdbx_grant_number_one"] = ""
            self.data_template_dict["pdbx_grant_country_one"] = ""
            funding_one = ""
        else:
            funding_one = "%s '%s' '%s' '%s'\n" % (
                self.data_template_dict["pdbx_funding_ordinal_one"],
                self.data_template_dict["pdbx_funding_organization_one"],
                self.data_template_dict["pdbx_grant_number_one"],
                self.data_template_dict["pdbx_grant_country_one"],
            )

        pdbx_funding_ordinal_two = self.data_template_dict["pdbx_funding_ordinal_two"]
        if (
            pdbx_funding_ordinal_two.lower()
            .replace(" ", "")
            .replace("none", "")
            .replace("null", "")
            == ""
        ):
            self.data_template_dict["pdbx_funding_ordinal_two"] = ""
            self.data_template_dict["pdbx_funding_organization_two"] = ""
            self.data_template_dict["pdbx_grant_number_two"] = ""
            self.data_template_dict["pdbx_grant_country_two"] = ""
            funding_two = ""
        else:
            funding_two = "%s '%s' '%s' '%s'\n" % (
                self.data_template_dict["pdbx_funding_ordinal_two"],
                self.data_template_dict["pdbx_funding_organization_two"],
                self.data_template_dict["pdbx_grant_number_two"],
                self.data_template_dict["pdbx_grant_country_two"],
            )

        pdbx_funding_ordinal_three = self.data_template_dict[
            "pdbx_funding_ordinal_three"
        ]
        if (
            pdbx_funding_ordinal_three.lower()
            .replace(" ", "")
            .replace("none", "")
            .replace("null", "")
            == ""
        ):
            self.data_template_dict["pdbx_funding_ordinal_three"] = ""
            self.data_template_dict["pdbx_funding_organization_three"] = ""
            self.data_template_dict["pdbx_grant_number_three"] = ""
            self.data_template_dict["pdbx_grant_country_three"] = ""
            funding_three = ""
        else:
            funding_three = "%s '%s' '%s' '%s'\n" % (
                self.data_template_dict["pdbx_funding_ordinal_three"],
                self.data_template_dict["pdbx_funding_organization_three"],
                self.data_template_dict["pdbx_grant_number_three"],
                self.data_template_dict["pdbx_grant_country_three"],
            )

        funding_info = (
            "#\n"
            "loop_\n"
            "_pdbx_audit_support.ordinal                   \n"
            "_pdbx_audit_support.funding_organization      \n"
            "_pdbx_audit_support.grant_number              \n"
            "_pdbx_audit_support.country                   \n"
            + funding_one
            + funding_two
            + funding_three
            + "#\n"
        )

        f = open(xtal + ".mmcif", "a")
        f.write(funding_info)
        f.close()

    def add_ligand_cif_to_model_mmcif(self, xtal):
        filestatus = False
        self.Logfile.insert("%s: looking for ligand restraints file..." % xtal)
        os.chdir(os.path.join(self.projectDir, xtal))
        if os.path.isfile(self.db_dict["RefinementMMCIFmodel_latest"]):
            self.Logfile.insert(
                "%s: found %s; assuming that ligand cif dictionary"
                " is already included..."
                % (xtal, self.db_dict["RefinementMMCIFmodel_latest"])
            )
            filestatus = True
        else:
            if os.path.isfile(self.db_dict["CompoundCode"] + ".cif"):
                self.Logfile.insert(
                    "%s: found ligand restraints file -> %s"
                    % (xtal, self.db_dict["CompoundCode"] + ".cif")
                )
                self.Logfile.insert(
                    "%s: adding ligand restraints file to model mmcif" % xtal
                )
                cif = ""
                for line in open(self.db_dict["CompoundCode"] + ".cif"):
                    cif += line
                f = open(xtal + ".mmcif", "a")
                f.write(cif)
                f.close()
                filestatus = True
            else:
                self.Logfile.warning(
                    "%s: could not find %s"
                    % (xtal, self.db_dict["CompoundCode"] + ".cif")
                )
        return filestatus

    def make_table_one(self, xtal):
        os.chdir(os.path.join(self.projectDir, xtal))
        if os.path.isfile(xtal + ".mmcif") and os.path.getsize(xtal + ".mmcif") > 20000:
            self.Logfile.insert("making table_1 for %s.mmcif" % xtal)

            Cmd = (
                "source /dls/science/groups/i04-1/software/pdb-extract-prod/setup.sh\n"
                "extract_table" + " " + xtal + ".mmcif"
            )

            self.Logfile.insert(xtal + ": running sf_convert: " + Cmd)
            os.system(Cmd)

            if os.path.isfile("cryst-table-1.out"):
                os.system("/bin/mv cryst-table-1.out %s-table-1.txt" % xtal)
                self.Logfile.insert(
                    "%s: table_1 successfully created; updating database..." % xtal
                )
                self.db.execute_statement(
                    "update mainTable set table_one='{0!s}-table-1.txt'"
                    " where CrystalName is '{1!s}'".format(xtal, xtal)
                )
            else:
                self.Logfile.warning("%s: could not create table_1" % xtal)

    def create_sf_mmcif(self, xtal):
        fileStatus = False
        if self.ground_state:
            os.chdir(self.projectDir)
        else:
            os.chdir(os.path.join(self.projectDir, xtal))

        if os.path.isfile("no_pandda_analysis_performed") or self.ignore_event_map:
            mtzin = "refine.mtz "
        else:
            mtzin = "refine.mtz " + xtal + ".free.mtz "
            for event in self.eventList:
                mtzin += event + " "

        pdb_extract_init = (
            "source /dls/science/groups/i04-1/software/pdb-extract-prod/setup.sh\n"
            "sf_convert"
        )

        Cmd = (
            pdb_extract_init + " -o mmcif"
            " -sf %s" % mtzin
            + " -out {0!s}_sf.mmcif  > {1!s}.sf_mmcif.log".format(xtal, xtal)
        )

        self.Logfile.insert(xtal + ": running sf_convert: " + Cmd)
        os.system(Cmd)
        os.system(
            "/bin/rm sf_format_guess.text mtzdmp.log"
            " SF_4_validate.cif sf_information.cif"
        )

        self.update_sf_mmcif_file(xtal)

        if (
            os.path.isfile(xtal + "_sf.mmcif")
            and os.path.getsize(xtal + "_sf.mmcif") > 20000
        ):
            self.Logfile.insert("%s: SF mmcif file successfully created" % xtal)
            if self.ground_state:
                self.db.execute_statement(
                    "update depositTable set mmCIF_SF_file='{0!s}_sf.mmcif'"
                    " where CrystalName is '{1!s}'"
                    " and DimplePANDDApath is '{2!s}'".format(
                        xtal, xtal, self.panddaDir
                    )
                )
            else:
                self.db.execute_statement(
                    "update depositTable set mmCIF_SF_file='{0!s}_sf.mmcif'"
                    " where CrystalName is '{1!s}'".format(xtal, xtal)
                )
            fileStatus = True
        else:
            self.Logfile.error("%s: SF mmcif file was not created successfully")
            self.add_to_errorList(xtal)

        return fileStatus

    def apo_mmcif_exists(self):
        fileStatus = False
        self.Logfile.insert("checking if mmcif files of apo structures exist")
        counter = 0
        for mmcif in glob.glob(
            os.path.join(self.panddaDir, "processed_datasets", "*", "*.mmcif")
        ):
            if os.path.isfile(mmcif):
                counter += 1
        if counter < 40:
            self.Logfile.error("found only %s apo mmcif files" % str(counter))
            self.Logfile.warning('you may need to run "PanDDA tab"/"apo -> mmcif"')
        else:
            self.Logfile.insert("found %s apo mmcif files; seems OK!" % str(counter))
            fileStatus = True
        return fileStatus

    def add_apo_sf_mmcif_to_ground_state_mmcif(self):
        os.chdir(self.projectDir)
        self.Logfile.insert(
            "checking pandda directory for apo mmcif files: " + self.panddaDir
        )
        f = open("ground_state_sf.mmcif", "w")

        refXtal = self.ground_state_pdb.split("/")[
            len(self.ground_state_pdb.split("/")) - 2
        ]
        # make sure that mmcof belonging to ref PDB file is first
        xtalList = [refXtal]
        for dirs in glob.glob(os.path.join(self.panddaDir, "processed_datasets", "*")):
            xtal = dirs[dirs.rfind("/") + 1 :]
            if xtal not in xtalList:
                xtalList.append(xtal)

        counter = 1
        for xtal in xtalList:
            # this is needed in case single files are in processed_datasets
            if not os.path.isdir(
                os.path.join(self.panddaDir, "processed_datasets", xtal)
            ):
                continue
            else:
                dirs = os.path.join(self.panddaDir, "processed_datasets", xtal)
            self.Logfile.insert(
                "%s: reading saoked compound information from database" % xtal
            )
            xtalDict = self.db.get_db_dict_for_sample(xtal)
            if xtalDict["CompoundSMILES"].lower().replace(" ", "") == "":
                smiles = "none"
            elif "none" in xtalDict["CompoundSMILES"].lower().replace(" ", ""):
                smiles = "none"
            elif "null" in xtalDict["CompoundSMILES"].lower().replace(" ", ""):
                smiles = "none"
            else:
                smiles = xtalDict["CompoundSMILES"].replace(" ", "")
            self.Logfile.insert("%s: compound SMILES -> %s" % (xtal, smiles))
            if os.path.isfile(os.path.join(dirs, xtal + "_sf.mmcif")):
                self.Logfile.insert(
                    "adding %s_sf.mmcif to ground-state_sf.mmcif" % xtal
                )
                for line in open(os.path.join(dirs, xtal + "_sf.mmcif")):
                    if line.startswith("_cell.angle_gamma"):
                        newLine = line
                        newLine += "#\n"
                        newLine += "_diffrn.id                  1\n"
                        newLine += (
                            '_diffrn.details             "diffraction data'
                            ' from crystal %s; soaked compound: %s"\n'
                            % (str(counter), smiles.replace("\n", "").replace("\r", ""))
                        )
                        f.write(newLine)
                        counter += 1
                    else:
                        f.write(line)
        f.close()
        self.Logfile.insert(
            "added %s apo mmcif files to ground-state mmcif" % str(counter)
        )
        return True

    def add_data_increment_to_apo_mmcif(self, xtal):
        self.Logfile.insert("inrementing data_rxxxxsf in ground-state_sf.mmcif")
        x = [
            "",
            "A",
            "B",
            "C",
            "D",
            "E",
            "F",
            "G",
            "H",
            "I",
            "J",
            "K",
            "L",
            "M",
            "N",
            "O",
            "P",
            "Q",
            "R",
            "S",
            "T",
            "U",
            "V",
            "W",
            "X",
            "Y",
            "Z",
        ]
        a = 0
        b = 0
        c = 0

        foundFirstLine = False
        datasetCounter = 0
        if os.path.isfile(os.path.join(self.panddaDir, "ground_state_sf.mmcif")):
            f = open("ground_state_sf_tmp.mmcif", "w")
            for n, line in enumerate(
                open(os.path.join(self.panddaDir, "ground_state_sf.mmcif"))
            ):
                if line.startswith("data_rxxxxsf") and not foundFirstLine:
                    foundFirstLine = True
                    a += 1
                    f.write(line)
                elif line.startswith("data_rxxxxsf") and foundFirstLine:
                    if a == len(x):
                        a = 1
                        b += 1
                    if b == len(x):
                        a = 1
                        b = 1
                        c += 1
                    newLine = line.replace("xsf", "s%ssf" % str(x[a] + x[b] + x[c]))
                    datasetCounter += 1
                    f.write(newLine)
                    a += 1
                    self.Logfile.insert(
                        "new dataset block: %s -> %s"
                        % (
                            str(datasetCounter),
                            newLine.replace("\n", "").replace("\r", ""),
                        )
                    )
                else:
                    f.write(line)
            f.close()
        os.chdir(self.panddaDir)
        os.system("/bin/mv ground_state_sf_tmp.mmcif ground_state_sf.mmcif")

        if (
            os.path.isfile("ground_state_sf.mmcif")
            and os.path.getsize("ground_state_sf.mmcif") > 20000
        ):
            self.Logfile.insert("ground_state: SF mmcif file successfully created")
            self.db.execute_statement(
                "update depositTable set mmCIF_SF_file='ground_state_sf.mmcif'"
                " where CrystalName is 'ground_state'"
                " and DimplePANDDApath is '{0!s}'".format(self.panddaDir)
            )
        else:
            self.Logfile.error("%s: SF mmcif file was not created successfully")
            self.add_to_errorList(xtal)

        return True

    def event_maps_exist_in_sf_mmcif(self, xtal):
        fileOK = False
        # set to -2 since first two data blocks are initial/final.mtz and data.mtz
        n_eventMTZ_found = -2
        if os.path.isfile("no_pandda_analysis_performed") or self.ignore_event_map:
            self.Logfile.warning(
                "%s: no pandda analysis performed; skipping this step..." % xtal
            )
            fileOK = True
        else:
            for line in open(xtal + "_sf.mmcif"):
                if line.startswith("_refln.crystal_id"):
                    n_eventMTZ_found += 1
            if n_eventMTZ_found == len(self.eventList):
                fileOK = True
                self.Logfile.insert(
                    "%s: %s_sf.mmcif should contains %s of %s event maps"
                    % (xtal, xtal, n_eventMTZ_found, len(self.eventList))
                )
            else:
                self.Logfile.error(
                    "%s: %s_sf.mmcif should contains only %s of %s event maps"
                    % (xtal, xtal, n_eventMTZ_found, len(self.eventList))
                )
                self.add_to_errorList(xtal)
        return fileOK

    def update_sf_mmcif_file(self, xtal):
        self.Logfile.insert("%s: updating %s_sf.mmcif" % (xtal, xtal))

        if self.ground_state:
            bound = ["data for PanDDA ground-state-mean-map"]
        else:
            bound = [
                "data from final refinement with ligand, final.mtz",
                "data from original reflections, data.mtz",
                "data for ligand evidence map (PanDDA event map), event_map_$.mtz",
            ]

        block = -1

        self.Logfile.insert(
            "%s: reading wavelength from mtz file; lambda = %s"
            % (xtal, str(self.data_template_dict["radiation_wavelengths"]))
        )

        if os.path.isfile("no_pandda_analysis_performed") or self.ignore_event_map:
            self.Logfile.warning(
                "%s: apparently not a pandda deposition; will skip this step..." % xtal
            )
            return None

        for i, line in enumerate(fileinput.input(xtal + "_sf.mmcif", inplace=1)):
            if line.startswith("_cell.length_a"):
                block += 1

            if line.startswith("_cell.angle_gamma"):
                if block >= 2:
                    n = 2
                else:
                    n = block
                sys.stdout.write(line)
                newLines = (
                    "#\n"
                    "_diffrn.id                  1\n"
                    '_diffrn.details             "%s"\n' % bound[n]
                ).replace("$", str(block - 1))
                sys.stdout.write(newLines)
            elif line.startswith("_diffrn_radiation_wavelength.wavelength"):
                sys.stdout.write(
                    "_diffrn_radiation_wavelength.wavelength   {0!s}\n".format(
                        str(self.data_template_dict["radiation_wavelengths"])
                    )
                )
            else:
                sys.stdout.write(line)


class prepare_for_group_deposition_upload(QtCore.QThread):
    def __init__(self, database, xce_logfile, depositDir, projectDir, type):
        QtCore.QThread.__init__(self)
        self.Logfile = XChemLog.updateLog(xce_logfile)
        self.db = XChemDB.data_source(database)
        self.depositDir = depositDir
        self.projectDir = projectDir
        self.type = type

    def run(self):
        TextIndex = ""
        os.chdir(self.depositDir)

        # ligand bound structures
        if self.type == "ligand_bound":
            self.Logfile.insert(
                "checking depositionTable for mmcif files of ligand-bound structures"
            )
            depositList = self.db.execute_statement(
                "select CrystalName from mainTable where RefinementOutcome like '5%';"
            )
            xtalString = "("
            for item in depositList:
                xtal = str(item[0])
                self.Logfile.insert(
                    "%s: adding mmcif files to final tar.bz2 file" % xtal
                )
                xtalString += "CrystalName = '" + xtal + "' or "
            xtalString = xtalString[:-4] + ")"
            toDeposit = self.db.execute_statement(
                "select CrystalName,mmCIF_model_file,mmCIF_SF_file,DimplePANDDApath"
                " from depositTable where StructureType is 'ligand_bound' and %s;"
                % xtalString
            )
        elif self.type == "ground_state":
            self.Logfile.insert(
                "checking depositionTable for mmcif files of ground-state structures"
            )
            toDeposit = self.db.execute_statement(
                "select CrystalName,mmCIF_model_file,mmCIF_SF_file,DimplePANDDApath"
                " from depositTable where StructureType is 'ground_state';"
            )
        else:
            return

        for n, item in enumerate(sorted(toDeposit)):
            xtal = str(item[0])
            if self.type == "ligand_bound":
                mmcif = os.path.join(self.projectDir, xtal, str(item[1]))
                mmcif_sf = os.path.join(self.projectDir, xtal, str(item[2]))
            elif self.type == "ground_state":
                mmcif = os.path.join(str(item[3]), str(item[1]))
                mmcif_sf = os.path.join(str(item[3]), str(item[2]))
            else:
                continue
            self.Logfile.insert("%s: %s/ %s" % (xtal, mmcif, mmcif_sf))
            if os.path.isfile(mmcif) and os.path.isfile(mmcif_sf):
                self.Logfile.insert(
                    "copying {0!s} to {1!s}".format(mmcif, self.depositDir)
                )
                os.system("/bin/cp {0!s} .".format(mmcif))
                if self.type == "ground_state":
                    os.system(
                        "/bin/mv ground_state.mmcif ground_state_{0!s}.mmcif".format(
                            str(n)
                        )
                    )
                    mmcif = mmcif.replace(
                        "ground_state.mmcif", "ground_state_{0!s}.mmcif".format(str(n))
                    )
                self.Logfile.insert(
                    "copying {0!s} to {1!s}".format(mmcif_sf, self.depositDir)
                )
                os.system("/bin/cp {0!s} .".format(mmcif_sf))
                if self.type == "ground_state":
                    os.system(
                        "/bin/mv ground_state_sf.mmcif"
                        " ground_state_{0!s}_sf.mmcif".format(str(n))
                    )
                    mmcif_sf = mmcif_sf.replace(
                        "ground_state_sf.mmcif",
                        "ground_state_{0!s}_sf.mmcif".format(str(n)),
                    )
            else:
                self.Logfile.error("cannot find mmcif file for " + xtal)

            text = (
                "label: {0!s}-{1!s}\n".format(xtal, self.type)
                + "description: {0!s} structure of {1!s}\n".format(self.type, xtal)
                + "model: {0!s}\n".format(mmcif[mmcif.rfind("/") + 1 :])
                + "sf: {0!s}\n\n".format(mmcif_sf[mmcif_sf.rfind("/") + 1 :])
            )
            TextIndex += text

        f = open("index.txt", "w")
        f.write(TextIndex)
        f.close()

        # checking of tar.bz2 files exisit
        fileList = []
        for i in sorted(glob.glob("%s_structures.tar.bz2.*" % self.type)):
            fileList.append(int(i[i.rfind(".") + 1 :]))

        if os.path.isfile("%s_structures.tar.bz2" % self.type):
            if fileList == []:
                self.Logfile.warning(
                    "moving existing %s_structures.tar.bz2 to %s_structures.tar.bz2.1"
                    % (self.type, self.type)
                )
                os.system(
                    "/bin/mv %s_structures.tar.bz2 %s_structures.tar.bz2.1"
                    % (self.type, self.type)
                )
            else:
                self.Logfile.warning(
                    "moving existing %s_structures.tar.bz2 %s_structures.tar.bz2.%s"
                    % (self.type, self.type, str(max(fileList) + 1))
                )
                os.system(
                    "/bin/mv %s_structures.tar.bz2 %s_structures.tar.bz2.%s"
                    % (self.type, self.type, str(max(fileList) + 1))
                )

        self.Logfile.insert("preparing tar archive...")
        os.system("tar -cvf {0!s}_structures.tar *mmcif index.txt".format(self.type))
        self.Logfile.insert("bzipping archive...")
        os.system("bzip2 {0!s}_structures.tar".format(self.type))
        self.Logfile.insert(
            "removing all bound mmcif files and index.txt file from " + self.depositDir
        )
        os.system("/bin/rm -f *mmcif index.txt")
        self.Logfile.insert("done!")


class import_PDB_IDs(QtCore.QThread):
    def __init__(self, pdbCodes, database, xce_logfile):
        QtCore.QThread.__init__(self)
        self.pdbCodes = pdbCodes
        self.Logfile = XChemLog.updateLog(xce_logfile)
        self.db = XChemDB.data_source(database)

    def run(self):
        for line in self.pdbCodes.split("\n"):
            if len(line.split("/")) == 2 and "-ligand_bound" in line:
                xtal = line[: line.rfind("-ligand_bound")].replace(" ", "")
                pdbID = line.split("/")[1].replace(" ", "")
                self.Logfile.insert("setting PDB ID for " + xtal + " to " + pdbID)
                sqlite = (
                    "UPDATE mainTable SET Deposition_PDB_ID='{0!s}',"
                    "RefinementOutcome='6 - Deposited'"
                    " where CrystalName is '{1!s}';".format(pdbID, xtal)
                )
                self.db.execute_statement(sqlite)


class compare_smiles_in_db_with_ligand_in_pdb(QtCore.QThread):
    def __init__(self, projectDir, database, xce_logfile):
        QtCore.QThread.__init__(self)
        self.projectDir = projectDir
        self.Logfile = XChemLog.updateLog(xce_logfile)
        self.db = XChemDB.data_source(database)
        self.ErrorDict = {}

    def update_ErrorDict(self, xtal, message):
        if xtal not in self.ErrorDict:
            self.ErrorDict[xtal] = []
        self.ErrorDict[xtal].append(message)

    def run(self):
        os.chdir(self.projectDir)

        progress_step = 1
        if len(glob.glob("*")) != 0:
            progress_step = 100 / float(len(glob.glob("*")))
        else:
            progress_step = 1
        progress = 0
        self.emit(QtCore.SIGNAL("update_progress_bar"), progress)

        for xtal in sorted(glob.glob("*")):
            if os.path.isfile(os.path.join(xtal, "refine.pdb")):
                smiles = self.db.execute_statement(
                    "select CompoundSmiles,CompoundCode from mainTable"
                    " where CrystalName is '{0!s}'".format(xtal)
                )
                try:
                    LigandSmiles = str(smiles[0][0])
                    LigandCode = str(smiles[0][1])
                    elementDict_smiles = XChemUtils.smilestools(
                        LigandSmiles
                    ).ElementDict()
                except IndexError:
                    self.Logfile.error(
                        "{0!s}: something is seems to be wrong"
                        " with the CompoundCode or SMILES string: {1!s}".format(
                            xtal, str(smiles)
                        )
                    )
                    continue

                pdb = XChemUtils.pdbtools(os.path.join(xtal, "refine.pdb"))
                ligandList = pdb.ligand_details_as_list()
                for ligand in ligandList:
                    resname = ligand[0]
                    chainID = ligand[1]
                    resseq = ligand[2]
                    altLoc = ligand[3]
                    elementDict_ligand = pdb.ElementDict(
                        resname, chainID, resseq, altLoc
                    )
                    for element in elementDict_ligand:
                        if elementDict_ligand[element] != elementDict_smiles[element]:
                            self.Logfile.error(
                                "{0!s}: {1!s} {2!s} {3!s} {4!s} contains different"
                                " number of atoms than smiles in DB:"
                                " {5!s} -> {6!s}".format(
                                    xtal,
                                    resname,
                                    chainID,
                                    resseq,
                                    altLoc,
                                    LigandSmiles,
                                    LigandCode,
                                )
                            )
                            self.update_ErrorDict(
                                xtal,
                                "{0!s} {1!s} {2!s} {3!s} contains different"
                                " number of atoms than smiles in DB".format(
                                    resname, chainID, resseq, altLoc
                                ),
                            )
                            break

            progress += progress_step
            self.emit(QtCore.SIGNAL("update_progress_bar"), progress)

        self.emit(QtCore.SIGNAL("show_error_dict"), self.ErrorDict)