packages/miew/src/io/parsers/MMTFParser.js

Summary

Maintainability
D
2 days
Test Coverage
import Parser from './Parser'
import chem from '../../chem'
import MMTF from '../../vendors/mmtf'
import StructuralElement from '../../chem/StructuralElement'
import { isArrayBuffer, isUndefined } from 'lodash'
import { Matrix4, Vector3 } from 'three'

const {
  Complex,
  Chain,
  Atom,
  Element,
  Helix,
  Sheet,
  Strand,
  Bond,
  Assembly,
  Molecule
} = chem

class ArrayComparator {
  constructor(original) {
    this._original = Array.from(original)
    this._original.sort()

    this._sum = 0
    for (let i = 0; i < this._original.length; ++i) {
      this._sum += this._original[i]
    }
  }

  compare(candidate) {
    const len = candidate.length
    if (len !== this._original.length) {
      return false
    }

    let sum = 0
    let i
    for (i = 0; i < len; ++i) {
      sum += candidate[i]
    }

    if (sum !== this._sum) {
      return false
    }

    const sorted = Array.from(candidate)
    sorted.sort()

    for (i = 0; i < len; ++i) {
      if (sorted[i] !== this._original[i]) {
        return false
      }
    }

    return true
  }
}

ArrayComparator.prototype.constructor = ArrayComparator

const StructuralElementType = StructuralElement.Type

// see https://github.com/rcsb/mmtf-javascript/blob/master/src/mmtf-traverse.js
const secStructToType = [
  StructuralElementType.HELIX_PI, // 0
  StructuralElementType.BEND, // 1
  StructuralElementType.HELIX_ALPHA, // 2
  StructuralElementType.STRAND, // 3
  StructuralElementType.HELIX_310, // 4
  StructuralElementType.BRIDGE, // 5
  StructuralElementType.TURN, // 6
  StructuralElementType.COIL // 7
]

function getFirstByte(buf) {
  const bytes = new Uint8Array(buf, 0, 1)
  return bytes[0]
}

class MMTFParser extends Parser {
  constructor(data, options) {
    super(data, options)
    this._options.fileType = 'mmtf'
  }

  static canProbablyParse(data) {
    // check if it's binary MessagePack format containing a map (dictionary)
    // see https://github.com/msgpack/msgpack/blob/master/spec.md
    return isArrayBuffer(data) && (getFirstByte(data) | 1) === 0xdf
  }

  _onModel(_modelData) {}

  _onChain(chainData) {
    if (chainData.modelIndex !== 0) {
      return
    }

    const chain = new Chain(this._complex, chainData.chainName)
    this._complex._chains[chainData.chainIndex] = chain
    chain._index = chainData.chainIndex
  }

  _onGroup(groupData) {
    if (groupData.modelIndex !== 0) {
      return
    }

    if (this.settings.now.nowater) {
      // skip water
      if (groupData.groupName === 'HOH' || groupData.groupName === 'WAT') {
        return
      }
    }

    const chain = this._complex._chains[groupData.chainIndex]
    const icode = !groupData.insCode.charCodeAt(0) ? '' : groupData.insCode
    const residue = chain.addResidue(
      groupData.groupName,
      groupData.groupId,
      icode
    )
    residue._index = groupData.groupIndex

    this._updateSecStructure(this._complex, residue, groupData)
  }

  _onAtom(atomData) {
    if (atomData.modelIndex !== 0) {
      return
    }

    const altLoc = !atomData.altLoc.charCodeAt(0) ? '' : atomData.altLoc
    const atom = new Atom(
      atomData.groupIndex, // we store residue index here to replace it later with actual reference
      atomData.atomName,
      Element.getByName(atomData.element.toUpperCase()),
      new Vector3(atomData.xCoord, atomData.yCoord, atomData.zCoord),
      Element.Role[atomData.atomName],
      false, // hetero atoms will be marked later
      atomData.atomId,
      altLoc,
      atomData.occupancy,
      atomData.bFactor,
      atomData.formalCharge
    )

    this._complex._atoms[atomData.atomIndex] = atom
    atom.index = atomData.atomIndex

    this._serialAtomMap[atomData.atomId] = atom
  }

  _onBond(bondData) {
    const right = Math.max(bondData.atomIndex1, bondData.atomIndex2)
    if (right >= this._complex._atoms.length) {
      return
    }
    const left = Math.min(bondData.atomIndex1, bondData.atomIndex2)
    this._complex.addBond(
      this._complex._atoms[left],
      this._complex._atoms[right],
      bondData.bondOrder,
      Bond.BondType.UNKNOWN,
      true
    )
  }

  _updateSecStructure(complex, residue, groupData) {
    const helixClasses = [3, -1, 1, -1, 5]

    if (!isUndefined(groupData) && groupData.secStruct === this._ssType) {
      residue._secondary = this._ssStruct
      if (this._ssStruct) {
        this._ssStruct.term = residue
      }
      return
    }

    if (!isUndefined(groupData)) {
      // start new secondary structure
      const type = secStructToType[groupData.secStruct]
      this._ssType = groupData.secStruct
      this._ssStart = residue

      let struct = null
      switch (this._ssType) {
        case -1: // undefined
        case 7: // coil
          break
        case 0: // pi helix
        case 2: // alpha helix
        case 4: // 3-10 helix
          struct = new Helix(
            helixClasses[this._ssType],
            residue,
            residue,
            0,
            '',
            '',
            0
          )
          complex._helices.push(struct)
          break
        case 3: {
          // extended
          const sheet = new Sheet('', 0)
          complex._sheets.push(sheet)
          struct = new Strand(sheet, residue, residue, 0, null, null)
          break
        }
        default:
          if (type !== undefined) {
            struct = new StructuralElement(type, residue, residue)
          }
          break
      }

      this._ssStruct = struct
      residue._secondary = struct
      if (struct) {
        complex.structures.push(struct)
      }
    }
  }

  _updateMolecules(mmtfData) {
    const entities = mmtfData.entityList
    if (!entities) {
      return
    }

    const chainsInModel0 = mmtfData.chainsPerModel[0]
    for (let i = 0; i < entities.length; i++) {
      const entity = entities[i]
      const chains = entity.chainIndexList
      let residues = []
      for (let j = 0; j < chains.length; j++) {
        const chainIndex = chains[j]
        // skip chains in models other than the first one
        if (chainIndex >= chainsInModel0) {
          continue
        }
        const chain = this._complex._chains[chainIndex]
        residues = residues.concat(chain._residues.slice())
      }
      const molecule = new Molecule(this._complex, entity.description, i + 1)
      molecule.residues = residues
      this._complex._molecules[i] = molecule
    }
  }

  // populate complex with chains, residues and atoms
  _traverse(mmtfData) {
    const self = this

    // get metadata
    const { metadata } = this._complex
    metadata.id = mmtfData.structureId
    metadata.title = []
    metadata.title[0] = mmtfData.title
    metadata.date = mmtfData.releaseDate
    metadata.format = 'mmtf'

    // create event callback functions
    const eventCallbacks = {
      onModel(modelData) {
        self._onModel(modelData)
      },
      onChain(chainData) {
        self._onChain(chainData)
      },
      onGroup(groupData) {
        self._onGroup(groupData)
      },
      onAtom(atomData) {
        self._onAtom(atomData)
      },
      onBond(bondData) {
        self._onBond(bondData)
      }
    }

    // temporary variables used during traversal to track secondary structures
    this._ssType = -1
    this._ssStruct = null
    this._ssStart = null

    // traverse the structure and listen to the events
    MMTF.traverse(mmtfData, eventCallbacks)

    this._updateSecStructure(this._complex)
    this._updateMolecules(mmtfData)
  }

  // During traversal atoms and residues don't come sequentially
  // so a residue for certain atom can be unavailable. Thus we
  // store residue index in atom.
  // This function being called after traversal replaces the index
  // with actual reference, and also populates atom lists in residues.
  _linkAtomsToResidues() {
    for (let i = 0; i < this._complex._atoms.length; ++i) {
      const atom = this._complex._atoms[i]
      const residue = this._complex._residues[atom.residue]
      atom.residue = residue
      residue._atoms.push(atom)
    }
  }

  _findSynonymousChains() {
    const named = {}
    for (let i = 0; i < this._complex._chains.length; ++i) {
      const chain = this._complex._chains[i]
      const name = chain.getName()
      if (!Object.hasOwn(named, name)) {
        named[name] = []
      }

      named[name].push(chain._index)
    }

    return named
  }

  // NOTE: This function relies on original chain indices, so it must be called before any magic happens to chains.
  _parseAssemblyInfo(mmtfData) {
    let i
    let j
    let k
    const assemblies = []
    const { logger } = this

    for (i = 0; i < mmtfData.bioAssemblyList.length; ++i) {
      const baInfo = mmtfData.bioAssemblyList[i]
      if (baInfo.transformList.length === 0) {
        continue
      }

      const chains = baInfo.transformList[0].chainIndexList
      const chainListCheck = new ArrayComparator(chains)

      // build list of chain names
      const chainNames = {}
      for (j = 0; j < chains.length; ++j) {
        chainNames[this._complex._chains[chains[j]].getName()] = 1
      }

      // all chains with the same name should belong to assembly if one of them belongs
      const allChains = []
      let name
      for (name in chainNames) {
        if (Object.hasOwn(chainNames, name)) {
          // just concat arrays -- there should be no duplicates
          Array.prototype.push.apply(allChains, this._chainsByName[name])
        }
      }
      if (!chainListCheck.compare(allChains)) {
        // assembly is missing some of the chains
        logger.debug(
          'MMTF: Assembly is missing some of the synonymous chains. Skipping...'
        )
      }

      const a = new Assembly(this._complex)

      // add chains to assembly
      for (name in chainNames) {
        if (Object.hasOwn(chainNames, name)) {
          a.addChain(name)
        }
      }

      // add unique matrices to assembly
      a.addMatrix(
        new Matrix4().fromArray(baInfo.transformList[0].matrix).transpose()
      )
      for (j = 1; j < baInfo.transformList.length; ++j) {
        const transform = baInfo.transformList[j]

        if (!chainListCheck.compare(transform.chainIndexList)) {
          // list of chains for this transform doesn't match that for other transforms
          // this is illegal in our structure
          logger.debug(
            'MMTF: Chain lists differ for different transforms in one assembly. Skipping...'
          )
          continue
        }

        const m = new Matrix4().fromArray(transform.matrix).transpose()

        // check if matrix is already in the list
        for (k = 0; k < a.matrices.length; ++k) {
          if (a.matrices[k].equals(m)) {
            break
          }
        }

        if (k === a.matrices.length) {
          a.addMatrix(m)
        }
      }

      a.finalize()
      assemblies.push(a)
    }

    return assemblies
  }

  // NOTE: This function relies on original chain indices, so it must be called before any magic happens to chains.
  _markHeteroAtoms(mmtfData) {
    const chainsInModel0 = mmtfData.chainsPerModel[0]
    for (let i = 0; i < mmtfData.entityList.length; ++i) {
      const entity = mmtfData.entityList[i]
      if (entity.type !== 'polymer') {
        for (let j = 0; j < entity.chainIndexList.length; ++j) {
          const chainIndex = entity.chainIndexList[j]
          // skip chains in models other than the first one
          if (chainIndex >= chainsInModel0) {
            continue
          }
          const chain = this._complex._chains[chainIndex]
          for (let k = 0; k < chain._residues.length; ++k) {
            const res = chain._residues[k]
            for (let m = 0; m < res._atoms.length; ++m) {
              res._atoms[m].het = true
            }
          }
        }
      }
    }
  }

  // joins chains with the same name into single chain
  _joinSynonymousChains() {
    let i
    let j

    const primaryChainsArray = []
    const primaryChainsHash = {}

    // join chains
    for (i = 0; i < this._complex._chains.length; ++i) {
      const chain = this._complex._chains[i]
      const name = chain.getName()
      if (!Object.hasOwn(primaryChainsHash, name)) {
        // new name -- this is a primary chain
        primaryChainsHash[name] = chain
        chain._index = primaryChainsArray.length // update index as this array will later replace original chain list
        primaryChainsArray.push(chain)
        continue
      }

      // this chain should be joined with the primary chain of the same name
      const primary = primaryChainsHash[name]
      for (j = 0; j < chain._residues.length; ++j) {
        const residue = chain._residues[j]
        primary._residues.push(residue)
        residue._chain = primary
      }
    }

    // replace chains list with one containing only primary chains
    // dropping references to all chains but primary
    this._complex._chains = primaryChainsArray
  }

  parseSync() {
    const mmtfData = MMTF.decode(this._data)

    this._complex = new Complex()
    this._serialAtomMap = {} // filled during traversal

    this._traverse(mmtfData)
    this._linkAtomsToResidues()
    this._markHeteroAtoms(mmtfData)
    this._chainsByName = this._findSynonymousChains()
    Array.prototype.push.apply(
      this._complex.units,
      this._parseAssemblyInfo(mmtfData)
    )
    this._joinSynonymousChains()

    this._complex.finalize({
      needAutoBonding: false,
      detectAromaticLoops: this.settings.now.aromatic,
      enableEditing: this.settings.now.editing,
      serialAtomMap: this._serialAtomMap
    })

    return this._complex
  }
}

MMTFParser.formats = ['mmtf']
MMTFParser.extensions = ['.mmtf']
MMTFParser.binary = true

export default MMTFParser