JensKrumsieck/ChemSharp

View on GitHub
ChemSharp.Molecules/Formats/Mol2Format.cs

Summary

Maintainability
A
1 hr
Test Coverage
using ChemSharp.Memory;

namespace ChemSharp.Molecules.Formats;

public partial class Mol2Format : FileFormat, IAtomFileFormat, IBondFileFormat
{
    private const string Tripos = "@<TRIPOS>";
    private const string AtomsBlock = $"{Tripos}ATOM";
    private const string BondsBlock = $"{Tripos}BOND";
    private const string SubstructureBlock = $"{Tripos}SUBSTRUCTURE";

    private readonly Dictionary<int, HashSet<int>> _chainIdToAtomId = new();
    private int _i;
    private bool _pickingAtoms;
    private bool _pickingBonds;
    private bool _pickingSubstructure;

    public Mol2Format(string path) : base(path) { }

    public List<Atom> Atoms { get; } = new();

    public Atom ParseAtom(ReadOnlySpan<char> line)
    {
        var cols = line.WhiteSpaceSplit();
        var id = line.Slice(cols[1].start, cols[1].length).ToString();
        var x = line.Slice(cols[2].start, cols[2].length).ToSingle();
        var y = line.Slice(cols[3].start, cols[3].length).ToSingle();
        var z = line.Slice(cols[4].start, cols[4].length).ToSingle();
        var type = line.Slice(cols[5].start, cols[5].length);
        var residueRaw = line.Slice(cols[7].start, cols[7].length);
        var idPos = residueRaw.FirstNumeric();
        var residue = idPos != -1 ? residueRaw[..idPos] : residueRaw;
        var resId = idPos != -1 ? residueRaw[idPos..].ToInt() : 0;
        var chainId = (char)line.Slice(cols[6].start, cols[6].length).ToInt();
        if (!_chainIdToAtomId.ContainsKey(chainId)) _chainIdToAtomId[chainId] = new HashSet<int>();
        _chainIdToAtomId[chainId].Add(_i++);
        type = type.PointSplit();
        //most of the time type contains the actual type, so casting to string is ok!
        var typeStr = type.ToString();
        var pos = id.AsSpan().FirstNumeric();
        var symbol = ElementDataProvider.ColorData.ContainsKey(typeStr)
            ? typeStr
            : pos != -1
                ? id[..pos]
                : id;
        return new Atom(symbol, x, y, z) {Title = id, Residue = residue.ToString(), ResidueId = resId};
    }

    public List<Bond> Bonds { get; } = new();


    public Bond ParseBond(ReadOnlySpan<char> line)
    {
        //subtract 1 as mol2 starts counting at 1
        var cols = line.WhiteSpaceSplit();
        var a1 = line.Slice(cols[1].start, cols[1].length).ToInt() - 1;
        var a2 = line.Slice(cols[2].start, cols[2].length).ToInt() - 1;
        var type = line.Slice(cols[3].start, cols[3].length);
        var aromatic = type.StartsWith("ar".AsSpan());
#if NETSTANDARD2_0
        var suc = int.TryParse(type.ToString(), out var order);
#else
        var suc = int.TryParse(type, out var order);
#endif
        return new Bond(Atoms[a1], Atoms[a2]) {IsAromatic = aromatic, Order = suc ? order : 1};
    }

    protected override void ParseLine(ReadOnlySpan<char> line)
    {
        //@<TRIPOS> marks block beginning
        if (line.StartsWith(Tripos.AsSpan()))
            SetPickingIndicator(line);
        else
        {
            //no block beginning, parse if allowed
            if (_pickingAtoms) Atoms.Add(ParseAtom(line));
            if (_pickingBonds) Bonds.Add(ParseBond(line));
            if (_pickingSubstructure) UpdateChainId(line);
        }
    }

    private void UpdateChainId(ReadOnlySpan<char> line)
    {
        var cols = line.WhiteSpaceSplit();
        if (cols.Length <= 5) return;
        if (cols[5].length == 0) return;
        var id = line.Slice(cols[0].start, cols[0].length).ToInt(); //index of chain
        var chainIdRaw = line.Slice(cols[5].start, cols[5].length).Trim();
        var chainId = chainIdRaw.Length > 0 ? chainIdRaw[0] : 'ä';
        var atoms = Atoms.Where((_, j) => _chainIdToAtomId[id].Contains(j));
        foreach (var atom in atoms) atom.ChainId = chainId;
    }

    private void SetPickingIndicator(ReadOnlySpan<char> line)
    {
        //determine whether to check for atoms or bonds
        if (line.StartsWith(AtomsBlock.AsSpan()))
        {
            _pickingAtoms = true;
            _pickingBonds = false;
            _pickingSubstructure = false;
        }
        else if (line.StartsWith(BondsBlock.AsSpan()))
        {
            _pickingBonds = true;
            _pickingAtoms = false;
            _pickingSubstructure = false;
        }
        else if (line.StartsWith(SubstructureBlock.AsSpan()))
        {
            _pickingAtoms = false;
            _pickingBonds = false;
            _pickingSubstructure = true;
        }
        else
        {
            _pickingAtoms = false;
            _pickingBonds = false;
            _pickingSubstructure = false;
        }
    }
}