ChemSharp.Molecules/Formats/CifFormat.cs
using ChemSharp.Extensions;
using ChemSharp.Memory;
using ChemSharp.Molecules.Extensions;
namespace ChemSharp.Molecules.Formats;
internal enum CifType
{
CCDC,
mmCIF,
Comp,
NotSet
}
/// <summary>
/// Reads and Parses CIF Files from CCDC or PDB
/// </summary>
public partial class CifFormat : FileFormat, IAtomFileFormat, IBondFileFormat
{
private const string Loop = "loop_";
private int _a1;
private int _a2 = 1;
private int _colChainId;
private int _colDisorder;
private int _colLabel;
private int _colRes;
private int _colSeqId;
private int _colSymbol;
private int _colX;
private int _colY;
private int _colZ;
private int _idh;
private int _idx;
private bool _pickingAtoms;
private bool _pickingBonds;
private CifType _type = CifType.NotSet;
public CifFormat(string path) : base(path) { }
public List<Atom> Atoms { get; } = new();
public Atom? ParseAtom(ReadOnlySpan<char> line) =>
_type switch
{
CifType.CCDC => ParseAtomCCDC(line),
CifType.mmCIF => ParseAtomInternal(line),
CifType.Comp => ParseAtomInternal(line),
_ => null
};
public List<Bond> Bonds { get; } = new();
public Bond? ParseBond(ReadOnlySpan<char> line) =>
_type switch
{
CifType.CCDC => ParseBondInternal(line),
CifType.mmCIF => null, //does not have bonds!
CifType.Comp => ParseBondInternal(line),
_ => null
};
private Atom? ParseAtomInternal(ReadOnlySpan<char> line)
{
var cols = line.WhiteSpaceSplit();
var label = line.Slice(cols[_colLabel].start, cols[_colLabel].length);
var symbol = line.Slice(cols[_colSymbol].start, cols[_colSymbol].length);
var x = line.Slice(cols[_colX].start, cols[_colX].length).RemoveUncertainty().ToSingle();
var y = line.Slice(cols[_colY].start, cols[_colY].length).RemoveUncertainty().ToSingle();
var z = line.Slice(cols[_colZ].start, cols[_colZ].length).RemoveUncertainty().ToSingle();
var residue = line.Slice(cols[_colRes].start, cols[_colRes].length).ToString();
var chainId = default(char);
if (_colChainId != 0)
chainId = line.Slice(cols[_colChainId].start, cols[_colChainId].length)[0];
var resId = 0;
if (_colSeqId != 0)
{
var resIdRaw = line.Slice(cols[_colSeqId].start, cols[_colSeqId].length);
if (char.IsDigit(resIdRaw[0])) resId = resIdRaw.ToInt();
}
return new Atom(symbol.ToString().UcFirst(), x, y, z)
{
Title = label.ToString(), Residue = residue, ResidueId = resId, ChainId = chainId
};
}
private Bond? ParseBondInternal(ReadOnlySpan<char> line)
{
var cols = line.WhiteSpaceSplit();
var strAtom1 = line.Slice(cols[_a1].start, cols[_a1].length).ToString();
var strAtom2 = line.Slice(cols[_a2].start, cols[_a2].length).ToString();
var (a1, a2) = Atoms.FindPairwise(strAtom1, strAtom2);
if (a1 == null || a2 == null) return null!;
return new Bond(a1, a2);
}
protected override void ParseLine(ReadOnlySpan<char> line)
{
line = line.Trim();
if (line.Length == 0) return;
if (_type == CifType.NotSet) DetermineType(line);
//pick cell params when type is CCDC
if (_type == CifType.CCDC &&
!_conversionMatrix.HasValue &&
(line.StartsWith(LenghtLines.AsSpan()) || line.StartsWith(AngleLines.AsSpan())))
ParseCellParams(line);
SetPickingIndicator(line);
//no block beginning, parse if allowed
if (_pickingAtoms && !line.StartsWith("_".AsSpan()))
{
var atom = ParseAtom(line);
if (atom != null) Atoms.Add(atom);
}
else if (_pickingAtoms) ExtractHeader(line);
else if (_pickingBonds && !line.StartsWith("_".AsSpan()))
{
var bond = ParseBond(line);
if (bond != null) Bonds.Add(bond);
}
}
/// <summary>
/// Tries to determine the CCDC Subtype
/// </summary>
/// <param name="line"></param>
private void DetermineType(ReadOnlySpan<char> line)
{
const string compNeedle = "_chem_comp.id";
const string mmcifNeedle = "_pdbx";
const string ccdcNeedle = "_atom_type_symbol";
const string altCCDCNeedle = "_symmetry";
if (line.StartsWith(compNeedle.AsSpan()))
{
_type = CifType.Comp;
_a1 = 1;
_a2 = 2;
}
if (line.StartsWith(mmcifNeedle.AsSpan())) _type = CifType.mmCIF;
if (line.StartsWith(ccdcNeedle.AsSpan()) || line.StartsWith(altCCDCNeedle.AsSpan())) _type = CifType.CCDC;
}
private void SetPickingIndicator(ReadOnlySpan<char> line)
{
//_loop marks block beginning
if (line.StartsWith(Loop.AsSpan()) || line.StartsWith("#".AsSpan()))
{
_pickingAtoms = false;
_pickingBonds = false;
}
if (_type == CifType.CCDC) SetPickingIndicatorCCDC(line);
if (_type == CifType.mmCIF) SetPickingIndicatorMMCIF(line);
if (_type == CifType.Comp) SetPickingIndicatorComp(line);
}
private void ExtractHeader(ReadOnlySpan<char> line)
{
if (_type == CifType.CCDC) ExtractHeaderCCDC(line);
if (_type == CifType.mmCIF) ExtractHeaderMMCIF(line);
if (_type == CifType.Comp) ExtractHeaderComp(line);
}
}