KarrLab/bpforms

View on GitHub
bpforms/alphabet/protein.canonical.yml

Summary

Maintainability
Test Coverage
id: canonical_protein
name: Canonical protein amino acids
description: The canonical protein amino acids
monomers:
  A:
    id: Ala
    name: L-alanine
    synonyms:
    - 3-phenyl-L-alanine
    - (S)-2-Amino-3-phenylpropionic acid
    - (S)-alpha-Amino-beta-phenylpropionic acid
    - β-phenyl-L-alanine
    identifiers:
    - ns: chebi
      id: CHEBI:16977
    - ns: pubchem.compound
      id: '5950'
    - ns: resid
      id: AA0001
    - ns: metacyc.compound
      id: L-ALPHA-ALANINE
    structure: C[C@H]([NH3+])C(=O)O
    l_bond_atoms:
    - molecule: Monomer
      element: N
      position: 4
      charge: -1
    l_displaced_atoms:
    - molecule: Monomer
      element: H
      position: 4
      charge: 1
    - molecule: Monomer
      element: H
      position: 4
    r_bond_atoms:
    - molecule: Monomer
      element: C
      position: 8
    r_displaced_atoms:
    - molecule: Monomer
      element: O
      position: 10
    - molecule: Monomer
      element: H
      position: 10
  R:
    id: Arg
    name: L-arginine
    synonyms:
    - 3-phenyl-L-alanine
    - (S)-2-Amino-3-phenylpropionic acid
    - (S)-alpha-Amino-beta-phenylpropionic acid
    - β-phenyl-L-alanine
    identifiers:
    - ns: pubchem.compound
      id: '6322'
    - ns: resid
      id: AA0002
    - ns: chebi
      id: CHEBI:16467
    - ns: metacyc.compound
      id: ARG
    structure: OC(=O)[C@H](CCCNC(=[NH2+])N)[NH3+]
    l_bond_atoms:
    - molecule: Monomer
      element: N
      position: 15
      charge: -1
    l_displaced_atoms:
    - molecule: Monomer
      element: H
      position: 15
      charge: 1
    - molecule: Monomer
      element: H
      position: 15
    r_bond_atoms:
    - molecule: Monomer
      element: C
      position: 2
    r_displaced_atoms:
    - molecule: Monomer
      element: O
      position: 1
    - molecule: Monomer
      element: H
      position: 1
  N:
    id: Asn
    name: L-asparagine
    synonyms:
    - (S)-Asparagine
    - L-aspartic acid beta-amide
    - (S)-2-amino-3-carbamoylpropanoic acid
    - (2S)-2-amino-3-carbamoylpropanoic acid
    - 2-Aminosuccinamic acid
    - (2S)-2,4-diamino-4-oxobutanoic acid
    - L-Asparagine
    - Aspartamic acid
    - L-Asparagin
    - L-2-aminosuccinamic acid
    - alpha-aminosuccinamic acid
    identifiers:
    - ns: metacyc.compound
      id: ASN
    - ns: resid
      id: AA0003
    - ns: pubchem.compound
      id: '6267'
    - ns: chebi
      id: CHEBI:17196
    structure: NC(=O)C[C@@H](C(=O)O)[NH3+]
    l_bond_atoms:
    - molecule: Monomer
      element: N
      position: 10
      charge: -1
    l_displaced_atoms:
    - molecule: Monomer
      element: H
      position: 10
      charge: 1
    - molecule: Monomer
      element: H
      position: 10
    r_bond_atoms:
    - molecule: Monomer
      element: C
      position: 7
    r_displaced_atoms:
    - molecule: Monomer
      element: O
      position: 9
    - molecule: Monomer
      element: H
      position: 9
  D:
    id: Asp
    name: L-aspartic acid
    synonyms:
    - (S)-2-aminosuccinic acid
    - (S)-2-aminobutanedioic acid
    - 2-Aminosuccinic acid
    - L-Asparaginsaeure
    identifiers:
    - ns: chebi
      id: CHEBI:17053
    - ns: resid
      id: AA0004
    - ns: pubchem.compound
      id: '5960'
    - ns: metacyc.compound
      id: ASP
    structure: '[O-]C(=O)C[C@@H](C(=O)O)[NH3+]'
    l_bond_atoms:
    - molecule: Monomer
      element: N
      position: 10
      charge: -1
    l_displaced_atoms:
    - molecule: Monomer
      element: H
      position: 10
      charge: 1
    - molecule: Monomer
      element: H
      position: 10
    r_bond_atoms:
    - molecule: Monomer
      element: C
      position: 7
    r_displaced_atoms:
    - molecule: Monomer
      element: O
      position: 9
    - molecule: Monomer
      element: H
      position: 9
  C:
    id: Cys
    name: L-cysteine
    synonyms:
    - E920
    - E 920
    - L-2-Amino-3-mercaptopropionic acid
    - (2R)-2-amino-3-sulfanylpropanoic acid
    - E-920
    - (2R)-2-amino-3-mercaptopropanoic acid
    - L-Zystein
    - (R)-2-amino-3-mercaptopropanoic acid
    identifiers:
    - ns: chebi
      id: CHEBI:17561
    - ns: pubchem.compound
      id: '5862'
    - ns: resid
      id: AA0005
    - ns: metacyc.compound
      id: CYS
    structure: OC(=O)[C@@H]([NH3+])CS
    l_bond_atoms:
    - molecule: Monomer
      element: N
      position: 6
      charge: -1
    l_displaced_atoms:
    - molecule: Monomer
      element: H
      position: 6
      charge: 1
    - molecule: Monomer
      element: H
      position: 6
    r_bond_atoms:
    - molecule: Monomer
      element: C
      position: 2
    r_displaced_atoms:
    - molecule: Monomer
      element: O
      position: 1
    - molecule: Monomer
      element: H
      position: 1
  Q:
    id: Gln
    name: L-glutamine
    synonyms:
    - L-Glutaminsaeure-5-amid
    - L-(+)-glutamine
    - L-2-Aminoglutaramic acid
    - Glutamic acid amide
    - (S)-2,5-diamino-5-oxopentanoic acid
    - (2S)-2-amino-4-carbamoylbutanoic acid
    - L-glutamic acid gamma-amide
    - Levoglutamide
    - (2S)-2,5-diamino-5-oxopentanoic acid
    - L-2-aminoglutaramic acid
    - Glutamic acid 5-amide
    identifiers:
    - ns: metacyc.compound
      id: GLN
    - ns: chebi
      id: CHEBI:18050
    - ns: resid
      id: AA0007
    - ns: pubchem.compound
      id: '5961'
    structure: NC(=O)CC[C@@H](C(=O)O)[NH3+]
    l_bond_atoms:
    - molecule: Monomer
      element: N
      position: 11
      charge: -1
    l_displaced_atoms:
    - molecule: Monomer
      element: H
      position: 11
      charge: 1
    - molecule: Monomer
      element: H
      position: 11
    r_bond_atoms:
    - molecule: Monomer
      element: C
      position: 8
    r_displaced_atoms:
    - molecule: Monomer
      element: O
      position: 10
    - molecule: Monomer
      element: H
      position: 10
  E:
    id: Glu
    name: L-glutamic acid
    synonyms:
    - L-Glutaminsaeure
    - (S)-2-aminopentanedioic acid
    - Glutamate
    - (S)-glutamic acid
    - L-Glutaminic acid
    identifiers:
    - ns: resid
      id: AA0006
    - ns: chebi
      id: CHEBI:16015
    - ns: metacyc.compound
      id: GLU
    - ns: pubchem.compound
      id: '33032'
    structure: '[O-]C(=O)CC[C@@H](C(=O)O)[NH3+]'
    l_bond_atoms:
    - molecule: Monomer
      element: N
      position: 11
      charge: -1
    l_displaced_atoms:
    - molecule: Monomer
      element: H
      position: 11
      charge: 1
    - molecule: Monomer
      element: H
      position: 11
    r_bond_atoms:
    - molecule: Monomer
      element: C
      position: 8
    r_displaced_atoms:
    - molecule: Monomer
      element: O
      position: 10
    - molecule: Monomer
      element: H
      position: 10
  G:
    id: Gly
    name: glycine
    synonyms:
    - Aminoacetic acid
    - aminoethanoic acid
    identifiers:
    - ns: pubchem.compound
      id: '750'
    - ns: metacyc.compound
      id: GLY
    - ns: chebi
      id: CHEBI:15428
    - ns: resid
      id: AA0008
    structure: OC(=O)C[NH3+]
    l_bond_atoms:
    - molecule: Monomer
      element: N
      position: 5
      charge: -1
    l_displaced_atoms:
    - molecule: Monomer
      element: H
      position: 5
      charge: 1
    - molecule: Monomer
      element: H
      position: 5
    r_bond_atoms:
    - molecule: Monomer
      element: C
      position: 2
    r_displaced_atoms:
    - molecule: Monomer
      element: O
      position: 1
    - molecule: Monomer
      element: H
      position: 1
  H:
    id: His
    name: L-histidine
    synonyms:
    - (S)-4-(2-Amino-2-carboxyethyl)imidazole
    - (S)-alpha-amino-1H-Imidazole-4-propanoic acid
    - L-(-)-histidine
    - (S)-alpha-Amino-1H-imidazole-4-propionic acid
    identifiers:
    - ns: resid
      id: AA0009
    - ns: metacyc.compound
      id: HIS
    - ns: pubchem.compound
      id: '33032'
    - ns: chebi
      id: CHEBI:15971
    structure: OC(=O)[C@@H]([NH3+])Cc1c[nH]cn1
    l_bond_atoms:
    - molecule: Monomer
      element: N
      position: 6
      charge: -1
    l_displaced_atoms:
    - molecule: Monomer
      element: H
      position: 6
      charge: 1
    - molecule: Monomer
      element: H
      position: 6
    r_bond_atoms:
    - molecule: Monomer
      element: C
      position: 2
    r_displaced_atoms:
    - molecule: Monomer
      element: O
      position: 1
    - molecule: Monomer
      element: H
      position: 1
  I:
    id: Ile
    name: L-isoleucine
    synonyms:
    - (2S,3S)-2-amino-3-methylpentanoic acid
    - alpha-amino-beta-methylvaleric acid
    - 2-Amino-3-methylvaleric acid
    identifiers:
    - ns: resid
      id: AA0010
    - ns: metacyc.compound
      id: ILE
    - ns: chebi
      id: CHEBI:17191
    - ns: pubchem.compound
      id: '6306'
    structure: CC[C@@H]([C@@H](C(=O)O)[NH3+])C
    l_bond_atoms:
    - molecule: Monomer
      element: N
      position: 10
      charge: -1
    l_displaced_atoms:
    - molecule: Monomer
      element: H
      position: 10
      charge: 1
    - molecule: Monomer
      element: H
      position: 10
    r_bond_atoms:
    - molecule: Monomer
      element: C
      position: 7
    r_displaced_atoms:
    - molecule: Monomer
      element: O
      position: 9
    - molecule: Monomer
      element: H
      position: 9
  L:
    id: Leu
    name: L-leucine
    synonyms:
    - (S)-leucine
    - 2-Amino-4-methylvaleric acid
    - (2S)-2-amino-4-methylpentanoic acid
    - (2S)-alpha-Leucine
    - (S)-(+)-leucine
    - (2S)-alpha-2-Amino-4-methylvaleric acid
    identifiers:
    - ns: resid
      id: AA0011
    - ns: pubchem.compound
      id: '6106'
    - ns: chebi
      id: CHEBI:15603
    - ns: metacyc.compound
      id: LEU
    structure: '[NH3+][C@H](C(=O)O)CC(C)C'
    l_bond_atoms:
    - molecule: Monomer
      element: N
      position: 1
      charge: -1
    l_displaced_atoms:
    - molecule: Monomer
      element: H
      position: 1
      charge: 1
    - molecule: Monomer
      element: H
      position: 1
    r_bond_atoms:
    - molecule: Monomer
      element: C
      position: 7
    r_displaced_atoms:
    - molecule: Monomer
      element: O
      position: 9
    - molecule: Monomer
      element: H
      position: 9
  K:
    id: Lys
    name: L-lysine
    identifiers:
    - ns: chebi
      id: CHEBI:18019
    - ns: pubchem.compound
      id: '5962'
    - ns: resid
      id: AA0012
    - ns: metacyc.compound
      id: LYS
    structure: '[NH3+]CCCC[C@@H](C(=O)O)[NH3+]'
    l_bond_atoms:
    - molecule: Monomer
      element: N
      position: 14
      charge: -1
    l_displaced_atoms:
    - molecule: Monomer
      element: H
      position: 14
      charge: 1
    - molecule: Monomer
      element: H
      position: 14
    r_bond_atoms:
    - molecule: Monomer
      element: C
      position: 11
    r_displaced_atoms:
    - molecule: Monomer
      element: O
      position: 13
    - molecule: Monomer
      element: H
      position: 13
  M:
    id: Met
    name: L-methionine
    synonyms:
    - (2S)-2-amino-4-(methylsulfanyl)butanoic acid
    - (S)-2-amino-4-(methylthio)butanoic acid
    - (S)-methionine
    - L-alpha-amino-gamma-methylmercaptobutyric acid
    - L-(-)-methionine
    - (S)-2-amino-4-(methylthio)butyric acid
    identifiers:
    - ns: metacyc.compound
      id: MET
    - ns: chebi
      id: CHEBI:16643
    - ns: pubchem.compound
      id: '6137'
    - ns: resid
      id: AA0013
    structure: CSCC[C@H]([NH3+])C(=O)O
    l_bond_atoms:
    - molecule: Monomer
      element: N
      position: 7
      charge: -1
    l_displaced_atoms:
    - molecule: Monomer
      element: H
      position: 7
      charge: 1
    - molecule: Monomer
      element: H
      position: 7
    r_bond_atoms:
    - molecule: Monomer
      element: C
      position: 11
    r_displaced_atoms:
    - molecule: Monomer
      element: O
      position: 13
    - molecule: Monomer
      element: H
      position: 13
  F:
    id: Phe
    name: L-phenylalanine
    synonyms:
    - 3-phenyl-L-alanine
    - (S)-2-Amino-3-phenylpropionic acid
    - (S)-alpha-Amino-beta-phenylpropionic acid
    - β-phenyl-L-alanine
    identifiers:
    - ns: pubchem.compound
      id: '22848660'
    - ns: metacyc.compound
      id: PHE
    - ns: resid
      id: AA0014
    - ns: chebi
      id: CHEBI:17295
    structure: '[NH3+][C@H](C(=O)O)Cc1ccccc1'
    l_bond_atoms:
    - molecule: Monomer
      element: N
      position: 1
      charge: -1
    l_displaced_atoms:
    - molecule: Monomer
      element: H
      position: 1
      charge: 1
    - molecule: Monomer
      element: H
      position: 1
    r_bond_atoms:
    - molecule: Monomer
      element: C
      position: 7
    r_displaced_atoms:
    - molecule: Monomer
      element: O
      position: 9
    - molecule: Monomer
      element: H
      position: 9
  P:
    id: Pro
    name: L-proline
    synonyms:
    - L-(-)-proline
    - (2S)-pyrrolidine-2-carboxylic acid
    - 2-Pyrrolidinecarboxylic acid
    - (-)-proline
    - L-pyrrolidine-2-carboxylic acid
    - (S)-2-carboxypyrrolidine
    - (S)-pyrrolidine-2-carboxylic acid
    - (S)-2-pyrrolidinecarboxylic acid
    - (-)-2-pyrrolidinecarboxylic acid
    - L-alpha-pyrrolidinecarboxylic acid
    - (-)-(S)-proline
    identifiers:
    - ns: pubchem.compound
      id: '145742'
    - ns: resid
      id: AA0015
    - ns: metacyc.compound
      id: PRO
    - ns: chebi
      id: CHEBI:17203
    structure: OC(=O)[C@@H]1CCC[NH2+]1
    l_bond_atoms:
    - molecule: Monomer
      element: N
      position: 9
      charge: -1
    l_displaced_atoms:
    - molecule: Monomer
      element: H
      position: 9
      charge: 1
    - molecule: Monomer
      element: H
      position: 9
    r_bond_atoms:
    - molecule: Monomer
      element: C
      position: 2
    r_displaced_atoms:
    - molecule: Monomer
      element: O
      position: 1
    - molecule: Monomer
      element: H
      position: 1
  S:
    id: Ser
    name: L-serine
    synonyms:
    - (S)-serine
    - beta-Hydroxy-L-alanine
    - (S)-alpha-Amino-beta-hydroxypropionic acid
    - L-2-Amino-3-hydroxypropionic acid
    - L-3-Hydroxy-alanine
    - (S)-(-)-serine
    - (2S)-2-amino-3-hydroxypropanoic acid
    - L-3-Hydroxy-2-aminopropionic acid
    - (S)-2-amino-3-hydroxypropanoic acid
    - L-(-)-serine; beta-Hydroxyalanine
    identifiers:
    - ns: resid
      id: AA0016
    - ns: metacyc.compound
      id: SER
    - ns: chebi
      id: CHEBI:17115
    - ns: pubchem.compound
      id: '5951'
    structure: OC[C@@H](C(=O)O)[NH3+]
    l_bond_atoms:
    - molecule: Monomer
      element: N
      position: 8
      charge: -1
    l_displaced_atoms:
    - molecule: Monomer
      element: H
      position: 8
      charge: 1
    - molecule: Monomer
      element: H
      position: 8
    r_bond_atoms:
    - molecule: Monomer
      element: C
      position: 5
    r_displaced_atoms:
    - molecule: Monomer
      element: O
      position: 7
    - molecule: Monomer
      element: H
      position: 7
  T:
    id: Thr
    name: L-threonine
    synonyms:
    - 2-Amino-3-hydroxybutyric acid
    - (2S,3R)-2-amino-3-hydroxybutanoic acid
    - L-alpha-amino-beta-hydroxybutyric acid
    - (2S)-threonine
    - (2S,3R)-(-)-Threonine
    - L-2-Amino-3-hydroxybutyric acid
    - L-(-)-Threonine
    identifiers:
    - ns: resid
      id: AA0017
    - ns: metacyc.compound
      id: THR
    - ns: chebi
      id: CHEBI:16857
    - ns: pubchem.compound
      id: '6288'
    structure: C[C@H]([C@@H](C(=O)O)[NH3+])O
    l_bond_atoms:
    - molecule: Monomer
      element: N
      position: 9
      charge: -1
    l_displaced_atoms:
    - molecule: Monomer
      element: H
      position: 9
      charge: 1
    - molecule: Monomer
      element: H
      position: 9
    r_bond_atoms:
    - molecule: Monomer
      element: C
      position: 6
    r_displaced_atoms:
    - molecule: Monomer
      element: O
      position: 8
    - molecule: Monomer
      element: H
      position: 8
  W:
    id: Trp
    name: L-tryptophan
    synonyms:
    - (S)-α-amino-1H-indole-3-propanoic acid
    - (S)-alpha-Amino-beta-(3-indolyl)-propionic acid
    - L-β-3-indolylalanine
    - (S)-tryptophan
    - (2S)-2-amino-3-(1H-indol-3-yl)propanoic acid
    - L-(−)-tryptophan
    identifiers:
    - ns: chebi
      id: CHEBI:16828
    - ns: metacyc.compound
      id: TRP
    - ns: pubchem.compound
      id: '6305'
    - ns: resid
      id: AA0018
    structure: OC(=O)[C@H](Cc1c[nH]c2c1cccc2)[NH3+]
    l_bond_atoms:
    - molecule: Monomer
      element: N
      position: 17
      charge: -1
    l_displaced_atoms:
    - molecule: Monomer
      element: H
      position: 17
      charge: 1
    - molecule: Monomer
      element: H
      position: 17
    r_bond_atoms:
    - molecule: Monomer
      element: C
      position: 2
    r_displaced_atoms:
    - molecule: Monomer
      element: O
      position: 1
    - molecule: Monomer
      element: H
      position: 1
  Y:
    id: Tyr
    name: L-tyrosine
    synonyms:
    - (S)-alpha-amino-4-hydroxybenzenepropanoic acid
    - (S)-(-)-Tyrosine
    - 4-hydroxy-L-phenylalanine
    - (2S)-2-amino-3-(4-hydroxyphenyl)propanoic acid
    - (S)-Tyrosine
    - (S)-3-(p-Hydroxyphenyl)alanine
    - (S)-2-Amino-3-(p-hydroxyphenyl)propionic acid
    - (-)-alpha-amino-p-hydroxyhydrocinnamic acid
    identifiers:
    - ns: metacyc.compound
      id: TYR
    - ns: chebi
      id: CHEBI:17895
    - ns: resid
      id: AA0019
    - ns: pubchem.compound
      id: '6057'
    structure: OC(=O)[C@H](Cc1ccc(cc1)O)[NH3+]
    l_bond_atoms:
    - molecule: Monomer
      element: N
      position: 14
      charge: -1
    l_displaced_atoms:
    - molecule: Monomer
      element: H
      position: 14
      charge: 1
    - molecule: Monomer
      element: H
      position: 14
    r_bond_atoms:
    - molecule: Monomer
      element: C
      position: 2
    r_displaced_atoms:
    - molecule: Monomer
      element: O
      position: 1
    - molecule: Monomer
      element: H
      position: 1
  V:
    id: Val
    name: L-valine
    synonyms:
    - L-alpha-Amino-beta-methylbutyric acid
    - (S)-valine
    - 2-Amino-3-methylbutyric acid
    - L-(+)-alpha-Aminoisovaleric acid
    - (2S)-2-amino-3-methylbutanoic acid
    identifiers:
    - ns: resid
      id: AA0020
    - ns: pubchem.compound
      id: '6287'
    - ns: metacyc.compound
      id: VAL
    - ns: chebi
      id: CHEBI:16414
    structure: '[NH3+]C(C(=O)O)C(C)C'
    l_bond_atoms:
    - molecule: Monomer
      element: N
      position: 1
      charge: -1
    l_displaced_atoms:
    - molecule: Monomer
      element: H
      position: 1
      charge: 1
    - molecule: Monomer
      element: H
      position: 1
    r_bond_atoms:
    - molecule: Monomer
      element: C
      position: 6
    r_displaced_atoms:
    - molecule: Monomer
      element: O
      position: 8
    - molecule: Monomer
      element: H
      position: 8