KarrLab/datanator

View on GitHub
datanator/data_source/protein_modification/pro.monomers.csv

Summary

Maintainability
Test Coverage
PRO id,BpForms code,Name,Short label,Base monomer,Type,SMILES,RESID,PDB-CCD,PSI-MI,ChEBI,PubChem SID,CAS,Comments
CHEBI:29950,C,L-cysteine residue,,,,SC[C@H]([NH3+])C=O,AA0005,CYS,,,8146405,,
CHEBI:29952,R,L-arginine residue,,,,O=C[C@H](CCCNC(=[NH2+])N)[NH3+],AA0002,ARG,,,8146403,,
CHEBI:29958,D,L-aspartic acid residue,,,,O=C[C@H](CC(=O)[O-])[NH3+],AA0004,ASP,,,8146397,,
CHEBI:29979,H,L-histidine residue,,,,C(c1cnc[nH]1)[C@H]([NH3+])C=O,AA0009,HIS,,,8146377,,
MOD:00006,,N-glycosylated residue,NGlycoRes,"N, R, W",class of modifications,,,,MOD:00006,,,,
MOD:00036,AA0027 ,"(2S,3R)-3-hydroxyaspartic acid",(3R)3HyAsp,D,,O=C[C@H]([C@H](C(=O)[O-])O)[NH3+],AA0027 ,BH2,MOD:00036,,,,
MOD:00037,AA0028,5-hydroxy-L-lysine,5HyLys,K,,[NH3+]C[C@@H](CC[C@@H](C=O)[NH3+])O,AA0028,LYZ,MOD:00037,CHEBI:18040,,1190-94-9,
MOD:00038,AA0029,3-hydroxy-L-proline,,P,,O=C[C@H]1[NH2+]CC[C@@H]1O,AA0029,HY3,MOD:00038,CHEBI:16889,,4298-08-2,
MOD:00039,AA0030,4-hydroxy-L-proline,,P,,O[C@@H]1C[C@H]([NH2+]C1)C=O,AA0030,HYP,MOD:00039,CHEBI:61965,,51-35-4,
MOD:00041,AA0032,L-gamma-carboxyglutamic acid,,E,,O=C[C@H](CC(C(=O)[O-])C(=O)[O-])[NH3+],AA0032,CGU,MOD:00041,CHEBI:61939,,53861-57-7,
MOD:00042,AA0033,L-aspartic 4-phosphoric anhydride,,D,,O=C[C@H](CC(=O)OP(=O)([O-])[O-])[NH3+],AA0033,PHD,MOD:00042,CHEBI:15836,,22138-53-0,
MOD:00046,AA0037,O-phospho-L-serine,,S,,O=C[C@H](COP(=O)([O-])[O-])[NH3+],AA0037,SEP,MOD:00046,CHEBI:45522,,407-41-0,
MOD:00047,AA0038,O-phospho-L-threonine,,T,,O=C[C@H]([C@H](OP(=O)([O-])[O-])C)[NH3+],AA0038,TPO,MOD:00047,CHEBI:61971,,1114-81-4,
MOD:00048,AA0039,O4'-phospho-L-tyrosine,,Y,,O=C[C@H](Cc1ccc(cc1)OP(=O)([O-])[O-])[NH3+],AA0039,PTR,MOD:00048,CHEBI:61972,,21820-51-9,
MOD:00050,AA0041,N-acetyl-L-alanine,,A,,O=C[C@@H](NC(=O)C)C,AA0041,AYA,MOD:00050,CHEBI:61920,,97-69-8,
MOD:00053,AA0044,N-acetyl-L-glutamic acid,,E,,O=C[C@@H](NC(=O)C)CCC(=O)[O-],AA0044,,MOD:00053,CHEBI:17533,,1188-37-0,
MOD:00055,AA0046,N-acetylglycine,,G,,O=CCNC(=O)C,AA0046,,MOD:00055,CHEBI:61888,,543-24-8,
MOD:00058,AA0049,N-acetyl-L-methionine,,M,,CSCC[C@H](NC(=O)C)C=O,AA0049,,MOD:00058,CHEBI:21557,,65-82-7,
MOD:00060,AA0051,N-acetyl-L-serine,,S,,OC[C@H](NC(=O)C)C=O,AA0051,SAC,MOD:00060,CHEBI:45441,,16354-58-8,
MOD:00064,AA0055,N6-acetyl-L-lysine,,K,,O=C[C@H](CCCCNC(=O)C)[NH3+],AA0055,ALY,MOD:00064,CHEBI:61930,,692-04-6,
MOD:00068,AA0059,N-myristoyl-glycine,,G,,CCCCCCCCCCCCCC(=O)NCC=O,AA0059,MYR,MOD:00068,,,14246-55-0,
MOD:00069,AA0060,N-palmitoyl-L-cysteine,,C,,CCCCCCCCCCCCCCCC(=O)N[C@H](C=O)CS,AA0060,,MOD:00069,,,67603-49-0,
MOD:00076,AA0067,"omega-N,omega-N'-dimethyl-L-arginine",,R,,CNC(NCCC[C@@H](C=O)[NH3+])NC,AA0067,2MR,MOD:00076,CHEBI:61916,,,
MOD:00077,AA0068,"omega-N,omega-N-dimethyl-L-arginine",,R,,O=C[C@H](CCCNC(=[NH2])N(C)C)[NH3+],AA0068,DA2,MOD:00077,CHEBI:61896,,30315-93-6,
MOD:00078,AA0069,omega-N-methyl-L-arginine,,R,,O=C[C@H](CCCNC(=[NH2])NC)[NH3+],AA0069,,MOD:00078,CHEBI:28229,,17035-90-4,
MOD:00083,AA0074,"N6,N6,N6-trimethyl-L-lysine",,K,,O=C[C@H](CCCC[N](C)(C)C)N,AA0074,M3L,MOD:00083,CHEBI:61961,,19253-88-4,
MOD:00084,AA0075,"N6,N6-dimethyl-L-lysine",,K,,O=C[C@H](CCCC[NH+](C)C)[NH3+],AA0075,MLY,MOD:00084,CHEBI:61969,,,
MOD:00085,AA0076,N6-methyl-L-lysine,,K,,C[NH2+]CCCC[C@@H](C=O)[NH3+],AA0076,MLZ,MOD:00085,CHEBI:61928,,1188-07-4,
MOD:00087,AA0078,N6-myristoyl-L-lysine,,K,,CCCCCCCCCCCCCC(=O)NCCCC[C@@H](C=O)[NH3+],AA0078,MYK,MOD:00087,CHEBI:21894,,62471-07-2,
MOD:00111,AA0102,S-farnesyl-L-cysteine,,C,,O=C[C@H](CSC/C=C(/CC/C=C(/CCC=C(C)C)\C)\C)[NH3+],AA0102,FAR,MOD:00111,,,68000-92-0,
MOD:00113,AA0104,S-geranylgeranyl-L-cysteine,,C,,O=C[C@H](CSC/C=C(/CC/C=C(/CC/C=C(/CCC=C(C)C)\C)\C)\C)[NH3+],AA0104,GER,MOD:00113,,,131404-69-8,
MOD:00114,AA0105,L-cysteine methyl ester,,C,,COC(=O)[C@H](CS)N,AA0105,CMT,MOD:00114,CHEBI:61989,,2485-63-3,
MOD:00115,AA0106,S-palmitoyl-L-cysteine,,C,,CCCCCCCCCCCCCCCC(=O)SC[C@@H](C=O)N,AA0106,PLM,MOD:00115,,,114507-35-6,
MOD:00126,AA0117,N6-biotinyl-L-lysine,,K,,O=C[C@H](CCCCNC(=O)CCCC[C@@H]1SC[C@H]2[C@@H]1NC(=O)N2)[NH3+],AA0117,BTN,MOD:00126,,,576-19-2,
MOD:00127,AA0118,N6-lipoyl-L-lysine,,K,,O=C[C@H](CCCCNC(=O)CCCC[C@@H]1CCSS1)[NH3+],AA0118,LPA,MOD:00127,CHEBI:14919,,1200-22-2,
MOD:00128,AA0119,N6-pyridoxal phosphate-L-lysine,,K,,O=C[C@H](CCCC[NH2+]Cc1c(cnc(c1O)C)COP(=O)([O-])[O-])[NH3+],AA0119,LLP,MOD:00128,,,2440-59-7,
MOD:00159,AA0150,O3-phosphopantetheine-L-serine,,S,,SCCNC(=O)CCNC(=O)[C@H](C(COP(=O)(OC[C@@H](C=O)[NH3+])[O-])(C)C)O,AA0150,PNS,MOD:00159,,,,
MOD:00160,,N4-glycosyl-L-asparagine,N4GlycoAsn,N,class of modifications,O=C[C@H](CC(=O)N[C@H]1O[C@H](CO)[C@H]([C@@H]([C@H]1O)O)O)N,"AA0151, AA0420, AA0421",,MOD:00160,,,,
MOD:00164,AA0155,O-(N-acetylaminogalactosyl)-L-threonine,,T,,O=C[C@H]([C@H](O[C@@H]1O[C@H](CO)[C@@H]([C@@H]([C@H]1NC(=O)C)O)O)C)[NH3+],AA0155,GTH,MOD:00164,CHEBI:53605,,,
MOD:00167,,N-asparaginyl-glycosylphosphatidylinositolethanolamine,GPIAsn,N,,,AA0158,,MOD:00167,,,,Structure model missing from RESID
MOD:00213,,chondroitin sulfate D-glucuronosyl-D-galactosyl-D-galactosyl-D-xylosyl-L-serine,,S,crosslink,,AA0208,,MOD:00213,,,,
MOD:00215,,heparan sulfate D-glucuronosyl-D-galactosyl-D-galactosyl-D-xylosyl-L-serine,,S,crosslink,,AA0210,,MOD:00215,,,,
MOD:00219,AA0214,L-citrulline,,R,,O=C[C@H](CCCNC(=O)N)[NH3+],AA0214,CIR,MOD:00219,,,372-75-8,
MOD:00222,AA0217,2'-alpha-mannosyl-L-tryptophan,,W,,O=C[C@H](Cc1c([nH]c2c1cccc2)[C@H]1O[C@H](CO)[C@H]([C@@H]([C@@H]1O)O)O)[NH3+],AA0217,BMA,MOD:00222,,,,
MOD:00237,AA0232,L-beta-methylthioaspartic acid,,D,,CS[C@@H]([C@@H](C=O)[NH3+])C(=O)[O-],AA0232,0TD,MOD:00237,,,180420-54-6,
MOD:00295,AA0290,O-octanoyl-L-serine,,S,,CCCCCCCC(=O)OC[C@@H](C=O)[NH3+],AA0290,,MOD:00295,,,,
MOD:00300,,L-glutamyl-5-poly(ADP-ribose),ADP-Ribosyl,E,class of modifications,,AA0295,,MOD:00300,,,,
MOD:00314,AA0309,glycine cholesterol ester,,G,,NCC(=O)O[C@H]1CC[C@]2(C(=CC[C@@H]3[C@@H]2CC[C@]2([C@H]3CC[C@@H]2[C@@H](CCCC(C)C)C)C)C1)C,AA0309,,MOD:00314,,,57-88-5,
MOD:00394,,acetylated residue,AcRes,,class of modifications,,,,MOD:00394,,,,
MOD:00414,,monomethylated L-arginine,Me1Arg,R,class of modifications,,,,MOD:00414,,,,
MOD:00437,,farnesylated residue,FarnRes,"C, W",class of modifications,,,,MOD:00437,,,,
MOD:00599,,monomethylated residue,Me1Res,"A, C, E, F, G, H, K, L, M, N, P, Q, R, S",class of modifications,,,,MOD:00599,,,,
MOD:00658,,methylated arginine,MeArg,R,class of modifications,,,,MOD:00658,,,,
MOD:00663,,methylated lysine,MeLys,K,class of modifications,,,,MOD:00663,,,,
MOD:00693,,glycosylated residue,GlycoRes,"C, K, N, P, R, S, T, W, Y",class of modifications,,,,MOD:00693,,,,
MOD:00696,,phosphorylated residue,PhosRes,"C, D, H, R, S, T, Y",class of modifications,,,,MOD:00696,,,,
MOD:00723,,N-acetylated L-lysine,NAcLys,K,class of modifications,,,,MOD:00723,,,,
MOD:00752,,adenosine diphosphoribosyl (ADP-ribosyl) modified residue,ADPRibRes,"C, E, H, K, N, Q, R, S, T",class of modifications,,,,MOD:00752,,,,
MOD:00783,,dimethylated L-arginine,NNMe2Arg,R,class of modifications,,,,MOD:00783,,,,
MOD:00805,AA0398,O-(N-acetylamino)glucosyl-L-serine,,S,,O=C[C@H](CO[C@@H]1O[C@H](CO)[C@H]([C@@H]([C@H]1NC(=O)C)O)O)[NH3+],AA0398,,MOD:00805,,,10036-64-3,
MOD:00806,AA0399,O-(N-acetylaminoglucosyl)-L-threonine,,T,,O=C[C@H]([C@H](O[C@@H]1O[C@H](CO)[C@H]([C@@H]([C@H]1NC(=O)C)O)O)C)[NH3+],AA0399,NDG,MOD:00806,,,10036-64-3,
MOD:00812,AA0404,O-fucosyl-L-serine,,S,,O=C[C@H](CO[C@H]1O[C@H](C)[C@@H]([C@@H]([C@H]1O)O)O)[NH3+],AA0404,,MOD:00812,,,,
MOD:00813,AA0405,O-fucosyl-L-threonine,,T,,O=C[C@H]([C@H](O[C@H]1O[C@H](C)[C@@H]([C@@H]([C@H]1O)O)O)C)[NH3+],AA0405,,MOD:00813,,,,
MOD:00814,AA0406,O-xylosyl-L-serine,,S,,O[C@H]1[C@@H](OC[C@H]([NH3+])C=O)OC[C@H]([C@@H]1O)O,AA0406,XYS,MOD:00814,,,6050-71-1,
MOD:00831,AA0151,N4-(N-acetylamino)glucosyl-L-asparagine,,N,,O=C[C@H](CC(=O)N[C@@H]1O[C@H](CO)[C@H]([C@@H]([C@H]1NC(=O)C)O)O)N,AA0151,,MOD:00831,CHEBI:17261,,10036-64-3,
MOD:00890,,phosphorylated L-histidine,NPhosHis,H,class of modifications,,,,MOD:00890,,,,
MOD:01048,,2-pyrrolidone-5-carboxylic acid,PyrGlu,Q,class of modifications,,,,MOD:01048,,,,
MOD:01110,,isoprenylated cysteine,IpCys,C,class of modifications,,,,MOD:01110,,,,
MOD:01119,SGergerOMeCys,S-geranylgeranyl-L-cysteine methyl ester,SGergerOMeCys,C,,COC(=O)[C@@H](N)CSC\C=C(/C)CC\C=C(/C)CC\C=C(/C)CCC=C(C)C,,,MOD:01119,,,,Related to AA0104 and AA0105
MOD:01148,,ubiquitinylated lysine,,K,crosslink,,,,MOD:01148,,,,
MOD:01149,,sumoylated lysine,,K,crosslink,,,,MOD:01149,,,,
MOD:01150,,neddylated lysine,,K,crosslink,,,,MOD:01150,,,,
MOD:01399,AA0476,N6-(ADP-ribosyl)-L-lysine,,K,,O=C[C@H](CCCC[NH2+][C@H]1O[C@@H]([C@H]([C@H]1O)O)CO[P@](=O)(O[P@@](=O)(OC[C@H]1O[C@H]([C@@H]([C@@H]1O)O)n1cnc2c1NC=N[C@H]2N)[O-])[O-])[NH3+],AA0476,,MOD:01399,,,,
MOD:01684,,palmitoylated-L-cysteine,PamCys,C,class of modifications,,,,MOD:01684,,,,
MOD:01786,AA0537,3'-nitro-L-tyrosine,,Y,,O=C[C@H](Cc1ccc(c(c1)N(=O)=O)[O-])[NH3+],AA0537,NIY,MOD:01786,CHEBI:44454,,3604-79-3,
PR:000026291,,unmodified amino-acid residue,,,class of modifications,,,,,,,,
PR:000044772,,K63-polyubiquitinylated lysine,K63polyUbiqLys,K,crosslink,,,,,,,,"A ubiquitinylated lysine in which the crosslink is to a polymer of ubiquitin chains, each crosslinked at the Lys-63 position with the C-terminal glycine of the succeeding chain. [PRO:DAN]"
CHEBI:16044,M,L-methionine residue,,M,,CSCC[C@H]([NH3+])C=O,AA0013,MET,,CHEBI:16044,6137,,
CHEBI:29947,G,glycine residue,,G,,C([NH3+])C=O,AA0008,GLZ,,CHEBI:29947,750,,
CHEBI:29954,W,L-tryptophan residue,,W,,O=C[C@H](Cc1c[nH]c2c1cccc2)[NH3+],AA0018,TRP,,CHEBI:29954,6305,,
CHEBI:29967,K,L-lysine residue,,K,,O=C[C@H](CCCC[NH3+])[NH3+],AA0012,LYS,,CHEBI:29967,5962,,
CHEBI:29972,E,L-glutamic acid residue,,E,,O=C[C@H](CCC(=O)[O-])[NH3+],AA0006,GLU,,CHEBI:29972,33032,,
CHEBI:29997,F,L-phenylalanine residue,,F,,O=C[C@H](Cc1ccccc1)[NH3+],AA0014,PHE,,CHEBI:29997,22848660,,
CHEBI:29999,S,L-serine residue,,S,,OC[C@H]([NH3+])C=O,AA0016,SER,,CHEBI:29999,5951,,
CHEBI:30006,L,L-leucine residue,,L,,O=C[C@H](CC(C)C)[NH3+],AA0011,LEU,,CHEBI:30006,6106,,
CHEBI:30009,I,L-isoleucine residue,,I,,CC[C@@H]([C@@H](C=O)[NH3+])C,AA0010,ILE,,CHEBI:30009,6306,,
CHEBI:30011,Q,L-glutamine residue,,Q,,O=C[C@H](CCC(=O)N)[NH3+],AA0007,GLN,,CHEBI:30011,5961,,
CHEBI:30013,T,L-threonine residue,,T,,C[C@H]([C@H]([NH3+])C=O)O,AA0017,THR,,CHEBI:30013,6288,,
CHEBI:30015,V,L-valine residue,,V,,CC(C)C([NH3+])C=O,AA0020,VAL,,CHEBI:30015,6287,,
CHEBI:46217,A,L-alanine residue,,A,,C[C@H]([NH3+])C=O,AA0001,ALA,,CHEBI:46217,5950,,
CHEBI:46858,Y,L-tyrosine residue,,Y,,O=C[C@H](Cc1ccc(cc1)O)[NH3+],AA0019,TYR,,CHEBI:46858,6057,,
CHEBI:50342,P,L-proline residue,,P,,O=C[C@@H]1CCC[NH2+]1,AA0015,PRO,,CHEBI:50342,145742,,
CHEBI:50347,N,L-asparagine residue,,N,,NC(=O)C[C@H]([NH3+])C=O,AA0003,ASN,,CHEBI:50347,6267,,
MOD:00235,AA0230,S-nitrosyl-L-cysteine,SNOCys,C,,O=NSC[C@@H](C=O)N,AA0230,SNC,MOD:00235,,,51209-75-7,
MOD:00267,AA0262,,,C,,O=C[C@H](C[S](O)O)N,AA0262,CSD,MOD:00267,CHEBI:61964,,1115-65-7,
MOD:00818,,glycosylphosphatidylinositolated residue,GPIRes,"A, C, D, G, N, S",class of modifications,,,,MOD:00818,,,,
MOD:01116,AA0102,,SFarnOMeCys,C,,O=C[C@H](CSC/C=C(/CC/C=C(/CCC=C(C)C)\C)\C)[NH3+],AA0102,FAR,MOD:01116,,,68000-92-0,
PR:000044771,,monoubiquitinylated lysine,MonoUbiqLys,K,crosslink,,,,,,,,
PR:000049733,,K63-diubiquitinylated lysine,K63diUbiq,K,crosslink,,,,,,,,
UniCarbKB:7674,,N-LINKED glycan structure,,,,,,,,,,,
UniCarbKB:7763,,N-LINKED glycan structure,,,,,,,,,,,
UniCarbKB:7764,,N-LINKED glycan structure,,,,,,,,,,,
UniCarbKB:7834,,N-LINKED glycan structure,,,,,,,,,,,
UniCarbKB:7846,,N-LINKED glycan structure,,,,,,,,,,,
UniCarbKB:7863,,N-LINKED glycan structure,,,,,,,,,,,