src/pydna/tm.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2013-2023 by Björn Johansson. All rights reserved.
# This code is part of the Python-dna distribution and governed by its
# license. Please see the LICENSE.txt file that should have been included
# as part of this package.
"""This module provide functions for melting temperature calculations."""
import math as _math
from Bio.SeqUtils import MeltingTemp as _mt
from Bio.SeqUtils import gc_fraction as _GC
import textwrap as _textwrap
from pydna._pretty import pretty_str as _pretty_str
# See the documentation for Bio.SeqUtils.MeltingTemp for more details
# The 10X Taq Buffer with (NH4)2SO4 is commercialized by companies like
# ThermoFisher, although we make it ourselves
# 10X Buffer Composition
# 750 mM Tris-HCl (pH 8.8 at 25°C),
# 200 mM (NH4)2SO4,
# 0.1% (v/v) Tween 20.
def tm_default(
seq,
check=True,
strict=True,
c_seq=None,
shift=0,
nn_table=_mt.DNA_NN4, # DNA_NN4: values from SantaLucia & Hicks (2004)
tmm_table=None,
imm_table=None,
de_table=None,
dnac1=500 / 2, # I assume 500 µM of each primer in the PCR mix
dnac2=500 / 2, # This is what MELTING and Primer3Plus do
selfcomp=False,
Na=40,
K=0,
Tris=75.0, # We use the 10X Taq Buffer with (NH4)2SO4 (above)
Mg=1.5, # 1.5 mM Mg2+ is often seen in modern protocols
dNTPs=0.8, # I assume 200 µM of each dNTP
saltcorr=7, # Tm = 81.5 + 0.41(%GC) - 600/N + 16.6 x log[Na+]
func=_mt.Tm_NN, # Used by Primer3Plus to calculate the product Tm.
):
return func(
seq,
check=check,
strict=strict,
c_seq=c_seq,
shift=shift,
nn_table=nn_table,
tmm_table=tmm_table,
imm_table=imm_table,
de_table=de_table,
dnac1=dnac1,
dnac2=dnac2,
selfcomp=selfcomp,
Na=Na,
K=K,
Tris=Tris,
Mg=Mg,
dNTPs=dNTPs,
saltcorr=saltcorr,
)
def tm_dbd(
seq,
check=True,
strict=True,
c_seq=None,
shift=0,
nn_table=_mt.DNA_NN3,
tmm_table=None,
imm_table=None,
de_table=None,
dnac1=250,
dnac2=250,
selfcomp=False,
Na=50,
K=0,
Tris=0,
Mg=1.5,
dNTPs=0.8,
saltcorr=1,
func=_mt.Tm_NN,
):
return func(
seq,
check=check,
strict=strict,
c_seq=c_seq,
shift=shift,
nn_table=nn_table,
tmm_table=tmm_table,
imm_table=imm_table,
de_table=de_table,
dnac1=dnac1,
dnac2=dnac2,
selfcomp=selfcomp,
Na=Na,
K=K,
Tris=Tris,
Mg=Mg,
dNTPs=dNTPs,
saltcorr=saltcorr,
)
def tm_product(seq: str, K=0.050):
"""Tm calculation for the amplicon.
according to:
Rychlik, Spencer, and Rhoads, 1990, Optimization of the anneal
ing temperature for DNA amplification in vitro
http://www.ncbi.nlm.nih.gov/pubmed/2243783
"""
tmp = 81.5 + 0.41 * _GC(seq) * 100 + 16.6 * _math.log10(K) - 675 / len(seq)
return tmp
def ta_default(fp: str, rp: str, seq: str, tm=tm_default, tm_product=tm_product):
"""Ta calculation.
according to:
Rychlik, Spencer, and Rhoads, 1990, Optimization of the anneal
ing temperature for DNA amplification in vitro
http://www.ncbi.nlm.nih.gov/pubmed/2243783
The formula described uses the length and GC content of the product and
salt concentration (monovalent cations)
"""
return 0.3 * min((tm(fp), tm(rp))) + 0.7 * tm_product(seq) - 14.9
def ta_dbd(fp, rp, seq, tm=tm_dbd, tm_product=None):
return min((tm(fp), tm(rp))) + 3
def program(amplicon, tm=tm_default, ta=ta_default):
r"""Returns a string containing a text representation of a suggested
PCR program using Taq or similar polymerase.
::
|95°C|95°C | |tmf:59.5
|____|_____ 72°C|72°C|tmr:59.7
|3min|30s \ 59.1°C _____|____|60s/kb
| | \______/ 0:32|5min|GC 51%
| | 30s | |1051bp
"""
taq_extension_rate = 45 # seconds/kB PCR product length (1min/kb)
extension_time_taq = max(30, int(taq_extension_rate * len(amplicon) / 1000))
# seconds
f = _textwrap.dedent(
r"""
|95°C|95°C | |tmf:{tmf:.1f}
|____|_____ 72°C|72°C|tmr:{tmr:.1f}
|3min|30s \ {ta:.1f}°C _____|____|{rate}s/kb
| | \______/{0:2}:{1:0>2}|5min|GC {GC}%
| | 30s | |{size}bp
""".format(
rate=taq_extension_rate,
size=len(amplicon.seq),
ta=round(
ta(
amplicon.forward_primer.footprint,
amplicon.reverse_primer.footprint,
str(amplicon.seq),
),
1,
),
tmf=tm(amplicon.forward_primer.footprint),
tmr=tm(amplicon.reverse_primer.footprint),
GC=int(amplicon.gc() * 100),
*map(int, divmod(extension_time_taq, 60)),
)
).strip()
return _pretty_str(f)
taq_program = program
def dbd_program(amplicon, tm=tm_dbd, ta=ta_dbd):
r"""Text representation of a suggested PCR program.
Using a polymerase with a DNA binding domain such as Pfu-Sso7d.
::
|98°C|98°C | |tmf:53.8
|____|_____ 72°C|72°C|tmr:54.8
|30s |10s \ 57.0°C _____|____|15s/kb
| | \______/ 0:15|5min|GC 51%
| | 10s | |1051bp
|98°C|98°C | |tmf:82.5
|____|____ | |tmr:84.4
|30s |10s \ 72°C|72°C|15s/kb
| | \____|____|GC 52%
| | 3:45|5min|15058bp
"""
PfuSso7d_extension_rate = 15 # seconds/kB PCR product
extension_time_PfuSso7d = max(10, int(PfuSso7d_extension_rate * len(amplicon) / 1000)) # seconds
# The program returned is eaither a two step or three step progrem
# This depends on the tm and length of the primers in the
# original instructions from finnzyme. These do not seem to be
# Ta calculation for enzymes with dsDNA binding domains like phusion or Pfu-Sso7d
# https://www.finnzymes.fi/tm_determination.html
tmf = tm(amplicon.forward_primer.footprint)
tmr = tm(amplicon.reverse_primer.footprint)
if tmf >= 69.0 and tmr >= 69.0:
f = _textwrap.dedent(
r"""
|98°C|98°C | |tmf:{tmf:.1f}
|____|____ | |tmr:{tmr:.1f}
|30s |10s \ 72°C|72°C|{rate}s/kb
| | \____|____|GC {GC_prod}%
| | {0:2}:{1:0>2}|5min|{size}bp
""".format(
rate=PfuSso7d_extension_rate,
tmf=tmf,
tmr=tmr,
GC_prod=int(amplicon.gc() * 100),
size=len(amplicon.seq),
*map(int, divmod(extension_time_PfuSso7d, 60)),
)
).strip()
else:
f = _textwrap.dedent(
r"""
|98°C|98°C | |tmf:{tmf:.1f}
|____|_____ 72°C|72°C|tmr:{tmr:.1f}
|30s |10s \ {ta:.1f}°C _____|____|{rate}s/kb
| | \______/{0:2}:{1:0>2}|5min|GC {GC}%
| | 10s | |{size}bp
""".format(
rate=PfuSso7d_extension_rate,
size=len(amplicon.seq),
ta=round(
ta(
amplicon.forward_primer.footprint,
amplicon.reverse_primer.footprint,
amplicon.seq,
),
1,
),
tmf=tmf,
tmr=tmr,
GC=int(amplicon.gc() * 100),
*map(int, divmod(extension_time_PfuSso7d, 60)),
)
).strip()
return _pretty_str(f)
pfu_sso7d_program = dbd_program
def Q5(primer: str, *args, **kwargs):
"""For Q5 Ta they take the lower of the two Tms and add 1C
(up to 72C). For Phusion they take the lower of the two
and add 3C (up to 72C).
"""
raise NotImplementedError
def tmbresluc(primer: str, *args, primerc=500.0, saltc=50, **kwargs):
"""Returns the tm for a primer using a formula adapted to polymerases
with a DNA binding domain, such as the Phusion polymerase.
Parameters
----------
primer : string
primer sequence 5'-3'
primerc : float
primer concentration in nM), set to 500.0 nm by default.
saltc : float, optional
Monovalent cation concentration in mM, set to 50.0 mM by default.
thermodynamics : bool, optional
prints details of the thermodynamic data to stdout. For
debugging only.
Returns
-------
tm : float
the tm of the primer
"""
from . import _thermodynamic_data
saltc = float(saltc) / 1000
pri = primerc / 1e9
dS = -12.4
dH = -3400
STR = primer.lower()
for i in range(len(STR) - 1):
n1 = ord(STR[i])
n2 = ord(STR[i + 1])
dH += _thermodynamic_data.dHBr[n1 - 97][n2 - 97]
dS += _thermodynamic_data.dSBr[n1 - 97][n2 - 97]
tm = (dH / (1.9872 * _math.log(pri / 1600) + dS) + (16.6 * _math.log(saltc)) / _math.log(10)) - 273.15
return tm
if __name__ == "__main__":
import os as _os
cached = _os.getenv("pydna_cached_funcs", "")
_os.environ["pydna_cached_funcs"] = ""
import doctest
doctest.testmod(verbose=True, optionflags=doctest.ELLIPSIS)
_os.environ["pydna_cached_funcs"] = cached