tlslite/utils/brotlidecpy/transform.py
# Copyright 2021 Sidney Markowitz All Rights Reserved.
# Distributed under MIT license.
# See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
from .dictionary import BrotliDictionary
"""
Transformations on dictionary words
"""
class Transform:
def __init__(self, prefix, transform, suffix):
self.prefix = bytearray(prefix)
self.transform = transform
self.suffix = bytearray(suffix)
@staticmethod
def transformDictionaryWord(dst, idx, word, length, transform):
prefix = kTransforms[transform].prefix
suffix = kTransforms[transform].suffix
t = kTransforms[transform].transform
skip = t < (0 if kOmitFirst1 else (t - (kOmitFirst1 - 1)))
start_idx = idx
if skip > length:
skip = length
prefix_pos = 0
while prefix_pos < len(prefix):
dst[idx] = prefix[prefix_pos]
idx += 1
prefix_pos += 1
word += skip
length -= skip
if t <= kOmitLast9:
length -= t
for i in range(0, length):
dst[idx] = BrotliDictionary.dictionary[word + i]
idx += 1
uppercase = idx - length
if t == kUppercaseFirst:
_to_upper_case(dst, uppercase)
elif t == kUppercaseAll:
while length > 0:
step = _to_upper_case(dst, uppercase)
uppercase += step
length -= step
suffix_pos = 0
while suffix_pos < len(suffix):
dst[idx] = suffix[suffix_pos]
idx += 1
suffix_pos += 1
return idx - start_idx
kIdentity = 0
kOmitLast1 = 1
kOmitLast2 = 2
kOmitLast3 = 3
kOmitLast4 = 4
kOmitLast5 = 5
kOmitLast6 = 6
kOmitLast7 = 7
kOmitLast8 = 8
kOmitLast9 = 9
kUppercaseFirst = 10
kUppercaseAll = 11
kOmitFirst1 = 12
kOmitFirst2 = 13
kOmitFirst3 = 14
kOmitFirst4 = 15
kOmitFirst5 = 16
kOmitFirst6 = 17
kOmitFirst7 = 18
kOmitFirst8 = 19
kOmitFirst9 = 20
kTransforms = [
Transform(b"", kIdentity, b""),
Transform(b"", kIdentity, b" "),
Transform(b" ", kIdentity, b" "),
Transform(b"", kOmitFirst1, b""),
Transform(b"", kUppercaseFirst, b" "),
Transform(b"", kIdentity, b" the "),
Transform(b" ", kIdentity, b""),
Transform(b"s ", kIdentity, b" "),
Transform(b"", kIdentity, b" of "),
Transform(b"", kUppercaseFirst, b""),
Transform(b"", kIdentity, b" and "),
Transform(b"", kOmitFirst2, b""),
Transform(b"", kOmitLast1, b""),
Transform(b", ", kIdentity, b" "),
Transform(b"", kIdentity, b", "),
Transform(b" ", kUppercaseFirst, b" "),
Transform(b"", kIdentity, b" in "),
Transform(b"", kIdentity, b" to "),
Transform(b"e ", kIdentity, b" "),
Transform(b"", kIdentity, b"\""),
Transform(b"", kIdentity, b"."),
Transform(b"", kIdentity, b"\">"),
Transform(b"", kIdentity, b"\n"),
Transform(b"", kOmitLast3, b""),
Transform(b"", kIdentity, b"]"),
Transform(b"", kIdentity, b" for "),
Transform(b"", kOmitFirst3, b""),
Transform(b"", kOmitLast2, b""),
Transform(b"", kIdentity, b" a "),
Transform(b"", kIdentity, b" that "),
Transform(b" ", kUppercaseFirst, b""),
Transform(b"", kIdentity, b". "),
Transform(b".", kIdentity, b""),
Transform(b" ", kIdentity, b", "),
Transform(b"", kOmitFirst4, b""),
Transform(b"", kIdentity, b" with "),
Transform(b"", kIdentity, b"'"),
Transform(b"", kIdentity, b" from "),
Transform(b"", kIdentity, b" by "),
Transform(b"", kOmitFirst5, b""),
Transform(b"", kOmitFirst6, b""),
Transform(b" the ", kIdentity, b""),
Transform(b"", kOmitLast4, b""),
Transform(b"", kIdentity, b". The "),
Transform(b"", kUppercaseAll, b""),
Transform(b"", kIdentity, b" on "),
Transform(b"", kIdentity, b" as "),
Transform(b"", kIdentity, b" is "),
Transform(b"", kOmitLast7, b""),
Transform(b"", kOmitLast1, b"ing "),
Transform(b"", kIdentity, b"\n\t"),
Transform(b"", kIdentity, b":"),
Transform(b" ", kIdentity, b". "),
Transform(b"", kIdentity, b"ed "),
Transform(b"", kOmitFirst9, b""),
Transform(b"", kOmitFirst7, b""),
Transform(b"", kOmitLast6, b""),
Transform(b"", kIdentity, b"("),
Transform(b"", kUppercaseFirst, b", "),
Transform(b"", kOmitLast8, b""),
Transform(b"", kIdentity, b" at "),
Transform(b"", kIdentity, b"ly "),
Transform(b" the ", kIdentity, b" of "),
Transform(b"", kOmitLast5, b""),
Transform(b"", kOmitLast9, b""),
Transform(b" ", kUppercaseFirst, b", "),
Transform(b"", kUppercaseFirst, b"\""),
Transform(b".", kIdentity, b"("),
Transform(b"", kUppercaseAll, b" "),
Transform(b"", kUppercaseFirst, b"\">"),
Transform(b"", kIdentity, b"=\""),
Transform(b" ", kIdentity, b"."),
Transform(b".com/", kIdentity, b""),
Transform(b" the ", kIdentity, b" of the "),
Transform(b"", kUppercaseFirst, b"'"),
Transform(b"", kIdentity, b". This "),
Transform(b"", kIdentity, b","),
Transform(b".", kIdentity, b" "),
Transform(b"", kUppercaseFirst, b"("),
Transform(b"", kUppercaseFirst, b"."),
Transform(b"", kIdentity, b" not "),
Transform(b" ", kIdentity, b"=\""),
Transform(b"", kIdentity, b"er "),
Transform(b" ", kUppercaseAll, b" "),
Transform(b"", kIdentity, b"al "),
Transform(b" ", kUppercaseAll, b""),
Transform(b"", kIdentity, b"='"),
Transform(b"", kUppercaseAll, b"\""),
Transform(b"", kUppercaseFirst, b". "),
Transform(b" ", kIdentity, b"("),
Transform(b"", kIdentity, b"ful "),
Transform(b" ", kUppercaseFirst, b". "),
Transform(b"", kIdentity, b"ive "),
Transform(b"", kIdentity, b"less "),
Transform(b"", kUppercaseAll, b"'"),
Transform(b"", kIdentity, b"est "),
Transform(b" ", kUppercaseFirst, b"."),
Transform(b"", kUppercaseAll, b"\">"),
Transform(b" ", kIdentity, b"='"),
Transform(b"", kUppercaseFirst, b","),
Transform(b"", kIdentity, b"ize "),
Transform(b"", kUppercaseAll, b"."),
Transform(b"\xc2\xa0", kIdentity, b""),
Transform(b" ", kIdentity, b","),
Transform(b"", kUppercaseFirst, b"=\""),
Transform(b"", kUppercaseAll, b"=\""),
Transform(b"", kIdentity, b"ous "),
Transform(b"", kUppercaseAll, b", "),
Transform(b"", kUppercaseFirst, b"='"),
Transform(b" ", kUppercaseFirst, b","),
Transform(b" ", kUppercaseAll, b"=\""),
Transform(b" ", kUppercaseAll, b", "),
Transform(b"", kUppercaseAll, b","),
Transform(b"", kUppercaseAll, b"("),
Transform(b"", kUppercaseAll, b". "),
Transform(b" ", kUppercaseAll, b"."),
Transform(b"", kUppercaseAll, b"='"),
Transform(b" ", kUppercaseAll, b". "),
Transform(b" ", kUppercaseFirst, b"=\""),
Transform(b" ", kUppercaseAll, b"='"),
Transform(b" ", kUppercaseFirst, b"='")
]
kNumTransforms = len(kTransforms)
def _to_upper_case(p, i):
"""
Overly simplified model of uppercase in utf-8, but what RFC7932 specifies
to use
"""
if p[i] < 0xc0:
if 97 <= p[i] <= 122:
p[i] ^= 32
return 1
if p[i] < 0xe0:
p[i + 1] ^= 32
return 2
p[i + 2] ^= 5
return 3