tlslite/utils/brotlidecpy/huffman.py
# Copyright 2021 Sidney Markowitz All Rights Reserved.
# Distributed under MIT license.
# See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
MAX_LENGTH = 15
def _get_next_key(key, length):
"""
Returns reverse(reverse(key, len) + 1, len), where reverse(key, len) is
the bit-wise reversal of the length least significant bits of key
"""
step = 1 << (length - 1)
while key & step:
step >>= 1
return (key & (step - 1)) + step
def _replicate_value(table, i, step, end, code):
"""
Stores code in table[0], table[step], table[2*step], ..., table[end]
Assumes end is integer multiple of step
"""
for index in range(i+end-step, i - step, -step):
table[index] = HuffmanCode(code.bits, code.value)
def _next_table_bit_size(count, length, root_bits):
"""
Returns the table width of the next 2nd level table. count is the histogram
of bit lengths for the remaining symbols, len is the code length of the
next processed symbol
"""
left = 1 << (length - root_bits)
while length < MAX_LENGTH:
left -= count[length]
if left <= 0:
break
length += 1
left <<= 1
return length - root_bits
class HuffmanCode:
def __init__(self, bits, value):
self.bits = bits # number of bits used for this symbol
self.value = value # symbol value or table offset
def brotli_build_huffman_table(
root_table, table, root_bits, code_lengths, code_lengths_size):
start_table = table
# Local variables used
# code current table entry
# len; current code length
# symbol; symbol index in original or sorted table
# key; reversed prefix code
# step; step size to replicate values in current table
# low; low bits for current root entry
# mask; mask for low bits
# table_bits; key length of current table
# table_size; size of current table
# total_size; sum of root table size and 2nd level table sizes
# sorted_symbols; symbols sorted by code length
count = [0] * (MAX_LENGTH + 1) # number of codes of each length
offset = [0] * (MAX_LENGTH + 1) # offsets in sorted table for each length
sorted_symbols = [0] * code_lengths_size
# build histogram of code lengths
for symbol in range(0, code_lengths_size):
count[code_lengths[symbol]] += 1
# generate offsets into sorted symbol table by code length
offset[1] = 0
for length in range(1, MAX_LENGTH):
offset[length + 1] = offset[length] + count[length]
# sort symbols by length, by symbol order within each length
for symbol in range(0, code_lengths_size):
length = code_lengths[symbol]
if length != 0:
sorted_symbols[offset[length]] = symbol
offset[length] += 1
table_bits = root_bits
table_size = 1 << table_bits
total_size = table_size
# special case code with only one value
if offset[MAX_LENGTH] == 1:
for key in range(0, total_size):
root_table[table + key] = HuffmanCode(
0, sorted_symbols[0] & 0xffff)
return total_size
# fill in root table
key = 0
symbol = 0
step = 2
for length in range(1, root_bits+1):
while count[length] > 0:
code = HuffmanCode(length & 0xff, sorted_symbols[symbol] & 0xffff)
symbol += 1
_replicate_value(root_table, table + key, step, table_size, code)
key = _get_next_key(key, length)
count[length] -= 1
step <<= 1
# fill in 2nd level tables and add pointers to root table
mask = total_size - 1
low = -1
step = 2
for length in range(root_bits + 1, MAX_LENGTH+1):
while count[length] > 0:
if (key & mask) != low:
table += table_size
table_bits = _next_table_bit_size(count, length, root_bits)
table_size = 1 << table_bits
total_size += table_size
low = key & mask
root_table[start_table + low] = HuffmanCode(
(table_bits + root_bits) & 0xff,
((table - start_table) - low) & 0xffff)
code = HuffmanCode((length - root_bits) & 0xff,
sorted_symbols[symbol] & 0xffff)
symbol += 1
_replicate_value(root_table, table + (key >> root_bits), step,
table_size, code)
key = _get_next_key(key, length)
count[length] -= 1
step <<= 1
return total_size