chromaterm/__init__.py
'''Color your output to terminal'''
import re
import sys
import time
__version__ = '0.10.8-dev'
# Color types, their color codes if it's style, their default reset codes, and
# RegEx's for detecting their color type.
COLOR_TYPES = {
'fg': {
'reset': b'\x1b[39m',
're': re.compile(br'\x1b\[(?:3[0-79]|9[0-7]|38;[0-9;]+)m')
},
'bg': {
'reset': b'\x1b[49m',
're': re.compile(br'\x1b\[(?:4[0-79]|10[0-7]|48;[0-9;]+)m')
},
'blink': {
'code': b'\x1b[5m',
'reset': b'\x1b[25m',
're': re.compile(br'\x1b\[2?5m')
},
'bold': {
'code': b'\x1b[1m',
'reset': b'\x1b[22m', # Normal intensity
're': re.compile(br'\x1b\[(?:1|2?2)m') # Any intensity type
},
'invert': {
'code': b'\x1b[7m',
'reset': b'\x1b[27m',
're': re.compile(br'\x1b\[2?7m')
},
'italic': {
'code': b'\x1b[3m',
'reset': b'\x1b[23m',
're': re.compile(br'\x1b\[2?3m')
},
'strike': {
'code': b'\x1b[9m',
'reset': b'\x1b[29m',
're': re.compile(br'\x1b\[2?9m')
},
'underline': {
'code': b'\x1b[4m',
'reset': b'\x1b[24m',
're': re.compile(br'\x1b\[2?4m')
}
}
# The format of a palette color
PALETTE_COLOR_RE = re.compile(r'\b([bf])\.([a-z0-9-_]+)\b')
# Select Graphic Rendition sequence (any type or only colors)
SGR_RE = re.compile(br'\x1b\x5b[\x30-\x3f]*[\x20-\x2f]*\x6d')
SGR_COLOR_RE = re.compile(br'\x1b\x5b[0-9;]*\x6d')
class Color:
'''A color and its ANSI escape codes.'''
# pylint: disable=too-many-instance-attributes
def __init__(self, color, palette=None, rgb=None):
'''Constructor.
Args:
color (str): A string which must contain:
* one foreground color (hex color `f#` or palette color `f.`),
* one background color (hex color `b#` or palette color `b.`),
* at least one style (blink, bold, invert, italic, strike,
underline), or
* a combination of the above, seperated by spaces.
Example: `"b#123123 f.status-1 bold"`
palette (chromaterm.Palette): Palette to resolve palette colors.
rgb (bool): Whether the color is meant for RGB-enabled terminals or
not. `False` will downscale the RGB colors to xterm-256. `None`
will detect support for RGB and fallback to xterm-256.
Raises:
TypeError: If `color` is not a string. If `rgb` is not a boolean.
ValueError: If the format of `color` is invalid. If palette color is
used with `palette=None`.
'''
self.palette = palette
self.rgb = rgb
self.color = color
@property
def color(self):
'''String that represents the color.'''
return self._color
@color.setter
def color(self, value):
if not isinstance(value, str):
raise TypeError('color must be a string')
color = value = value.lower().strip()
styles = tuple(k for k, v in COLOR_TYPES.items() if v.get('code'))
color_re = r'(([bf]#[0-9a-f]{6}|' + '|'.join(styles) + r')(\s+|$))+'
color_code = color_reset = b''
color_types = []
if PALETTE_COLOR_RE.search(value):
if not self.palette:
raise ValueError(
'palette color name present, but no palette specified')
value = self.palette.resolve(value)
if not re.fullmatch(color_re, value):
raise ValueError(f'invalid color format {repr(value)}')
# Colors
for target, hex_code in re.findall(r'([bf])#([0-9a-f]{6})', value):
if target == 'f':
target, color_type = b'\x1b[38;', 'fg'
else:
target, color_type = b'\x1b[48;', 'bg'
if color_type in [x[0] for x in color_types]:
raise ValueError('color accepts one foreground and one '
'background colors')
# Break down hex color to RGB integers
rgb_int = [int(hex_code[i:i + 2], 16) for i in (0, 2, 4)]
if self.rgb:
target += b'2;'
color_id = b'%d;%d;%d' % tuple(rgb_int)
else:
target += b'5;'
color_id = b'%d' % self.rgb_to_xterm256(*rgb_int)
color_code = color_code + target + color_id + b'm'
color_reset = COLOR_TYPES[color_type]['reset'] + color_reset
color_types.append((color_type, target + color_id + b'm'))
# Styles
for style in dict.fromkeys(re.findall('|'.join(styles), value)):
color_code = color_code + COLOR_TYPES[style]['code']
color_reset = COLOR_TYPES[style]['reset'] + color_reset
color_types.append((style, COLOR_TYPES[style]['code']))
self._color = ' '.join(dict.fromkeys(color.split()))
self.color_code = color_code
self.color_reset = color_reset
self.color_types = color_types
@property
def rgb(self):
'''Flag for RGB-support. When changed, updates `self.color`.'''
return self._rgb
@rgb.setter
def rgb(self, value):
if value is not None and not isinstance(value, bool):
raise TypeError('rgb must be a boolean')
self._rgb = value
# Update the color if present; it won't be during __init__
if hasattr(self, '_color'):
self.color = self._color
@staticmethod
def decode_sgr(source_color_code, is_reset=False):
'''Decodes an SGR, splitting it into a list of colors, each being a list
containing color code (bytes), is reset (bool), and color type (bytes)
which corresponds to `COLOR_TYPES`.
Args:
source_color_code (bytes): Bytes to be split into individual colors.
is_reset (bool): Consider all identified colors as resets.
'''
# Includes non-color characters; don't touch it
if not SGR_COLOR_RE.search(source_color_code):
return [[source_color_code, False, None]]
make_sgr = lambda code_id: b'\x1b[' + code_id + b'm'
colors = []
codes = source_color_code.lstrip(b'\x1b[').rstrip(b'm').split(b';')
skip = 0
for index, code in enumerate(codes):
# Code processed by an index look-ahead; skip it
if skip:
skip -= 1
continue
# Full reset
if code == b'' or int(code) == 0:
colors.append([make_sgr(b'0'), True, None])
# Multi-code SGR
elif code in (b'38', b'48'):
color_type = 'fg' if code == b'38' else 'bg'
# xterm-256
if len(codes) > index + 2 and codes[index + 1] == b'5':
skip = 2
code = b';'.join(codes[index:index + 3])
# RGB
elif len(codes) > index + 4 and codes[index + 1] == b'2':
skip = 4
code = b';'.join(codes[index:index + 5])
# Does not conform to format; do not touch code
else:
return [[source_color_code, False, None]]
colors.append([make_sgr(code), is_reset, color_type])
# Single-code SGR
else:
color = [make_sgr(b'%d' % int(code)), False, None]
for name, color_type in COLOR_TYPES.items():
if color_type['re'].search(color[0]):
color[1] = is_reset or color[0] == color_type['reset']
color[2] = name
# Types don't overlap; only one can match
break
colors.append(color)
return colors
@staticmethod
def rgb_to_xterm256(_r, _g, _b):
'''Downscale from 24-bit RGB to xterm-256.'''
def index(value, steps):
'''Returns index of the step closest to value.'''
return steps.index(min(steps, key=lambda x: abs(x - value)))
def distance(new_r, new_g, new_b):
'''Magnify the differences (like stdev, but avg/sqrt not needed).'''
return (new_r - _r)**2 + (new_g - _g)**2 + (new_b - _b)**2
# Steps between 2 shades https://www.ditig.com/256-colors-cheat-sheet,
# the index of the closest step, and the distance to the input color
rgb_steps = (0, 95, 135, 175, 215, 255)
rgb_index = [index(x, rgb_steps) for x in (_r, _g, _b)]
rgb_distance = distance(*[rgb_steps[x] for x in rgb_index])
gray_steps = tuple(range(8, 239, 10))
gray_index = index((_r + _g + _b) // 3, gray_steps)
gray_distance = distance(*[gray_steps[gray_index]] * 3)
if gray_distance < rgb_distance:
return 232 + gray_index
return 16 + (36 * rgb_index[0]) + (6 * rgb_index[1]) + rgb_index[2]
@staticmethod
def strip_colors(data):
'''Returns data after stripping the existing colors and a list of inserts
containing the stripped colors. The format of the insert is that of
`Config.get_inserts`.
Args:
data (bytes): Bytes from which the colors should be stripped.
'''
inserts = []
match = SGR_RE.search(data)
while match:
start, end = match.span()
# Existing colors are marked as resets to indicate that they can be
# updated if ChromaTerm already matched the same data
for color in Color.decode_sgr(match.group(), is_reset=True):
color.insert(0, start)
inserts.insert(0, color)
# Next color's start index ignores length of this color
data = data[:start] + data[end:]
match = SGR_RE.search(data)
return data, inserts
class Palette:
'''A color palette that maps names to RGB hex codes.'''
def __init__(self):
'''Constructor.'''
self.colors = {}
def add_color(self, name, value):
'''Adds a color to the palette.
Args:
name (str): The color name to be referenced. Accepts `[a-z0-9-_]+`.
value (str): A hex color, like `#123abc`.
Raises:
ValueError: If `name` is reserved, already exists, or uses invalid
characters. If `value` uses invalid characters.
TypeError: If `name` or `value` are not strings.
'''
if not isinstance(name, str):
raise TypeError('color name must be a string')
if not isinstance(value, str):
raise TypeError('color value must be a string')
name = name.lower().strip()
value = value.lower().strip()
if name in COLOR_TYPES:
raise ValueError('color name is reserved')
if name in self.colors:
raise ValueError('a color with the same name already exists')
if not re.fullmatch(r'[a-z0-9-_]+', name):
raise ValueError('name accepts alphanumerics, dashes, and '
'underscores only')
if not re.fullmatch(r'#[0-9a-f]{6}', value):
raise ValueError('palette color must be in `#123abc` format')
self.colors[name] = value
def resolve(self, color):
'''Returns `color` after resolving palette colors to their appropriate
values (e.g. `b.color_name` to `b#123abc`).
Args:
color (str): the string that describes a color.
Raises:
TypeError: If `color` is not a string.
ValueError: If a color is not found in the palette.
'''
if not isinstance(color, str):
raise TypeError('color must be a string')
color = color.lower().strip()
for match in reversed(list(PALETTE_COLOR_RE.finditer(color))):
start, end = match.span()
target = match.group(1)
name = match.group(2)
if name not in self.colors:
raise ValueError(f'color {repr(name)} not in palette')
color = color[:start] + f'{target}{self.colors[name]}' + color[end:]
return color
class Rule:
'''A rule containing a regex and colors corresponding to the regex's groups.'''
# pylint: disable=import-outside-toplevel,too-many-arguments,too-many-instance-attributes
def __init__(self,
regex,
color=None,
description=None,
exclusive=False,
pcre=False):
'''Constructor.
Args:
regex (str): Regular expression for getting matches in data.
color (chromaterm.Color, dict): Color used to highlight the entire
match. Can be a dictionary of {group: color} format.
description (str): String to help identify the rule.
exclusive (bool): Whether other rules should overlap with this one.
pcre (bool): Whether to use PCRE2 or default to Python's RE.
Raises:
TypeError: If `color` is not an instance of `Color` or not `None`.
'''
if description is not None and not isinstance(description, str):
raise TypeError('description must be a string')
if not isinstance(exclusive, bool):
raise TypeError('exclusive must be a boolean')
self.colors = {}
self.description = description
self.exclusive = exclusive
self.pcre = pcre
self.regex = regex
if not isinstance(color, dict):
color = {0: color}
for group, value in color.items():
self.set_color(value, group)
@property
def color(self):
'''Color used for highlight the full match (group 0) of regex.'''
return self.colors.get(0)
@color.setter
def color(self, value):
self.set_color(value)
@property
def pcre(self):
'''True when the PCRE engine is used. False means Python's RE.'''
return self._pcre
@pcre.setter
def pcre(self, value):
if not isinstance(value, bool):
raise TypeError('pcre must be a boolean')
self._pcre = value
# Recompile the regex if present; it won't be during __init__
if hasattr(self, '_regex'):
self.regex = self._regex
@property
def regex(self):
'''The regex pattern.'''
return self._regex
@regex.setter
def regex(self, value):
if not isinstance(value, str):
raise TypeError('regex must be a string')
self._regex = value
if self.pcre:
import chromaterm.pcre
self._regex_object = chromaterm.pcre.Pattern(value.encode())
else:
self._regex_object = re.compile(value.encode())
def get_matches(self, data):
'''Returns a list of tuples, each containing a start index, an end index,
and the `Color` object for that match.
Args:
data (bytes): Bytes to match regex against.
'''
matches = []
for match in self._regex_object.finditer(data):
for group in self.colors:
start, end = match.span(group)
# Ignore zero-length matches, like unmatched optional groups
# New lines in data can only come from exclusive rules
if start != end and b'\n' not in data[start:end]:
matches.append((start, end, self.colors[group]))
return matches
def set_color(self, color, group=0):
'''Sets a color to be used when highlighting. The group can be used to
limit the parts of the match which are highlighted. Group 0 (the default)
will highlight the entire match. If a color already exists for the group,
it is overwritten. If `color` is None, the color of `group` is cleared.
Args:
color (chromaterm.Color): A color for highlighting the matched input.
group (int, str): The regex group to be be highlighted with the color.
Raises:
TypeError: If `color` is not an instance of `Color` or None. If
`group` is not an integer or string.
ValueError: If `group` does not exist in the regular expression.
'''
if not isinstance(group, (int, str)):
raise TypeError('group must be an integer or a string')
if isinstance(group, str):
if not self._regex_object.groupindex.get(group):
raise ValueError(f'named group {repr(group)} not in regex')
# Resolve the named group to its index
group = self._regex_object.groupindex[group]
if color is None:
self.colors.pop(group, None)
return
if not isinstance(color, Color):
raise TypeError('color must be a chromaterm.Color')
if group > self._regex_object.groups:
raise ValueError(f'regex has {self._regex_object.groups} group(s);'
f' {group} is invalid')
self.colors[group] = color
# Sort by group number to ensure deterministic highlighting
self.colors = {k: self.colors[k] for k in sorted(self.colors)}
class Config:
'''An aggregation of multiple rules which highlights by performing the regex
matching of the rules before any colors are added.'''
def __init__(self, benchmark=False):
'''Constructor.
Args:
Benchmark (bool): Measure usage (duration, match count) of the rules.
'''
self._reset_codes = {k: v['reset'] for k, v in COLOR_TYPES.items()}
self.benchmark = benchmark
self.benchmark_results = {}
self.rules = []
@staticmethod
def get_insert_index(start, end, inserts):
'''Returns a tuple containing the start and end indices for where they
should be inserted into the inserts list in order to maintain the
position-based descending (reverse) order.
Args:
start (int): The start position of a match.
end (int): The end position of a match.
inserts (list): A list of inserts, where the first item of each insert
is the position.
'''
start_index = end_index = None
index = -1
# Arrange the inserts in reverse order (index magic over data)
for index, (position, _, _, _) in enumerate(inserts):
if start_index is None and start >= position:
start_index = index
# In the case of overlapping matches, other colors exist between
# the start and the end, so the end index needs to be located
# independently
if end_index is None and end > position:
end_index = index
if start_index is not None and end_index is not None:
return start_index, end_index
# If an index wasn't found, then it belongs at the end of the list
if start_index is None:
start_index = index + 1
if end_index is None:
end_index = index + 1
return start_index, end_index
def get_inserts(self, data, inserts):
'''Returns a list containing the inserts for the color codes relative to
data. An insert is a list containing a position (index relative to data),
the code to be inserted, a boolean indicating if its a reset code or not,
and The color type which corresponds to COLOR_TYPES or `None` if it's a
full SGR reset.
The list of inserts is ordered in descending order based on the position
of each insert relative to the data. This makes them easy to insert into
data without calculating index offsets.
Args:
data (bytes): Bytes from which the inserts are gathered.
inserts (list): Any pre-existing inserts to be added to it.
'''
# A lot of the code here is difficult to comprehend directly, because the
# intent might not be immediately visible. You may find it easier to take
# a test-driven approach by looking at the test_config_highlight_* tests
for start, end, color in self.get_matches(data):
start_index, end_index = self.get_insert_index(start, end, inserts)
# Each color type requires tracking of its respective type
for color_type, color_code in color.color_types:
# Find the last color before the end of this match (if any) and
# use it as the reset code for this color
for insert in inserts[end_index:]:
if insert[3] == color_type:
reset = insert[1]
break
# No type (a full reset); use the default for this type
if insert[2] and insert[3] is None:
reset = COLOR_TYPES[color_type]['reset']
break
else:
reset = self._reset_codes[color_type]
start_insert = [start, color_code, False, color_type]
end_insert = [end, reset, True, color_type]
# Replace every color reset of the current color type with our
# color code to prevent them from interrupting this color
for insert in inserts[end_index:start_index]:
if insert[2] and insert[3] in (color_type, None):
# A full reset is moved forward to our reset (replaced)
if insert[3] is None:
end_insert[1:4] = insert[1:4]
insert[1:4] = color_code, False, color_type
# Relative to data, the inserts are added in reverse order LI-FO
inserts.insert(start_index, start_insert)
inserts.insert(end_index, end_insert)
# Advance to ensure the slices above search appropriately if
# multiple color types exist
start_index += 1
end_index += 1
return inserts
def get_matches(self, data):
'''Returns a list of tuples, each of which containing a start index, an
end index, and the `Color` object for that match. The tuples of the
latter rules are towards the end of the list.
Args:
data (bytes): Bytes against which each rule is matched.
'''
matches = []
for rule in self.rules:
if self.benchmark:
duration, count = self.benchmark_results.get(rule, (0, 0))
checkpoint = time.perf_counter()
rule_matches = rule.get_matches(data)
duration += time.perf_counter() - checkpoint
count += len(rule_matches)
self.benchmark_results[rule] = (duration, count)
else:
rule_matches = rule.get_matches(data)
# If overlap is not allowed, replace any matches with \n to prevent
# overlap while maintaining correct indices on other rules' matches
if rule.exclusive:
for start, end, _ in rule_matches:
data = data[:start] + b'\n' * (end - start) + data[end:]
matches += rule_matches
return matches
def highlight(self, data):
'''Returns a highlighted bytes of `data`. The matches from the rules
are gathered prior to inserting any color codes, making it so the rules
can match without the color codes interfering.
Args:
data (bytes): Bytes to highlight.
'''
data, inserts = Color.strip_colors(data)
inserts = self.get_inserts(data, inserts)
resets_to_update = list(self._reset_codes)
for position, color_code, is_reset, color_type in inserts:
data = data[:position] + color_code + data[position:]
if resets_to_update:
# A full reset; default the remaining resets
if color_type is None and is_reset:
for key in resets_to_update:
self._reset_codes[key] = COLOR_TYPES[key]['reset']
resets_to_update = []
elif color_type in resets_to_update:
self._reset_codes[color_type] = color_code
resets_to_update.remove(color_type)
return data
def print_benchmark_results(self, descending=True, file=sys.stderr):
'''Prints the benchmark results, sorted by time spent.
Args:
descending (bool): Ordering of the results.
file (object): The file to which the results are printed.
'''
total = sum(x[0] for x in self.benchmark_results.values())
if self.benchmark_results:
print('Benchmark results (time spent, match count):', file=file)
for rule, (duration, count) in sorted(self.benchmark_results.items(),
key=lambda x: x[1],
reverse=descending):
print(
f'{duration / total:^7.2%} {duration:.3f}s {count:<7} '
f'{rule.description or repr(rule.regex[:30])}',
file=file,
)