utils/fix_rules.py
File `fix_rules.py` has 544 lines of code (exceeds 400 allowed). Consider refactoring.#!/usr/bin/python3 from __future__ import print_function import sysimport osimport jinja2import argparseimport jsonimport re from ssg import yaml, cce, productsfrom ssg.shims import input_funcfrom ssg.utils import read_file_listimport ssgimport ssg.productsimport ssg.rulesimport ssg.rule_yaml SSG_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))TO_SORT = ['identifiers', 'references'] _COMMANDS = dict() def command(name, description): def wrapper(wrapped): _COMMANDS[name] = wrapped wrapped.description = description return wrapped return wrapper Function `has_empty_identifier` has a Cognitive Complexity of 8 (exceeds 7 allowed). Consider refactoring.
Similar blocks of code found in 2 locations. Consider refactoring.def has_empty_identifier(rule_path, rule, rule_lines): if 'identifiers' in rule and rule['identifiers'] is None: return True if 'identifiers' in rule and rule['identifiers'] is not None: for _, value in rule['identifiers'].items(): if str(value).strip() == "": return True return False def has_no_cce(yaml_file, product_yaml=None): rule = yaml.open_and_macro_expand(yaml_file, product_yaml) product = product_yaml["product"] if 'identifiers' in rule and rule['identifiers'] is None: return True if 'identifiers' in rule and rule['identifiers'] is not None: for ident in rule['identifiers']: if ident == "cce@" + product: return False return True Similar blocks of code found in 2 locations. Consider refactoring.def has_empty_references(rule_path, rule, rule_lines): if 'references' in rule and rule['references'] is None: return True if 'references' in rule and rule['references'] is not None: for _, value in rule['references'].items(): if str(value).strip() == "": return True return False def has_prefix_cce(rule_path, rule, rule_lines): if 'identifiers' in rule and rule['identifiers'] is not None: for i_type, i_value in rule['identifiers'].items(): if i_type[0:3] == 'cce': has_prefix = i_value[0:3].upper() == 'CCE' remainder_valid = cce.is_cce_format_valid("CCE-" + i_value[3:]) remainder_valid |= cce.is_cce_format_valid("CCE-" + i_value[4:]) return has_prefix and remainder_valid return False Function `has_invalid_cce` has a Cognitive Complexity of 10 (exceeds 7 allowed). Consider refactoring.def has_invalid_cce(rule_path, rule, rule_lines): if 'identifiers' in rule and rule['identifiers'] is not None: for i_type, i_value in rule['identifiers'].items(): if i_type[0:3] == 'cce':Merge this if statement with the enclosing one. if not cce.is_cce_value_valid("CCE-" + str(i_value)): return True return False Similar blocks of code found in 2 locations. Consider refactoring.def has_int_identifier(rule_path, rule, rule_lines): if 'identifiers' in rule and rule['identifiers'] is not None: for _, value in rule['identifiers'].items(): if type(value) != str: return True return False Similar blocks of code found in 2 locations. Consider refactoring.def has_int_reference(rule_path, rule, rule_lines): if 'references' in rule and rule['references'] is not None: for _, value in rule['references'].items(): if type(value) != str: return True return False def has_duplicated_subkeys(rule_path, rule, rule_lines): return ssg.rule_yaml.has_duplicated_subkeys(rule_path, rule_lines, TO_SORT) def _human_sort(line): # Based on: https://blog.codinghorror.com/sorting-for-humans-natural-sort-order/ def convert(text): return int(text) if text.isdigit() else text return [convert(text) for text in re.split(r'(\d+)', line)] def has_unordered_sections(rule_path, rule, rule_lines): if 'references' in rule or 'identifiers' in rule: new_lines = ssg.rule_yaml.sort_section_keys(rule_path, rule_lines, TO_SORT, sort_func=_human_sort) # Compare string representations to avoid issues with references being # different. return "\n".join(rule_lines) != "\n".join(new_lines) return False def rule_data_generator(args): # Iterates over all know rules in the build system (according to # rule_dir_json.py) and attempts to load the resulting YAML files. # If they parse correctly, yield them as a result. # # Note: this has become a generator rather than returning a list of # results. product_yamls = dict() rule_dirs = json.load(open(args.json)) for rule_id in rule_dirs: rule_obj = rule_dirs[rule_id] if 'products' not in rule_obj or not rule_obj['products']: print(rule_id, rule_obj) assert rule_obj['products'] product = rule_obj['products'][0] if product not in product_yamls: product_path = ssg.products.product_yaml_path(args.root, product) product_yaml = ssg.products.load_product_yaml(product_path) properties_directory = os.path.join(args.root, "product_properties") product_yaml.read_properties_from_directory(properties_directory) product_yamls[product] = product_yaml local_env_yaml = dict(cmake_build_type='Debug') local_env_yaml.update(product_yamls[product]) local_env_yaml['rule_id'] = rule_id rule_path = ssg.rules.get_rule_dir_yaml(rule_obj['dir']) try: rule = yaml.open_and_macro_expand(rule_path, local_env_yaml) rule_lines = read_file_list(rule_path) yield rule_path, rule, rule_lines, product_path, local_env_yaml except jinja2.exceptions.UndefinedError as ue: msg = "Failed to parse file {0} (with product.yml: {1}). Skipping. {2}" msg = msg.format(rule_path, product_path, ue) print(msg, file=sys.stderr) def find_rules_generator(args, func): for item in rule_data_generator(args): rule_path, rule, rule_lines, product_path, local_env_yaml = item if func(rule_path, rule, rule_lines): yield (rule_path, product_path, local_env_yaml) def find_rules(args, func): # Returns find_rules_generator as a list return list(find_rules_generator(args, func)) def print_file(file_contents): for line_num in range(0, len(file_contents)): print("%d: %s" % (line_num, file_contents[line_num])) Function `find_section_lines` has a Cognitive Complexity of 16 (exceeds 7 allowed). Consider refactoring.
Refactor this function to reduce its Cognitive Complexity from 16 to the 15 allowed.def find_section_lines(file_contents, sec): # Hack to find a global key ("section"/sec) in a YAML-like file. # All indented lines until the next global key are included in the range. # For example: # # 0: not_it: # 1: - value # 2: this_one: # 3: - 2 # 4: - 5 # 5: # 6: nor_this: # # for the section "this_one", the result [(2, 5)] will be returned. # Note that multiple sections may exist in a file and each will be # identified and returned. sec_ranges = [] sec_id = sec + ":" sec_len = len(sec_id) end_num = len(file_contents) line_num = 0 while line_num < end_num: if len(file_contents[line_num]) >= sec_len:Merge this if statement with the enclosing one. if file_contents[line_num][0:sec_len] == sec_id: begin = line_num line_num += 1 while line_num < end_num:Avoid deeply nested control flow statements. if len(file_contents[line_num]) > 0 and file_contents[line_num][0] != ' ': break line_num += 1 end = line_num - 1 sec_ranges.append((begin, end)) line_num += 1 return sec_ranges def remove_lines(file_contents, lines): # Returns a series of lines and returns a new copy new_file = [] for line_num in range(0, len(file_contents)): if line_num not in lines: new_file.append(file_contents[line_num]) return new_file Function `remove_section_keys` has a Cognitive Complexity of 14 (exceeds 7 allowed). Consider refactoring.def remove_section_keys(file_contents, yaml_contents, section, removed_keys): # Remove a series of keys from a section. Refuses to operate if there is more # than one instance of the section. If the section is empty (because all keys # are removed), then the section is also removed. Otherwise, only matching keys # are removed. Note that all instances of the keys will be removed, if it appears # more than once. sec_ranges = find_section_lines(file_contents, section) if len(sec_ranges) != 1: raise RuntimeError("Refusing to fix file: %s -- could not find one section: %d" % (path, sec_ranges)) begin, end = sec_ranges[0] r_lines = set() if (yaml_contents[section] is None or len(yaml_contents[section].keys()) == len(removed_keys)): r_lines = set(range(begin, end+1)) print("Removing entire section since all keys are empty") else: # Don't include section header for line_num in range(begin+1, end+1): line = file_contents[line_num].strip() len_line = len(line) for key in removed_keys: k_l = len(key)+1 k_i = key + ":" if len_line >= k_l and line[0:k_l] == k_i: r_lines.add(line_num) break return remove_lines(file_contents, r_lines) def rewrite_value_int_str(line): # Rewrites a key's value to explicitly be a string. Assumes it starts # as an integer. Takes a line. key_end = line.index(':') key = line[0:key_end] value = line[key_end+1:].strip() str_value = '"' + value + '"' return key + ": " + str_value def rewrite_keyless_section(file_contents, yaml_contents, section, content): new_contents = file_contents[:] sec_ranges = find_section_lines(file_contents, section) if len(sec_ranges) != 1: raise RuntimeError("Refusing to fix file: %s -- could not find one section: %d" % (path, sec_ranges)) if len(sec_ranges[0]) != 2: raise RuntimeError("Section has more than one line") Line too long (100 > 99 characters) new_contents[sec_ranges[0][0]] = "{section}: {content}".format(section=section, content=content) return new_contents def rewrite_value_remove_prefix(line): # Rewrites a key's value to remove a "CCE" prefix. key_end = line.index(':') key = line[0:key_end] value = line[key_end+1:].strip() new_value = value if cce.is_cce_format_valid("CCE-" + value[3:]): new_value = value[3:] elif cce.is_cce_format_valid("CCE-" + value[4:]): new_value = value[4:] return key + ": " + new_value def add_to_the_section(file_contents, yaml_contents, section, new_keys): to_insert = [] sec_ranges = find_section_lines(file_contents, section) if len(sec_ranges) != 1: raise RuntimeError("could not find one section: %s" % section) begin, end = sec_ranges[0] assert end > begin, "We need at least one identifier there already" template_line = str(file_contents[end - 1]) leading_whitespace = re.match(r"^\s*", template_line).group() for key, value in new_keys.items(): to_insert.append(leading_whitespace + key + ": " + value) new_contents = file_contents[:end] + to_insert + file_contents[end:] return new_contents def sort_section(file_contents, yaml_contents, section): new_contents = ssg.rule_yaml.sort_section_keys(yaml_contents, file_contents, section) return new_contents Function `rewrite_section_value` has 5 arguments (exceeds 4 allowed). Consider refactoring.def rewrite_section_value(file_contents, yaml_contents, section, keys, transform): # For a given section, rewrite the keys in int_keys to be strings. Refuses to # operate if the given section appears more than once in the file. Assumes all # instances of key are an integer; all will get updated. new_contents = file_contents[:] sec_ranges = find_section_lines(file_contents, section) if len(sec_ranges) != 1: raise RuntimeError("Refusing to fix file: %s -- could not find one section: %d" % (path, sec_ranges)) begin, end = sec_ranges[0] r_lines = set() # Don't include section header for line_num in range(begin+1, end+1): line = file_contents[line_num].strip() len_line = len(line) for key in keys: k_l = len(key)+1 k_i = key + ":" if len_line >= k_l and line[0:k_l] == k_i: new_contents[line_num] = transform(file_contents[line_num]) break return new_contents def rewrite_section_value_int_str(file_contents, yaml_contents, section, int_keys): return rewrite_section_value(file_contents, yaml_contents, section, int_keys, rewrite_value_int_str) Similar blocks of code found in 2 locations. Consider refactoring.def fix_empty_identifier(file_contents, yaml_contents): section = 'identifiers' empty_identifiers = [] if yaml_contents[section] is not None: for i_type, i_value in yaml_contents[section].items(): if str(i_value).strip() == "": empty_identifiers.append(i_type) return remove_section_keys(file_contents, yaml_contents, section, empty_identifiers) Similar blocks of code found in 2 locations. Consider refactoring.def fix_empty_reference(file_contents, yaml_contents): section = 'references' empty_identifiers = [] if yaml_contents[section] is not None: for i_type, i_value in yaml_contents[section].items(): if str(i_value).strip() == "": empty_identifiers.append(i_type) return remove_section_keys(file_contents, yaml_contents, section, empty_identifiers) Function `fix_prefix_cce` has a Cognitive Complexity of 10 (exceeds 7 allowed). Consider refactoring.def fix_prefix_cce(file_contents, yaml_contents): section = 'identifiers' prefixed_identifiers = [] if yaml_contents[section] is not None: for i_type, i_value in yaml_contents[section].items(): if i_type[0:3] == 'cce': has_prefix = i_value[0:3].upper() == 'CCE' remainder_valid = cce.is_cce_format_valid("CCE-" + str(i_value[3:])) remainder_valid |= cce.is_cce_format_valid("CCE-" + str(i_value[4:])) if has_prefix and remainder_valid: prefixed_identifiers.append(i_type) return rewrite_section_value(file_contents, yaml_contents, section, prefixed_identifiers, rewrite_value_remove_prefix) Function `fix_invalid_cce` has a Cognitive Complexity of 10 (exceeds 7 allowed). Consider refactoring.def fix_invalid_cce(file_contents, yaml_contents): section = 'identifiers' invalid_identifiers = [] if yaml_contents[section] is not None: for i_type, i_value in yaml_contents[section].items(): if i_type[0:3] == 'cce':Merge this if statement with the enclosing one. if not cce.is_cce_value_valid("CCE-" + str(i_value)): invalid_identifiers.append(i_type) return remove_section_keys(file_contents, yaml_contents, section, invalid_identifiers) def has_product_cce(yaml_contents, product): section = 'identifiers' invalid_identifiers = [] if not yaml_contents[section]: return False for i_type, i_value in yaml_contents[section].items(): if i_type[0:3] != 'cce' or "@" not in i_type: continue _, cce_product = i_type.split("@", 1) if product == cce_product: return True return False def add_product_cce(file_contents, yaml_contents, product, cce): section = 'identifiers' if section not in yaml_contents: return file_contents new_contents = add_to_the_section( file_contents, yaml_contents, section, {"cce@{product}".format(product=product): cce}) new_contents = sort_section(new_contents, yaml_contents, section) return new_contents Similar blocks of code found in 2 locations. Consider refactoring.def fix_int_identifier(file_contents, yaml_contents): section = 'identifiers' int_identifiers = [] for i_type, i_value in yaml_contents[section].items(): if type(i_value) != str: int_identifiers.append(i_type) return rewrite_section_value_int_str(file_contents, yaml_contents, section, int_identifiers) Similar blocks of code found in 2 locations. Consider refactoring.def fix_int_reference(file_contents, yaml_contents): section = 'references' int_identifiers = [] for i_type, i_value in yaml_contents[section].items(): if type(i_value) != str: int_identifiers.append(i_type) return rewrite_section_value_int_str(file_contents, yaml_contents, section, int_identifiers) def sort_rule_subkeys(file_contents, yaml_contents): return ssg.rule_yaml.sort_section_keys(None, file_contents, TO_SORT, sort_func=_human_sort) def _fixed_file_contents(path, file_contents, product_yaml, func): if file_contents[-1] == '': file_contents = file_contents[:-1] subst_dict = product_yaml yaml_contents = yaml.open_and_macro_expand(path, subst_dict) Similar blocks of code found in 5 locations. Consider refactoring. try: new_file_contents = func(file_contents, yaml_contents) except Exception as exc: msg = "Refusing to fix file: {path}: {error}".format(path=path, error=str(exc)) raise RuntimeError(msg) return new_file_contents def fix_file(path, product_yaml, func): file_contents = open(path, 'r').read().split("\n") new_file_contents = _fixed_file_contents(path, file_contents, product_yaml, func) if file_contents == new_file_contents: return False with open(path, 'w') as f: for line in new_file_contents: print(line, file=f) return True Function `fix_file_prompt` has a Cognitive Complexity of 10 (exceeds 7 allowed). Consider refactoring.def fix_file_prompt(path, product_yaml, func, args): file_contents = open(path, 'r').read().split("\n") new_file_contents = _fixed_file_contents(path, file_contents, product_yaml, func) changes = file_contents != new_file_contents if not changes: return changes need_input = not args.assume_yes and not args.dry_run if need_input: print("====BEGIN BEFORE====") print_file(file_contents) print("====END BEFORE====") if need_input: print("====BEGIN AFTER====") print_file(new_file_contents) print("====END AFTER====") response = 'n' if need_input: response = input_func("Confirm writing output to %s: (y/n): " % path) if args.assume_yes or response.strip().lower() == 'y': changes = True with open(path, 'w') as f: for line in new_file_contents: print(line, file=f) else: changes = False return changes def add_cce(args, product_yaml): directory = os.path.join(args.root, args.subdirectory) cce_pool = cce.CCE_POOLS[args.cce_pool]() return _add_cce(directory, cce_pool, args.rule, product_yaml, args) Function `_add_cce` has a Cognitive Complexity of 11 (exceeds 7 allowed). Consider refactoring.
Function `_add_cce` has 5 arguments (exceeds 4 allowed). Consider refactoring.def _add_cce(directory, cce_pool, rules, product_yaml, args): product = product_yaml["product"] def is_relevant_rule(rule_path, rule, rule_lines): for r in rules: if ( rule_path.endswith("/{r}/rule.yml".format(r=r)) and has_no_cce(rule_path, product_yaml)): return True return False results = find_rules(args, is_relevant_rule) for result in results: rule_path = result[0] cce = cce_pool.random_cce() def fix_callback(file_contents, yaml_contents): return add_product_cce(file_contents, yaml_contents, product_yaml["product"], cce) Similar blocks of code found in 5 locations. Consider refactoring. try: changes = fix_file(rule_path, product_yaml, fix_callback) except RuntimeError as exc: msg = ( "Error adding CCE into {rule_path}: {exc}" .format(rule_path=rule_path, exc=str(exc))) raise RuntimeError(exc) if changes: cce_pool.remove_cce_from_file(cce) @command("empty_identifiers", "check and fix rules with empty identifiers")def fix_empty_identifiers(args, product_yaml): results = find_rules(args, has_empty_identifier) print("Number of rules with empty identifiers: %d" % len(results)) for result in results: rule_path = result[0] product_yaml_path = result[2] if product_yaml_path is not None: product_yaml = yaml.open_raw(product_yaml_path) if args.dry_run: print(rule_path + " has one or more empty identifiers") continue fix_file_prompt(rule_path, product_yaml, fix_empty_identifier, args) exit(int(len(results) > 0)) Similar blocks of code found in 5 locations. Consider refactoring.@command("empty_references", "check and fix rules with empty references")def fix_empty_references(args, product_yaml): results = find_rules(args, has_empty_references) print("Number of rules with empty references: %d" % len(results)) for result in results: rule_path = result[0] product_yaml = result[2] if args.dry_run: print(rule_path + " has one or more empty references") continue fix_file_prompt(rule_path, product_yaml, fix_empty_reference, args) exit(int(len(results) > 0)) @command("prefixed_identifiers", "check and fix rules with prefixed (CCE-) identifiers")def find_prefix_cce(args): results = find_rules(args, has_prefix_cce) print("Number of rules with prefixed CCEs: %d" % len(results)) for result in results: rule_path = result[0] product_yaml = result[2] if args.dry_run: print(rule_path + " has one or more CCE with CCE- prefix") continue fix_file_prompt(rule_path, product_yaml, fix_prefix_cce, args) exit(int(len(results) > 0)) Similar blocks of code found in 5 locations. Consider refactoring.@command("invalid_identifiers", "check and fix rules with invalid identifiers")def find_invalid_cce(args, product_yamls): results = find_rules(args, has_invalid_cce) print("Number of rules with invalid CCEs: %d" % len(results)) for result in results: rule_path = result[0] product_yaml = result[2] if args.dry_run: print(rule_path + " has one or more invalid CCEs") continue fix_file_prompt(rule_path, product_yaml, fix_invalid_cce, args) exit(int(len(results) > 0)) Similar blocks of code found in 5 locations. Consider refactoring.@command("int_identifiers", "check and fix rules with pseudo-integer identifiers")def find_int_identifiers(args, product_yaml): results = find_rules(args, has_int_identifier) print("Number of rules with integer identifiers: %d" % len(results)) for result in results: rule_path = result[0] product_yaml = result[2] if args.dry_run: print(rule_path + " has one or more integer references") continue fix_file_prompt(rule_path, product_yaml, fix_int_identifier, args) exit(int(len(results) > 0)) Similar blocks of code found in 5 locations. Consider refactoring.@command("int_references", "check and fix rules with pseudo-integer references")def find_int_references(args, product_yaml): results = find_rules(args, has_int_reference) print("Number of rules with integer references: %d" % len(results)) for result in results: rule_path = result[0] product_yaml = result[2] if args.dry_run: print(rule_path + " has one or more unsorted integer references") continue fix_file_prompt(rule_path, product_yaml, fix_int_reference, args) exit(int(len(results) > 0)) @command("duplicate_subkeys", "check for duplicated references and identifiers")def duplicate_subkeys(args, product_yaml): results = find_rules(args, has_duplicated_subkeys) print("Number of rules with duplicated subkeys: %d" % len(results)) for result in results: print(result[0] + " has one or more duplicated subkeys") exit(int(len(results) > 0)) Similar blocks of code found in 5 locations. Consider refactoring.@command("sort_subkeys", "sort references and identifiers")def sort_subkeys(args, product_yaml): results = find_rules(args, has_unordered_sections) print("Number of modified rules: %d" % len(results)) for result in results: rule_path = result[0] product_yaml = result[2] if args.dry_run: print(rule_path + " has one or more unsorted references") continue fix_file_prompt(rule_path, product_yaml, sort_rule_subkeys, args) exit(int(len(results) > 0)) @command("test_all", "Perform all checks on all rules")def test_all(args, product_yaml): result = 0 checks = [ (has_empty_identifier, "empty identifiers"), (has_invalid_cce, "invalid CCEs"), (has_int_identifier, "integer references"), (has_empty_references, "empty references"), (has_int_reference, "unsorted references"), (has_duplicated_subkeys, "duplicated subkeys"), (has_unordered_sections, "unsorted references") ] for item in rule_data_generator(args): rule_path, rule, rule_lines, _, _ = item for func, msg in checks: if func(rule_path, rule, rule_lines): print("Rule '%s' has %s" % (rule_path, msg)) result = 1 exit(result) def create_parser_from_functions(subparsers): for name, function in _COMMANDS.items(): subparser = subparsers.add_parser(name, description=function.description) subparser.set_defaults(func=function) def create_other_parsers(subparsers): subparser = subparsers.add_parser("add-cce", description="Add CCE to rule files") subparser.add_argument("rule", nargs="+") subparser.add_argument("--subdirectory", default="linux_os") subparser.add_argument( "--cce-pool", "-p", default="redhat", choices=list(cce.CCE_POOLS.keys()), ) subparser.set_defaults(func=add_cce) def parse_args(): parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description="Utility for fixing mistakes in rule files") parser.add_argument( "-y", "--assume-yes", default=False, action="store_true", help="Assume yes and overwrite all files (no prompt)") parser.add_argument( "-d", "--dry-run", default=False, action="store_true", help="Assume no and don't overwrite any files") parser.add_argument( "-j", "--json", type=str, action="store", default="build/rule_dirs.json", help="File to read json " "output of rule_dir_json.py from (defaults to " "build/rule_dirs.json") parser.add_argument( "-r", "--root", default=SSG_ROOT, help="Path to root of the project directory") parser.add_argument("--product", "-p", help="Path to the main product.yml") subparsers = parser.add_subparsers(title="command", help="What to perform.") subparsers.required = True create_parser_from_functions(subparsers) create_other_parsers(subparsers) return parser.parse_args() def __main__(): args = parse_args() project_root = args.root if not project_root: project_root = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.path.pardir) subst_dict = dict() if args.product: subst_dict = dict() product = products.load_product_yaml(args.product) product.read_properties_from_directory(os.path.join(project_root, "product_properties")) subst_dict.update(product) args.func(args, subst_dict) if __name__ == "__main__": __main__()