File subsetter.py
has 569 lines of code (exceeds 250 allowed). Consider refactoring.
"""
Generate a random sample of rows from a relational database that preserves
referential integrity - so long as constraints are defined, all parent rows
will exist for child rows.
Function create_row_in
has a Cognitive Complexity of 56 (exceeds 5 allowed). Consider refactoring.
def create_row_in(self, source_row, target_db, target, prioritized=False):
logging.debug('create_row_in %s:%s ' %
(target.name, target.pk_val(source_row)))
pks = hashable((source_row[key] for key in target.pk))
Similar blocks of code found in 2 locations. Consider refactoring.
for (parent_col, child_col) in zip(fk['referred_columns'],
fk['constrained_columns']):
slct = slct.where(target_parent.c[parent_col] ==
source_row[child_col])
if source_row[child_col] is not None:
Similar blocks of code found in 2 locations. Consider refactoring.
for (referred_col, constrained_col) in zip(
constraint['referred_columns'],
constraint['constrained_columns']):
slct = slct.where(target_referred.c[referred_col] ==
source_row[constrained_col])
Function __init__
has a Cognitive Complexity of 22 (exceeds 5 allowed). Consider refactoring.
def __init__(self, sqla_conn, args, schemas=[None]):
self.args = args
self.sqla_conn = sqla_conn
self.schemas = schemas
self.engine = sa.create_engine(sqla_conn)
Function create_subset_in
has a Cognitive Complexity of 20 (exceeds 5 allowed). Consider refactoring.
def create_subset_in(self, target_db):
for (tbl_name, pks) in self.args.force_rows.items():
if '.' in tbl_name:
(tbl_schema, tbl_name) = tbl_name.split('.', 1)
Function _random_row_gen_fn
has a Cognitive Complexity of 15 (exceeds 5 allowed). Consider refactoring.
def _random_row_gen_fn(self):
"""
Random sample of *approximate* size n
"""
if self.n_rows:
Function assign_target
has a Cognitive Complexity of 14 (exceeds 5 allowed). Consider refactoring.
def assign_target(self, target_db):
for ((tbl_schema, tbl_name), tbl) in self.tables.items():
tbl._random_row_gen_fn = types.MethodType(_random_row_gen_fn, tbl)
tbl.random_rows = tbl._random_row_gen_fn()
tbl.next_row = types.MethodType(_next_row, tbl)
Function _find_n_rows
has a Cognitive Complexity of 11 (exceeds 5 allowed). Consider refactoring.
def _find_n_rows(self, estimate=False):
self.n_rows = 0
if estimate:
try:
if self.db.engine.driver in ('psycopg2', 'pg8000', ):
Function fix_postgres_array_of_enum
has a Cognitive Complexity of 11 (exceeds 5 allowed). Consider refactoring.
def fix_postgres_array_of_enum(connection, tbl):
"Change type of ENUM[] columns to a custom type"
for col in tbl.c:
col_str = str(col.type)
Function update_sequences
has a Cognitive Complexity of 10 (exceeds 5 allowed). Consider refactoring.
def update_sequences(source, target, schemas, tables, exclude_tables):
"""Set database sequence values to match the source db
Needed to avoid subsequent unique key violations after DB build.
Currently only implemented for postgresql -> postgresql."""
Function _completeness_score
has a Cognitive Complexity of 9 (exceeds 5 allowed). Consider refactoring.
def _completeness_score(self):
"""Scores how close a target table is to being filled enough to quit"""
table = (self.schema if self.schema else "") + self.name
fetch_all = self.fetch_all
requested = len(self.requested)
Avoid deeply nested control flow statements.
if source_referred_row:
self.create_row_in(source_referred_row, target_db,
target_referred)
Function update_sequences
has 5 arguments (exceeds 4 allowed). Consider refactoring.
def update_sequences(source, target, schemas, tables, exclude_tables):
Function generate
has a Cognitive Complexity of 7 (exceeds 5 allowed). Consider refactoring.
def generate():
args = argparser.parse_args()
_import_modules(args.import_list)
args.force_rows = {}
for force_row in (args.force or []):