test_load_table.py

Summary

Maintainability
A
0 mins
Test Coverage
import config
import utils


import pytest
from sqlalchemy import create_engine, inspect, MetaData
from sqlalchemy.sql import select


def assert_column_type(column, expected_type):
    assert column.type.python_type == expected_type


def assert_columns_have_non_unique_indexes(engine, table_name, *columns):
    inspection = inspect(engine)

    non_unique_1_column_indexes = [
        index['column_names'][0]
        for index in inspection.get_indexes(table_name)
        if not index['unique'] and len(index['column_names']) == 1
    ]

    assert set(columns) <= set(non_unique_1_column_indexes)


class TestLoadTable():
    def setup_method(self, method):
        self.engine = create_engine(config.SQLA_URI)
        self.meta = MetaData(bind=self.engine)

    def teardown_method(self, method):
        self.meta.drop_all()

    def load_table(self, tmpdir, csv_str, table_name, **kwargs):
        file_name = '{}.csv'.format(table_name)
        csv_file = tmpdir.join(file_name)
        csv_file.write(csv_str)

        utils.load_table(str(csv_file), table_name, **kwargs)
        self.meta.reflect()

    def test_good_csv(self, tmpdir):
        CSV = """name, dob, number_of_pets
        Tom, 1980-02-26, 0
        Dick, 1982-03-14, 3
        Harry, 1972-11-24, 2
        """

        self.load_table(tmpdir, CSV, 'people')

        assert 'people' in self.meta.tables
        people = self.meta.tables['people']

        assert_column_type(people.columns['index'], int)
        assert_column_type(people.columns['name'], str)
        assert_column_type(people.columns['dob'], str)
        assert_column_type(people.columns['number_of_pets'], str)

        assert people.columns['index'].primary_key is True
        assert people.columns['name'].primary_key is False

        assert_columns_have_non_unique_indexes(self.engine, 'people', 'name',
                                               'dob', 'number_of_pets')

        connection = self.engine.connect()
        results = connection.execute(
            select([people]).order_by(people.c.index)).fetchall()
        assert (0, 'Tom', '1980-02-26', '0') == results[0]
        assert (1, 'Dick', '1982-03-14', '3') == results[1]
        assert (2, 'Harry', '1972-11-24', '2') == results[2]

    def test_load_with_chunking(self, tmpdir):
        CSV = """name, dob, number_of_pets
        Tom, 1980-02-26, 0
        Dick, 1982-03-14, 3
        Harry, 1972-11-24, 2
        Chris, 1991-08-03, 0
        Jill, 1990-04-15, 1
        Lorenzo, 2003-01-01, 0
        Alyssa, 2005-02-02, 1
        Andrew, 2008-03-03, 1
        Javi, 2008-04-04, 2
        """

        self.load_table(tmpdir, CSV, 'people', chunksize=4)

        people = self.meta.tables['people']

        connection = self.engine.connect()
        results = connection.execute(
            select([people]).order_by(people.c.index)).fetchall()

        assert (0, 'Tom', '1980-02-26', '0') == results[0]
        assert (1, 'Dick', '1982-03-14', '3') == results[1]
        assert (2, 'Harry', '1972-11-24', '2') == results[2]
        assert (3, 'Chris', '1991-08-03', '0') == results[3]
        assert (4, 'Jill', '1990-04-15', '1') == results[4]
        assert (5, 'Lorenzo', '2003-01-01', '0') == results[5]
        assert (6, 'Alyssa', '2005-02-02', '1') == results[6]
        assert (7, 'Andrew', '2008-03-03', '1') == results[7]
        assert (8, 'Javi', '2008-04-04', '2') == results[8]


    def test_empty_csv(self, tmpdir):
        file_name = 'empty.csv'
        csv_file = tmpdir.join(file_name)
        csv_file.ensure()

        with pytest.raises(Exception):
            utils.load_table(str(csv_file), 'empty')

    def test_embedded_space_in_columnname(self, tmpdir):
        CSV = """name, number of pets
        Tom, 0
        Dick, 3
        Harry, 2
        """

        self.load_table(tmpdir, CSV, 'people')
        people = self.meta.tables['people']

        assert 'number of pets' in people.columns

    def test_null_empty_string_handling(self, tmpdir):
        CSV = """name, number_of_pets
        Tom, 0
        "", 3
        , 2
        """

        self.load_table(tmpdir, CSV, 'people')

        people = self.meta.tables['people']

        connection = self.engine.connect()
        results = connection.execute(
            select([people]).order_by(people.c.index)).fetchall()
        assert (0, 'Tom', '0') == results[0]
        assert (1, None, '3') == results[1]
        assert (2, None, '2') == results[2]

    def test_embedded_space_in_tablename_and_filename(self, tmpdir):
        CSV = """name, number_of_pets
        Tom, 0
        Dick, 3
        Harry, 2
        """
        self.load_table(tmpdir, CSV, 'We the people')

        assert 'We the people' in self.meta.tables