okfn-brasil/serenata-de-amor

View on GitHub
jarbas/chamber_of_deputies/management/commands/update.py

Summary

Maintainability
A
0 mins
Test Coverage
from csv import DictReader, DictWriter
from pathlib import Path
from urllib.request import urlretrieve

from django.core.management import call_command
from django.core.management.base import BaseCommand

from jarbas.chamber_of_deputies.models import Reimbursement, Tweet


class Command(BaseCommand):
    help = (
        'Load Serenata de Amor reimbursements from a given directory. '
        '(1) Does a backup of Twitter data to re-link reimbursements; '
        '(2) Deletes all data from Reimbursement model; '
        '(3) Loads all reimbursements-YYYY.csv files; '
        '(4) Loads suspicions.xz file; '
        '(5) Reload receipt texts; '
        '(6) Rebuilds the search vector;'
        '(7) Restores Twitter data.'
    )
    RECEIPT_TEXTS = '2017-02-15-receipts-texts.xz'
    SPACES_URL = 'https://serenata-de-amor-data.nyc3.digitaloceanspaces.com/'

    def add_arguments(self, parser):
        parser.add_argument('path', help='Directory of the CSV/LZMA files')

    def handle(self, *args, **options):
        self.path = Path(options['path'])

        # (1) Does a backup of Twitter data to re-link reimbursements
        print(f'Backing up {Tweet.objects.count()} tweets')
        with open(self.path / 'tweets.csv', 'w') as fobj:
            writer = DictWriter(fobj, fieldnames=('status', 'document_id'))
            writer.writeheader()
            for tweet in Tweet.objects.all():
                writer.writerow({
                    'status': tweet.status,
                    'document_id': tweet.reimbursement.document_id
                })

        # (2) Deletes all data from Reimbursement model
        # (3) Loads all reimbursements-YYYY.csv files
        first_round = True
        for file in sorted(self.path.glob('reimbursements-*.csv')):
            print(f'Importing {file}')
            call_command('reimbursements', file, drop_all=first_round)
            first_round = False

        # (4) Loads suspicions.xz file
        print(f'Importing {self.path / "suspicions.xz"}')
        call_command('suspicions', self.path / 'suspicions.xz')

        # (5) Reload receipt texts
        urlretrieve(
            f'{self.SPACES_URL}{self.RECEIPT_TEXTS}',
            self.path / self.RECEIPT_TEXTS
        )
        print(f'Importing {self.path / self.RECEIPT_TEXTS}')
        call_command('receipts_text', self.path / self.RECEIPT_TEXTS)

        # (6) Rebuilds the search vector
        print(f'Rebuilding the search vector')
        call_command('searchvector')

        # (7) Restores Twitter data
        print(f'Restoring tweets')
        with open(self.path / 'tweets.csv') as fobj:
            for tweet in DictReader(fobj):
                key = {'document_id': tweet['document_id']}
                try:
                    reimbursement = Reimbursement.objects.get(**key)
                except Reimbursement.DoesNotExist:
                    continue

                Tweet.objects.create(
                    status=tweet['status'],
                    reimbursement=reimbursement
                )