codeforamerica/intake

View on GitHub
intake/services/statistics.py

Summary

Maintainability
C
7 hrs
Test Coverage
"""
This module handles queries to retrieve common statistics
"""
from datetime import datetime, timedelta
from intake import models, constants, utils
from user_accounts.models import Organization
from django.db.models.aggregates import Count
from django.db.models.functions import ExtractYear
from django.db.models.functions import Extract
from django.db.models import DateTimeField
from pytz import timezone


@DateTimeField.register_lookup
class ExtractWeek(Extract):
    lookup_name = 'week'


TOTAL = 'Total'
ALL = {'slug': 'all', 'name': 'Total (All Organizations)'}


def make_year_weeks():
    start_date = utils.get_start_date()
    year_weeks = []
    date_cursor = start_date
    last_date = utils.get_todays_date()
    while date_cursor <= last_date:
        year_week = as_year_week(date_cursor)
        year_weeks.append(year_week)
        date_cursor += timedelta(days=7)
    return year_weeks


def as_year_week(dt):
    return dt.strftime('%Y-%W-1')


def get_org_data_dict():
    """
    Generates a data dictionary for displaying both total and
    organization-specific statistics.

    [
      {
        'org': {
            'name': 'Total (All Organizations)'.
            'slug': 'all'
        },
        'total': 1501,
        'apps_this_week': 33,
        'weekly_totals': [
            {'date': '2016-04-24', 'count': 0}
        ]
      },
    ]

    :return: list of data dicts including one for aggregate totals and one for
    each organization we serve.
    """

    # Figure out all the weeks of the year since we started
    year_weeks = make_year_weeks()

    # Get all the organizations receiving applications
    orgs = list(Organization.objects.filter(
        is_receiving_agency=True, is_live=True
    ).values('name', 'slug', 'id').order_by('name'))

    # Get overall counts of applications by weeks of the year
    total_app_data = models.Application.objects.annotate(
        wk=ExtractWeek('form_submission__date_received',
                       tzinfo=timezone('US/Pacific')),
        yr=ExtractYear('form_submission__date_received',
                       tzinfo=timezone('US/Pacific'))
    ).values('yr', 'wk').annotate(
        fs_count=Count('form_submission_id', distinct=True)
    ).values('yr', 'wk', 'fs_count').order_by('yr', 'wk')

    # Convert the db results to a dict keyed by the week of the year in
    # datetime format. The '-1' at the end indicates weeks starting on Mondays.
    total_app_data_by_week = {
        '%d-%02d-1' % (row['yr'], row['wk']): row['fs_count'] for row in
        total_app_data}

    # Transform the totals data into something formatted for charting purposes
    # with the grand total and weekly time-series data and add it to the
    # results we will return for display.
    display_data = [total_display_data(total_app_data_by_week, year_weeks)]

    # Get counts of applications by organization and week of year
    org_app_data = models.Application.objects.annotate(
        wk=ExtractWeek('form_submission__date_received',
                       tzinfo=timezone('US/Pacific')),
        yr=ExtractYear('form_submission__date_received',
                       tzinfo=timezone('US/Pacific'))
    ).values('organization_id', 'yr', 'wk').annotate(
        fs_count=Count('form_submission_id')
    ).values('organization_id', 'yr', 'wk', 'fs_count').order_by('yr', 'wk')

    # Convert the db results to a dict of dicts keyed by org_id and week of the
    # year in datetime format. The '-1' at the end indicates weeks starting on
    # Mondays.
    org_app_data_by_week = {}
    for row in org_app_data:
        current_org_app_data_by_week = org_app_data_by_week.setdefault(
            row['organization_id'], {})
        current_org_app_data_by_week['%d-%02d-1' % (row['yr'], row['wk'])] = \
            row['fs_count']

    # Transform the org-specific data into something formatted for charting
    # purposes with the grand total and weekly time-series data and add it to
    # the results we will return for display.
    display_data_by_org = orgs_display_data(org_app_data_by_week, orgs,
                                            year_weeks)
    display_data += display_data_by_org

    return display_data


def orgs_display_data(org_app_data_by_week, orgs, year_weeks):
    """
    Generate a list of dictionaries of data suitable for our view to render
    stats for each individual organization which receives applications. Each
    dictionary includes metadata about the organization whose stats are being
    displayed, a cumulative total of applications, a count of applications for
    the current and previous weeks, and time-series data representing
    applications for each week since launch.

    :param org_app_data_by_week: dict of dicts data structured as
    {organization_id: {%Y-%W-%w: count}}
    :param orgs: list of all organizations.
    :param year_weeks: list of year-weeks to generate display data for, in the
    format %Y-%W-%w.
    :return: dict of data suitable for rendering statistics.
    """

    # Iterate over each organization, calculate its overall total plus a weekly
    # time-series data-set for charting
    org_data = []
    org_totals = {}
    org_weekly_totals = {}

    for org in orgs:
        org_id = org['id']
        current_org_app_data_by_week = org_app_data_by_week.get(org_id, None)
        if current_org_app_data_by_week is None:
            continue

        # Iterate over each week of the year for the current organization
        for year_week in year_weeks:
            # Get the actual start date for the week
            date = datetime.strptime(year_week, "%Y-%W-%w")

            # Add the counts if we have a data point for the week, otherwise 0
            org_apps = current_org_app_data_by_week.get(year_week, None)
            if org_apps is not None:
                current_org_total = org_totals.setdefault(org_id, 0)
                org_totals[org_id] = current_org_total + org_apps

                current_org_weekly_totals = org_weekly_totals.setdefault(
                    org_id, [])
                current_org_weekly_totals.append({
                    'date': date.strftime('%Y-%m-%d'),
                    'count': org_apps
                })
            else:
                current_org_weekly_totals = org_weekly_totals.setdefault(
                    org_id, [])
                current_org_weekly_totals.append({
                    'date': date.strftime('%Y-%m-%d'),
                    'count': 0
                })

        # Add the data for this org formatted for display
        org_data.append({
            'org': org,
            'total': org_totals.get(org_id, 0),
            'weekly_totals': org_weekly_totals[org_id],
            'apps_this_week': org_weekly_totals[org_id][-1]['count'] if len(
                org_weekly_totals[org_id]) > 0 else 0,
            'apps_last_week': org_weekly_totals[org_id][-2]['count'] if len(
                org_weekly_totals[org_id]) > 1 else 0,
        })

    return org_data


def total_display_data(total_app_data_by_week, year_weeks):
    """
    Generate a dictionary of data suitable for our view to render total stats
    across all organizations that receive applications. It includes metadata
    about the organization whose stats are being displayed, a cumulative total
    of applications, a count of applications for the current and previous
    weeks, and time-series data representing applications for each week since
    launch.

    :param total_app_data_by_week: dict of data where the key is week of the
    year and the value is the count of applications submitted that week
    :param year_weeks: list of year-weeks to generate display data for, in the
    format %Y-%W-%w.
    :return: dict of data suitable for rendering statistics.
    """

    # Iterate over the weekly data, calculate the overall total plus a weekly
    # time-series data-set for charting
    total_count = 0
    weekly_totals = []
    for year_week in year_weeks:
        # Get the actual start date for the week
        date = datetime.strptime(year_week, "%Y-%W-%w")

        # Add the counts if we have a data point for the week, otherwise add 0
        total_apps = total_app_data_by_week.get(year_week, None)
        if total_apps is not None:
            total_count += total_apps
            weekly_totals.append({
                'date': date.strftime('%Y-%m-%d'),
                'count': total_apps
            })
        else:
            weekly_totals.append({
                'date': date.strftime('%Y-%m-%d'),
                'count': 0
            })

    # Return the total data formatted for display
    return {
        'org': ALL,
        'total': total_count,
        'apps_this_week': weekly_totals[-1]['count'] if len(
            weekly_totals) > 0 else 0,
        'apps_last_week': weekly_totals[-2]['count'] if len(
            weekly_totals) > 1 else 0,
        'weekly_totals': weekly_totals
    }