gfw-api/forest-change-analysis-elastic

View on GitHub
gladanalysis/services/summary_service.py

Summary

Maintainability
A
1 hr
Test Coverage
F
16%
import pandas as pd


class SummaryService(object):
    """Class for creating summary stats on terrai data
    Takes data from the router and aggregates alerts by user specified intervals
    (day, week, month, year)"""

    @staticmethod
    def create_time_table(dataset, data, agg_type):

        if not data['data']:
            return []

        else:
            df = pd.DataFrame(data['data'])
            df = df.rename(columns={'COUNT(*)': 'count'})

            # standardize the output table to use julian_day
            if dataset == 'terrai':
                df = df.rename(columns={'day': 'julian_day'})

            if agg_type == 'day':
                agg_type = 'julian_day'

            # create datetime column in pandas so we can use its datetime
            # methods to easily summarize our results
            df['alert_date'] = pd.to_datetime(df.year, format='%Y') + pd.to_timedelta(df.julian_day - 1, unit='d')

            # extract month and quarter values from datetime object
            df['month'] = df.alert_date.dt.month
            df['quarter'] = df.alert_date.dt.quarter
            df['week'] = df.alert_date.dt.week

            # start the list of columns to groupby
            groupby_list = ['year']

            # return string formatted day value if day summary requested
            if agg_type == 'julian_day':
                df['alert_date'] = df.alert_date.dt.strftime('%Y-%m-%d')
                groupby_list += ['alert_date']

            if agg_type != 'year':
                groupby_list.append(agg_type)

            grouped = df.groupby(groupby_list).sum()['count'].reset_index()

            return grouped.to_dict(orient='records')