mangroveorg/datawinners

View on GitHub
datawinners/blue/xlsform_utils.py

Summary

Maintainability
A
3 hrs
Test Coverage
import os
import pyexcel as pe

from pyexcel.ext import xlsx #This import is needed for loading excel. Please don't remove them
from pyexcel.ext import xls #This import is needed for loading excel. Please don't remove them
from collections import OrderedDict

XLSFORM_PREDEFINED_COLUMN_NAMES={
                                 "survey": ['type','name','label','calculation','hint','required','appearance',
                                            'constraint','constraint_message','relevant','default', 'choice_filter',
                                            'required_message', 'repeat_count'],
                                 "choices": ['list_name','name', 'label']
                                 }
XLSFORM_EXCLUDE_COLUMN_NAMES={
                              "cascades":['base_index']
                              }
XLSFORM_EXCLUDE_FOR_DEFAULT=['begin_group','end_group', 'begin repeat', 'end repeat', 'note']

def purify_posted_data(excel_as_dict):
    for survey in excel_as_dict['survey']:
        if survey['type'] in XLSFORM_EXCLUDE_FOR_DEFAULT:
            if survey['default'] and survey['default'].strip():
                try:
                    del survey['default']
                except KeyError:
                    pass
    return excel_as_dict

def convert_excel_to_dict(file_name=None, file_content=None, file_type='xlsx'):
    book = pe.get_book(file_name=file_name, file_content=file_content, file_type=file_type)
    excel_as_dict = OrderedDict()
    for sheet in book:
        records = []
        if len(sheet.array) > 0:
            sheet.name_columns_by_row(0)
            if sheet.name == 'survey':
                _add_supported_attributes_that_doesnt_exists(sheet)
            records = _to_records(sheet)
        excel_as_dict[sheet.name] = records
    return excel_as_dict

def convert_json_to_excel(json_as_dict, file_type='xlsx'):
    book_content = OrderedDict()
    for sheet_name in json_as_dict:
        book_content[sheet_name] = convert_json_record_to_array(
                                                                json_as_dict[sheet_name], 
                                                                sheet_name,
                                                                XLSFORM_PREDEFINED_COLUMN_NAMES.get(sheet_name),
                                                                XLSFORM_EXCLUDE_COLUMN_NAMES.get(sheet_name)
                                                                )
    excel_raw_stream = pe.save_book_as(dest_file_type=file_type, bookdict=book_content)

    return excel_raw_stream

def convert_json_record_to_array(records, sheet_name, xlsform_predefined_column_names=None, xlsform_exclude_column_names=None):
    rows = []
    if len(records) < 1:
        if xlsform_predefined_column_names:
            rows.append(xlsform_predefined_column_names)
            empty_content = ['' for k in xlsform_predefined_column_names]
            rows.append(empty_content)
        return rows
    else:
        keys = records[0].keys()
        if xlsform_predefined_column_names:
            column_names = [k for k in keys if k in xlsform_predefined_column_names]
        elif xlsform_exclude_column_names:
            column_names = [k for k in keys if k not in xlsform_exclude_column_names]
        else:
            column_names = keys
        rows.append(column_names)
        for r in records:
            row = []
            for k in column_names:
                row.append(r[k])
            rows.append(row)
    return rows

def _to_records(reader):
    """
    Make an array of dictionaries.
    This is a trimmed down version of pyexcel.utils -> to_records. This adds support for Ordered dictionary.
    """
    ret = []
    if len(reader.colnames) > 0:
        headers = reader.colnames
        for row in reader.rows():
            the_dict = OrderedDict(zip(headers, row))
            ret.append(the_dict)
    else:
        raise ValueError('Unable to convert excel to json due to invalid excel structure')
    return ret

def _yield_from_records(records):
    """Reverse function of to_records
        Trimmed down version of pyexcel.utils -> to_records.
    """
    if len(records) < 1:
        yield []
    else:
        keys = records[0].keys()
        yield list(keys)
        for r in records:
            row = []
            for k in keys:
                row.append(r[k])
            yield row
        
def _add_supported_attributes_that_doesnt_exists(sheet):
    supported_attributes_that_doesnt_exists = list(set(XLSFORM_PREDEFINED_COLUMN_NAMES[sheet.name]) - set(sheet.colnames))
    additional_data = list()
    additional_data.append(supported_attributes_that_doesnt_exists)
    for row in sheet.rows():
        additional_data.append(['' for x in supported_attributes_that_doesnt_exists])

    additional_supported_attr_sheet = pe.Sheet(additional_data)
    sheet.column += additional_supported_attr_sheet