mangroveorg/datawinners

View on GitHub
datawinners/entity/import_data.py

Summary

Maintainability
F
3 days
Test Coverage
# vim: ai ts=4 sts=4 et sw=4 encoding=utf-8
import os
import logging
import re

from django.conf import settings
from django.core.exceptions import ValidationError
from django.db.utils import IntegrityError
from django.utils.translation import ugettext as _, ugettext_lazy, ugettext
from django.contrib.auth.models import User, Group
from django.core.validators import email_re
from django.contrib.auth.tokens import default_token_generator
from django.contrib.sites.models import get_current_site
from django.core.mail.message import EmailMessage
from django.template.loader import render_to_string
from django.utils.http import int_to_base36
from datawinners.entity.datasender_search import datasender_count_with
from datawinners.entity.subject_template_validator import SubjectTemplateValidator
from datawinners.entity.helper import get_country_appended_location, entity_type_as_sequence, \
    get_organization_telephone_number
from datawinners.entity.entity_exceptions import InvalidFileFormatException
from datawinners.exceptions import InvalidEmailException, NameNotFoundException
from datawinners.location.LocationTree import get_location_tree
from mangrove.errors.MangroveException import CodeSheetMissingException
from mangrove.datastore.entity import get_all_entities, Entity
from mangrove.errors.MangroveException import MangroveException, DataObjectAlreadyExists, EmptyRowException, \
    MultipleReportersForANumberException, MobileNumberMandatoryException
from mangrove.errors.MangroveException import CSVParserInvalidHeaderFormatException, \
    XlsParserInvalidHeaderFormatException
from mangrove.form_model.form_model import get_form_model_by_entity_type
from mangrove.form_model.form_model import REPORTER, get_form_model_by_code, \
    NAME_FIELD_CODE, SHORT_CODE, MOBILE_NUMBER_FIELD
from mangrove.form_model.project import get_entity_type_fields, tabulate_data
from mangrove.transport.player.parser import CsvParser, XlsParser, XlsDatasenderParser, XlsxDataSenderParser
from mangrove.transport.contract.transport_info import Channel
from mangrove.transport.contract.response import Response
from mangrove.transport.player.player import Player
from mangrove.transport.work_flow import RegistrationWorkFlow
from datawinners.location.LocationTree import get_location_hierarchy
from datawinners.submission.location import LocationBridge
from mangrove.contrib.registration_validators import case_insensitive_lookup
from mangrove.form_model.form_model import ENTITY_TYPE_FIELD_CODE
from datawinners.utils import get_organization
from datawinners.accountmanagement.models import NGOUserProfile, DataSenderOnTrialAccount
from datawinners.settings import HNI_SUPPORT_EMAIL_ID, EMAIL_HOST_USER
from datawinners.questionnaire.helper import get_location_field_code
from mangrove.transport.player.parser import XlsxParser
from datawinners.exceptions import ImportValidationError
from django.db import transaction


class FormCodeDoesNotMatchException(Exception):
    def __init__(self, message, form_code=None):
        self.message = message
        self.data = form_code

    def __str__(self):
        return self.message


class FilePlayer(Player):
    def __init__(self, dbm, parser, channel_name, location_tree=None, is_datasender=False, is_update=False):
        Player.__init__(self, dbm, location_tree)
        self.parser = parser
        self.channel_name = channel_name
        self.form_code = None
        self.is_datasender = is_datasender
        self.is_update = is_update

    @classmethod
    def build(cls, manager, extension, default_parser=None, form_code=None, is_datasender=False, is_update=False):
        channels = dict({".xls": Channel.XLS, ".xlsx": Channel.XLSX, ".csv": Channel.CSV})
        try:
            channel = channels[extension]
        except KeyError:
            raise InvalidFileFormatException()
        if default_parser is not None:
            parser = default_parser()
        elif extension == '.csv':
            parser = CsvParser()
        elif extension == '.xls':
            parser = XlsParser()
        elif extension == '.xlsx':
            parser = XlsxParser()
        else:
            raise InvalidFileFormatException()
        location_bridge = LocationBridge(get_location_tree(), get_loc_hierarchy=get_location_hierarchy)
        player = FilePlayer(manager, parser, channel, location_tree=location_bridge, is_datasender=is_datasender,
                            is_update=is_update)
        player.form_code = form_code
        return player

    def _process(self, form_model, values):
        if form_model.is_entity_registration_form():
            values = RegistrationWorkFlow(self.dbm, form_model, self.location_tree).process(values)
        return values

    def _appendFailedResponse(self, message, values=None):
        response = Response(reporters=[], survey_response_id=None)
        response.success = False
        response.errors = dict(error=ugettext(message), row=values)
        return response

    def _create_user(self, email, organization, response):
        user = User.objects.create_user(email, email, 'password')
        group = Group.objects.filter(name="Data Senders")[0]
        user.groups.add(group)
        user.first_name = case_insensitive_lookup(response.processed_data, NAME_FIELD_CODE)
        user.save()
        profile = NGOUserProfile(user=user, org_id=organization.org_id, title="Mr",
                                 reporter_id=case_insensitive_lookup(response.processed_data, SHORT_CODE))
        profile.save()
        return user

    def _validate_duplicate_email_address(self, email):
        # registered_emails = self._get_registered_emails()
        mail_filter = User.objects.filter(email=email)
        matching_email_count = datasender_count_with(email)
        if matching_email_count > 0 or mail_filter.exists():
            raise DataObjectAlreadyExists(_("User"), _("email address"), email)

    def _import_data_sender(self, form_model, organization, values):
        try:
            mobile_number = case_insensitive_lookup(values, "m")

            if not mobile_number:
                raise MobileNumberMandatoryException()
            if organization.in_trial_mode:
                from accountmanagement.helper import is_mobile_number_unique_for_trial_account
                if not is_mobile_number_unique_for_trial_account(organization, mobile_number):
                    raise MultipleReportersForANumberException(mobile_number)
                else:
                    data_sender = DataSenderOnTrialAccount.objects.model(mobile_number=mobile_number,
                                                                     organization=organization)
                    data_sender.save(force_insert=True)
        except IntegrityError:
            raise MultipleReportersForANumberException(mobile_number)
        except Exception as ex:
            raise ex

        if len(",".join(values["l"])) > 500:
            raise MangroveException("Location Name cannot exceed 500 characters.")

        email = case_insensitive_lookup(values, "email")
        if email:
            if not email_re.match(email):
                raise InvalidEmailException(message="Invalid email address.")

            self._validate_duplicate_email_address(email)

            response = self.submit(form_model, values, [])

        else:
            response = self.submit(form_model, values, [])

        return response

    def _append_country_for_location_field(self, form_model, values, organization):
        location_field_code = get_location_field_code(form_model)
        if location_field_code is None:
            return values
        if location_field_code in values and values[location_field_code]:
            values[location_field_code] = get_country_appended_location(values[location_field_code],
                                                                        organization.country_name())
        return values

    def _import_submission(self, organization, values, form_model=None):
        self._append_country_for_location_field(form_model, values, organization)
        sid = transaction.savepoint()
        try:
            if filter(lambda x: len(x), values.values()).__len__() == 0:
                raise EmptyRowException()
            values = self._process(form_model, values)
            is_reporter = case_insensitive_lookup(values, ENTITY_TYPE_FIELD_CODE) == REPORTER
            if is_reporter:
                values['is_data_sender'] = 'True' if self.is_datasender else 'False'
                response = self._import_data_sender(form_model, organization, values)
            else:
                SubjectTemplateValidator(form_model).validate(values)
                response = self.submit(form_model, values, [], self.is_update)

            if not response.success:
                response.errors = dict(error=response.errors, row=values)
            transaction.savepoint_commit(sid)    
            return response
        except DataObjectAlreadyExists as e:
            transaction.savepoint_rollback(sid)
            if is_reporter:
                msg = _("%s with Unique ID Number = %s already exists.") % (e.data[2], e.data[1]) \
                    if e.data[0] == 'Unique ID Number' \
                    else _("%s with %s = %s already exists.") % (e.data[2], e.data[0], e.data[1])
            else:
                msg = _("%s with ID Number '%s' already exists or has previously collected data.") % (
                    e.data[2], e.data[1]) \
                    if e.data[0] == 'Unique ID Number' \
                    else _("%s with %s = %s already exists.") % (e.data[2], e.data[0], e.data[1])
            return self._appendFailedResponse(msg,
                                              values=values)
        except EmptyRowException as e:
            transaction.savepoint_rollback(sid)
            return self._appendFailedResponse(e.message)
        except (InvalidEmailException, MangroveException, NameNotFoundException, ValidationError) as e:
            transaction.savepoint_rollback(sid)
            return self._appendFailedResponse(e.message, values=values)
        except ImportValidationError as e:
            raise



    def _get_registered_emails(self):
        if type(self.parser) in [XlsDatasenderParser, XlsxDataSenderParser]:
            registered_emails = User.objects.values_list('email', flat=True)
        else:
            registered_emails = []
        return registered_emails

    def _get_form_model(self, rows):
        form_model = None
        if len(rows) > 0:
            (form_code, values) = rows[0]
            form_model = get_form_model_by_code(self.dbm, form_code)
            if self.form_code is not None and form_code != self.form_code:
                form_model = get_form_model_by_code(self.dbm, self.form_code)
                raise FormCodeDoesNotMatchException(
                    ugettext(
                        'Some unexpected error happened. Please check the excel file or download the latest template and import again.') %
                    form_model.entity_type[0], form_code=form_code)
        return form_model

    def accept(self, file_contents):
        from datawinners.utils import get_organization_from_manager

        organization = get_organization_from_manager(self.dbm)
        rows = self.parser.parse(file_contents)
        form_model = self._get_form_model(rows)

        responses = []
        for (form_code, values) in rows:
            responses.append(
                self._import_submission(organization, values, form_model))
            
        return responses


# TODO This is a hack. To be fixed after release. Introduce handlers and get error objects from mangrove
def translate_errors(items, question_dict={}, question_answer_dict={}):
    errors = []
    for key, value in items:

        answer, question_label = _get_answer_and_question_label(question_answer_dict, question_dict, key)

        # todo the ds & subject import errors will now start showing question_label than quotes. Do we need to have that?

        if 'is required' in value:
            errors.append(_('Answer for question %s is required.') % (question_label, ))

        elif 'Expected date in mm.yyyy format' in value:
            errors.append(_('Answer %s for question %s is invalid. Expected date in %s format') % (
                answer, question_label, ' mm.yyyy'))

        elif 'Expected date in dd.mm.yyyy format' in value:
            errors.append(_('Answer %s for question %s is invalid. Expected date in %s format') % (
                answer, question_label, 'dd.mm.yyyy'))

        elif 'Expected date in mm.dd.yyyy format' in value:
            errors.append(_('Answer %s for question %s is invalid. Expected date in %s format') % (
                answer, question_label, 'mm.dd.yyyy'))

        elif 'smaller than allowed' in value:
            errors.append(_('Answer %s for question %s is smaller than allowed.') % (answer, question_label))

        elif 'greater than allowed' in value:
            errors.append(_('Answer %s for question %s is greater than allowed.') % (answer, question_label))

        elif 'is of the wrong type' in value:
            errors.append(_('Answer %s for question %s is of the wrong type.') % (answer, question_label))

        elif 'contains more than one value' in value:
            errors.append(_('Answer %s for question %s contains more than one value.') % (answer, question_label))

        elif 'not present in the allowed options' in value:
            errors.append(
                _('Answer %s for question %s is not present in the allowed options.') % (answer, question_label))

        elif 'xx.xxxx yy.yyyy' in value:
            errors.append(_(
                'Incorrect GPS format. The GPS coordinates must be in the following format: xx.xxxx,yy.yyyy. Example -18.8665,47.5315'))

        elif 'longer' in value:
            errors.append(_("Answer %s for question %s is longer than allowed.") % ((answer).split(".0")[0], question_label))

        elif re.match(r"([A-Za-z0-9 ]+) with Unique Identification Number \(ID\) = (\w+) not found", value):
            re_match = re.match(r"([A-Za-z0-9 ]+) with Unique Identification Number \(ID\) = (\w+) not found", value)
            unique_id_type = re_match.group(1)
            errors.append(_(
                "The unique ID %s of the %s does not match with any existing Identification number. Please correct and import again.") % (
                              answer, unique_id_type))

        elif 'Data Sender ID not matched' in value:
            errors.append(_(
                "The unique ID %s of the Data Sender does not match with any existing Data Sender ID. Please correct and import again.") % (
                              answer))

        elif 'shorter' in value:
            errors.append(_("Answer %s for question %s is shorter than allowed.") % ((answer).split(".0")[0], question_label))

        elif 'Sorry, the telephone number' in value:
            errors.append(_("Sorry, the telephone number %s has already been registered.") % (answer).split(".0")[0])

        elif 'must be between' in value:
            # todo check the usage and remove the split
            text = value.split(' ')[2]
            low = value.split(' ')[6]
            high = value.split(' ')[8]
            errors.append(_("The answer %s must be between %s and %s.") % (text, low, high))
        else:
            errors.append(_(value))
    return errors


def _get_answer_and_question_label(question_answer_dict, question_dict, question_code):
    return question_answer_dict.get(question_code), question_dict.get(question_code, question_code)


def _get_form_model_questions(manager, row):
    return {'n': ''Name'', 'm': ''Mobile Number''} if 'reporter' in row[1].entity_type else \
        get_form_model_by_code(manager, row[1].form_code).get_field_code_label_dict()


def tabulate_failures(rows, manager):
    tabulated_data = []
    form_model = None
    questions_dict = {}

    for row in rows:
        if not row[1].errors["row"]:
            continue
        if form_model is None and row[1].form_code:
            questions_dict = _get_form_model_questions(manager, row)
        row[1].errors['row_num'] = row[0] + 2

        if isinstance(row[1].errors['error'], dict):

            errors = translate_errors(items=row[1].errors['error'].items(), question_dict=questions_dict,
                                      question_answer_dict=row[1].errors['row'])
        else:
            errors = [_(row[1].errors['error'])]

        errors.insert(0, "")
        row[1].errors['error'] = "<li>".join(errors)
        row[1].errors.pop('row')
        tabulated_data.append(row[1].errors)
    return tabulated_data


def tabulate_success(success_responses):
    tabulated_data = []
    for success_response in success_responses:
        tabulated_data.append(success_response.processed_data)
    return tabulated_data


def _get_entity_type_from_row(row):
    type = row['doc']['aggregation_paths']['_type']
    return type


def load_entity_registration_data(manager,
                                  type=REPORTER, tabulate_function=tabulate_data):
    entity_type = entity_type_as_sequence('registration' if type == REPORTER else type)
    form_model = get_form_model_by_entity_type(manager, entity_type)

    fields, labels, codes = get_entity_type_fields(manager, form_model.form_code)
    entities = get_all_entities(dbm=manager, entity_type=entity_type_as_sequence(type))
    data = []
    for entity in entities:
        data.append(tabulate_function(entity, form_model, codes))
    return data, fields, labels


def get_field_infos(fields):
    fields_names, labels, codes = [], [], []
    for field in fields:
        if field.name != 'entity_type':
            fields_names.append(field.name)
            labels.append(field.label)
            codes.append(field.code)
    return fields_names, labels, codes


def get_entity_type_info(entity_type, manager=None):
    form_code = None
    names = []
    codes = []
    labels = []
    form_code = ''
    fields_names, labels, codes = [], [], []
    if entity_type:
        form_model = get_form_model_by_entity_type(manager, entity_type_as_sequence(entity_type))
        if form_model:
            form_code = form_model.form_code
            names, labels, codes = get_field_infos(form_model.fields)

    return dict(entity=entity_type, code=form_code, names=names, labels=labels, codes=codes, data=[])


def _from_row_to_subject(dbm, row):
    return Entity.new_from_doc(dbm=dbm, doc=Entity.__document_class__.wrap(row.get('value')))


def _get_subject_type_infos(subject_types, form_models_grouped_by_subject_type):
    subject_types_dict = {}
    default_form_model = form_models_grouped_by_subject_type.get('registration')

    for subject_type in subject_types:
        form_model = form_models_grouped_by_subject_type.get(subject_type, default_form_model)
        names, labels, codes = zip(*[(field.name, field.label, field.code) for field in form_model.fields])
        subject_types_dict[subject_type] = dict(entity=subject_type,
                                                code=form_model.form_code,
                                                names=names,
                                                labels=labels,
                                                codes=codes,
                                                data=[], )
    return subject_types_dict


def load_all_entities_of_type(manager, type=REPORTER):
    return load_entity_registration_data(manager, type)


def _handle_uploaded_file(file_name, file, manager, default_parser=None, form_code=None, is_datasender=False,
                          is_update=False):
    base_name, extension = os.path.splitext(file_name)
    player = FilePlayer.build(manager, extension, default_parser=default_parser, form_code=form_code,
                              is_datasender=is_datasender, is_update=is_update)
    responses = player.accept(file)
    return responses


def _get_imported_entities(responses):
    imported_entities = dict()
    datarecords_id = []
    for response in responses:
        if response.success:
            datarecords_id.append(response.datarecord_id)
            imported_entities.update({response.short_code: response.processed_data})
    return {"imported_entities": imported_entities, "datarecords_id": datarecords_id}


def _get_failed_responses(responses):
    return [i for i in enumerate(responses) if not i[1].success]


def _get_successful_responses(responses):
    return [response for response in responses if response.success]


def import_data(request, manager, default_parser=None, form_code=None, is_datasender=False, is_update=False):
    response_message = ''
    error_message = None
    failure_imports = None
    imported_entities = []
    try:
        # IE sends the file in request.FILES['qqfile'] whereas all other browsers in request.GET['qqfile']. The following flow handles that flow.
        file_name, file = get_filename_and_contents(request)
        responses = _handle_uploaded_file(file_name=file_name, file=file, manager=manager,
                                          default_parser=default_parser, form_code=form_code,
                                          is_datasender=is_datasender, is_update=is_update)

        imported_entities_dict = _get_imported_entities(responses)

        if form_code is not None and \
                len(imported_entities_dict.get("datarecords_id")) and settings.CRS_ORG_ID == get_organization(
                request).org_id:
            from django.core.management import call_command

            datarecords_id = imported_entities_dict.get("datarecords_id")
            call_command('crs_datamigration', form_code, *datarecords_id)

        imported_entities = imported_entities_dict.get("imported_entities")
        successful_import_count = len(imported_entities)
        total = len(responses)
        if total == 0:
            error_message = _("The imported file is empty.")
        failures = _get_failed_responses(responses)
        failure_imports = tabulate_failures(failures, manager)
        total = len(failure_imports) + successful_import_count
        response_message = ugettext_lazy('%s of %s records uploaded') % (successful_import_count, total)
    except CodeSheetMissingException:
        error_message = _("The template you are using is not correct, please use DataWinners template and try again")
    except CSVParserInvalidHeaderFormatException or XlsParserInvalidHeaderFormatException as e:
        error_message = e.message
    except InvalidFileFormatException:
        error_message = _(
            u"We could not import your data ! You are using a document format we canʼt import. Please use the excel (.xlsx) template file!")
    except FormCodeDoesNotMatchException as e:
        error_message = e.message
    except ImportValidationError as e:
        error_message = e.message

    return error_message, failure_imports, response_message, imported_entities


def _file_and_name_for_ie(request):
    file_name = request.FILES.get('qqfile').name
    file = request.FILES.get('qqfile').read()
    return file_name, file


def _file_and_name(request):
    file_name = request.GET.get('qqfile')
    file = request.raw_post_data
    return file_name, file


def get_datasenders_mobile(manager):
    all_data_senders, fields, labels = load_all_entities_of_type(manager)
    index = fields.index(MOBILE_NUMBER_FIELD)
    return [ds["cols"][index] for ds in all_data_senders]


def send_email_to_data_sender(user, language_code, request=None, type="activation", organization=None):
    site = get_current_site(request)
    account_type = organization.account_type if organization else 'Pro SMS'
    ctx_dict = {
        'domain': site.domain,
        'uid': int_to_base36(user.id),
        'user': user,
        'token': default_token_generator.make_token(user),
        'protocol': 'http',
        'site': site,
        'account_type': account_type,
    }
    types = dict({"activation":
                      dict({"subject": 'activatedatasenderemail/activation_email_subject_for_data_sender_account_',
                            "subject_param": False,
                            "template": 'activatedatasenderemail/activation_email_for_data_sender_account_'}),
                  "created_user":
                      dict({"subject": 'registration/created_user_email_subject_',
                            "subject_param": site.domain,
                            "template": 'registration/created_user_email_'})})
    if type not in types:
        return
    action = types.get(type)
    subject = render_to_string(action.get("subject") + str(language_code) + '.txt')
    subject = ''.join(subject.splitlines())
    if action.get("subject_param"):
        subject = subject % action.get("subject_param")

    if request is not None:
        ctx_dict.update({"creator_user": request.user.first_name})
        if organization:
            ctx_dict.update({"org_number": get_organization_telephone_number(request)})
    message = render_to_string(action.get("template") + language_code + '.html', ctx_dict)
    email = EmailMessage(subject, message, EMAIL_HOST_USER, [user.email], [HNI_SUPPORT_EMAIL_ID])
    email.content_subtype = "html"
    email.send()


def get_filename_and_contents(request):
    return _file_and_name(request) if 'qqfile' in request.GET else _file_and_name_for_ie(request)