uktrade/directory-api

View on GitHub
company/documents.py

Summary

Maintainability
A
0 mins
Test Coverage
from urllib.parse import urljoin

from django.conf import settings
from elasticsearch_dsl import Document, InnerDoc, MetaField, analysis, field

from company import helpers, search_filters, serializers

american_english_analyzer = analysis.analyzer(
    'normalize_american_english',
    tokenizer='standard',
    filter=[
        'lowercase',
        'stop',
        search_filters.companies_stopwords_filter,
        search_filters.lovins_stemmer,
        search_filters.american_english_synonyms_filter,
    ],
    char_filter=[
        search_filters.american_english_normalizer_filter,
    ],
)


class CaseStudyInnerDoc(InnerDoc):
    wildcard = field.Text()
    pk = field.Integer(index=False)
    title = field.Text(copy_to='wildcard')
    short_summary = field.Text(copy_to='wildcard')
    description = field.Text(copy_to='wildcard')
    sector = field.Text(copy_to='wildcard')
    keywords = field.Text(copy_to='wildcard')
    image = field.Text(index=False)
    company_number = field.Text(index=False)
    image_one_caption = field.Text(copy_to='wildcard')
    image_two_caption = field.Text(copy_to='wildcard')
    image_three_caption = field.Text(copy_to='wildcard')
    testimonial = field.Text(copy_to='wildcard')
    testimonial_name = field.Keyword(copy_to='wildcard')
    testimonial_job_title = field.Text(copy_to='wildcard')
    slug = field.Text(index=False)


class CompanyDocument(Document):
    wildcard = field.Text(analyzer=american_english_analyzer)
    casestudy_wildcard = field.Text(analyzer=american_english_analyzer)
    keyword_wildcard = field.Keyword()

    case_study_count = field.Integer()
    company_type = field.Keyword(index=False, store=True)
    date_of_creation = field.Date(index=False)
    description = field.Text(copy_to='wildcard', analyzer=american_english_analyzer)
    has_description = field.Boolean()
    employees = field.Keyword(index=False, store=True)
    facebook_url = field.Keyword(index=False, store=True)
    pk = field.Integer(index=False)
    keywords = field.Text(copy_to='wildcard')
    linkedin_url = field.Keyword(index=False, store=True)
    logo = field.Keyword(index=False, store=True)
    has_single_sector = field.Boolean()
    modified = field.Date(index=False)
    ordering_name = field.Keyword()
    name = field.Text(copy_to=['wildcard', 'ordering_name'])
    number = field.Keyword(
        copy_to='keyword_wildcard',
    )
    sectors = field.Keyword(multi=True, copy_to='keyword_wildcard', store=True)
    sectors_label = field.Keyword(multi=True, copy_to='keyword_wildcard', store=True)
    expertise_industries = field.Keyword(multi=True, copy_to='keyword_wildcard', store=True)
    expertise_regions = field.Keyword(multi=True, copy_to='keyword_wildcard', store=True)
    expertise_languages = field.Keyword(multi=True, copy_to='keyword_wildcard', store=True)
    expertise_countries = field.Keyword(multi=True, copy_to='keyword_wildcard', store=True)
    # Represents Dict as it's the primitive datatype for this field
    expertise_products_services = field.Object(dynamic=True)
    expertise_products_services_labels = field.Keyword(multi=True, copy_to='keyword_wildcard', store=True)
    expertise_labels = field.Keyword(multi=True, copy_to='keyword_wildcard', store=True)
    slug = field.Keyword(copy_to='keyword_wildcard', store=True)
    summary = field.Text(copy_to='wildcard', analyzer=american_english_analyzer)
    twitter_url = field.Keyword(index=False, store=True)
    website = field.Keyword(copy_to='keyword_wildcard', store=True)
    supplier_case_studies = field.Nested(
        properties={
            'pk': field.Integer(index=False),
            'title': field.Text(copy_to='casestudy_wildcard'),
            'short_summary': field.Text(copy_to='casestudy_wildcard'),
            'description': field.Text(copy_to='casestudy_wildcard'),
            'sector': field.Keyword(copy_to='keyword_wildcard', store=True),
            'keywords': field.Text(copy_to='casestudy_wildcard'),
            'image_one_caption': field.Text(copy_to='casestudy_wildcard'),
            'image_two_caption': field.Text(copy_to='casestudy_wildcard'),
            'image_three_caption': field.Text(copy_to='casestudy_wildcard'),
            'testimonial': field.Text(copy_to='casestudy_wildcard'),
            'website': field.Keyword(copy_to='casestudy_wildcard', store=True),
            'slug': field.Keyword(copy_to='keyword_wildcard', store=True),
            'testimonial_name': field.Keyword(copy_to='casestudy_wildcard', store=True),
            'testimonial_company': field.Text(copy_to='casestudy_wildcard'),
            'testimonial_job_title': field.Text(copy_to='casestudy_wildcard'),
        }
    )
    is_showcase_company = field.Boolean()
    is_published_investment_support_directory = field.Boolean()
    is_published_find_a_supplier = field.Boolean()

    class Meta:
        dynamic = MetaField('strict')

    class Index:
        name = settings.ELASTICSEARCH_COMPANY_INDEX_ALIAS


def get_absolute_url(url):
    if settings.STORAGE_CLASS_NAME == 'local-storage':
        return urljoin(settings.LOCAL_STORAGE_DOMAIN, url)
    return url


def company_model_to_document(company, index=settings.ELASTICSEARCH_COMPANY_INDEX_ALIAS):
    # getattr is used on the company to allow this functionton be used in
    # migrations (historic models wont have all the fields listed below).
    company_fields = {
        'date_of_creation',
        'description',
        'company_type',
        'employees',
        'facebook_url',
        'keywords',
        'linkedin_url',
        'modified',
        'name',
        'number',
        'sectors',
        'expertise_industries',
        'expertise_regions',
        'expertise_languages',
        'expertise_countries',
        'expertise_products_services',
        'slug',
        'summary',
        'twitter_url',
        'website',
        'is_showcase_company',
        'is_published_investment_support_directory',
        'is_published_find_a_supplier',
    }
    case_study_fields = {
        'description',
        'image_one_caption',
        'image_three_caption',
        'image_two_caption',
        'keywords',
        'pk',
        'sector',
        'short_summary',
        'slug',
        'testimonial',
        'testimonial_company',
        'testimonial_job_title',
        'testimonial_name',
        'title',
        'website',
    }
    has_description = getattr(company, 'description', '') != ''
    company_data_dict = serializers.CompanySerializer(company).data
    company_parser = helpers.CompanyParser(company_data_dict)
    expertise_products_services_labels = []
    for key, values in company.expertise_products_services.items():
        expertise_products_services_labels += values

    document = CompanyDocument(
        meta={'id': company.pk, '_index': index},
        pk=str(company.pk),
        case_study_count=company.supplier_case_studies.count(),
        has_single_sector=len(company.sectors) == 1,
        has_description=has_description,
        logo=get_absolute_url(company.logo.url if company.logo else ''),
        sectors_label=[helpers.get_sector_label(v) for v in company.sectors],
        expertise_products_services_labels=expertise_products_services_labels,
        expertise_labels=company_parser.expertise_labels_for_search,
        **{key: getattr(company, key, '') for key in company_fields},
    )

    for case_study in company.supplier_case_studies.all():
        document.supplier_case_studies.append({key: getattr(case_study, key, '') for key in case_study_fields})

    return document