uktrade/directory-api

View on GitHub
company/search_filters.py

Summary

Maintainability
A
3 hrs
Test Coverage
from elasticsearch_dsl import analysis

companies_stopwords_filter = analysis.token_filter(
    'companies_stopwords', type='stop', stopwords=['limited', 'ltd', 'plc', 'llp', 'lp', 'rc', 'partnership', 'ngo']
)

american_english_normalizer_filter = analysis.char_filter(
    'american_english_normalizer',
    type='mapping',
    mappings=[
        # fixes
        "axerie=>aerie",
        "airplane=>aeroplane",
        "aloxe=>aloe",
        "canoxe=>canoe",
        "coxerce=>coerce",
        "poxem=>poem",
        "prixse=>prise",
        # whole words
        "armor=>armour",
        "behavior=>behaviour",
        "center=>centre",
        "color=>colour",
        "clamor=>clamour",
        "draft=>draught",
        "endeavor=>endeavour",
        "favor=>favour",
        "flavor=>flavour",
        "harbor=>harbour",
        "honor=>honour",
        "humor=>humour",
        "labor=>labour",
        "liter=>litre",
        "meter=>metre",
        "mold=>mould",
        "neighbor=>neighbour",
        "plow=>plough",
        "savior=>saviour",
        "savor=>savour",
        # generic transformations
        "xion=>ction",
        "disk=>disc",
        "gram=>gramme",
        "izable=>isable",
        "ization=>isation",
        "ize=>ise",
        "izing=>ising",
        "og=>ogue",
        "zation=>sation",
        "yze=>yse",
        "yzing=>ysing",
    ],
)

american_english_synonyms_filter = analysis.token_filter(
    'american_english_synonyms',
    type='synonym',
    synonyms=(
        "accolade, prize, award",
        "accoutrement, accouterment",
        "aching, pain, hurt",
        "acw, anticlockwise, counterclockwise, counter-clockwise, ccw",
        "adaptor, adapter",
        "advocate, attorney, barrister, procurator, solicitor, lawyer",
        "ageing, aging",
        "agendas, agendum, agenda",
        "almanack, almanac",
        "aluminium, aluminum",
        "america, united states, usa",
        "amphitheatre, amphitheater",
        "anti-aliased, anti-aliasing, antialiased",
        "arbour, arbor",
        "ardour, ardor",
        "arse, ass",
        "artefact, artifact",
        "aubergine, eggplant",
        "automobile, motorcar, car",
        "axe, ax",
        "bannister, banister",
        "barbecue, bbq",
        "battleaxe, battleax",
        "baulk, balk",
        "beetroot, beet",
        "biassed, biased",
        "biassing, biasing",
        "biscuit, cookie",
        "bobsleigh, bobsled",
        "bonnet, hood",
        "bulb, electric bulb, light bulb, lightbulb",
        "burned, burnt",
        "bussines, bussiness, business",
        "business man, business people, businessman",
        "business woman, business people, businesswoman",
        "bussing, busing",
        "cactus, cactuses, cacti",
        "calibre, caliber",
        "candour, candor",
        "candy floss, cotton candy, candyfloss",
        "car park, parking area, parking ground, parking lot, parking-lot, parking place, parking",
        "carburettor, carburetor",
        "castor, caster",
        "cataloguing, cataloging",
        "catboat, sailboat, sailing boat",
        "champion, gainer, victor, win, winner, victory",
        "chat, talk",
        "chequebook, checkbook",
        "chequer, checker",
        "chequerboard, checkerboard",
        "chequered, checkered",
        "christmas tree ball, christmas tree ball ornament, christmas ball ornament, christmas bauble",
        "christmas, x-mas, xmas",
        "cinema, movies",
        "clangour, clangor",
        "clarinettist, clarinetist",
        "conditioning, conditioner",
        "conference, meeting",
        "coriander, cilantro",
        "corporate, company",
        "cosmos, universe, outer space",
        "cosy, cosiness, cozy",
        "criminal, crime",
        "curriculums, curricula",
        "cypher, cipher",
        "daddy, father, pa, papa, dad",
        "defence, defense",
        "defenceless, defenseless",
        "demeanour, demeanor",
        "departure platform, station platform, train platform, train station",
        "dishrag, dish cloth",
        "dishtowel, dishcloth, dish towel",
        "doughnut, donut",
        "downspout, drainpipe",
        "drugstore, pharmacy",
        "e-mail, email",
        "enamoured, enamored",
        "england, britain",
        "english, british",
        "epaulette, epaulet",
        "exercise, excercise, training, workout, fitness",
        "expressway, motorway, highway, freeway",
        "facebook, facebook, social media",
        "fanny, buttocks",
        "fanny pack, bum bag",
        "farmyard, barnyard",
        "faucet, tap",
        "fervour, fervor",
        "fibre, fiber",
        "fibreglass, fiberglass",
        "flashlight, torch",
        "flautist, flutist",
        "flier, flyer",
        "flower fly, hoverfly, syrphid fly, syrphus fly",
        "foot-walk, sidewalk, sideway, pavement",
        "football, soccer",
        "forums, fora",
        "fourth, 4",
        "freshman, fresher",
        "chips, fries, french fries",
        "gaol, jail",
        "gaolbird, jailbird",
        "gaolbreak, jailbreak",
        "gaoler, jailer",
        "garbage, rubbish, trash",
        "gasoline, petrol",
        "gases, gasses",
        "gauge, gage",
        "gauged, gaged",
        "gauging, gaging",
        "glamour, glamor",
        "glueing, gluing",
        "gravesite, sepulchre, sepulture, sepulcher",
        "grey, gray",
        "greyish, grayish",
        "greyness, grayness",
        "groyne, groin",
        "gryphon, griffon, griffin",
        "hand shake, shake hands, shaking hands, handshake",
        "haulier, hauler",
        "new year, new year's eve, hogmanay, silvester, sylvester",
        "holiday, vacation",
        "holidaymaker, holiday-maker, vacationer, vacationist, tourist",
        "inbox, letterbox, outbox, postbox, mailbox",
        "infant, suckling, toddler, baby",
        "infeasible, unfeasible",
        "inquire, inquiry, enquire",
        "insure, ensure",
        "internet, website, www",
        "jelly, jam",
        "jewelery, jewellery, jewelry",
        "jogging, running",
        "journey, travel",
        "judgement, judgment",
        "kerb, curb",
        "kiwifruit, kiwi",
        "laborer, worker",
        "lacklustre, lackluster",
        "ladybeetle, ladybird, ladybug, ladybird beetle",
        "larrikin, scalawag, rascal, scallywag, naughty boy",
        "leaf, leaves",
        "licence, licenced, licencing, license",
        "liquorice, licorice",
        "lorry, truck",
        "loupe, magnifier, magnifying, magnifying glass, magnifying lens, zoom",
        "louvred, louvered",
        "louvres, louver",
        "lustre, luster",
        "mail, post",
        "mailman, postman",
        "marriage, married, marry, marrying, wedding, wed",
        "mayonaise, mayo",
        "meagre, meager",
        "misdemeanour, misdemeanor",
        "mitre, miter",
        "mom, momma, mummy, mother, mum",
        "moonlight, moon light",
        "moult, molt",
        "moustache, moustached, mustache",
        "nappy, diaper",
        "nightlife, night life",
        "normalcy, normality",
        "octopus, kraken",
        "odour, odor",
        "odourless, odorless",
        "offence, offense",
        "omelette, omelet",
        "paine, painee",
        "pajamas, pyjamas",
        "pantyhose, tights",
        "parenthesis, parentheses, bracket",
        "parliament, congress",
        "parlour, parlor",
        "persnickety, pernickety",
        "philtre, filter",
        "phoney, phony",
        "popsicle, iced-lolly",
        "porch, veranda",
        "pretence, pretense",
        "pullover, jumper, sweater",
        "pyjama, pajama",
        "railway, railroad",
        "rancour, rancor",
        "rappel, abseil",
        "row house, serial house, terrace house, terraced house, terraced housing, town house",
        "rigour, rigor",
        "rumour, rumor",
        "sabre, saber",
        "saltpetre, saltpeter",
        "sanitarium, sanatorium",
        "santa, santa claus, st nicholas, st nicholas day",
        "sceptic, sceptical, scepticism, sceptics, skeptic",
        "sceptre, scepter",
        "shaikh, sheikh, sheik",
        "shivaree, charivari",
        "silverware, flatware, cutlery",
        "simultaneous, simultanous",
        "sleigh, sled",
        "smoulder, smouldering, smolder",
        "sombre, somber",
        "speciality, specialty",
        "spectre, specter",
        "splendour, splendor",
        "spoilt, spoiled",
        "street, road",
        "streetcar, tramway, tram, trolley-car",
        "succour, succor",
        "sulphate, sulphide, sulphur, sulphurous, sulfurous, sulfur",
        "super hero, superhero, hero",
        "surname, last name",
        "sweets, candy",
        "syphon, siphon",
        "syphoning, siphoning",
        "tack, thumb-tack, thumbtack, drawing pin",
        "tailpipe, exhaust pipe",
        "taleban, taliban",
        "teenager, teen",
        "television, tv",
        "thank you, thanks",
        "theatre, theater",
        "tickbox, checkbox",
        "ticked, checked",
        "timetable, schedule",
        "tinned, canned",
        "titbit, tidbit",
        "toffee, taffy",
        "tonne, ton",
        "transportation, transport",
        "trapezium, trapezoid",
        "trousers, pants",
        "tumour, tumor",
        "twitter, twitter, social media",
        "tyre, tire",
        "tyres, tires",
        "undershirt, singlet",
        "university, college",
        "upmarket, upscale",
        "valour, valor",
        "vapour, vapor",
        "vigour, vigor",
        "waggon, wagon",
        "windscreen, windshield, front shield",
        "world championship, world cup, worldcup",
        "worshipper, worshipping, worshiping",
        "yoghourt, yoghurt, yogurt",
        "zip, zip code, postal code, postcode",
        "zucchini, courgette",
    ),
)


lovins_stemmer = analysis.token_filter(
    'english_stemmer',
    type='stemmer',
    name='lovins',
)