company/search_filters.py
from elasticsearch_dsl import analysis
companies_stopwords_filter = analysis.token_filter(
'companies_stopwords', type='stop', stopwords=['limited', 'ltd', 'plc', 'llp', 'lp', 'rc', 'partnership', 'ngo']
)
american_english_normalizer_filter = analysis.char_filter(
'american_english_normalizer',
type='mapping',
mappings=[
# fixes
"axerie=>aerie",
"airplane=>aeroplane",
"aloxe=>aloe",
"canoxe=>canoe",
"coxerce=>coerce",
"poxem=>poem",
"prixse=>prise",
# whole words
"armor=>armour",
"behavior=>behaviour",
"center=>centre",
"color=>colour",
"clamor=>clamour",
"draft=>draught",
"endeavor=>endeavour",
"favor=>favour",
"flavor=>flavour",
"harbor=>harbour",
"honor=>honour",
"humor=>humour",
"labor=>labour",
"liter=>litre",
"meter=>metre",
"mold=>mould",
"neighbor=>neighbour",
"plow=>plough",
"savior=>saviour",
"savor=>savour",
# generic transformations
"xion=>ction",
"disk=>disc",
"gram=>gramme",
"izable=>isable",
"ization=>isation",
"ize=>ise",
"izing=>ising",
"og=>ogue",
"zation=>sation",
"yze=>yse",
"yzing=>ysing",
],
)
american_english_synonyms_filter = analysis.token_filter(
'american_english_synonyms',
type='synonym',
synonyms=(
"accolade, prize, award",
"accoutrement, accouterment",
"aching, pain, hurt",
"acw, anticlockwise, counterclockwise, counter-clockwise, ccw",
"adaptor, adapter",
"advocate, attorney, barrister, procurator, solicitor, lawyer",
"ageing, aging",
"agendas, agendum, agenda",
"almanack, almanac",
"aluminium, aluminum",
"america, united states, usa",
"amphitheatre, amphitheater",
"anti-aliased, anti-aliasing, antialiased",
"arbour, arbor",
"ardour, ardor",
"arse, ass",
"artefact, artifact",
"aubergine, eggplant",
"automobile, motorcar, car",
"axe, ax",
"bannister, banister",
"barbecue, bbq",
"battleaxe, battleax",
"baulk, balk",
"beetroot, beet",
"biassed, biased",
"biassing, biasing",
"biscuit, cookie",
"bobsleigh, bobsled",
"bonnet, hood",
"bulb, electric bulb, light bulb, lightbulb",
"burned, burnt",
"bussines, bussiness, business",
"business man, business people, businessman",
"business woman, business people, businesswoman",
"bussing, busing",
"cactus, cactuses, cacti",
"calibre, caliber",
"candour, candor",
"candy floss, cotton candy, candyfloss",
"car park, parking area, parking ground, parking lot, parking-lot, parking place, parking",
"carburettor, carburetor",
"castor, caster",
"cataloguing, cataloging",
"catboat, sailboat, sailing boat",
"champion, gainer, victor, win, winner, victory",
"chat, talk",
"chequebook, checkbook",
"chequer, checker",
"chequerboard, checkerboard",
"chequered, checkered",
"christmas tree ball, christmas tree ball ornament, christmas ball ornament, christmas bauble",
"christmas, x-mas, xmas",
"cinema, movies",
"clangour, clangor",
"clarinettist, clarinetist",
"conditioning, conditioner",
"conference, meeting",
"coriander, cilantro",
"corporate, company",
"cosmos, universe, outer space",
"cosy, cosiness, cozy",
"criminal, crime",
"curriculums, curricula",
"cypher, cipher",
"daddy, father, pa, papa, dad",
"defence, defense",
"defenceless, defenseless",
"demeanour, demeanor",
"departure platform, station platform, train platform, train station",
"dishrag, dish cloth",
"dishtowel, dishcloth, dish towel",
"doughnut, donut",
"downspout, drainpipe",
"drugstore, pharmacy",
"e-mail, email",
"enamoured, enamored",
"england, britain",
"english, british",
"epaulette, epaulet",
"exercise, excercise, training, workout, fitness",
"expressway, motorway, highway, freeway",
"facebook, facebook, social media",
"fanny, buttocks",
"fanny pack, bum bag",
"farmyard, barnyard",
"faucet, tap",
"fervour, fervor",
"fibre, fiber",
"fibreglass, fiberglass",
"flashlight, torch",
"flautist, flutist",
"flier, flyer",
"flower fly, hoverfly, syrphid fly, syrphus fly",
"foot-walk, sidewalk, sideway, pavement",
"football, soccer",
"forums, fora",
"fourth, 4",
"freshman, fresher",
"chips, fries, french fries",
"gaol, jail",
"gaolbird, jailbird",
"gaolbreak, jailbreak",
"gaoler, jailer",
"garbage, rubbish, trash",
"gasoline, petrol",
"gases, gasses",
"gauge, gage",
"gauged, gaged",
"gauging, gaging",
"glamour, glamor",
"glueing, gluing",
"gravesite, sepulchre, sepulture, sepulcher",
"grey, gray",
"greyish, grayish",
"greyness, grayness",
"groyne, groin",
"gryphon, griffon, griffin",
"hand shake, shake hands, shaking hands, handshake",
"haulier, hauler",
"new year, new year's eve, hogmanay, silvester, sylvester",
"holiday, vacation",
"holidaymaker, holiday-maker, vacationer, vacationist, tourist",
"inbox, letterbox, outbox, postbox, mailbox",
"infant, suckling, toddler, baby",
"infeasible, unfeasible",
"inquire, inquiry, enquire",
"insure, ensure",
"internet, website, www",
"jelly, jam",
"jewelery, jewellery, jewelry",
"jogging, running",
"journey, travel",
"judgement, judgment",
"kerb, curb",
"kiwifruit, kiwi",
"laborer, worker",
"lacklustre, lackluster",
"ladybeetle, ladybird, ladybug, ladybird beetle",
"larrikin, scalawag, rascal, scallywag, naughty boy",
"leaf, leaves",
"licence, licenced, licencing, license",
"liquorice, licorice",
"lorry, truck",
"loupe, magnifier, magnifying, magnifying glass, magnifying lens, zoom",
"louvred, louvered",
"louvres, louver",
"lustre, luster",
"mail, post",
"mailman, postman",
"marriage, married, marry, marrying, wedding, wed",
"mayonaise, mayo",
"meagre, meager",
"misdemeanour, misdemeanor",
"mitre, miter",
"mom, momma, mummy, mother, mum",
"moonlight, moon light",
"moult, molt",
"moustache, moustached, mustache",
"nappy, diaper",
"nightlife, night life",
"normalcy, normality",
"octopus, kraken",
"odour, odor",
"odourless, odorless",
"offence, offense",
"omelette, omelet",
"paine, painee",
"pajamas, pyjamas",
"pantyhose, tights",
"parenthesis, parentheses, bracket",
"parliament, congress",
"parlour, parlor",
"persnickety, pernickety",
"philtre, filter",
"phoney, phony",
"popsicle, iced-lolly",
"porch, veranda",
"pretence, pretense",
"pullover, jumper, sweater",
"pyjama, pajama",
"railway, railroad",
"rancour, rancor",
"rappel, abseil",
"row house, serial house, terrace house, terraced house, terraced housing, town house",
"rigour, rigor",
"rumour, rumor",
"sabre, saber",
"saltpetre, saltpeter",
"sanitarium, sanatorium",
"santa, santa claus, st nicholas, st nicholas day",
"sceptic, sceptical, scepticism, sceptics, skeptic",
"sceptre, scepter",
"shaikh, sheikh, sheik",
"shivaree, charivari",
"silverware, flatware, cutlery",
"simultaneous, simultanous",
"sleigh, sled",
"smoulder, smouldering, smolder",
"sombre, somber",
"speciality, specialty",
"spectre, specter",
"splendour, splendor",
"spoilt, spoiled",
"street, road",
"streetcar, tramway, tram, trolley-car",
"succour, succor",
"sulphate, sulphide, sulphur, sulphurous, sulfurous, sulfur",
"super hero, superhero, hero",
"surname, last name",
"sweets, candy",
"syphon, siphon",
"syphoning, siphoning",
"tack, thumb-tack, thumbtack, drawing pin",
"tailpipe, exhaust pipe",
"taleban, taliban",
"teenager, teen",
"television, tv",
"thank you, thanks",
"theatre, theater",
"tickbox, checkbox",
"ticked, checked",
"timetable, schedule",
"tinned, canned",
"titbit, tidbit",
"toffee, taffy",
"tonne, ton",
"transportation, transport",
"trapezium, trapezoid",
"trousers, pants",
"tumour, tumor",
"twitter, twitter, social media",
"tyre, tire",
"tyres, tires",
"undershirt, singlet",
"university, college",
"upmarket, upscale",
"valour, valor",
"vapour, vapor",
"vigour, vigor",
"waggon, wagon",
"windscreen, windshield, front shield",
"world championship, world cup, worldcup",
"worshipper, worshipping, worshiping",
"yoghourt, yoghurt, yogurt",
"zip, zip code, postal code, postcode",
"zucchini, courgette",
),
)
lovins_stemmer = analysis.token_filter(
'english_stemmer',
type='stemmer',
name='lovins',
)