View on GitHub


3 days
Test Coverage
module Impressionist
  module Bots

    def = nil)
      return false if user_agent.nil?
      WILD_CARDS.any? { |wc| user_agent.downcase.include?(wc) } || LIST.include?(user_agent)

    WILD_CARDS = ["bot","yahoo","slurp","google","msn","crawler"]

    LIST = ["<a href=''> UnChaos </a> From Chaos To Order Hybrid Web Search Engine.(",
      "<a href=''> UnChaos Bot Hybrid Web Search Engine. </a> (",
      "<b> UnChaosBot From Chaos To Order UnChaos Hybrid Web Search Engine at </b> (",
      " (+Have Good Day)",
      " LinkChecker v2.0",
      "8484 Boston Project v 1.0",
      ":robot/1.0 (linux) ( admin e-mail: undefined )",
      "A-Online Search",
      "A1 Sitemap Generator/1.0 (+ miggibot/2006.01.24",
      "AbachoBOT (Mozilla compatible)",
      "ABCdatos BotLink/",
      "Aberja Checkomat",
      "abot/0.1 (abot;;",
      "Accelatech RSSCrawler/0.4",
      "Accoona-AI-Agent/1.1.1 (crawler at accoona dot com)",
      "Accoona-AI-Agent/1.1.2 (aicrawler at accoonabot dot com)",
      "Ack (",
      "Acoon Robot v1.50.001",
      "Acoon Robot v1.52 (",
      "Acoon-Robot 4.0.x.[xx] (",
      "Acoon-Robot v3.xx ( and",
      "Acorn/Nutch-0.9 (Non-Profit Search Engine;; acorn at isara dot org)",
      "agadine/1.x.x (+",
      "AgentName/0.1 libwww-perl/5.48",
      "AIBOT/2.1 By +( A Real artificial intelligence search engine China)",
      "aipbot/1.0 (aipbot;;",
      "aipbot/2-beta (aipbot dev;;",
      "Aleksika Spider/1.0 (+",
      "AlkalineBOT/1.4 (1.4.0326.0 RTM)",
      "Allesklar/0.1 libwww-perl/5.46",
      "Allrati/1.1 (+)",
      "AltaVista Intranet V2.0 AVS EVAL",
      "AltaVista Intranet V2.0 Compaq Altavista Eval",
      "AltaVista Intranet V2.0",
      "AltaVista V2.0B",
      "Amfibibot/0.06 (Amfibi Web Search;;",
      "Amfibibot/0.07 (Amfibi Robot;;",
      "AnnoMille spider 0.1 alpha -",
      "AnswerBus (",
      "AnzwersCrawl/2.0 (;Engine)",
      "Apexoo Spider 1.x",
      "appie 1.1 (",
      "ArabyBot (compatible; Mozilla/5.0; GoogleBot; FAST Crawler 6.4;;)",
      "Arachnoidea (",
      "Arquivo-web-crawler  (compatible; heritrix/1.12.1 +",
      "ASAHA Search Engine Turkey V.001 (",
      "Asahina-Antenna/1.x ( ;",
      "AskAboutOil/0.06-rcp (Nutch;;",
      "asked/Nutch-0.8 (web crawler;; epicurus at gmail dot com)",
      "AtlocalBot/1.1 +(",
      "Attentio/Nutch-0.9-dev (Attentio's beta blog crawler;;",
      "augurnfind V-1.x",
      "autowebdir 1.1 (",
      "AV Fetch 1.0",
      "axadine/ (Axadine Crawler;; )",
      "AxmoRobot - Crawling your site for better indexing on search engine.",
      "BabalooSpider/1.3 (BabalooSpider;;",
      "BaboomBot/1.x.x (+",
      "Balihoo/Nutch-1.0-dev (Crawler for search engine - obeys robots.txt and robots meta tags ;; robot at balihoo dot com)",
      "BarraHomeCrawler (",
      "bdcindexer_2.6.2 (research@bdc)",
      "BDNcentral Crawler v2.3 [en] ( (X11; I; Linux 2.0.44 i686)",
      "beautybot/1.0 (+",
      "BebopBot/2.5.1 ( crawler )",
      "BigCliqueBOT/1.03-dev (bigclicbot;;",
      "BIGLOTRON (Beta 2;GNU/Linux)",
      " ( Internet Spider;;",
      "BilgiBetaBot/0.8-dev ( (Beta) ;;",
      "BilgiBot/1.0(beta) (; bilgi at bilgi dot com)",
      "Bitacle bot/1.1",
      "Bitacle Robot (V:1.0;) (",
      "Blaiz-Bee/1.0 (+",
      "Blaiz-Bee/2.00.8222 (BE Internet Search Engine",
      "Blaiz-Bee/2.00.xxxx (+",
      " (Mozilla compatible)",
      "Bloglines Title Fetch/1.0 (",
      "Bloglines-Images/0.1 (",
      "Bloglines/3.1 (",
      "Blogpulse (",
      "BlogPulseLive (",
      "BlogSearch/1.x +",
      "BlogsNowBot, V 2.01 (+",
      "BlogVibeBot-v1.1 (",
      "blogWatcher_Spider/0.1 (",
      "BlogzIce/1.0 (+;",
      "BlogzIce/1.0 +",
      "Bloodhound/Nutch-0.9 (Testing Crawler for Research - obeys robots.txt and robots meta tags ;; robot at balihoo dot com)",
      " (",
      " (",
      "BPImageWalker/2.0 (",
      "BravoBrian SpiderEngine MarcoPolo",
      "BruinBot (+ ",
      "BTbot/0.x (+",
      "BuildCMS crawler (",
      "BurstFindCrawler/1.1 (;;",
      "Buscaplus Robi/1.0 (",
      "Cabot/Nutch-0.9 (Amfibi's web-crawling robot;;",
      "Cabot/Nutch-1.0-dev (Amfibi's web-crawling robot;;",
      "Carnegie_Mellon_University_Research_WebBOT-->PLEASE READ-->",
      "Catall Spider",
      "CazoodleBot/CazoodleBot-0.1 (CazoodleBot Crawler;;",
      "CCBot/1.0 (+",
      "Ceramic Tile Installation Guide (",
      "China Local Browse 2.6",
      "ChristCRAWLER 2.0",
      "CipinetBot (",
      "CloakDetect/0.9 (+",
      "Clushbot/2.x (+",
      "Clushbot/3.x-BinaryFury (+",
      "Clushbot/3.xx-Ajax (+",
      "Clushbot/3.xx-Hector (+",
      "Clushbot/3.xx-Peleus (+",
      "Cogentbot/1.X (+",
      "Comrite/0.7.1 (Nutch;;",
      "Convera Internet Spider V6.x",
      "ConveraCrawler/0.9d (+",
      "ConveraMultiMediaCrawler/0.1 (+",
      "CougarSearch/0.x (+",
      "Covac TexAs Arachbot",
      "Cowbot-0.1 (NHN Corp. / +82-2-3011-1954 /",
      "Cowbot-0.1.x (NHN Corp. / +82-2-3011-1954 /",
      "CrawlConvera0.1 (",
      "Crawler (",
      "Crawler V 0.2.x",
      "Crawllybot/0.1 (Crawllybot; +;",
      "CreativeCommons/0.06-dev (Nutch;;",
      "CrocCrawler vx.3 [en] ( (X11; I; Linux 2.0.44 i686)",
      "Cuasarbot/0.9b ",
      "CurryGuide SiteScan 1.1",
      "Custom Spider /1.0",
      "CyberPatrol SiteCat Webbot (",
      "CydralSpider/1.x (Cydral Web Image Search;",
      "CydralSpider/3.0 (Cydral Image Search;",
      "DataFountains/DMOZ Downloader",
      "DataFountains/Dmoz Downloader (",
      "DataFountains/DMOZ Feature Vector Corpus Creator (",
      "DataparkSearch/4.47 (+",
      "DataparkSearch/4.xx (",
      "DataSpear/1.0 (Spider;;",
      "DataSpearSpiderBot/0.2 (DataSpear Spider Bot;;",
      "DaviesBot/1.7 (",
      "DBrowse 1.4b",
      "DBrowse 1.4d",
      "de.searchengine.comBot 1.2 (",
      "DeepIndex ( )",
      "DeepIndex (",
      "Demo Bot DOT 16b",
      "Demo Bot Z 16b",
      "Denmex websearch (",
      "DiaGem/1.1 (",
      "Digger/1.0 JDK/1.3.0rc3",
      "disco/Nutch-0.9 (experimental crawler;;",
      "disco/Nutch-1.0-dev (experimental crawler;;",
      "DoCoMo/2.0 P900iV(c100;TB;W24H11) ",
      "DoCoMo/2.0 SH902i (compatible; Y!J-SRD/1.0;",
      "DoCoMo/2.0/SO502i (compatible; Y!J-SRD/1.0;",
      "Download-Tipp Linkcheck (",
      "Drecombot/1.0 (",
      "DSurf15a 01",
      "DSurf15a 71",
      "DSurf15a 81",
      "DSurf15a VA",
      "DuckDuckBot/1.0; (+",
      "Dumbot(version 0.1 beta -",
      "Dumbot(version 0.1 beta -",
      "Dumbot(version 0.1 beta)",
      "e-sense 1.0 ea(",
      "eApolloBot/2.0 (compatible; heritrix/2.0.0-SNAPSHOT-20071024.170148 +",
      " []",
      " []",
      "EBrowse 1.4b",
      "Educate Search VxB",
      "egothor/3.0a (+",
      "EgotoBot/4.8 (+",
      "elfbot/1.0 (+",
      "ELI/20070402:2.0 (DAUM RSS Robot, Daum Communications Corp.; +",
      "EmailWolf 1.00",
      "EnaBot/1.x (",
      "Enfish Tracker",
      "Enterprise_Search/;MSSQL (",
      "envolk/1.7 (+",
      "ES.NET_Crawler/2.0 (",
      "eseek-larbin_2.6.2 (",
      "eStyleSearch 4 (compatible; MSIE 6.0; Windows NT 5.0)",
      "ESurf15a 15",
      "EuripBot/0.x (+ GetFile",
      "EuripBot/0.x (+ GetRobots",
      "EuripBot/0.x (+ PreCheck",
      "Eurobot/1.0 (",
      "EvaalSE -",
      "eventax/1.3 (eventax;;",
      "Everest-Vulcan Inc./0.1 (R&D project; host=e-1-24;",
      "Everest-Vulcan Inc./0.1 (R&D project;",
      "ExactSeek Crawler/0.1",
      "exactseek-crawler-2.63 (",
      "exactseek-pagereaper-2.63 (",
      "Exalead NG/MimeLive Client (convert/http/0.120)",
      "Excalibur Internet Spider V6.5.4",
      "Execrawl/1.0 (Execrawl;;",
      "exooba crawler/exooba crawler (crawler for;; info at exooba dot com)",
      "exooba/exooba crawler (exooba; exooba)",
      "EyeCatcher (",
      "Factbot 1.09 (see",
      "factbot :",
      "Fast Crawler Gold Edition",
      "FAST Enterprise Crawler 6 (Experimental)",
      "FAST Enterprise Crawler 6 / Scirus;",
      "FAST Enterprise Crawler 6 used by Cobra Development (",
      "FAST Enterprise Crawler 6 used by Comperio AS (",
      "FAST Enterprise Crawler 6 used by FAST (FAST)",
      "FAST Enterprise Crawler 6 used by Pages Jaunes (",
      "FAST Enterprise Crawler 6 used by Web Crawler (search_comments\\at\\sensis\\dot\\com\\dot\\au)",
      "FAST Enterprise Crawler 6 used by Singapore Press Holdings (",
      "FAST Enterprise Crawler/6 (",
      "FAST Enterprise Crawler/6.4 (helpdesk at",
      "FAST FirstPage retriever (compatible; MSIE 5.5; Mozilla/4.0)",
      "FAST MetaWeb Crawler (helpdesk at fastsearch dot com)",
      "Fast PartnerSite Crawler",
      "FAST-WebCrawler/2.2.10 (Multimedia Search) (;",
      "FAST-WebCrawler/2.2.6 (;",
      "FAST-WebCrawler/2.2.7 (;",
      "FAST-WebCrawler/2.2.8 (;",
      "FAST-WebCrawler/3.2 test",
      "FAST-WebCrawler/3.3 (;",
      "FAST-WebCrawler/3.4/Nirvana (;",
      "FAST-WebCrawler/3.4/PartnerSite (;",
      "FAST-WebCrawler/3.5 (atw-crawler at fast dot no;",
      "FAST-WebCrawler/3.6 (atw-crawler at fast dot no;",
      "FAST-WebCrawler/3.6/FirstPage (;",
      "FAST-WebCrawler/3.7 (atw-crawler at fast dot no;",
      "FAST-WebCrawler/3.7/FirstPage (atw-crawler at fast dot no;",
      "FAST-WebCrawler/3.8 (atw-crawler at fast dot no;",
      "FAST-WebCrawler/3.8/Fresh (atw-crawler at fast dot no;",
      "FAST-WebCrawler/3.x Multimedia",
      "FAST-WebCrawler/3.x Multimedia (mm dash crawler at fast dot no)",
      "fastbot crawler beta 2.0 (+",
      "FastCrawler 3.0.1 (",
      "FastSearch Web Crawler for Verizon SuperPages (",
      "Favcollector/2.0 (",
      " crawler/0.6 (",
      "Feed Seeker Bot (RSS Feed Seeker",
      "Feedfetcher-Google; (+",
      "FeedHub FeedDiscovery/1.0 (",
      "FeedHub MetaDataFetcher/1.0 (",
      "Feedjit Favicon Crawler 1.0",
      "Feedster Crawler/3.0; Feedster, Inc.",
      "Felix - Mixcat Crawler (+",
      "FFC Trap Door Spider",
      "Findexa Crawler (",
      "findlinks/ (+ ",
      "Firefly/1.0 (compatible; Mozilla 4.0; MSIE 5.5)",
      "Firefox (",
      "Firefox_1.0.6 (",
      " Search -",
      "Flapbot/0.7.2 (Flaptor Crawler;; crawler at flaptor period com)",
      "Flexum spider",
      "FlickBot 2.0 RPT-HTTPClient/0.3-3",
      "FnooleBot/2.5.2 (+",
      " Spider/0.1 beta 1 (",
      "Francis/1.0 (",
      "Franklin Locator 1.8",
      " (;",
      "FreshNotes crawler< report problems to crawler-at-freshnotes-dot-com",
      "FSurf15a 01",
      "Full Web Bot 0416B",
      "Full Web Bot 0516B",
      "Full Web Bot 2816B",
      "FyberSpider (+",
      "GAIS Robot/1.0B2",
      "Gaisbot/3.0 (;",
      "GalaxyBot/1.0 (",
      "Gallent Search Spider v1.4 Robot 2 (",
      "gamekitbot/1.0 (+",
      "gazz/x.x (",
      "genieBot (",
      "GeonaBot 1.x;",
      "gigabaz/3.1x (;",
      "Gigabot/2.0 (",
      "Gigabot/3.0 (",
      "GigabotSiteSearch/2.0 (",
      "GNODSPIDER (",
      "Goblin/0.9 (",
      "Goblin/0.9.x (",
      "GOFORITBOT ( )",
      "gonzo1[P] +",
      "gonzo2[P] +",
      "Googlebot-Image/1.0 (",
      "Googlebot/2.1 (",
      "Googlebot/2.1 (",
      "Googlebot/Test (",
      "GrapeFX/0.3 libwww/5.4.0",
      "great-plains-web-spider/flatlandbot (Flatland Industries Web Spider;;",
      "GrigorBot 0.8 (",
      "grub crawler(",
      "gsa-crawler (Enterprise; GID-01422;",
      "gsa-crawler (Enterprise; GID-01742;",
      "gsa-crawler (Enterprise; GIX-02057;",
      "gsa-crawler (Enterprise; GIX-03519;",
      "gsa-crawler (Enterprise; GIX-0xxxx;",
      "Guestbook Auto Submitter",
      "Gulper Web Bot 0.2.4 (",
      "Gungho/0.08004 (",
      "GurujiBot/1.0 (+",
      "GurujiImageBot/1.0 (+",
      "Hatena Antenna/0.4 (",
      "Hatena Pagetitle Agent/1.0",
      "Hatena RSS/0.3 (",
      "hbtronix.spider.2 --",
      "HeinrichderMiragoRobot (",
      "Helix/1.x (",
      "HenriLeRobotMirago (",
      "HenryTheMiragoRobot (",
      "Hi! I'm CsCrawler my homepage: RPT-HTTPClient/0.3-3",
      "Hippias/0.9 Beta",
      "Hitwise Spider v1.0",
      "holmes/3.11 (",
      "holmes/3.9 (",
      "holmes/3.xx (OnetSzukaj/5.0; +",
      "HolmesBot (",
      "Honda-Search/0.7.2 (Nutch;;",
      "HooWWWer/2.1.3 (debugging run) (+ | mailto:crawler-info<at>",
      "HooWWWer/2.1.x ( | mailto:crawler-info<at>",
      "HPL/Nutch-0.9 -",
      "htdig/3.1.6 (",
      "htdig/3.1.6 (unconfigured@htdig.searchengine.maintainer)",
      "htdig/3.1.x (root@localhost)",
      "http://Ask.24x.Info/ (",
      " ACONTBOT",
      " []",
      " [wf216]",
      "i1searchbot/2.0 (i1search web crawler;;",
      "iaskspider2 (",
      "ICC-Crawler(Mozilla-compatible;; icc-crawl(at)ml(dot)nict(dot)go(dot)jp)",
      "iCCrawler (",
      "ICCrawler - ICjobs (",
      "ichiro/x.0 (",
      "ichiro/x.0 (",
      "IconSurf/2.0 favicon finder (see",
      "IconSurf/2.0 favicon monitor (see",
      "ideare - SignSite/1.x",
      " (; 0 subscribers)",
      "igdeSpyder (compatible;; +",
      "IIITBOT/1.1 (Indian Language Web Search Engine;; pvvpr at iiit dot ac dot in)",
      "ilial/Nutch-0.9 (Ilial, Inc. is a Los Angeles based Internet startup company. For more information please visit;;",
      "IlTrovatore-Setaccio (",
      "Iltrovatore-Setaccio/0.3-dev (Indexing;;",
      "IlTrovatore-Setaccio/1.2 (",
      "Iltrovatore-Setaccio/1.2 (It-bot;;",
      "iltrovatore-setaccio/1.2-dev (spidering;",
      "IlTrovatore/1.2 (IlTrovatore;;",
      "ImageWalker/2.0 (",
      "IncyWincy data gatherer(",
      "IncyWincy page crawler(",
      " Crawler7",
      "Industry Program 1.0.x",
      "Inet library",
      " ( il Sud dei Motori di Ricerca",
      "InfoFly/1.0 (",
      "INFOMINE/8.0 Adders",
      "INFOMINE/8.0 RemoteServices",
      "INFOMINE/8.0 VLCrawler (",
      "InfoSeek Sidewinder/0.9",
      "InfoSeek Sidewinder/1.0A",
      "InfoSeek Sidewinder/1.1A",
      "Infoseek SideWinder/1.45 (Compatible; MSIE 10.0; UNIX)",
      "Infoseek SideWinder/2.0B (Linux 2.4 i686)",
      "INGRID/3.0 MT (;",
      "Inktomi Search",
      "InnerpriseBot/1.0 (",
      " search and find world wide!",
      "Internet Ninja x.0",
      "IOI/2.0 (ISC Open Index crawler;;",
      "IPiumBot laurion(dot)com",
      "IpselonBot/0.xx-beta (Ipselon;;",
      "IRLbot/1.0 (",
      "IRLbot/3.0 (compatible; MSIE 6.0;",
      "ISC Systems iRc Search 2.1",
      "IUPUI Research Bot v 1.9a",
      "IWAgent/ 1.0 -",
      "Jabot/6.x (",
      "Jabot/7.x.x (",
      "Jambot/0.1.x (Jambot;;",
      "Jambot/0.2.1 (Jambot;;",
      "Jayde Crawler.",
      "KAIST AITrc Crawler",
      "KakleBot - (KakleBot -; http://;",
      "kalooga/kalooga-4.0-dev-datahouse (Kalooga;;",
      "kalooga/KaloogaBot (Kalooga;;",
      "Kenjin Spider",
      "KE_1.0/2.0 libwww/5.2.8",
      "KFSW-Bot (Version: 1.01 powered by KFSW",
      "kinja-imagebot (",
      "kinjabot (",
      "KIT-Fireball/2.0 (compatible; Mozilla 4.0; MSIE 5.5)",
      "Krugle/Krugle,Nutch/0.8+ (Krugle web crawler;;",
      "KSbot/1.0 (KnowledgeStorm crawler;;",
      "LapozzBot/1.4 (",
      "LapozzBot/1.5 (+",
      "larbin (",
      "larbin_2.2.0 (",
      "larbin_2.2.1_de_Viennot (",
      "larbin_2.2.2 (",
      "larbin_2.2.2_guillaume (",
      "larbin_2.6.0 (larbin2.6.0@unspecified.mail)",
      "larbin_2.6.1 (larbin2.6.1@unspecified.mail)",
      "larbin_2.6.2 (",
      "larbin_2.6.2 (larbin2.6.2@unspecified.mail)",
      "larbin_2.6.2 (listonATccDOTgatechDOTedu)",
      "larbin_2.6.2 (",
      "larbin_2.6.2 (",
      "larbin_2.6.2 (",
      "larbin_2.6.3 (",
      "larbin_2.6.3 (",
      "larbin_2.6_basileocaml (",
      "larbin_devel (",
      "lawinfo-crawler/Nutch-0.9-dev (Crawler for pages;;",
      "LECodeChecker/3.0 libgetdoc/1.0",
      "LEIA/3.01pr (LEIAcrawler; [SNIP])",
      " +",
      "LibertyW (+",
      "libWeb/clsHTTP --",
      "libwww-perl/5.52 FP/2.1",
      "libwww-perl/5.52 FP/4.0",
      "LijitSpider/Nutch-0.9 (Reports crawler;; info(a)lijit(d)com)",
      "Lincoln State Web Browser",
      "Links 2.0 (",
      "Links SQL (",
      "LinkScan/11.0beta2 UnixShareware robot from (used by Indiafocus/Indiainfo)",
      "LinkScan/9.0g Unix",
      "LinkScan/x.x Unix",
      "LiveTrans/Nutch-0.9 (maintainer: cobain at iis dot sinica dot edu dot tw;",
      "Llaut/1.0 (",
      "lmspider (",
      "LocalBot/1.0 (",
      "LocalcomBot/1.2.x (",
      "Lockstep Spider/1.0",
      "Lovel as 1.0 ( +",
      "LTI/LemurProject Nutch Spider/Nutch-1.0-dev (lti crawler for CMU;; changkuk at cmu dot edu)",
      "LTI/LemurProject Nutch Spider/Nutch-1.0-dev (Research spider using Nutch;;",
      "Lynx/2.8.4rel.1 libwww-FM/2.14 SSL-MM/1.4.1 OpenSSL/0.9.6c (",
      "Mac Finder 1.0.xx",
      "Mackster( )",
      "Mahiti.Com/Mahiti Crawler-1.0 (Mahiti.Com; ;",
      "mammoth/1.0 (",
      " (",
      "Mariner/5.1b [de] (Win95; I ;Kolibri gncwebbot)",
      "Marketwave Hit List",
      "Marvin v0.3",
      "MaSagool/1.0 (MaSagool;;",
      "Mata Hari/2.00 ",
      "Matrix S.p.A. - FAST Enterprise Crawler 6 (Unknown admin e-mail address)",
      "maxomobot/dev-20051201 (maxomo;;",
      "MDbot/1.0 (+",
      "MediaCrawler-1.0 (Experimental)",
      "Mediapartners-Google/2.1 (",
      "MegaSheep v1.0 ( internet sheep)",
      "Megite2.0 (",
      "Metaeuro Web Crawler/0.2 (MetaEuro Web Search Clustering Engine;; crawler at metaeuro dot com)",
      "MetagerBot/0.8-dev (MetagerBot;;  )",
      "Metaspinner/0.01 (Metaspinner;;",
      "metatagsdir/0.7 (+",
      "MFC Foundation Class Library 4.0",
      "Microsoft Small Business Indexer",
      "Microsoft URL Control - 6.00.8xxx",
      "MicrosoftPrototypeCrawler (How's my crawling?",
      "Missauga Locate 1.0.0",
      "Missigua Locator 1.9",
      "Missouri College Browse",
      "Misterbot-Nutch/0.7.1 (Misterbot-Nutch;;",
      "Miva (",
      "Mizzu Labs 2.2",
      "MJ12bot/vx.x.x (",
      "MJ12bot/vx.x.x (",
      "MJBot (SEO assessment)",
      "MLBot (",
      "Mo College 1.9",
      "moget/x.x (",
      "MojeekBot/0.x (archi;",
      "Morris - Mixcat Crawler (",
      "Mouse-House/7.4 (spider_monkey spider info at",
      "mozDex/0.xx-dev (mozDex;;",
      "Mozilla (",
      "Mozilla 4.0(compatible; BotSeer/1.0; +",
      "Mozilla/2.0 (compatible; Ask Jeeves)",
      "Mozilla/2.0 (compatible; Ask Jeeves/Teoma)",
      "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; ",
      "Mozilla/2.0 (compatible; Ask Jeeves/Teoma;",
      "Mozilla/2.0 (compatible; EZResult -- Internet Search Engine)",
      "Mozilla/2.0 (compatible; NEWT ActiveX; Win32)",
      "Mozilla/2.0 (compatible; T-H-U-N-D-E-R-S-T-O-N-E)",
      "Mozilla/3.0 (compatible; Fluffy the spider;;",
      "Mozilla/3.0 (compatible; Indy Library)",
      "Mozilla/3.0 (compatible; MuscatFerret/1.5.4;",
      "Mozilla/3.0 (compatible; MuscatFerret/1.5;",
      "Mozilla/3.0 (compatible; MuscatFerret/1.6.x;",
      "Mozilla/3.0 (compatible; scan4mail (advanced version)",
      "Mozilla/3.0 (compatible; ScollSpider;",
      "Mozilla/3.0 (compatible;",
      "Mozilla/3.0 (compatible;",
      "Mozilla/3.0 (INGRID/3.0 MT;;",
      "Mozilla/3.0 (;;",
      "Mozilla/3.0 (Slurp/cat;;",
      "Mozilla/3.0 (Slurp/si;;",
      "Mozilla/3.0 (Vagabondo/1.1 MT;;",
      "Mozilla/3.0 (Vagabondo/1.x MT;;",
      "Mozilla/3.0 (Vagabondo/2.0 MT;;",
      "Mozilla/3.0 (Vagabondo/2.0 MT;;",
      "Mozilla/3.01 (Compatible; Links2Go Similarity Engine)",
      "Mozilla/4.0 (agadine3.0)",
      "Mozilla/4.0 (compatible: AstraSpider V.2.1 :",
      "Mozilla/4.0 (compatible;  Vagabondo/2.2; webcrawler at wise-guys dot nl;",
      "Mozilla/4.0 (compatible;  Vagabondo/4.0Beta; webcrawler at wise-guys dot nl;",
      "Mozilla/4.0 (compatible; Advanced Email Extractor v2.xx)",
      "Mozilla/4.0 (compatible; B_L_I_T_Z_B_O_T)",
      "Mozilla/4.0 (compatible;",
      "Mozilla/4.0 (compatible; crawlx,",
      "Mozilla/4.0 (compatible; DAUMOA-video; +",
      "Mozilla/4.0 (compatible; FastCrawler3",
      "Mozilla/4.0 (compatible; FDSE robot)",
      "Mozilla/4.0 (compatible; GPU p2p crawler",
      "Mozilla/4.0 (compatible; grub-client-0.2.x; Crawl your stuff with",
      "Mozilla/4.0 (compatible; grub-client-0.3.x; Crawl your own stuff with",
      "Mozilla/4.0 (compatible; grub-client-2.x)",
      "Mozilla/4.0 (compatible; Iplexx Spider/1.0",
      "Mozilla/4.0 (compatible; MSIE 4.01; b o t)",
      "Mozilla/4.0 (compatible; MSIE 4.01; Windows CE; PPC; 240x320; SPV M700; OpVer OrangeBot-Mobile 2008.0 (",
      "Mozilla/4.0 (compatible; MSIE 4.0; Windows NT; Site Server 3.0 Robot) Indonesia Interactive",
      "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0) (",
      "Mozilla/4.0 (compatible; MSIE 5.0; NetNose-Crawler 2.0; A New Search Experience:",
      "Mozilla/4.0 (compatible; MSIE 5.0; Windows 95) TrueRobot; 1.5",
      "Mozilla/4.0 (compatible; MSIE 5.0; Windows 95) VoilaBot BETA 1.2 (",
      "Mozilla/4.0 (compatible; MSIE 5.0; Windows 95) VoilaBot; 1.6",
      "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt; DTS Agent",
      "Mozilla/4.0 (compatible; MSIE 5.0;;",
      "Mozilla/4.0 (compatible; MSIE 5.0;;;",
      "Mozilla/4.0 (compatible; MSIE 5.0; YANDEX)",
      "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0; obot)",
      "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0; QXW03018)",
      "Mozilla/4.0 (compatible; MSIE 6.0 compatible; Asterias Crawler v4; +;; SpiderThread  Revision: 3.10",
      "Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.1) Skampy/0.9.x [en]",
      "Mozilla/4.0 (compatible; MSIE 6.0; TargetSeek/1.0; +",
      "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; ODP entries t_st;",
      "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; ODP links test;",
      "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; bot; .NET CLR 1.1.4322)",
      "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; heritrix/1.3.0",
      "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; QihooBot 1.0",
      "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT; MS Search 4.0 Robot)",
      "Mozilla/4.0 (compatible; MSIE enviable; DAUMOA 2.0; DAUM Web Robot; Daum Communications Corp., Korea; +",
      "Mozilla/4.0 (compatible; MSIE is not me; DAUMOA/1.0.1; DAUM Web Robot; Daum Communications Corp., Korea)",
      "Mozilla/4.0 (compatible; NaverBot/1.0;",
      "Mozilla/4.0 (compatible; SpeedySpider;",
      "Mozilla/4.0 (compatible;",
      "Mozilla/4.0 (compatible; Y!J; for robot study; keyoshid)",
      "Mozilla/4.0 (compatible; Yahoo Japan; for robot study; kasugiya)",
      "Mozilla/4.0 (JemmaTheTourist;",
      "Mozilla/4.0 (MobilePhone SCP-5500/US/1.0) NetFront/3.0 MMP/2.0 (compatible; Googlebot/2.1;",
      "Mozilla/4.0 (MobilePhone SCP-5500/US/1.0) NetFront/3.0 MMP/2.0 FAKE (compatible; Googlebot/2.1;",
      "Mozilla/4.0 (Mozilla;;",
      "Mozilla/4.0 (Sleek Spider/1.2)",
      "Mozilla/4.0 compatible FurlBot/Furl Search 2.0 (FurlBot;;",
      "Mozilla/4.0 compatible ZyBorg/1.0 (;",
      "Mozilla/4.0 compatible ZyBorg/1.0 (;",
      "Mozilla/4.0 compatible ZyBorg/1.0 Dead Link Checker (;",
      "Mozilla/4.0 compatible ZyBorg/1.0 for Homepage (;",
      "Mozilla/4.0 [en] (Ask Jeeves Corporate Spider)",
      "Mozilla/4.0(compatible; Zealbot 1.0)",
      "Mozilla/4.04 (compatible; Dulance bot; +",
      "Mozilla/4.0_(compatible;_MSIE_5.0;_Windows_95)_TrueRobot/1.4 libwww/5.2.8",
      "Mozilla/4.0_(compatible;_MSIE_5.0;_Windows_95)_VoilaBot/1.6 libwww/5.3.2",
      "Mozilla/4.6 [en] (",
      "Mozilla/4.7 (compatible;",
      "Mozilla/4.7 (compatible; Intelliseek;",
      "Mozilla/4.7 (compatible; Whizbang)",
      "Mozilla/4.7 (compatible; WhizBang;",
      "Mozilla/4.7 [en](",
      "Mozilla/4.7 [en](",
      "Mozilla/4.72 [en] (BACS",
      "Mozilla/5.0 (+ Mammoth/0.1",
      "Mozilla/5.0 (+ Mammoth/0.1",
      "Mozilla/5.0 (Clustered-Search-Bot/1.0;;",
      "Mozilla/5.0 (compatible; +",
      "Mozilla/5.0 (compatible; 008/0.83;;) Gecko/2008032620",
      "Mozilla/5.0 (compatible; Abonti/0.8 -",
      "Mozilla/5.0 (compatible; aiHitBot/1.0; +",
      "Mozilla/5.0 (compatible; AnsearchBot/1.x; +",
      "Mozilla/5.0 (compatible; archive.org_bot/1.10.0 +",
      "Mozilla/5.0 (compatible; archive.org_bot/1.13.1x",
      "Mozilla/5.0 (compatible; archive.org_bot/1.5.0-200506132127 Hurricane Katrina",
      "Mozilla/5.0 (compatible; Ask Jeeves/Teoma;",
      "Mozilla/5.0 (compatible; BecomeBot/1.23;",
      "Mozilla/5.0 (compatible; BecomeBot/1.xx; MSIE 6.0 compatible;",
      "Mozilla/5.0 (compatible; BecomeBot/2.0beta;",
      "Mozilla/5.0 (compatible; BecomeBot/2.x; MSIE 6.0 compatible;",
      "Mozilla/5.0 (compatible; BecomeJPBot/2.3; MSIE 6.0 compatible; +",
      "Mozilla/5.0 (compatible; BlogRefsBot/0.1;",
      "Mozilla/5.0 (compatible; Bot; +",
      "Mozilla/5.0 (compatible; BuzzRankingBot/1.0; +",
      "Mozilla/5.0 (compatible; Charlotte/1.0b;",
      "Mozilla/5.0 (compatible; Charlotte/1.0b;",
      "Mozilla/5.0 (compatible; Crawling jpeg;",
      "Mozilla/5.0 (compatible; de/1.13.2 +",
      "Mozilla/5.0 (compatible; Diffbot/0.1; +",
      "Mozilla/5.0 (compatible; DNS-Digger-Explorer/1.0; +",
      "Mozilla/5.0 (compatible; DNS-Digger/1.0; +",
      "Mozilla/5.0 (compatible;;",
      "Mozilla/5.0 (compatible; EARTHCOM/2.2; +",
      "Mozilla/5.0 (compatible; Exabot Test/3.0; +",
      "Mozilla/5.0 (compatible; FatBot 2.0;",
      "Mozilla/5.0 (compatible; Galbot/1.0; +",
      "mozilla/5.0 (compatible; genevabot",
      "Mozilla/5.0 (compatible; Googlebot/2.1;",
      "mozilla/5.0 (compatible; heritrix/1.0.4",
      "Mozilla/5.0 (compatible; heritrix/1.10.2 +",
      "Mozilla/5.0 (compatible; heritrix/1.12.1 +",
      "Mozilla/5.0 (compatible; heritrix/1.12.1 +",
      "Mozilla/5.0 (compatible; heritrix/1.12.1 + []",
      "mozilla/5.0 (compatible; heritrix/1.3.0",
      "Mozilla/5.0 (compatible; heritrix/1.4.0 +",
      "Mozilla/5.0 (compatible; heritrix/1.4t",
      "Mozilla/5.0 (compatible; heritrix/1.5.0",
      "Mozilla/5.0 (compatible; heritrix/1.5.0-200506231921",
      "Mozilla/5.0 (compatible; heritrix/1.6.0",
      "Mozilla/5.0 (compatible; heritrix/1.7.0 +",
      "Mozilla/5.0 (compatible; heritrix/1.x.x +",
      "Mozilla/5.0 (compatible; heritrix/2.0.0-RC1 +",
      "Mozilla/5.0 (compatible; Hermit Search. Com; +",
      "Mozilla/5.0 (compatible; HyperixScoop/1.3; +",
      "Mozilla/5.0 (compatible; IDBot/1.0; +",
      "Mozilla/5.0 (compatible; InterseekWeb/3.x)",
      "Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Exabot-Thumbnails)",
      "Mozilla/5.0 (compatible; LemSpider 0.1)",
      "Mozilla/5.0 (compatible; MojeekBot/2.0;",
      "Mozilla/5.0 (compatible; MSIE 6.0; Podtech Network;",
      "Mozilla/5.0 (compatible; OnetSzukaj/5.0;",
      "Mozilla/5.0 (compatible; PalmeraBot; Version 0.001",
      "Mozilla/5.0 (compatible;",
      "Mozilla/5.0 (compatible;",
      "Mozilla/5.0 (compatible; PWeBot/3.1;",
      "Mozilla/5.0 (compatible; Quantcastbot/1.0;",
      "Mozilla/5.0 (compatible; ScoutJet; +",
      "Mozilla/5.0 (compatible; Scrubby/2.2;",
      "Mozilla/5.0 (compatible; ShunixBot/1.x.x +",
      "Mozilla/5.0 (compatible; ShunixBot/1.x;",
      "Mozilla/5.0 (compatible; SkreemRBot +",
      "Mozilla/5.0 (compatible; SummizeBot +",
      "Mozilla/5.0 (compatible; Synoobot/0.9;",
      "Mozilla/5.0 (compatible; Theophrastus/x.x;",
      "Mozilla/5.0 (compatible; TridentSpider/3.1)",
      "Mozilla/5.0 (compatible; Vagabondo/2.1; webcrawler at wise-guys dot nl;",
      "Mozilla/5.0 (compatible; Webduniabot/1.0; +",
      "Mozilla/5.0 (compatible; worio bot heritrix/1.10.0 +",
      "Mozilla/5.0 (compatible; WoW Lemmings Kathune/2.0;",
      "Mozilla/5.0 (compatible; Yahoo! DE Slurp;",
      "Mozilla/5.0 (compatible; Yahoo! Slurp China;",
      "Mozilla/5.0 (compatible; Yahoo! Slurp;",
      "Mozilla/5.0 (compatible; Yoono;",
      "Mozilla/5.0 (compatible; YoudaoBot/1.0;; )",
      "Mozilla/5.0 (compatible; Zenbot/1.3; +",
      "Mozilla/5.0 (compatible; zermelo + [,]",
      "Mozilla/5.0 (compatible;archive.org_bot/1.7.1; collectionId=316; Archive-It; +",
      "Mozilla/5.0 (compatible;archive.org_bot/heritrix-1.9.0-200608171144 +",
      "Mozilla/5.0 (compatible;MAINSEEK_BOT)",
      "Mozilla/5.0 (Slurp/cat;;",
      "Mozilla/5.0 (Slurp/si;;",
      "Mozilla/5.0 (Twiceler-0.9",
      "Mozilla/5.0 (Version: xxxx Type:xx)",
      "Mozilla/5.0 (",
      "Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.7) NimbleCrawler 1.11 obeys UserAgent NimbleCrawler For problems contact:",
      "Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (",
      "Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (",
      "Mozilla/5.0 (Windows;) NimbleCrawler 1.12 obeys UserAgent NimbleCrawler For problems contact: crawler@health",
      "Mozilla/5.0 (Windows;) NimbleCrawler 1.12 obeys UserAgent NimbleCrawler For problems contact:",
      "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2.1; aggregator:Spinn3r (Spinn3r 3.1); Gecko/20021130",
      "Mozilla/5.0 URL-Spider",
      "Mozilla/5.0 [en] (compatible; Gulper Web Bot 0.2.4",
      "MQBOT/Nutch-0.9-dev (MQBOT Nutch Crawler;;",
      "msnbot-media/1.0 (+",
      "msnbot-Products/1.0 (+",
      "MSNBOT/0.xx (",
      "msnbot/x.xx (",
      "MSNBOT_Mobile MSMOBOT Mozilla/2.0 (compatible; MSIE 4.02; Windows CE; Default)",
      "MSRBOT (",
      "multicrawler (",
      "MusicWalker2.0 (",
      " Crawler 2.0",
      "Naamah 1.0.1/Blogbot (",
      "Naamah 1.0a/Blogbot (",
      "NameOfAgent (CMS Spider)",
      "NASA Search 1.0",
      "NaverBot-1.0 (NHN Corp. / +82-2-3011-1954 /",
      "NavissoBot/1.7  (+",
      "NCSA Beta 1 (",
      "Nebullabot/2.2 (",
      "NEC Research Agent -- compuman at",
      "Net-Seekr Bot/Net-Seekr Bot V1 (",
      "NetinfoBot/1.0 (",
      "Netluchs/0.8-dev ( ;; ___don'",
      "Netprospector JavaCrawler",
      "NetSeer/Nutch-0.9 (NetSeer Crawler;;",
      "NetSprint -- 2.0",
      "NetWhatCrawler/0.06-dev (NetWhatCrawler from;;",
      "NextGenSearchBot 1 (for information visit",
      "NextopiaBOT (+ distributed crawler client beta v0.x",
      "NG-Search/0.90 (NG-SearchBot;;  )",
      "NITLE Blog Spider/0.01",
      "Noago Spider",
      "Nokia-WAPToolkit/1.2 googlebot(at)",
      "Nokia6610/1.0 (3.09) Profile/MIDP-1.0 Configuration/CLDC-1.0 (compatible;YahooSeeker/M1A1-R2D2;",
      "NokodoBot/1.x (+",
      "Norbert the Spider(",
      "noxtrumbot/1.0 (",
      "NP/0.1 (NP;;",
      "NPBot (",
      " (;",
      " (;",
      "nttdirectory_robot/0.9 (",
      "nuSearch Spider <a href=''></a> (compatible; MSIE 4.01)",
      "NuSearch Spider (compatible; MSIE 6.0)",
      "NuSearch Spider",
      "Nutch crawler/Nutch-0.9 (;",
      "Nutch/Nutch-0.9 (Eurobot; )",
      "NutchCVS/0.0x-dev (Nutch;;",
      "NutchCVS/0.7.1 (Nutch running at UW;;",
      "NutchEC2Test/Nutch-0.9-dev (Testing Nutch on Amazon EC2.;; ec2test at",
      "NutchOrg/0.0x-dev (Nutch;;",
      "nutchsearch/Nutch-0.9 (Nutch Search 1.0; herceg_novi at yahoo dot com)",
      "NutchVinegarCrawl/Nutch-0.8.1 (Vinegar;; eytanadar at gmail dot com)",
      "obidos-bot (just looking for books.)",
      "ObjectsSearch/0.01-dev (ObjectsSearch;;",
      "ObjectsSearch/0.0x (ObjectsSearch;;",
      "oBot ((compatible;Win32))",
      "Ocelli/1.x (",
      "Octora Beta -",
      "Octora Beta Bot -",
      "OmniExplorer_Bot/1.0x (+ Internet CategorizerOmniExplorer car & shopping search (",
      "OmniExplorer_Bot/1.0x (+ Job Crawler",
      "OmniExplorer_Bot/1.1x (+ Torrent Crawler",
      "OmniExplorer_Bot/x.xx (+ WorldIndexer",
      " SA-",
      "OntoSpider/1.0 libwww-perl/5.65",
      "OOZBOT/0.20 ( ; agentname at setooz dot_com )",
      "OpenAcoon v4.0.x (",
      "Openfind data gatherer- Openbot/3.0+(;+",
      "Openfind Robot/1.1A2",
      "OpenISearch/1.x (",
      "OpenTaggerBot (",
      "OpenWebSpider/0.x.x (",
      "OpidooBOT (larbin2.6.3@unspecified.mail)",
      "Oracle Ultra Search",
      "Orbiter/T-2.0 (+",
      "Overture-WebCrawler/3.8/Fresh (atw-crawler at fast dot no;",
      "ozelot/2.7.3 (Search engine indexer;;",
      "PADLibrary Spider",
      "PageBitesHyperBot/600 (",
      "page_verifier (",
      "ParaSite/1.0b (",
      "Patwebbot (",
      "PBrowse 1.4b",
      "PEval 1.4b",
      "pipeLiner/0.3a (PipeLine Spider;; webmaster'at'",
      "pipeLiner/0.xx (PipeLine Spider;",
      "PJspider/3.0 (;",
      "PluckFeedCrawler/2.0 (compatible; Mozilla 4.0; MSIE 5.5;; 1 subscribers)",
      "Pluggd/Nutch-0.9 (automated crawler;support at pluggd dot com)",
      "polybot 1.0 (",
      "Port Huron Labs",
      "PortalBSpider/2.0 (",
      "potbot 1.0",
      "PRCrawler/Nutch-0.9 (data mining development project;",
      "PrivacyFinder Cache Bot v1.0",
      "Production Bot 0116B",
      "Production Bot 2016B",
      "Production Bot DOT 3016B",
      "Program Shareware 1.0.2",
      "Project XP5 [2.03.07-111203]",
      "PROve AnswerBot 4.0",
      "ProWebGuide Link Checker (",
      "psbot/0.1 (+",
      "PSurf15a 11",
      "PSurf15a 51",
      "PSurf15a VA",
      "PubCrawl (",
      "pulseBot (pulse Web Miner)",
      "PWeBot/1.2 Inspector (",
      " Web Directory (",
      "QEAVis Agent/Nutch-0.9 (Quantitative Evaluation of Academic Websites Visibility;",
      "QPCreep Test Rig ( We are not indexing- just testing )",
      "QuepasaCreep ( )",
      "QuepasaCreep v0.9.1x",
      "QueryN Metasearch",
      "QweeryBot/3.01 (",
      "Qweery_robot.txt_CheckBot/3.01 (",
      "rabaz (rabaz at gigabaz dot com)",
      "RaBot/1.0 Agent-admin/",
      "ramBot xtreme x.x",
      "RAMPyBot - (RAMPyBot -;;",
      "RAMPyBot/0.8-dev (Nutch;;",
      "Rankivabot/3.2 (; 3.2; vzmxikn)",
      "Rational SiteCheck (Windows NT)",
      "Reaper [2.03.10-031204] (",
      "Reaper/2.0x (+",
      "RedCarpet/1.2 (",
      "RedCell/0.1 (InfoSec Search Bot (Coming Soon);;",
      "RedCell/0.1 (RedCell;;",
      "RedKernel WWW-Spider 2/0 (+",
      "RixBot (",
      "RoboCrawl (",
      "RoboCrawl (",
      "RoboPal (",
      "Robot: NutchCrawler- Owner:",
      "Rotondo/3.1 libwww/5.3.1",
      "RRC (",
      " RSS/Atom Feed Robot",
      "RSurf15a 41",
      "RSurf15a 51",
      "RSurf15a 81",
      "RufusBot (Rufus Web Miner;",
      "RufusBot (Rufus Web Miner;",
      "sait/Nutch-0.9 (SAIT Research;",
      "SandCrawler - Compatibility Testing",
      "SapphireWebCrawler/1.0 (Sapphire Web Crawler using Nutch;;",
      "SapphireWebCrawler/Nutch-1.0-dev (Sapphire Web Crawler using Nutch;;",
      "SBIder/0.7 (SBIder;;",
      "SBIder/0.8-dev (SBIder;;",
      "ScholarUniverse/0.8 (Nutch;+;",
      "ScollSpider/2.0 (+",
      "Scooter/1.1 (custom)",
      "Scooter/2.0 G.R.A.B. V1.1.0",
      "Scooter/2.0 G.R.A.B. X2.0",
      "ScoutAnt/0.1; +",
      "Scrubby/2.x (",
      "Scrubby/3.0 (+",
      " V1.4",
      " V1.4.2 (;",
      "Search/1.0 (",
      "SearchByUsa/2 (SearchByUsa;;",
      "SearchExpress Spider0.99",
      "SearchGuild/DMOZ/Experiment (",
      "SearchGuild_DMOZ_Experiment (",
      "Searchit-Now Robot/2.2 (+",
      "Searchmee! Spider v0.98a",
      "SearchSight/2.0 (",
      "Searchspider/1.2 (SearchSpider;;",
      "SearchTone2.0 - IDEARE",
      "Seekbot/1.0 ( HTTPFetcher/0.3",
      "Seekbot/1.0 ( RobotsTxtFetcher/1.0 (XDF)",
      "Seekbot/1.0 ( RobotsTxtFetcher/1.2",
      "Semager/1.1 (",
      "Semager/1.x (",
      "Sensis Web Crawler (search_comments\\at\\sensis\\dot\\com\\dot\\au)",
      " Web Crawler (search_comments\\at\\sensis\\dot\\com\\dot\\au)",
      "SeznamBot/1.0 (+",
      "SeznamBot/2.0-test (+",
      "ShablastBot 1.0",
      "Shim Crawler",
      "ShopWiki/1.0 ( +",
      "ShopWiki/1.0 ( +",
      " Crawler 2.0",
      "SietsCrawler/1.1 (+",
      "Sigram/Nutch-1.0-dev (Test agent for Nutch development;; bot at sigram dot com)",
      "Siigle Orumcex v.001 Turkey (",
      "silk/1.0 (+",
      "Sirketcebot/v.01 (",
      "SiteSpider +(",
      " site rating system",
      "Skampy/0.9.x (",
      "Skimpy/0.x (",
      "Skywalker/0.1 (Skywalker; anonymous; anonymous)",
      "Slurp/2.0 (;",
      "Slurp/2.0-KiteWeekly (;",
      "Slurp/si (;",
      "Slurpy Verifier/1.0",
      "SlySearch (",
      " beta crawler v0",
      "Snapbot/1.0 (Snap Shots, +",
      "SnykeBot/0.6 (",
      "SocSciBot ()",
      "sogou develop spider",
      "Sogou Orion spider/3.0(+",
      "sogou spider",
      "Sogou web spider/3.0(+",
      "sohu agent",
      "speedfind ramBot xtreme 8.1",
      "Speedy Spider (Beta/x.x;",
      "Speedy Spider (Entireweb; Beta/1.0;",
      "Speedy_Spider (",
      "Sphere Scout&v4.0 - scout at sphere dot com",
      "Spider-Sleek/2.0 (+",
      " -",
      "SpiderMonkey/7.0x ( info at",
      "Spinne/2.0 med",
      "Spinne/2.0 med_AH",
      "Spock Crawler (",
      " (Version: 1.02- powered by",
      "sproose/0.1-alpha (sproose crawler;;",
      "Sqworm/2.9.81-BETA (beta_release; 20011102-760; i686-pc-linux-gnu)",
      "Sqworm/2.9.85-BETA (beta_release; 20011115-775; i686-pc-linux-gnu)",
      "SSurf15a 11 ",
      "StackRambler/x.x ",
      "Steeler/1.x (",
      "Steeler/3.3 (",
      "Strategic Board Bot (+",
      "Strategic Board Bot (+",
      "Submission Spider at",
      " (CrawlerAgent v0.103)",
      "suchpadbot/1.0 (+",
      "SurferF3 1/0",
      "Swooglebot/2.0. (+",
      "Syntryx ANT Scout Chassis Pheromone; Mozilla/4.0 compatible crawler",
      "Szukacz/1.x (robot;;",
      " (+",
      "Tagword (",
      "Talkro Web-Shot/1.0 (E-mail: Home:",
      "TCDBOT/Nutch-0.8 (PhD student research;; mcgettrs at t c d dot IE)",
      "Tecomi Bot (",
      "Teemer (NetSeer, Inc. is a Los Angeles based Internet startup company.;;",
      "Teoma MP",
      "teomaagent1 []",
      "Teradex Mapper;;",
      "terraminds-bot/1.0 (",
      "TerrawizBot/1.0 (+",
      "Test spider",
      "TestCrawler/Nutch-0.9 (Testing Crawler for Research ;; tgautier at balihoo dot com)",
      "TheRarestParser/0.2a (",
      "TheSuBot/0.1 (",
      "thumbshots-de-Bot (Version: 1.02- powered by",
      "TinEye/1.1 (",
      "tivraSpider/1.0 (",
      "Topodia/1.2-dev (Topodia - Crawler for HTTP content indexing;;",
      "Toutatis x-xx.x (",
      "Toutatis x.x (",
      "Toutatis x.x-x",
      "traazibot/testengine (+",
      "TSurf15a 11",
      "Tumblr/1.0 RSS syndication (+ (",
      "TurnitinBot/x.x (",
      "Turnpike Emporium LinkChecker/0.1",
      "TutorGig/1.5 (+",
      "Tutorial Crawler 1.4 (",
      "Tycoon Agent/Nutch-1.0-dev",
      "UKWizz/Nutch-0.8.1 (UKWizz Nutch crawler;",
      "Under the Rainbow 2.2",
      "UofTDB_experiment (",
      "updated/0.1-alpha (updated crawler;;",
      "updated/0.1beta (;;",
      "URL Spider Pro/x.xx (",
      "urlfan-bot/1.0; +",
      "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
      "User-Agent: Mozilla/4.0 (SKIZZLE! Distributed Internet Spider v1.0 -",
      "USyd-NLP-Spider (",
      "Vagabondo-WAP/2.0 (webcrawler at wise-guys dot nl; Profile",
      "Vagabondo/1.x MT (",
      "Vagabondo/2.0 MT",
      "Vagabondo/2.0 MT (webagent at wise-guys dot nl)",
      "Vagabondo/2.0 MT (",
      "Vagabondo/3.0 (webagent at wise-guys dot nl)",
      "Vakes/0.01 (Vakes;;",
      "versus 0.2 (+",
      "versus crawler",
      " - Networking4all Bot/x.x",
      "Verzamelgids/2.2 (",
      "Vespa Crawler",
      "VisBot/2.0 ( Crawler;;",
      "Vision Research Lab image spider at",
      "VMBot/0.x.x (VMBot;;",
      "Vortex/2.2 (+",
      "voyager/2.0 (",
      "VSE/1.0 (",
      "VSE/1.0 (",
      "VWBOT/Nutch-0.9-dev (VWBOT Nutch Crawler;;",
      "W3SiteSearch Crawler_v1.1",
      " 0.2 (",
      "Wavefire/0.8-dev (Wavefire;;",
      "Waypath development crawler - info at waypath dot com",
      "Waypath Scout v2.x - info at waypath dot com",
      "Web Snooper",
      " (leveled playing field;; info at",
      "WebAlta Crawler/1.2.1 (",
      "WebarooBot (Webaroo Bot;",
      "WebarooBot (Webaroo Bot;",
      "WebCompass 2.0",
      "Webglimpse 2.xx.x (",
      "Weblog Attitude Diffusion 1.0",
      "WebRankSpider/1.37 (+",
      "WebSearch.COM.AU/3.0.1 (The Australian Search Engine; http://WebSearch.COM.AU; Search@WebSearch.COM.AU)",
      "WebSearchBench WebCrawler v0.1(Experimental)",
      "WebsiteWorth v1.0",
      " (Add url robot)",
      "WebStat/1.0 (Unix; beta; 20040314)",
      "Webster v0.3 ( )",
      "WebVac (",
      " - Telefon: 01908 / 26005",
      "WebVulnCrawl.unknown/1.0 libwww-perl/5.803",
      "Wells Search II",
      "WEP Search 00",
      "WhizBang! Lab",
      "Willow Internet Crawler by Twotrees V2.1",
      "WinHTTP Example/1.0",
      "WinkBot/0.06 ( search engine web crawler;;",
      "WIRE/0.11 (Linux; i686; Bot,Robot,Spider,Crawler,",
      "WIRE/0.x (Linux; i686; Bot,Robot,Spider,Crawler)",
      "WISEbot/1.0 (;",
      "worio heritrix bot (+",
      "woriobot (",
      "Wotbox/alpha0.6 (;",
      "Wotbox/alpha0.x.x (; Java/1.4.1_02",
      "WSB WebCrawler V1.0 (Beta)-",
      "wume_crawler/1.1 (",
      "WWWeasel Robot v1.00 (",
      "wwwster/1.x (Beta-",
      "X-Crawler ",
      "xirq/0.1-beta (xirq;;",
      "Y!J-BSC/1.0 (",
      "Y!J/1.0 (",
      "yacy (; v20040602; i386 Linux 2.4.26-gentoo-r13; java 1.4.2_06; MET/en)",
      "yacybot (x86 Windows XP 5.1; java 1.5.0_06; Europe/de)",
      "Yahoo Pipes 1.0",
      "Yahoo! Mindset",
      "Yahoo-Blogs/v3.9 (compatible; Mozilla 4.0; MSIE 5.5; )",
      "Yahoo-MMAudVid/1.0 (mms dash mmaudvidcrawler dash support at yahoo dash inc dot com)",
      "Yahoo-MMAudVid/2.0(mms dash mm aud vid crawler dash support at yahoo dash ;Mozilla 4.0 compatible; MSIE 7.0;Windows NT 5.0; .NET CLR 2.0)",
      "Yahoo-MMCrawler/3.x (mm dash crawler at trd dot overture dot com)",
      "Yahoo-VerticalCrawler-FormerWebCrawler/3.9 crawler at trd dot overture dot com;",
      "YahooFeedSeeker/2.0 (compatible; Mozilla 4.0; MSIE 5.5;",
      "YahooSeeker-Testing/v3.9 (compatible; Mozilla 4.0; MSIE 5.5;",
      "YahooSeeker/1.0 (compatible; Mozilla 4.0; MSIE 5.5;",
      "YahooSeeker/1.0 (compatible; Mozilla 4.0; MSIE 5.5;",
      "YahooSeeker/1.1 (compatible; Mozilla 4.0; MSIE 5.5;",
      "YahooSeeker/bsv3.9 (compatible; Mozilla 4.0; MSIE 5.5; )",
      "YahooSeeker/CafeKelsa-dev (compatible; Konqueror/3.2; FreeBSD ; )",
      "Yandex/1.01.001 (compatible; Win16; I)",
      "Yanga WorldSearch Bot v1.1/beta (",
      "Yeti/0.01 (nhn/1noon,, check robots.txt daily and follows it)",
      "Yeti/1.0 (NHN Corp.;",
      "yggdrasil/Nutch-0.9 (yggdrasil biorelated search engine; www dot biotec dot tu minus dresden do de slash schroeder; heiko dot dietze at biotec dot tu minus dresden dot de)",
      "YodaoBot/1.0 (; )",
      "yoofind/yoofind-0.1-dev (yoono webcrawler; ; MyEmail)",
      "yoono/1.0 web-crawler/1.0",
      "YottaCars_Bot/4.12 (+ Car Search Engine ",
      "YottaShopping_Bot/4.12 (+ Shopping Search Engine",
      "Zao-Crawler 0.2b",
      "Zao/0.1 (",
      "ZBot/1.00 (",
      " (",
      " (",
      "zedzo.digest/0.1 (",
      "zermelo Mozilla/5.0 compatible; heritrix/1.12.1 (+ [,]",
      "zerxbot/Version 0.6 libwww-perl/5.79",
      "Zeus ThemeSite Viewer Webster Pro V2.9 Win32",
      "Zeus xxxxx Webster Pro V2.9 Win32",
      "Zeusbot/0.07 (Ulysseek's web-crawling robot;;",
      "ZipppBot/0.xx (ZipppBot;;",
      "ZIPPPCVS/0.xx (ZipppBot/.xx;;",
      "Zippy v2.0 -",
      "ZoomSpider -",
      "ZyBorg/1.0 (;"]