# AWSTATS ROBOTS DATABASE
#-------------------------------------------------------
# If you want to add robots to extend AWStats database detection capabilities,
# you must add an entry in RobotsSearchIDOrder_listx and RobotsHashIDLib.

# The entry in RobotsSearchIDOrder_listx is a Perl regular expression
# (see http://perldoc.perl.org/perlreref.html). AWSTats applies these
# expressions to the user agent string in the order given by the lists. The
# first match specifies the robot.
#
# Note: This regular expression must not contain any whitespace.
# Otherwise AWStats will produce lines in the database that
# will be misinterpreted and as a consequence the corresponding data in the
# generated HTML reports will be wrong. If you want to match whitespace in
# the user agent string, use other constructs like '\s', '[:blank:]',
# '\p{IsSpace}', '\x20' etc.
#
# The corresponding entry in RobotsHashIDLib contains the regular expression
# as key, followed by a string containing HTML-text. AWStats inserts this
# text into reports to describe the bot. If possible the text should contain
# a link to the bot home page. This makes it easier for sysadmins to find
# the information necessary e.g. to adapt the robots.txt file.
#
# An entry in the RobotsAffiliateLib is not necessary. An entry in this list
# contains as first part the regular expression specifying the bot. The
# second part is a string that gives the Company or product managing the bot.
# This information is not used yet.
#
# There are several sorts of bots that AWStats is not able to detect and
# therefore a considerable amount of bot generated traffic counts
# as user traffic:
#
# a) A crawler that identifies itself in the referrer string, but not in
#    the user agent string. An example is the crawler from semalt.semalt.com.
#
# b) Crawlers that correctly access robots.txt but identify themselves in
#    in the user agent string only once or just a few times. Most of the
#    time a user agent string ist used that does not contain hints that
#    a bot is involved. An example is the iCjobs spider.
#    msnbot-UDiscovery/2.0b seems to show this behaviour too.
#
#
#
#-------------------------------------------------------

# 2018-03-13 RobC 
#              Added 36 robots and one generic ( survey ) using v 7.7 robots file as base. 
#              Also moved robot "Obot" into generics so that it is singled out as an individual Robot.         
#
# 2016-09-02 RobC 
#              Fixed a few errors and added a few missing bots from awstats 7.5 release.
#
# 2016-08-28 RobC 
#              Complete re-build of this file almost from scratch.
#              dropped many old bots, added many new bots and reordered file.
#              edited and added regex expressions to stop spaces causing problems.
#              You should tune file by placing the most common robots crawling your site at top 
#              in List1.
#
#
#              N.B. many bots need to be in correct order so don't chnage order without checking if
#              change will cause counts to be allocated to wrong bot. Not always simple.
#
#
# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html
#              added dipsie (not tested with real data).
#              added DomainsDB.net http://domainsdb.net/
#              added ia_archiver-web.archive.org (was inadvertently grouped with Alexa traffic)
#              added Nutch (used by looksmart (furl?))
#              added rssImagesBot
#              added Sqworm
#              added t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e
#              added w3c css-validator
#              added documentation link to bot home pages for above and selected major bots.
#                    In the case of international bots, choose .com page.
#                    Included tool tip (html "title").
#                    To do: parameterize to match both AWStats language and tooltips settings.
#                    To do: add html links for all bots based on current documentation in source
#                           files referenced below.
#              changed '\wbot[\/\-]', to '\wbot[\/\-]' (removed comma)
#              made minor grammar corrections to notes below
# 2005-08-24	added YahooSeeker-Testing
#              	added w3c-checklink
#              	updated url for ask.com
# 2005-08-24   	added Girafabot http://www.girafa.com/
# 2005-08-30   	added PluckFeedCrawler http://www.pluck.com/
#		added Gaisbot/3.0 (robot05@gais.cs.ccu.edu.tw; )
#		dded geniebot (wgao@genieknows.com)
#		added BecomeBot link http://www.become.com/site_owners.html
#		added topicblogs http://www.topicblogs.com/
#		added Powermarks; seen used by referrer spam
#		added YahooSeeker
#		added NG/2. http://www.exabot.com/
# 2005-09-15	added link for Walhello appie
#		added bender focused_crawler
#		updated YahooSeeker description (blog crawler)
# 2005-09-16	added link for http://linkchecker.sourceforge.net
# 		added ConveraCrawler/0.9d ( http://www.authoritativeweb.com/crawl)
#		added Blogslive  info@blogslive.com intelliseek.com
#		added BlogPulse (ISSpider-3.0) intelliseek.com
# 2005-09-26	added Feedfetcher-Google (http://www.google.com/feedfetcher.html)
#		added EverbeeCrawler
#		added Yahoo-Blogs http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html
#		added link for Bloglines http://www.bloglines.com
# 2005-10-19	fixed Feedfetcher-Google (http://www.google.com/feedfetcher.html)
# 		added Blogshares Spiders (Synchronized V1.5.1)
#		added yacy
# 2005-11-21	added Argus www.simpy.com
#		added BlogsSay :: RSS Search Crawler (http://www.blogssay.com/)
#		added MJ12bot http://majestic12.co.uk/bot.php
#		added OpenTaggerBot (http://www.opentagger.com/opentaggerbot.htm)
#		added OutfoxBot/0.3 (For internet experiments; outfox.agent@gmail.com)
#		added RufusBot Rufus Web Miner http://64.124.122.252.webaroo.com/feedback.html
#		added Seekbot (http://www.seekbot.net/bot.html)
#		added Yahoo-MMCrawler/3.x (mms-mmcrawler-support@yahoo-inc.com)
#               added link for BaiDuSpider
#		added link for Blogshares Spider
#		added link for StackRambler http://www.rambler.ru/doc/faq.shtml
#		added link for WISENutbot
#		added link for ZyBorg/1.0 (wn-14.zyborg@looksmart.net; http://www.WISEnutbot.com.  Moved location to above wisenut to avoid classification as wisenut
# 2005-12-15
#		added FAST Enteprise Crawler/6 (www dot fastsearch dot com). Note spelling Enteprise not Enterprise.
#		added findlinks http://wortschatz.uni-leipzig.de/findlinks/
#		added IBM Almaden Research Center WebFountain http://www.almaden.ibm.com/cs/crawler [hc3]
#		added INFOMINE/8.0 VLCrawler (http://infomine.ucr.edu/useragents)
#		added lmspider (lmspider@scansoft.com) http://www.nuance.com/
#		added noxtrumbot http://www.noxtrum.com/
#		added SandCrawler (Microsoft)
#		added SBIder http://www.sitesell.com/sbider.html
#		added SeznamBot http://fulltext.seznam.cz/
#		added sohu-search http://corp.sohu.com/ (looked for //robots.txt not /robots.txt)
#		added the ruffle SemanticWeb crawler v0.5 - http://www.unreach.net
#		added WebVulnCrawl/1.0 libwww-perl/5.803 (looked for //robots.txt not /robots.txt)
#		added Yahoo! Japan keyoshid http://www.yahoo.co.jp/
#		added Y!J http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html
#		added link for GigaBot
#		added link for MagpieRSS
#		added link for MSIECrawler
# 2005-12-21
#		added aipbot http://www.aipbot.com aipbot@aipbot.com [matthys70 users.sourceforge.net]
#		added Everest-Vulcan Inc./0.1 (R&D project; http://everest.vulcan.com/crawlerhelp)
#		added Fast-Search-Engine http://www.fast-search-engine.com/ [matthys70  users.sourceforge.net]
#		added g2Crawler (nobody@airmail.net) http://crawler.instantnetworks.net/
#		added Jakarta commons-httpclient http://jakarta.apache.org/commons/httpclient/ (hit robots.txt).  May be used as robot or browser - a site may want to remove this entry.
#		added OmniExplorer_Bot http://www.omni-explorer.com/ [matthys70 users.sourceforge.net]
#		added USTC-Semantic-Group ai.ustc.edu.cn/mas/en/research/index.php ?
# 2005-12-22
#		added EARTHCOM.info www.earthcom.info
#		added HTTrack off-line browser 'httrack','HTTrack', http://www.httrack.com/ [Moizes Gabor]
#		added KummHttp http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_g_l_301105_2\b [Moizes Gabor]
# 2006-01-01
#		added Dulance http://www.dulance.com/bot.jsp
#		added MojeekBot http://www.mojeek.com/bot.html
#		added nicebot http://www.egghelp.org/setup.htm ?
#		added Snappy http://www.urltrends.com/faq.php
#		added sohu agent
#		added VORTEX http://marty.anstey.ca/robots/vortex/ [matthys70 users.sourceforge.net]
#		added zspider http://feedback.redkolibri.com/
# 2006-01-13
#		added boitho.com-dc http://www.boitho.com/dcbot.html
#		added IRLbot http://irl.cs.tamu.edu/crawler
#		added virus_detector virus_harvester@securecomputing.com
#		added Wavefire http://www.wavefire.com; info@wavefire.com

#		added WebFilter Robot
# 2006-01-24
#		added Shim-Crawler http://www.logos.ic.i.u-tokyo.ac.jp/crawler/; crawl@logos.ic.i.u-tokyo.ac.jp
#		added Exabot exabot.com
#		added LetsCrawl.com http://letscrawl.com
#		added ichiro http://help.goo.ne.jp/door/crawlerE.html
# 2006-01-27    additional 22 robots from a list provided by Moizes Gabor
#		added ALeadSoftbot	http://www.aleadsoft.com/bot.htm
#		added CipinetBot	http://www.cipinet.com/bot.html
#		added Cuasarbot	http://www.cuasar.com/
#		added Dumbot	http://www.dumbfind.com/
#		added Extreme_Picture_Finder	http://www.exisoftware.com/
#		added Fooky.com/ScorpionBot/ScoutOut	http://www.fooky.com/scorpionbots
#		added IlTrovatore-Setaccio	http://www.iltrovatore.it/aiuto/motore_di_ricerca.html	bot@iltrovatore.it
#		added InsurancoBot	http://www.fastspywareremoval.com/
#		added InternetArchive	http://lucene.apache.org/nutch/bot.html 	nutch-agent@lucene.apache.org
#		added KazoomBot	http://www.kazoom.ca/bot.html	kazoombot@kazoom.ca
#		added Kurzor	http://www.easymail.hu/	cursor@easymail.hu
#		added NutchCVS	http://lucene.apache.org/nutch/bot.html	nutch-agent@lucene.apache.org
#		added NutchOSU-VLIB	http://lucene.apache.org/nutch/bot.html	nutch-agent@lucene.apache.org
#		added Orbiter	http://www.dailyorbit.com/bot.htm
#		added PHP_version_tracker	http://www.nexen.net/phpversion/bot.php
#		added SuperBot	http://www.sparkleware.com/superbot/
#		added SynooBot	http://www.synoo.de/bot.html	webmaster@synoo.com
#		added TestBot	http://www.agbrain.com/
#		added TutorGigBot	http://www.tutorgig.info/
#		added WebIndexer	mailto://webindexerv1@yahoo.com
#		added WebMiner	http://64.124.122.252/feedback.html
# 2006-02-01
#		added heritrix https://sourceforge.net/forum/message.php?msg_id=3550202
#		added Zeus Webster Pro https://sourceforge.net/forum/message.php?msg_id=3141164
#               additional robots from a list provided by Moizes Gabor [ mojzi -a-t- free mail hu ]
#		added Candlelight_Favorites_Inspector
#		added DomainChecker
#		added EasyDL
#		added FavOrg
#		added Favorites_Sweeper
#		added Html_Link_Validator
#		added Internet_Ninja
#		added JRTwine_Software_Check_Favorites_Utility
#		fixed Microsoft_URL_Control
#		added miniRank
#		added Missigua_Locator
#		added NPBot
#		added Ocelli
#		added Onet.pl_SA
#		added proodleBot
#		added SearchGuild_DMOZ_Experiment
#		added Susie
#		added Website_Monitoring_Bot
#		added Xenu_Link_Sleuth
# 2006-05-15
#		added ASPseek http://www.aspseek.org/
#		added AdamM Bot http://home.blic.net/adamm/
#		added archive.org_bot http://crawls.archive.org/collections/bncf/crawl.html
#		added arianna.libero.it (Italian Portal/search engine)
#		added Biz360 spider http://www.biz360.com
#		added BlogBridge Service http://www.blogbridge.com/
#		added BlogSearch http://www.icerocket.com/
#		added libcrawl
#		added edgeio-relanshanbottriever http://www.edgeio.com
#		added FeedFlow http://feedflow.com/about
#		added Biblioteca Nazionale Centrale di Firenze (Italian National Archive) http://www.bncf.firenze.sbn.it/raccolta.txt
#		added Java catchall - used by many spam bots
#		added lanshanbot http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_g_l_140406_1%5Cb
#		added msnbot-media http://search.msn.com/msnbot.htm
#		added MT::Telegraph::Agent
#		added Netluchs http://www.netluchs.de/ (German SE bot)
#		added oBot http://www.webmasterworld.com/forum11/1616.htm
#		added Onfolio http://www.onfolio.com/  (IE Toolbar plugin) - hit rss feeds.
#		added ping.blo.gs http://blo.gs/ping.php blog bot
#		added Sphere Scout http://www.sphere.com/
#		added sproose crawler http://www.sproose.com/bot.html
#		added SyndicAPI http://syndicapi.com/bot.html
#		added Yahoo! Mindset http://mindset.research.yahoo.com/
#		added msrabot
#		added Vagabondo & Vagabondo-WAP http://www.wise-guys.nl/Contact/index.php?botselected=webagents)#=uk
#		fixed Missigua Locator detection (Missigua_Locator -> Missigua Locator)
#		changed echo to echo! to avoid conflict with the bonecho (Firefox 2.0) browser.
#			This requires you to reprocess historic logs if you want EchO! to be recognized for older reports.
# 2006-05-17
#		added Alpha Search Agent # 62.152.125.60 Eurologon Srl
#		added Krugle http://www.krugle.com/crawler/info.html the search engine for developers
#		added Octora Beta Bot http://www.octora.com/ # Blog and Rss Search Engine
#		added UbiCrawler http://law.dsi.unimi.it/ubicrawler/
#		added Yahoo! Slurp China http://misc.yahoo.com.cn/help.html
#			You must reprocess old logs for the Yahoo! Slurp China bot to be detected in old reports
# 2006-05-20
#		added 1-More Scanner http://www.myzips.com/software/1-More-Scanner.phtml
#		added Accoona-AI-Agent http://www.accoona.com/
#		added ActiveBookmark http://www.libmaster.com/active_bookmark.php
#		added BIGLOTRON http://www.biglotron.com/robot.html
#		added Bookmark-Manager http://bkm.sourceforge.net/
#		added cbn00glebot
#		added Cerberian Drtrs http://www.pgts.com.au/cgi-bin/psql?robot_info=25240
#		added CFNetwork http://www.cocoadev.com/index.pl?CFNetwork
#		added CheckWeb link validator http://p.duby.free.fr/chkweb.htm
#		added Computer and Automation Research Institute Crawler http://www.ilab.sztaki.hu/~stamas/publications/p184-benczur.html
#		added ConveraCrawler http://www.authoritativeweb.com/crawl/
#		added ConveraMultiMediaCrawler http://www.authoritativeweb.com/crawl/
#		added CSE HTML Validator Lite Online http://online.htmlvalidator.com/php/onlinevallite.php
#		added Cursor http://adcenter.hu/docs/en/bot.html
#		added Custo http://www.netwu.com/custo/
#		added DataFountains/DMOZ Downloader http://infomine.ucr.edu/
#		added Deepindex http://www.deepindex.net/faq.php
#		added DNSGroup http://www.dnsgroup.com/
#		added DoCoMo http://www.nttdocomo.co.jp/
#		added dumm.de-Bot http://www.dumm.de/
#		added ETS v http://www.freetranslation.com/help/
#		added eventax http://www.eventax.de/
#		added FAST Enterprise Crawler * crawleradmin.t-info@telekom.de http://www.telekom.de/
#		added FAST Enterprise Crawler http://www.fast.no/
#		added FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de http://www.telekom.de/
#		added FeedValidator http://feedvalidator.org/
#		added FilmkameraBot http://www.filmkamera.at/bot.html
#		added Findexa Crawler http://www.findexa.no/gulesider/article26548.ece
#		added Global Fetch http://www.wesonet.com/
#		added GOFORITBOT http://www.goforit.com/about/
#		added GoForIt.com http://www.goforit.com/about/
#		added GPU p2p crawler http://gpu.sourceforge.net/search_engine.php
#		added HooWWWer http://cosco.hiit.fi/search/hoowwwer/
#		added HPPrint
#		added HTMLParser http://htmlparser.sourceforge.net/
#		added Hundesuche.com-Bot http://www.hundesuche.com/
#		added InfoBot http://www.infobot.org/
#		added InfociousBot http://corp.infocious.com/tech_crawler.php
#		added InternetSupervision http://internetsupervision.com/
#		added isearch2006 http://www.yahoo.com.cn/
#		added IUPUI_Research_Bot http://spamhuntress.com/2005/04/25/a-mail-harvester-visits/
#		added KalamBot http://64.124.122.251/feedback.html
#		added kamano.de NewsFeedVerzeichnis http://www.kamano.de/
#		added Kevin http://dznet.com/kevin/
#		added KnowItAll http://www.cs.washington.edu/research/knowitall/
#		added Knowledge.com http://www.knowledge.com/
#		added Kouaa Krawler http://www.kouaa.com/
#		added ksibot http://ego.ms.mff.cuni.cz/
#		added Link Valet Online http://www.htmlhelp.com/tools/valet/
#		added lwp-request http://search.cpan.org/~gaas/libwww-perl-5.69/bin/lwp-request
#		added lwp-trivial http://search.cpan.org/src/GAAS/libwww-perl-5.805/lib/LWP/Simple.pm
#		added MapoftheInternet.com http://MapoftheInternet.com/
#		added Matrix S.p.A. - FAST Enterprise Crawler http://tin.virgilio.it/
#		added Megite http://www.megite.com/
#		added Metaspinner http://index.meta-spinner.de/
#		added Mini-reptile
#		added Misterbot http://www.misterbot.fr/
#		added Miva http://www.miva.com/
#		added Mizzu Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_m_141105_2\b
#		added MSRBOT http://research.microsoft.com/research/sv/msrbot/
#		added MS SharePoint Portal Server - MS Search 4.0 Robot http://support.microsoft.com/default.aspx?scid=kb;en-us;284022
#		added Mydoyouhike http://www.doyouhike.net/my
#		added NASA Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_140506_2\b
#		added NetSprint http://www.netsprint.pl/serwis/
#		added NimbleCrawler http://www.healthline.com/
#		added OpenWebSpider http://www.openwebspider.org/
#		added Oracle Ultra Search http://www.oracle.com/technology/products/ultrasearch/index.html
#		added OSSProxy http://www.marketscore.com/FAQ.Aspx
#		added passwordmaker.org http://passwordmaker.org/
#		added PEAR HTTP Request class http://pear.php.net/
#		added PEERbot http://www.peerbot.com/
#		added PHP version tracker http://www.nexen.net/phpversion/bot.php
#		added PictureOfInternet http://malfunction.org/poi/
#		added plinki http://www.plinki.com/
#		added Port Huron Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1133\b
#		added PostFavorites http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1135\b
#		added ProjectWF-java-test-crawler
#		added PyQuery http://sourceforge.net/projects/pyquery/
#		added Schizozilla http://spamhuntress.com/2005/03/18/gizmo/
#		added Scumbot
#		added Sensis Web Crawler http://www.sensis.com.au/
#		added snap.com beta crawler http://www.snap.com/
#		added Steeler http://www.tkl.iis.u-tokyo.ac.jp/~crawler/
#		added STEROID  Download http://faqs.org.ru/progr/pascal/delphi_internet2.htm
#		added Suchfin-Bot http://www.suchfin.de/
#		added Sunrise http://www.sunrisexp.com/
#		added Tagyu Agent http://www.tagyu.com/
#		added Tcl http client package http://www.tcl.tk/man/tcl8.4/TclCmd/http.htm
#		added TeragramCrawlerSURF http://www.teragram.com/
#		added Test Crawler http://netp.ath.cx/
#		added UnChaos Bot Hybrid Web Search Engine http://www.unchaos.com/
#		added unido-bot http://www.unchina.org/unido/unido/our_projects/3_3.html
#		added UniversalFeedParser http://feedparser.org/ (seen from md301000.inktomisearch.com)
#		added updated http://www.updated.com/
#		added Vermut http://vermut.aol.com
#		added versus crawler from eda.baykan@epfl.ch http://www.epfl.ch/Eindex.html
#		added Vespa Crawler (Yahoo Norway?) http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_t_z_030406_1%5Cb
#		added VSE http://www.vivisimo.com/
#		added webcrawl.net http://www.webcrawl.net/
#		added Web Downloader http://www.krasu.ru/soft/chuchelo/
#		added Webdup http://www.webdup.com/en/index.html
#		added Wells Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_t_z_1484\b
#		added WordPress http://wordpress.org/
#		added wume crawler http://wume.cse.lehigh.edu/~xiq204/crawler/
#		added Xenu's Link Sleuth (with ')
#		added xirq http://www.xirq.com/
#		added yoogliFetchAgent http://www.yoogli.com/
#		added Z-Add Link Checker http://w3.z-add.co.uk/linkcheck/
#		-- fix - some robots were reported with _ where _ should have been a space.
#		changed Xenu Link Sleuth
#		changed microsoft[_+\s]url[_+\s]control -> microsoft_url_control
#		changed favorites_sweeper -> favorites_sweeper
#		-- updates
#		updated AskJeeves to Ask
# 2012-06-05 Albrecht Mueller
#              added Grabber from SDSC (San Diego Supercomputer Center).
# 2013-09-30 Albrecht Mueller
# AWStats probably cannot detect this bot as it identifies itself in
# the referrer field and not in the user agent string.
#92.113.100.35 - - [29/Sep/2013:17:22:46 +0200] "GET /robots.txt HTTP/1.1" 200 516 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
#92.113.100.35 - - [29/Sep/2013:17:22:49 +0200] "GET /tghome.htm HTTP/1.1" 200 4445 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
#92.113.100.35 - - [29/Sep/2013:17:22:51 +0200] "GET / HTTP/1.1" 200 5467 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"

# to do  MS Search 4.0 Robot

#package AWSROB;


# Robots list was found at http://www.robotstxt.org/wc/active/all.txt
# Other robots can be found at http://www.jafsoft.com/searchengines/webbots.html
# Rem: To avoid bad detection, some robot's ids were removed from this list:
#      - Robots with ID of 3 letters only
#      - Robots called 'webs' and 'tcl'
# Rem: directhit changed into direct_hit (its real id)
# Rem: calif changed into calif[^r] to avoid confusion between Tiscalifreenet browser
# Rem: fish changed into [^a]fish to avoid confusion between Madsafish browser
# Rem: roadrunner changed into road_runner
# Rem: lycos changed to lycos_ to avoid confusion with lycos-online browser
# Rem: voyager changed into ^voyager\/ to avoid to exclude voyager and amigavoyager browser

# RobotsSearchIDOrder
# It contains all matching criteria to search for in log fields. This list is
# used to know in which order to search Robot IDs.
# Most frequent ones are in list1, used when LevelForRobotsDetection is 1 or more
# Minor robots are in list2, used when LevelForRobotsDetection is 2 or more
# Note: Robots IDs are in lower case, '_', ' ' and '+' are changed into '[_+\s]' and are quoted.
#-------------------------------------------------------


@RobotsSearchIDOrder_list1 = (
# Common robots (In robot file)
'bingbot/',
'bingpreview',
'MSIECrawler',
'msnbot/',
'msnbot\-media/',
'AdIdxBot/',
'NOT[\x20]Googlebot/',
'Googlebot/',
'Google[\x20]Web[\x20]Preview',
'Googlebot\-Image/',
'Googlebot\-Mobile/',
'Google[\x20]Page[\x20]Speed',
'google\-sitemaps',
'Googlebot\-News',
'Googlebot\-Video/',
'AdsBot\-Google[\x20]\(',
'AdsBot\-Google\-Mobile\-Apps',
'Mediapartners-Google',
'Feedfetcher\-Google',
'Google\-Adwords\-Instant',
'Firefox/1\.5',
'Yahoo![\x20]Slurp[\x20]China',
'Yahoo![\x20]Slurp',
'Baiduspider/',
'Baiduspider\-image',
'baidu',
'YandexBot/',
'YandexImages/',
'YandexImageResizer',
'YandexMetrika/',
'YandexMobileBot/',
'yandex',
'electricmonk/',
'spbot/',
'SeznamBot/',
'msie8',
'AhrefsBot/',
'007ac9[\x20]Crawler',
'2345Explorer/',
'360Spider',
'A[\x20]Simple[\x20]Crawler',
'Abrave',
'acapbot/',
'Accoona\-AI\-Agent/',
'arcemedia',
'AdnormCrawlerCatchBot/',
'adscanner',
'aiHitBot/',
'aipbot/',
'AlphaBot',
'Apache\-HttpClient/',
'Apexoo[\x20]Spider',
'Applebot/',
'archive\.org_bot',
'Babya[\x20]Discoverer',
'Barkrowler',
'BDCbot/',
'BellPagesCA/',
'BeNosey[\x20]Mohawk[\x20]Search',
'bhcBot',
'bidswitchbot',
'BigBozz/',
'BinGet/',
'bitlybot',
'bl\.uk_lddc_bot/',
'BLEXBot/',
'bnf.fr_bot',
'boitho\.com\-dc/',
'BoogleBot',
'BusinessBot:',
'CatchBot/',
'CB/Nutch',
'CCBot/',
'Cliqzbot/',
'CMS[\x20]Crawler',
'Companybook\-Crawler',
'ConveraCrawler/',
'Contacts-Crawler',
'contxbot',
'cosmos/',
'crawl/Nutch',
'crawler4j',
'CRAZYWEBCRAWLER',
'CRMNLCrawlAgent',
'CSE[\x20]HTML[\x20]Validator',
'C\-T[\x20]bot',
'CUBOT',
'Curl/PHP',
'cyencebot',
'Dalvik/',
'DataCrawler/',
'daumoa',
'daum',
'Deepnet[\x20]Explorer',
'DeuSu/',
'Digincore',
'Discordbot/',
'Dispatch/',
'DnyzBot',
'DoCoMo/',
'Domain[\x20]Re\-Animator[\x20]Bot',
'DomainCrawler/',
'DomainMacroCrawler/',
'DomainSONOCrawler/',
'DomainStatsBot/',
'DotBot/',
'DuckDuckBot-Https',
'DuckDuckGo\-Favicons\-Bot/',
'ELinks/',
'ELinks[\x20]\(',
'EmailMarketingRobot/',
'EmeraldShield\.com[\x20]WebBot',
'envolk\[ITS\]spider/',
'eright',
'EsperanzaBot',
'Exabot/',
'ExtLinksBot',
'ExperianCrawlUK',
'facebookexternalhit/',
'fast_enterprise_crawler.*scrawleradmin\.t\-info@telekom\.de',
'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de',
'FAST\-WebCrawler/',
'Feosey[\x20]Mohk[\x20]Crawler',
'findlinks/',
'Findxbot/',
'FirePHP/',
'firstdirectory\-bot',
'flamingo',
'FlippyBearBot/',
'^foo$',
'FreeWebMonitoring[\x20]SiteChecker/',
'fujilabol',
'FurlBot/',
'Gaisbot/',
'Gallent[\x20]Spider',
'GarlikCrawler/',
'Getintent[\x20]Crawler',
'GetintentCrawler[\x20]getintent\.com',
'Gigabot/',
'gipo\-crawler/Nutch',
'Girafabot',
'Gluten[\x20]Free[\x20]Crawler/',
'gocrawl',
'Gowikibot',
'Go\-http\-client/',
'GrapeshotCrawler/',
'GSiteCrawler/',
'GurujiBot/',
'hadiBot',
'HaosouSpider',
'HELLO[\x20]Crawler',
'holmes/',
'houzzbot',
'HTTP_Request2/',
'HubSpot[\x20]Webcrawler',
'HyperCrawl/',
'ICC\-Crawler/',
'iconoclast',
'IDGCrawler/Nutch',
'IDG/UK',
'idmarch[\x20]Automatic\.beta/',
'InbyBot',
'Incutio[\x20]XML',
'IndeedBot',
'InfluenceBot',
'IRLbot/',
'IssueCrawler',
'istellabot/',
'James[\x20]BOT',
'Jigsaw/',
'JobFeed',
'Jooblebot',
'KomodiaBot/',
'Konqueror/',
'laserlikebot',
'Lightspeed',
'linkapediabot',
'metager\-linkchecker',
'linkchecker',
'LinkCheck',
'linkdexbot/',
'LinkedInBot/',
'LinkpadBot/',
'Links[\x20]\(',
'LinksManager\.com_bot',
'LWP::Simple/',
'Mail\.RU_Bot/',
'makecontact',
'mappy',
'MauiBot',
'meanpathbot/',
'Mechanize',
'Mediatoolkitbot',
'MegaIndex\.ru/',
'merzscope',
'Meta_Bot',
'mfibot/',
'microsoft.*discovery',
'missigua_locator',
'MixrankBot',
'MJ12bot/',
'MojeekBot',
'Mojolicious',
'MXT/Nutch',
'My[\x20]Nutch[\x20]Spider/',
'myse/Nutch',
'Naaraa',
'NerdyBot',
'netEstate[\x20]NE[\x20]Crawler',
'NetResearchServer/',
'Nimbostratus-Bot',
'nominet',
'NRLCorpusBuilder/Nutch',
'nutch\-1\.4/',
'nutch\-1\.8/',
'NutchCVS/',
'o\.uk[\x20]robot',
'ocrawler;',
'ODP[\x20]link[\x20]checker',
'Offline[\x20]Explorer/',
'OmniExplorer_Bot/',
'OrangeBot/',
'Orliac',
'OutclicksBot',
'PageBitesHyperBot/',
'Pcore',
'pdffillerbot/',
'peopleman',
'PhantomJS',
'PHP/5\.2\.8',
'Pinterestbot',
'PiplBot',
'Ploetz[\x20]\+[\x20]Zeller',
'Plukkie/',
'Princetonbot/',
'PrivacyAwareBot/',
'Prlog/',
'proximic',
'psbot/',
'psbot\-image',
'python_wk_crawler',
'Python\-urllib/',
'QCrawl',
'Quick-Crawler',
'ResearchBot',
'roboto',
'rogerbot/',
'RSSingBot',
'RukiCrawler/',
'SafeDNS[\x20]search[\x20]bot/',
'SafeDNSBot',
'SafeSearch[\x20]microdata[\x20]crawler',
'safesearch',
'SBL\-BOT',
'scrapy',
'Screaming[\x20]Frog[\x20]SEO[\x20]Spider/',
'ScreenerBot[\x20]Crawler[\x20]Beta',
'Scrubby',
'Searchie/',
'SecurityResearch\.bot',
'Seekmo',
'semanticbot',
'SemrushBot/',
'SemrushBot-SI',
'seo\-audit\-check\-bot/',
'Seobility',
'SEOkicks\-Robot',
'SEOlyticsCrawler/',
'SEOstats',
'Seosys/Nutch',
'Seoterritory\.com[\x20]bot',
'serendeputy',
'Shim\-Crawler',
'SiteExplorer/',
'siteexplorer\.info',
'siteimprove',
'Slackbot\-LinkExpanding',
'SmabblerBot/',
'Sogou[\x20]web[\x20]spider/',
'special_archiver/',
'Spiderbot/',
'SpuhexBot',
'spyonweb',
'ssearch_bot',
'Streamline3Bot',
'SurdotlyBot/',
'SurveyBot/',
'taiil/Nutch',
'tbot\-nutch',
'TeeRaidBot',
'TelegramBot',
'Test/Nutch',
'Test[\x20]Spider',
'TestCrawler',
'The[\x20]Knowledge[\x20]AI',
'tracemyfile',
'trendiction',
'TurnitinBot/',
'TurnitinBot',
'TweetmemeBot/',
'UCY/Nutch',
'uni-leipzig\.de',
'Uptimebot/',
'UptimeRobot/',
'URL[\x20]Checker',
'UXCrawlerBot',
'Validator\.nu/',
'vBSEO',
'vBulletin[\x20]via[\x20]PHP',
'vebidoobot',
'vegi[\x20]bot',
'Velen',
'viz/Nutch',
'VoilaBot',
'VORTEX/',
'voyager/',
'W3C_Validator/',
'W3C\-checklink/',
'WBSearchBot/',
'WbSrch/',
'WeSEE:Ads/PageBot',
'WeSEE:Ads/PictureBot',
'WeSEE_Bot',
'Wget/',
'Who\.is[\x20]Bot',
'wonderbot/',
'woobot/',
'Wotbox/',
'Xaldon[\x20]WebSpider',
'Xenu[\x20]Link[\x20]Sleuth',
'xenu_link_sleuth',
'XML[\x20]Sitemaps[\x20]Generator',
'XoviBot/',
'yacybot',
'Yahoo[\x20]Link[\x20]Preview',
'yak',
'YisouSpider',
'yoozBot',
'Your\-Website\-Sucks',
'zoominfobot',
'zspider/',
'ZumBot/',
# below placed at end to catch some generics
'nbot',
'ng/1\.',
'ng/2\.',
'libwww\-perl',
'CFNetwork',
'urllib',
'javabee',
'projectwf\-java\-test\-crawler',
'java',
'loocalcrawler/nutch',
'nutchosu\-vlib',
'nutch',
'perlcrawler',
'perl',
# old robots using firefox < version 11 not identifying themselves as a robot.
'(firefox/)([0-9]\.|[0-1][0]\.)'
);

@RobotsSearchIDOrder_list2 = (
# Less common robots (In robot file)
'^Mozilla$',
'^mozilla\/3\.0\s\(compatible$',
'^mozilla\/4\.0$',
'^mozilla\/4\.0\s\(compatible;\)$',
'^mozilla\/5\.0$',
'^mozilla\/5\.0\s\(compatible;$',
'^mozilla\/5\.0\s\(en\-us\)$',
'^mozilla\/5\.0\sfirefox\/3\.0\.5$',
'^Mozilla/6\.0[\x20]\(compatible\)$',
'^Mozilla/(.*)Beta[\x20]\(Windows\)',
'MSIE[\x20]2',
'MSIE[\x20]3',
'MSIE[\x20]4',
'MSIE[\x20]5',
'MSIE[\x20]6',
'MSIE\+6\.0\;',
'Windows[\x20]95',
'Windows[\x20]98',

# these could be removed to speed up processing as they are rarely seen
'a6\-indexer',
'abcdatos',
'abonti\.com',
'acme\.spider',
'activebookmark',
'adamm_bot',
'advbot',
'affectv\.co\.uk',
'ahoythehomepagefinder',
'aleadsoftbot',
'alkaline',
'allrati',
'alltop',
'almaden',
'alpha_search_agent',
'anthill',
'antibot',
'aport',
'appie',
'applesyndication',
'arachnophilia',
'arale',
'araneo',
'architext',
'archive\-de\.com',
'aretha',
'argus',
'ariadne',
'arianna\.libero\.it',
'arks',
'aspider',
'aspseek',
'asterias',
'asynchttpclient',
'atn\.txt',
'atomz',
'auresys',
'awbot',
'backlinktest\.com',
'backrub',
'bbot',
'becomebot',
'bender',
'betabot',
'bigbrother',
'biglotron',
'BingLocalSearch',
'bittorrent_bot',
'biz360[_+\s]spider',
'bjaaland',
'blackwidow',
'blindekuh',
'blogbridge[_+\s]service',
'blogged_crawl',
'bloglines',
'bloglovin',
'blogpulse',
'blogsearch',
'blogshares',
'blogslive',
'blogssay',
'bloodhound',
'bncf\.firenze\.sbn\.it/raccolta\.txt',
'bobby',
'bookmark\-manager',
'borg\-bot',
'boris',
'brightnet',
'bruinbot',
'bspider',
'bubing',
'bumblebee',
'butterfly',
'buzztracker',
'cactvschemistryspider',
'calif[^r]',
'candlelight[_+\s]favorites[_+\s]inspector',
'careerbot',
'carpathia',
'cassandra',
'catbot',
'cbn00glebot',
'cerberian_drtrs',
'cfetch',
'cgireader',
'chattertrap',
'check_http',
'checkbot',
'checkweb_link_validator',
'christcrawler',
'churl',
'cienciaficcion',
'cipinetbot',
'imagecoccoc',
'coccoc',
'coldfusion',
'collective',
'combine',
'commons\-httpclient',
'computer_and_automation_research_institute_crawler',
'conceptbot',
'contentmatch',
'converamultimediacrawler',
'coolbot',
'copubbot',
'core',
'covario',
'cruiser',
'cscrawler',
'cuasarbot',
'cursor',
'cusco',
'custo',
'cyberspyder',
'datafountains/dmoz_downloader',
'dataprovider\.com',
'daviesbot',
'daylifefeedfetcher',
'daypopbot',
'deepindex',
'desertrealm',
'deweb',
'dienstspider',
'digger',
'digout4u',
'diibot',
'dipsie\.bot',
'direct_hit',
'discobot',
'dlvr\.it',
'dnabot',
'dnsgroup',
'doccheckbot',
'domainappender',
'domainchecker',
'domainsdb\.net',
'download_express',
'dragonbot',
'dreamwidth',
'drupal',
'dulance',
'dumbot',
'dumm\.de\-bot',
'dwcp',
'e\-collector',
'earthcom\.info',
'easydl',
'ebiness',
'eccp',
'echo!',
'edgeio\-retriever',
'elfinbot',
'emacs',
'emcspider',
'enteprise',
'ernst[:blank:]2\.0',
'esther',
'ets_v',
'eventax',
'everbeecrawler',
'everest\-vulcan',
'evliyacelebi',
'exactseek',
'extreme[_+\s]picture[_+\s]finder',
'ezoom',
'ezresult',
'facebook',
'facebot',
'fast\-search\-engine',
'matrix_s\.p\.a\._\-_fast_enterprise_crawler',
'fast_enterprise_crawler',
'fastbot',
'fastcrawler',
'favicon',
'favorg',
'favorites_sweeper',
'fdse',
'feedburner',
'feedcrawl',
'feedflow',
'feedmyinbox',
'feedroll\.com',
'feedsky',
'feedster',
'feedvalidator',
'feedzira',
'felix',
'ferret',
'fetchbot',
'fetchrover',
'fever/',
'fido',
'filmkamerabot',
'filterdb\.iss\.net',
'finderlein[_+\s]research[_+\s]crawler',
'findexa_crawler',
'finnish',
'fireball',
'firmilybot',
'flexum',
'foaf\-search\.net',
'fooky\.com/ScorpionBot',
'fouineur',
'francoroute',
'freecrawl',
'freenews',
'funnelweb',
'g2crawler',
'gama',
'gazz',
'gcreep',
'geniebot',
'genieo',
'geohasher',
'getbot',
'geturl',
'gigablastopensource',
'global_fetch',
'gnodspider',
'goforit\.com',
'goforitbot',
'golem',
'gonzo',
'gougou',
'gpu_p2p_crawler',
'grabber',
'grapeshot',
'grapnel',
'griffon',
'gromit',
'grub',
'gulliver',
'gulperbot',
'hambot',
'hanrss',
'harvest',
'havindex',
'henrythemiragorobot',
'heritrix',
'hl_ftien_spider',
'hometown',
'hoowwwer',
'hpprint',
'htdig',
'html[_+\s]link[_+\s]validator',
'htmlgobble',
'htmlparser',
'httrack',
'hundesuche\.com\-bot',
'hyperdecontextualizer',
'ia_archiver\-web\.archive\.org',
'ia_archiver',
'iajabot',
'iaskspider',
'i\-bot',
'icarus6j',
'ichiro',
'icjobs\.de',
'ilse',
'iltrovatore\-setaccio',
'imagelock',
'implisensebot',
'inagist',
'incywincy',
'infobot',
'infociousbot',
'infohelfer',
'infomine',
'informant',
'infoseeksidewinder',
'infoseek',
'infospider',
'inspectorwww',
'insurancobot',
'integromedb\.org',
'intelliagent',
'internet[_+\s]ninja',
'internetarchive',
'internetseer',
'internetsupervision',
'ips\-agent',
'irobot',
'iron33',
'isearch2006',
'israelisearch',
'iupui_research_bot',
'izsearch',
'jacobin[\x20]club',
'jakarta',
'jbot',
'jcrawler',
'jeeves',
'jennybot',
'jobboerse',
'jobot',
'jobo',
'joebot',
'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility',
'js\-kit',
'jubii',
'jumpstation',
'justview',
'kalambot',
'kamano\.de_newsfeedverzeichnis',
'kapsi',
'katipo',
'kazoombot',
'kevin',
'keyoshid',
'kilroy',
'kinja\-imagebot',
'kinjabot',
'knowitall',
'knowledge\.com',
'ko[_+\s]yappo[_+\s]robot',
'kouaa_krawler',
'krugle',
'ksibot',
'kummhttp',
'kurzor',
'labelgrabber\.txt',
'lanshanbot',
'larbin',
'largesmall[\x20]crawler',
'legs',
'letscrawl\.com',
'libcrawl',
'lilina',
'link_valet_online',
'linkbot',
'linkdex\.com',
'linkidator',
'linkscan',
'linkstats[\x20]bot',
'linkwalker',
'lipperhey',
'livejournal\.com',
'lmspider',
'loadtimebot',
'lockon',
'logo_gif',
'longurl',
'lssrocketcrawler',
'ltbot',
'ltx71',
'lwp\-request',
'lwp\-trivial',
'lycos[_+\s]',
'macworm',
'madaali\.de',
'magpierss',
'magpie',
'mapoftheinternet\.com',
'marvin',
'mattie',
'mediabot',
'mediafox',
'megaindex',
'megite',
'memorybot',
'mercator',
'meshexplorer',
'metager2\-verification\-bot',
'metajobbot',
'metaspinner',
'metauri',
'miadev',
'microsoft[_+\s]url[_+\s]control',
'microsoft[\x20]bits',
'microsoft\-webdav\-miniredir',
'mindcrawler',
'mindupbot',
'mini\-reptile',
'minirank',
'misterbot',
'miva',
'mizzu_labs',
'mnogosearch',
'moget',
'momspider',
'monster',
'motor',
'movabletype',
'ms[_+\s]search[_+\s]6\.0[_+\s]robot',
'ms_search_4\.0_robot',
'msnbot\-udiscovery',
'msrabot',
'msrbot',
'mt::telegraph::agent',
'muncher',
'muscatferret',
'mwdsearch',
'mydoyouhike',
'myweb',
'nagios',
'nasa_search',
'ndspider',
'nederland\.zoek',
'netcarta',
'netcraft',
'netluchs',
'netmechanic',
'netnewswire',
'netscoop',
'netsprint',
'netvibes',
'newrelicpinger',
'newscan\-online',
'newsfox',
'newsgatoronline',
'nextgensearchbot',
'nhse',
'nicebot',
'nimblecrawler',
'ning',
'nomad',
'northstar',
'noxtrumbot',
'npbot',
'nzexplorer',
'objectssearch',
'occam',
'ocelli',
'octopus',
'octora_beta_bot',
'onet\.pl[_+\s]sa',
'onfolio',
'openfind',
'opentaggerbot',
'openwebspider',
'optimizer',
'oracle_ultra_search',
'orb_search',
'orbiter',
'packrat',
'pageboy',
'panscient',
'parasite',
'passwordmaker\.org',
'patric',
'pear_http_request_class',
'peerbot',
'pegasus',
'perignator',
'perman',
'petersnews',
'phantom',
'php[_+\s]version[_+\s]tracker',
'phpcrawl',
'phpdig',
'picmole',
'pictureofinternet',
'piltdownman',
'pimptrain',
'ping\.blo\.gs',
'pingdom',
'pioneer',
'pita',
'pitkow',
'pjspider',
'plinki',
'pluckfeedcrawler',
'plumtreewebaccessor',
'pogodak',
'pompos',
'popdexter',
'poppi',
'port_huron_labs',
'portalb',
'postfavorites',
'postpost',
'postrank',
'powermarks',
'printfulbot',
'proodlebot',
'protopage',
'publiclibraryarchive',
'pyquery',
'python',
'qihoobot',
'quipply',
'qwantify',
'r6\_',
'rambler',
'ratingburner',
'raven',
'rbse',
'redalert',
'regator',
'relevantnoise\.com',
'resumerobot',
'rhcs',
'riddler',
'road_runner',
'robbie',
'robi',
'robocrawl',
'robofox',
'robozilla',
'rojo',
'rome[\x20]client',
'roverbot',
'rpt\-httpclient',
'rssgraffiti',
'rssimagesbot',
'ruffle',
'rufusbot',
'rules',
'safeads\.xyz',
'safetynetrobot',
'sage\+\+',
'sandcrawler',
'savetheworldheritage',
'sbider',
'schizozilla',
'scooter',
'scoutjet',
'scumbot',
'search\-info',
'search_au',
'searchguild[_+\s]dmoz[_+\s]experiment',
'searchmetricsbot',
'searchprocess',
'seekbot',
'semalt',
'senrigan',
'sensis_web_crawler',
'seodiver',
'seokicks\.de',
'seoscanners',
'sgscout',
'shaggy',
'shaihulud',
'shareaholicbot',
'shoutcast',
'sift',
'simbot',
'simplepie',
'sistrix',
'site\-valet',
'sitebot',
'sitedomain\-bot',
'sitetech',
'skimbot',
'skymob',
'slcrawler',
'slurp',
'slysearch',
'smartspider',
'smtbot',
'snap\.com_beta_crawler',
'snappy',
'snooper',
'sohu\-search',
'sohu',
'solbot',
'speedy',
'sphere_scout',
'spider[_+\s]monkey',
'spiderline',
'spiderlytics',
'spiderman',
'spiderview',
'spip',
'sproose_crawler',
'spry',
'sqworm',
'ssearcher',
'steeler',
'steroid__download',
'stq_bot',
'Stratagems[\x20]Kumo',
'suchfin\-bot',
'suke',
'summify\.com',
'sunrise',
'suntek',
'superbot',
'superfeedr',
'susie',
'sven',
'syndic8',
'syndicapi',
'synoobot',
'synthesio',
't\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e',
'tach_bw',
'tagyu_agent',
'tailrank',
'tarantula',
'tarspider',
'tcl_http_client_package',
'techbot',
'technoratibot',
'templeton',
'teoma',
'teragramcrawlersurf',
'test_crawler',
'testbot',
'thumbsniper',
'titan',
'titin',
'tkwww',
'tlspider',
'topblogsinfo',
'topicblogs',
'topix\.net',
'trapit',
'trileet',
'turtlescanner',
'turtle',
'tutorgigbot',
'tweetedtimes',
'twiceler',
'twisted[\x20]pagegetter',
'twitterbot',
'twitterfeed',
'ubicrawler',
'ucsd',
'udmsearch',
'ultraseek',
'um\-IC',
'um\-LN',
'unchaos_bot_hybrid_web_search_engine',
'unido\-bot',
'unisterbot',
'universalfeedparser',
'unlost_web_crawler',
'unwindfetchor',
'updated',
'urlck',
'ustc\-semantic\-group',
'vagabondo\-wap',
'vagabondo',
'valkyrie',
'vermut',
'versus_crawler_from_eda\.baykan@epfl\.ch',
'verticrawl',
'vespa_crawler',
'victoria',
'virus[_+\s]detector',
'visionsearch',
'voidbot',
'voltron',
'vse/',
'vwbot',
'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa',
'w3index',
'w3m2',
'wallpaper',
'wanderer',
'wapspider',
'wapspIRLider',
'watchmouse',
'wavefire',
'waybackarchive\.org',
'wazzup',
'web_downloader',
'webbandit',
'webbase',
'webcatcher',
'webclipping\.com',
'webcollage',
'webcompass',
'webcopy',
'webcrawl\.net',
'webdup',
'webfetcher',
'webfilter',
'webfoot',
'webinator',
'webindexer',
'weblayers',
'weblinker',
'webminer',
'webmirror',
'webmoose',
'webquest',
'webreader',
'webreaper',
'website[_+\s]monitoring[_+\s]bot',
'websnarf',
'webspider',
'webvac',
'webvulncrawl',
'webwalker',
'webwalk',
'webwatch',
'wells_search',
'wer\-liefert\-was',
'wesee:search',
'wevikabot',
'whatuseek',
'whowhere',
'windows\-rss\-platform',
'wired\-digital',
'zyborg',
'wisenutbot',
'wiumi',
'wmir',
'wolp',
'wombat',
'wonderer',
'woozweb',
'wordpress',
'worm',
'wume_crawler',
'wwwc',
'wwweasel',
'wz101',
'xget',
'xirq',
'xydo',
'y!j',
'yahoo![\x20]searchmonkey',
'yahoo!_mindset',
'yahoo\-blogs',
'yahoo\-mmcrawler',
'yahoo\-newscrawler',
'yahoo[\x20]pipes',
'yahoo\-verticalcrawler',
'yahoocachesystem',
'yahooexternalcache',
'yahoofeedseeker',
'yahooseeker\-testing',
'yahooseeker',
'yahooysmcm',
'yammer',
'yanga',
'yet\-another\-spider',
'yeti',
'yie8',
'yodaobot',
'yooglifetchagent',
'youdao',
'yourls',
'z\-add_link_checker',
'zealbot',
'zemanta',
'zend_http_client',
'zeus',
'zhuaxia',
'[^a]fish',
'[\x20]netseer[\x20]',
'^[1-3]$',
'^finbot',
'^motorola$',
'^msie',
'^voyager/',
'^webindex$',
'1\-more_scanner'
);

@RobotsSearchIDOrder_listgen = (
# Generic robot
'robot',
'blog',
'checker',
'crawl',
'discover',
'feed',
'fetcher',
'hunter',
'link',
'scanner',
'seek',
'sitemap',
'spider',
'sucker',
'survey',
'validator',
'bot[\s_+:,\.\;\/\\\-]',
'[\s_+:,\.\;\/\\\-]bot',
'curl',
'php',
'ruby/',
# Moving oBot here so it doesn't get assigned for other *obot robots
'oBot/',
'no_user_agent'
);


# RobotsHashIDLib
# List of robots names ('robot id','robot clear text')
#-------------------------------------------------------
%RobotsHashIDLib   = (
# Common robots (In robot file)
'bingbot/','bingbot',
'bingpreview','BingPreview',
'MSIECrawler','MSIECrawler',
'msnbot/','msnbot',
'msnbot\-media/','msnbot-media',
'AdIdxBot/','AdIdxBot Microsoft Ad Quality control',
'NOT[\x20]Googlebot/','NOT Googlebot',
'Googlebot/','Googlebot',
'Google[\x20]Web[\x20]Preview','Google Web Preview',
'Googlebot\-Image/','Googlebot-Image',
'Googlebot\-Mobile/','Googlebot-Mobile',
'Google[\x20]Page[\x20]Speed','Google Page Speed',
'google\-sitemaps','google-sitemaps',
'Googlebot\-News','Googlebot-News',
'Googlebot\-Video/','Googlebot-Video',
'AdsBot\-Google[\x20]\(','AdsBot-Google',
'AdsBot\-Google\-Mobile\-Apps','AdsBot-Google-Mobile-Apps',
'Mediapartners\-Google','Mediapartners-Google',
'Feedfetcher\-Google','Feedfetcher-Google',
'Google\-Adwords\-Instant','Google-Adwords-Instant',
'Firefox/1\.5','Nautic Expo using Firefox/1.5',
'Yahoo![\x20]Slurp[\x20]China','Yahoo! Slurp China',
'Yahoo![\x20]Slurp','Yahoo! Slurp',
'Baiduspider/','Baiduspider',
'Baiduspider\-image','Baiduspider-image',
'baidu','Baidu ( catchall )',
'YandexBot/','YandexBot',
'YandexImages/','YandexImages',
'YandexImageResizer','YandexImageResizer',
'YandexMetrika/','YandexMetrika',
'YandexMobileBot/','YandexMobileBot',
'yandex','Yandex ( catchall )',
'electricmonk/','electricmonk',
'spbot/','spbot',
'SeznamBot/','SeznamBot',
'msie8','msie8 - ( Rogue Robot )',
'AhrefsBot/','AhrefsBot',
'007ac9[\x20]Crawler','007ac9 Crawler',
'2345Explorer/','2345Explorer',
'360Spider','360Spider',
'A[\x20]Simple[\x20]Crawler','A Simple Crawler',
'Abrave','Abrave',
'acapbot/','acapbot',
'Accoona\-AI\-Agent/','Accoona-AI-Agent',
'AdnormCrawlerCatchBot/','AdnormCrawlerCatchBot',
'adscanner','adscanner',
'aiHitBot/','aiHitBot',
'aipbot/','aipbot',
'AlphaBot','AlphaBot',
'Apache\-HttpClient/','Apache-HttpClient',
'Apexoo[\x20]Spider','Apexoo Spider',
'Applebot/','Applebot',
'arcemedia','AdsBot-ArceMedia',
'archive\.org_bot','archive.org_bot',
'Babya[\x20]Discoverer','Babya Discoverer',
'Barkrowler','Barkrowler',
'BDCbot/','BDCbot',
'BellPagesCA/','BellPagesCA',
'BeNosey[\x20]Mohawk[\x20]Search','BeNosey Mohawk Search',
'bhcBot','bhcBot',
'bidswitchbot','bidswitchbot',
'BigBozz/','BigBozz',
'BinGet/','BinGet',
'bitlybot','bit.ly',
'bl\.uk_lddc_bot/','bl.uk_lddc_bot',
'BLEXBot/','BLEXBot',
'bnf.fr_bot','bnf.fr_bot',
'boitho\.com\-dc/','boitho.com-dc',
'BoogleBot','BoogleBot',
'BusinessBot:','BusinessBot:',
'CatchBot/','CatchBot',
'CB/Nutch','CB/Nutch',
'CCBot/','CCBot',
'Cliqzbot/','Cliqzbot',
'CMS[\x20]Crawler','CMS Crawler',
'Companybook\-Crawler','Companybook-Crawler',
'ConveraCrawler/','ConveraCrawler',
'Contacts-Crawler','Contacts-Crawler',
'contxbot','contxbot',
'cosmos/','cosmos',
'CRMNLCrawlAgent','CRMNLCrawlAgent',
'crawl/Nutch','crawl/Nutch',
'crawler4j','crawler4j',
'CRAZYWEBCRAWLER','CRAZYWEBCRAWLER',
'CSE[\x20]HTML[\x20]Validator','CSE HTML Validator',
'C\-T[\x20]bot','C-T bot',
'CUBOT','CUBOT',
'Curl/PHP','Curl/PHP',
'cyencebot','cyencebot',
'Dalvik/','Dalvik',
'DataCrawler/','DataCrawler',
'daumoa','daumoa',
'daum','daum',
'Deepnet[\x20]Explorer','Deepnet Explorer',
'DeuSu/','DeuSu',
'Digincore','Digincore',
'Discordbot/','Discordbot',
'Dispatch/','Dispatch',
'DnyzBot','DnyzBot',
'DoCoMo/','DoCoMo',
'Domain[\x20]Re\-Animator[\x20]Bot','Domain Re-Animator Bot',
'DomainCrawler/','DomainCrawler',
'DomainMacroCrawler/','DomainMacroCrawler',
'DomainSONOCrawler/','DomainSONOCrawler',
'DomainStatsBot/','DomainStatsBot',
'DotBot/','DotBot',
'DuckDuckBot-Https','DuckDuckBot-Https',
'DuckDuckGo\-Favicons\-Bot/','DuckDuckGo-Favicons-Bot',
'ELinks/','ELinks',
'ELinks[\x20]\(','ELinks (',
'EmailMarketingRobot/','EmailMarketingRobot',
'EmeraldShield\.com[\x20]WebBot','EmeraldShield.com WebBot',
'envolk\[ITS\]spider/','envolk ITS spider',
'eright','eright',
'EsperanzaBot','EsperanzaBot',
'Exabot/','Exabot',
'ExtLinksBot','ExtLinksBot',
'ExperianCrawlUK','ExperianCrawlUK',
'facebookexternalhit/','facebookexternalhit',
'fast_enterprise_crawler.*scrawleradmin\.t\-info@telekom\.de','FAST Enterprise crawleradmin.t-info@telekom.de',
'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de','FAST Enterprise T-Info_BI_cluster crawleradmin.t-info@telekom.de',
'FAST\-WebCrawler/','FAST-WebCrawler',
'Feosey[\x20]Mohk[\x20]Crawler','Feosey Mohk Crawler',
'findlinks/','findlinks',
'Findxbot/','Findxbot',
'FirePHP/','FirePHP',
'firstdirectory\-bot','firstdirectory-bot',
'flamingo','Flamingo_SearchEngine',
'FlippyBearBot/','FlippyBearBot',
'^foo$','foo',
'FreeWebMonitoring[\x20]SiteChecker/','FreeWebMonitoring SiteChecker',
'fujilabol','fujilabol',
'FurlBot/','FurlBot',
'Gaisbot/','Gaisbot',
'Gallent[\x20]Spider','Gallent Spider',
'GarlikCrawler/','GarlikCrawler',
'Getintent[\x20]Crawler','GetIntent Crawler',
'GetintentCrawler[\x20]getintent\.com','GetintentCrawler getintent.com',
'Gigabot/','Gigabot',
'gipo\-crawler/Nutch','gipo-crawler/Nutch',
'Girafabot','Girafabot',
'Gluten[\x20]Free[\x20]Crawler/','Gluten Free Crawler',
'gocrawl','gocrawl',
'Gowikibot','Gowikibot',
'Go\-http\-client/','Go-http-client',
'GrapeshotCrawler/','GrapeshotCrawler',
'GSiteCrawler/','GSiteCrawler',
'GurujiBot/','GurujiBot',
'hadiBot','hadiBot',
'HaosouSpider','HaosouSpider',
'HELLO[\x20]Crawler','HELLO Crawler',
'holmes/','holmes',
'houzzbot','houzzbot',
'HTTP_Request2/','HTTP_Request2',
'HubSpot[\x20]Webcrawler','HubSpot Webcrawler',
'HyperCrawl/','HyperCrawl',
'ICC\-Crawler/','ICC-Crawler',
'iconoclast','iconoclast',
'IDGCrawler/Nutch','IDGCrawler/Nutch',
'IDG/UK','IDG/UK',
'idmarch[\x20]Automatic\.beta/','idmarch Automatic.beta',
'InbyBot','InbyBot',
'Incutio[\x20]XML','Incutio XML',
'IndeedBot','IndeedBot',
'InfluenceBot','InfluenceBot',
'IRLbot/','IRLbot',
'IssueCrawler','IssueCrawler',
'istellabot/','istellabot',
'James[\x20]BOT','James BOT',
'Jigsaw/','Jigsaw',
'JobFeed','JobFeed',
'Jooblebot','Jooblebot',
'KomodiaBot/','KomodiaBot',
'Konqueror/','Konqueror',
'Lightspeed','Lightspeed',
'linkapediabot','linkapediabot',
'metager\-linkchecker','metager-linkchecker',
'linkchecker','linkchecker',
'LinkCheck','LinkCheck',
'linkdexbot/','linkdexbot',
'LinkedInBot/','LinkedInBot',
'LinkpadBot/','LinkpadBot',
'Links[\x20]\(','Links (',
'LinksManager\.com_bot','LinksManager.com_bot',
'LWP::Simple/','LWP::Simple',
'Mail\.RU_Bot/','Mail.RU Bot',
'makecontact','makecontact',
'mappy','Mappy Crawler',
'MauiBot','MauiBot',
'meanpathbot/','meanpathbot',
'Mechanize','Mechanize',
'Mediatoolkitbot','Mediatoolkitbot',
'MegaIndex\.ru/','MegaIndex.ru',
'merzscope','merzscope',
'Meta_Bot','Meta_Bot',
'mfibot/','mfibot',
'microsoft.*discovery','Microsoft Office Protocol Discovery',
'missigua_locator','missigua_locator',
'MixrankBot','MixrankBot',
'MJ12bot/','MJ12bot',
'mojeek','mojeek',
'MojeekBot/','MojeekBot',
'Mojolicious','Mojolicious',
'MXT/Nutch','MXT/Nutch',
'My[\x20]Nutch[\x20]Spider/','My Nutch Spider',
'myse/Nutch','myse/Nutch',
'Naaraa','Naaraa',
'NerdyBot','NerdyBot',
'netEstate[\x20]NE[\x20]Crawler','netEstate NE Crawler',
'NetResearchServer/','NetResearchServer',
'Nimbostratus-Bot','Nimbostratus-Bot',
'nominet','nominet',
'NRLCorpusBuilder/Nutch','NRLCorpusBuilder/Nutch',
'nutch\-1\.4/','nutch-1.4',
'nutch\-1\.8/','nutch-1.8',
'NutchCVS/','NutchCVS',
'o\.uk[\x20]robot','o uk.robot',
'ocrawler;','ocrawler;',
'ODP[\x20]link[\x20]checker','ODP link checker',
'Offline[\x20]Explorer/','Offline Explorer',
'OmniExplorer_Bot/','OmniExplorer_Bot',
'OrangeBot/','OrangeBot',
'Orliac','Orliac',
'OutclicksBot','OutclicksBot',
'PageBitesHyperBot/','PageBitesHyperBot',
'Pcore','Pcore',
'pdffillerbot/','pdffillerbot',
'peopleman','peopleman',
'PhantomJS','PhantomJS',
'PHP/5\.2\.8','PHP/5.2.8',
'Pinterestbot','Pinterestbot',
'PiplBot','PiplBot',
'Ploetz[\x20]\+[\x20]Zeller','Ploetz + Zeller',
'Plukkie/','Plukkie',
'Princetonbot/','Princetonbot',
'PrivacyAwareBot/','PrivacyAwareBot',
'Prlog/','Prlog',
'proximic','proximic',
'psbot/','psbot',
'psbot\-image','psbot-image',
'python_wk_crawler','python_wk_crawler',
'Python\-urllib/','Python-urllib',
'QCrawl','QCrawl',
'Quick-Crawler','Quick-Crawler',
'ResearchBot','ResearchBot',
'roboto','roboto',
'rogerbot/','rogerbot',
'RSSingBot','RSSingBot',
'RukiCrawler/','RukiCrawler',
'SafeDNS[\x20]search[\x20]bot/','SafeDNS search bot',
'SafeDNSBot','SafeDNSBot',
'SafeSearch[\x20]microdata[\x20]crawler','SafeSearch microdata crawler',
'safesearch','safesearch ( catchall )',
'SBL\-BOT','SBL-BOT',
'scrapy','scrapy',
'Screaming[\x20]Frog[\x20]SEO[\x20]Spider/','Screaming Frog SEO Spider',
'ScreenerBot[\x20]Crawler[\x20]Beta','ScreenerBot Crawler Beta',
'Scrubby','Scrubby',
'Searchie/','Searchie',
'SecurityResearch\.bot','Security Research Bot',
'Seekmo','Seekmo',
'semanticbot','semanticbot',
'SemrushBot/','SemrushBot',
'SemrushBot-SI','SemrushBot-SI',
'seo\-audit\-check\-bot/','seo-audit-check-bot',
'Seobility','Seobility',
'SEOkicks\-Robot','SEOkicks-Robot',
'SEOlyticsCrawler/','SEOlyticsCrawler',
'SEOstats','SEOstats',
'Seosys/Nutch','Seosys/Nutch',
'Seoterritory\.com[\x20]bot','Seoterritory.com.bot',
'serendeputy','serendeputy',
'Shim\-Crawler','Shim-Crawler',
'SiteExplorer/','SiteExplorer',
'siteexplorer\.info','siteexplorer.info',
'siteimprove','siteimprove',
'Slackbot\-LinkExpanding','Slackbot-LinkExpanding',
'SmabblerBot/','SmabblerBot',
'Sogou[\x20]web[\x20]spider/','Sogou web spider',
'special_archiver/','special_archiver',
'Spiderbot/','Spiderbot',
'SpuhexBot','SpuhexBot',
'spyonweb','spyonweb',
'ssearch_bot','ssearch_bot',
'Streamline3Bot','Streamline3Bot',
'SurdotlyBot/','SurdotlyBot',
'SurveyBot/','SurveyBot',
'taiil/Nutch','taiil/Nutch',
'tbot\-nutch','tbot-nutch',
'TeeRaidBot','TeeRaidBot',
'TelegramBot','TelegramBot',
'Test/Nutch','Test/Nutch',
'Test[\x20]Spider','Test Spider',
'TestCrawler','TestCrawler',
'The[\x20]Knowledge[\x20]AI', 'The Knowledge AI',
'tracemyfile','tracemyfile',
'trendiction','trendiction',
'TurnitinBot/','TurnitinBot',
'TurnitinBot','TurnitinBot',
'TweetmemeBot/','TweetmemeBot',
'UCY/Nutch','UCY/Nutch',
'uni-leipzig\.de','uni-leipzig.de',
'Uptimebot/','Uptimebot',
'UptimeRobot/','UptimeRobot',
'URL[\x20]Checker','URL Checker',
'UXCrawlerBot','UXCrawlerBot',
'Validator\.nu/','Validator.nu',
'vBSEO','vBSEO',
'vBulletin[\x20]via[\x20]PHP','vBulletin via PHP',
'vebidoobot','vebidoobot',
'vegi[\x20]bot','vegi bot',
'Velen','Velen',
'viz/Nutch','viz/Nutch',
'VoilaBot','VoilaBot',
'VORTEX/','VORTEX',
'voyager/','voyager',
'W3C_Validator/','W3C_Validator',
'W3C\-checklink/','W3C-checklink',
'WBSearchBot/','WBSearchBot',
'WbSrch/','WbSrch/',
'WeSEE:Ads/PageBot','WeSEE:Ads/PageBot',
'WeSEE:Ads/PictureBot','WeSEE:Ads/PictureBot',
'WeSEE_Bot','WeSEE_Bot',
'Wget/','Wget',
'Who\.is[\x20]Bot','Who.is.Bot',
'wonderbot/','wonderbot',
'woobot/','woobot',
'Wotbox/','Wotbox',
'Xaldon[\x20]WebSpider','Xaldon WebSpider',
'Xenu[\x20]Link[\x20]Sleuth','Xenu Link Sleuth',
'xenu_link_sleuth','xenu_link_sleuth',
'XML[\x20]Sitemaps[\x20]Generator','XML Sitemaps Generator',
'XoviBot/','XoviBot',
'yacybot','yacybot',
'Yahoo[\x20]Link[\x20]Preview','Yahoo Link Preview',
'yak','yak-linkfluence',
'YisouSpider','YisouSpider',
'yoozBot','yoozBot',
'Your\-Website\-Sucks','Your-Website-Sucks',
'zoominfobot','zoominfobot',
'zspider/','zspider',
'ZumBot/','ZumBot',
# below placed at end to catch some generics
'nbot','nbot',
'ng/1\.','ng/1.',
'ng/2\.','ng/2.',
'libwww\-perl','libwww-perl',
'CFNetwork','CFNetwork',
'urllib','urllib',
'javabee','javabee',
'projectwf\-java\-test\-crawler','projectwf-java-test-crawler',
'java','Java ( catchall )',
'loocalcrawler/nutch','loocalcrawler/nutch',
'nutchosu\-vlib','nutchosu-vlib',
'nutch','nutch ( catchall )',
'perlcrawler','perlcrawler',
'perl','perl',
'(firefox/)([0-9]\.|[0-1][0]\.)','Firefox version 10 and lower - various robots',

# Less common robots (In robot file)
'^Mozilla$','Mozilla ( Rogue Robot )',
'^mozilla\/3\.0\s\(compatible$', 'mozilla/3.0 (compatible - ( Rogue Robot )',
'^mozilla\/4\.0$', 'mozilla/4.0 - ( Rogue Robot )',
'^mozilla\/4\.0\s\(compatible;\)$', 'mozilla/4.0 (compatible;) - ( Rogue Robot )',
'^mozilla\/5\.0$', 'mozilla/5.0 - ( Rogue Robot )',
'^mozilla\/5\.0\s\(compatible;$', 'mozilla/5.0 (compatible; - ( Rogue Robot )',
'^mozilla\/5\.0\s\(en\-us\)$', 'mozilla/5.0 (en-us) - ( Rogue Robot )',
'^mozilla\/5\.0\sfirefox\/3\.0\.5$', 'mozilla/5.0 firefox/3.0.5 - ( Rogue Robot )',
'^Mozilla/6\.0[\x20]\(compatible\)$','Mozilla/6.0 (compatible) - ( Rogue Robot )',
'^Mozilla/(.*)Beta[\x20]\(Windows\)','Mozilla Beta (Windows) - ( Rogue Robot )',
'MSIE[\x20]2','MSIE 2 - ( Rogue Robot )',
'MSIE[\x20]3','MSIE 3 - ( Rogue Robot )',
'MSIE[\x20]4','MSIE 4 - ( Rogue Robot )',
'MSIE[\x20]5','MSIE 5 - ( Rogue Robot )',
'MSIE[\x20]6','MSIE 6 - ( Rogue Robot )',
'MSIE\+6\.0\;','MSIE+6.0; - ( Rogue Robot)',
'Windows[\x20]95','Windows 95 - ( Rogue Robot )',
'Windows[\x20]98','Windows 99 - ( Rogue Robot )',

# these could be removed to speed up processing as they are rarely seen
'a6\-indexer','a6-indexer',
'abcdatos','abcdatos',
'abonti\.com','abonti.com',
'acme\.spider','acme.spider',
'activebookmark','activebookmark',
'adamm_bot','adamm_bot',
'advbot','advbot',
'affectv\.co\.uk','affectv.co.uk',
'ahoythehomepagefinder','ahoythehomepagefinder',
'aleadsoftbot','aleadsoftbot',
'alkaline','alkaline',
'allrati','allrati',
'alltop','alltop',
'almaden','almaden',
'alpha_search_agent','alpha_search_agent',
'anthill','anthill',
'antibot','antibot',
'aport','aport',
'appie','appie',
'applesyndication','applesyndication',
'arachnophilia','arachnophilia',
'arale','arale',
'araneo','araneo',
'architext','architext',
'archive\-de\.com','archive-de.com',
'aretha','aretha',
'argus','argus',
'ariadne','ariadne',
'arianna\.libero\.it','arianna.libero.it',
'arks','arks',
'aspider','aspider',
'aspseek','aspseek',
'asterias','asterias',
'asynchttpclient','asynchttpclient',
'atn\.txt','atn.txt',
'atomz','atomz',
'auresys','auresys',
'awbot','awbot',
'backlinktest\.com','backlinktest.com',
'backrub','backrub',
'bbot','bbot',
'becomebot','becomebot',
'bender','bender',
'betabot','betabot',
'bigbrother','bigbrother',
'biglotron','biglotron',
'BingLocalSearch','BingLocalSearch',
'bittorrent_bot','bittorrent_bot',
'biz360[_+\s]spider','biz360 spider',
'bjaaland','bjaaland',
'blackwidow','blackwidow',
'blindekuh','blindekuh',
'blogbridge[_+\s]service','blogbridge service',
'blogged_crawl','blogged_crawl',
'bloglines','bloglines',
'bloglovin','bloglovin',
'blogpulse','blogpulse',
'blogsearch','blogsearch',
'blogshares','blogshares',
'blogslive','blogslive',
'blogssay','blogssay',
'bloodhound','bloodhound',
'bncf\.firenze\.sbn\.it/raccolta\.txt','bncf\.firenze\.sbn.it/raccolta.txt',
'bobby','bobby',
'bookmark\-manager','bookmark-manager',
'borg\-bot','borg-bot',
'boris','boris',
'brightnet','brightnet',
'bruinbot','bruinbot',
'bspider','bspider',
'bubing','bubing',
'bumblebee','bumblebee',
'butterfly','butterfly',
'buzztracker','buzztracker',
'cactvschemistryspider','cactvschemistryspider',
'calif[^r]','calif[^r]',
'candlelight[_+\s]favorites[_+\s]inspector','candlelight favorites inspector',
'careerbot','careerbot',
'carpathia','carpathia',
'cassandra','cassandra',
'catbot','catbot',
'cbn00glebot','cbn00glebot',
'cerberian_drtrs','cerberian_drtrs',
'cfetch','cfetch',
'cgireader','cgireader',
'chattertrap','chattertrap',
'check_http','check_http',
'checkbot','checkbot',
'checkweb_link_validator','checkweb_link_validator',
'christcrawler','christcrawler',
'churl','churl',
'cienciaficcion','cienciaficcion',
'cipinetbot','cipinetbot',
'imagecoccoc','imagecoccoc',
'coccoc','coccoc',
'coldfusion','coldfusion',
'collective','collective',
'combine','combine',
'commons\-httpclient','commons-httpclient',
'computer_and_automation_research_institute_crawler','computer_and_automation_research_institute_crawler',
'conceptbot','conceptbot',
'contentmatch','contentmatch',
'converamultimediacrawler','converamultimediacrawler',
'coolbot','coolbot',
'copubbot','copubbot',
'core','core',
'covario','covario',
'cruiser','cruiser',
'cscrawler','cscrawler',
'cuasarbot','cuasarbot',
'cursor','cursor',
'cusco','cusco',
'custo','custo',
'cyberspyder','cyberspyder',
'datafountains/dmoz_downloader','datafountains/dmoz_downloader',
'dataprovider\.com','dataprovider.com',
'daviesbot','daviesbot',
'daylifefeedfetcher','daylifefeedfetcher',
'daypopbot','daypopbot',
'deepindex','deepindex',
'desertrealm','desertrealm',
'deweb','deweb',
'dienstspider','dienstspider',
'digger','digger',
'digout4u','digout4u',
'diibot','diibot',
'dipsie\.bot','dipsie.bot',
'direct_hit','direct_hit',
'discobot','discobot',
'dlvr\.it','dlvr.it',
'dnabot','dnabot',
'dnsgroup','dnsgroup',
'doccheckbot','doccheckbot',
'domainappender','domainappender',
'domainchecker','domainchecker',
'domainsdb\.net','domainsdb.net',
'download_express','download_express',
'dragonbot','dragonbot',
'dreamwidth','dreamwidth',
'drupal','drupal',
'dulance','dulance',
'dumbot','dumbot',
'dumm\.de\-bot','dumm.de-bot',
'dwcp','dwcp',
'e\-collector','e-collector',
'earthcom\.info','earthcom.info',
'easydl','easydl',
'ebiness','ebiness',
'eccp','eccp',
'echo!','echo!',
'edgeio\-retriever','edgeio-retriever',
'elfinbot','elfinbot',
'emacs','emacs',
'emcspider','emcspider',
'enteprise','enteprise',
'ernst[:blank:]2\.0','ernst[:blank:]2.0',
'esther','esther',
'ets_v','ets_v',
'eventax','eventax',
'everbeecrawler','everbeecrawler',
'everest\-vulcan','everest-vulcan',
'evliyacelebi','evliyacelebi',
'exactseek','exactseek',
'extreme[_+\s]picture[_+\s]finder','extreme picture finder',
'ezoom','ezoom',
'ezresult','ezresult',
'facebook','facebook',
'facebot','facebot',
'fast\-search\-engine','fast-search-engine',
'matrix_s\.p\.a\._\-_fast_enterprise_crawler','matrix_s.p.a._-_fast_enterprise_crawler',
'fast_enterprise_crawler','fast_enterprise_crawler',
'fastbot','fastbot',
'fastcrawler','fastcrawler',
'favicon','favicon',
'favorg','favorg',
'favorites_sweeper','favorites_sweeper',
'fdse','fdse',
'feedburner','feedburner',
'feedcrawl','feedcrawl',
'feedflow','feedflow',
'feedmyinbox','feedmyinbox',
'feedroll\.com','feedroll.com',
'feedsky','feedsky',
'feedster','feedster',
'feedvalidator','feedvalidator',
'feedzira','feedzira',
'felix','felix',
'ferret','ferret',
'fetchbot','fetchbot',
'fetchrover','fetchrover',
'fever/','fever',
'fido','fido',
'filmkamerabot','filmkamerabot',
'filterdb\.iss\.net','filterdb.iss.net',
'finderlein[_+\s]research[_+\s]crawler','finderlein research crawler',
'findexa_crawler','findexa_crawler',
'finnish','finnish',
'fireball','fireball',
'firmilybot','firmilybot',
'flexum','flexum',
'foaf\-search\.net','foaf-search.net',
'fooky\.com/ScorpionBot','fooky.com/ScorpionBot',
'fouineur','fouineur',
'francoroute','francoroute',
'freecrawl','freecrawl',
'freenews','freenews',
'funnelweb','funnelweb',
'g2crawler','g2crawler',
'gama','gama',
'gazz','gazz',
'gcreep','gcreep',
'geniebot','geniebot',
'genieo','genieo',
'geohasher','geohasher',
'getbot','getbot',
'geturl','geturl',
'gigablastopensource','gigablastopensource',
'global_fetch','global_fetch',
'gnodspider','gnodspider',
'goforit\.com','goforit.com',
'goforitbot','goforitbot',
'golem','golem',
'gonzo','gonzo',
'gougou','gougou',
'gpu_p2p_crawler','gpu_p2p_crawler',
'grabber','grabber',
'grapeshot','grapeshot',
'grapnel','grapnel',
'griffon','griffon',
'gromit','gromit',
'grub','grub',
'gulliver','gulliver',
'gulperbot','gulperbot',
'hambot','hambot',
'hanrss','hanrss',
'harvest','harvest',
'havindex','havindex',
'henrythemiragorobot','henrythemiragorobot',
'heritrix','heritrix',
'hl_ftien_spider','hl_ftien_spider',
'hometown','hometown',
'hoowwwer','hoowwwer',
'hpprint','hpprint',
'htdig','htdig',
'html[_+\s]link[_+\s]validator','html link validator',
'htmlgobble','htmlgobble',
'htmlparser','htmlparser',
'httrack','httrack',
'hundesuche\.com\-bot','hundesuche.com-bot',
'hyperdecontextualizer','hyperdecontextualizer',
'ia_archiver\-web\.archive\.org','ia_archiver-web.archive.org',
'ia_archiver','ia_archiver',
'iajabot','iajabot',
'iaskspider','iaskspider',
'i\-bot','i-bot',
'icarus6j','icarus6j',
'ichiro','ichiro',
'icjobs\.de','icjobs.de',
'ilse','ilse',
'iltrovatore\-setaccio','iltrovatore-setaccio',
'imagelock','imagelock',
'implisensebot','implisensebot',
'inagist','inagist',
'incywincy','incywincy',
'infobot','infobot',
'infociousbot','infociousbot',
'infohelfer','infohelfer',
'infomine','infomine',
'informant','informant',
'infoseeksidewinder','infoseeksidewinder',
'infoseek','infoseek',
'infospider','infospider',
'inspectorwww','inspectorwww',
'insurancobot','insurancobot',
'integromedb\.org','integromedb.org',
'intelliagent','intelliagent',
'internet[_+\s]ninja','internet ninja',
'internetarchive','internetarchive',
'internetseer','internetseer',
'internetsupervision','internetsupervision',
'ips\-agent','ips-agent',
'irobot','irobot',
'iron33','iron33',
'isearch2006','isearch2006',
'israelisearch','israelisearch',
'iupui_research_bot','iupui_research_bot',
'izsearch','izsearch',
'jacobin[\x20]club','jacobin club',
'jakarta','jakarta',
'jbot','jbot',
'jcrawler','jcrawler',
'jeeves','jeeves',
'jennybot','jennybot',
'jobboerse','jobboerse',
'jobot','jobot',
'jobo','jobo',
'joebot','joebot',
'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility','jrtwine software check favorites utility',
'js\-kit','js-kit',
'jubii','jubii',
'jumpstation','jumpstation',
'justview','justview',
'kalambot','kalambot',
'kamano\.de_newsfeedverzeichnis','kamano.de_newsfeedverzeichnis',
'kapsi','kapsi',
'katipo','katipo',
'kazoombot','kazoombot',
'kevin','kevin',
'keyoshid','keyoshid',
'kilroy','kilroy',
'kinja\-imagebot','kinja-imagebot',
'kinjabot','kinjabot',
'knowitall','knowitall',
'knowledge\.com','knowledge.com',
'ko[_+\s]yappo[_+\s]robot','ko yappo robot',
'kouaa_krawler','kouaa_krawler',
'krugle','krugle',
'ksibot','ksibot',
'kummhttp','kummhttp',
'kurzor','kurzor',
'labelgrabber\.txt','labelgrabber.txt',
'lanshanbot','lanshanbot',
'larbin','larbin',
'largesmall[\x20]crawler','largesmall crawler',
'legs','legs',
'letscrawl\.com','letscrawl.com',
'libcrawl','libcrawl',
'lilina','lilina',
'link_valet_online','link_valet_online',
'linkbot','linkbot',
'linkdex\.com','linkdex.com',
'linkidator','linkidator',
'linkscan','linkscan',
'linkstats[\x20]bot','linkstats bot',
'linkwalker','linkwalker',
'lipperhey','lipperhey',
'livejournal\.com','livejournal.com',
'lmspider','lmspider',
'loadtimebot','loadtimebot',
'lockon','lockon',
'logo_gif','logo_gif',
'longurl','longurl',
'lssrocketcrawler','lssrocketcrawler',
'ltbot','ltbot',
'ltx71','ltx71',
'lwp\-request','lwp-request',
'lwp\-trivial','lwp-trivial',
'lycos[_+\s]','lycos ',
'macworm','macworm',
'madaali\.de','madaali.de',
'magpierss','magpierss',
'magpie','magpie',
'mapoftheinternet\.com','mapoftheinternet.com',
'marvin','marvin',
'mattie','mattie',
'mediabot','mediabot',
'mediafox','mediafox',
'megaindex','megaindex',
'megite','megite',
'memorybot','memorybot',
'mercator','mercator',
'meshexplorer','meshexplorer',
'metager2\-verification\-bot','metager2-verification-bot',
'metajobbot','metajobbot',
'metaspinner','metaspinner',
'metauri','metauri',
'miadev','miadev',
'microsoft[_+\s]url[_+\s]control','microsoft url control',
'microsoft[\x20]bits','microsoft bits',
'microsoft\-webdav\-miniredir','microsoft-webdav-miniredir',
'mindcrawler','mindcrawler',
'mindupbot','mindupbot',
'mini\-reptile','mini-reptile',
'minirank','minirank',
'misterbot','misterbot',
'miva','miva',
'mizzu_labs','mizzu_labs',
'mnogosearch','mnogosearch',
'moget','moget',
'momspider','momspider',
'monster','monster',
'motor','motor',
'movabletype','movabletype',
'ms[_+\s]search[_+\s]6\.0[_+\s]robot','ms search 6.0 robot',
'ms_search_4\.0_robot','ms_search_4.0_robot',
'msnbot\-udiscovery','msnbot-udiscovery',
'msrabot','msrabot',
'msrbot','msrbot',
'mt::telegraph::agent','mt::telegraph::agent',
'muncher','muncher',
'muscatferret','muscatferret',
'mwdsearch','mwdsearch',
'mydoyouhike','mydoyouhike',
'myweb','myweb',
'nagios','nagios',
'nasa_search','nasa_search',
'ndspider','ndspider',
'nederland\.zoek','nederland.zoek',
'netcarta','netcarta',
'netcraft','netcraft',
'netluchs','netluchs',
'netmechanic','netmechanic',
'netnewswire','netnewswire',
'netscoop','netscoop',
'netsprint','netsprint',
'netvibes','netvibes',
'newrelicpinger','newrelicpinger',
'newscan\-online','newscan-online',
'newsfox','newsfox',
'newsgatoronline','newsgatoronline',
'nextgensearchbot','nextgensearchbot',
'nhse','nhse',
'nicebot','nicebot',
'nimblecrawler','nimblecrawler',
'ning','ning',
'nomad','nomad',
'northstar','northstar',
'noxtrumbot','noxtrumbot',
'npbot','npbot',
'nzexplorer','nzexplorer',
'objectssearch','objectssearch',
'occam','occam',
'ocelli','ocelli',
'octopus','octopus',
'octora_beta_bot','octora_beta_bot',
'onet\.pl[_+\s]sa','onet.pl sa',
'onfolio','onfolio',
'openfind','openfind',
'opentaggerbot','opentaggerbot',
'openwebspider','openwebspider',
'optimizer','optimizer',
'oracle_ultra_search','oracle_ultra_search',
'orb_search','orb_search',
'orbiter','orbiter',
'packrat','packrat',
'pageboy','pageboy',
'panscient','panscient',
'parasite','parasite',
'passwordmaker\.org','passwordmaker.org',
'patric','patric',
'pear_http_request_class','pear_http_request_class',
'peerbot','peerbot',
'pegasus','pegasus',
'perignator','perignator',
'perman','perman',
'petersnews','petersnews',
'phantom','phantom',
'php[_+\s]version[_+\s]tracker','php version tracker',
'phpcrawl','phpcrawl',
'phpdig','phpdig',
'picmole','picmole',
'pictureofinternet','pictureofinternet',
'piltdownman','piltdownman',
'pimptrain','pimptrain',
'ping\.blo\.gs','ping.blo.gs',
'pingdom','pingdom',
'pioneer','pioneer',
'pita','pita',
'pitkow','pitkow',
'pjspider','pjspider',
'plinki','plinki',
'pluckfeedcrawler','pluckfeedcrawler',
'plumtreewebaccessor','plumtreewebaccessor',
'pogodak','pogodak',
'pompos','pompos',
'popdexter','popdexter',
'poppi','poppi',
'port_huron_labs','port_huron_labs',
'portalb','portalb',
'postfavorites','postfavorites',
'postpost','postpost',
'postrank','postrank',
'powermarks','powermarks',
'printfulbot','printfulbot',
'proodlebot','proodlebot',
'protopage','protopage',
'publiclibraryarchive','publiclibraryarchive',
'pyquery','pyquery',
'python','python',
'qihoobot','qihoobot',
'quipply','quipply',
'qwantify','qwantify',
'r6\_','r6\_',
'rambler','rambler',
'ratingburner','ratingburner',
'raven','raven',
'rbse','rbse',
'redalert','redalert',
'regator','regator',
'relevantnoise\.com','relevantnoise.com',
'resumerobot','resumerobot',
'rhcs','rhcs',
'riddler','riddler',
'road_runner','road_runner',
'robbie','robbie',
'robi','robi',
'robocrawl','robocrawl',
'robofox','robofox',
'robozilla','robozilla',
'rojo','rojo',
'rome[\x20]client','rome client',
'roverbot','roverbot',
'rpt\-httpclient','rpt-httpclient',
'rssgraffiti','rssgraffiti',
'rssimagesbot','rssimagesbot',
'ruffle','ruffle',
'rufusbot','rufusbot',
'rules','rules',
'safeads\.xyz','safeads.xyz',
'safetynetrobot','safetynetrobot',
'sage\+\+','sage++',
'sandcrawler','sandcrawler',
'savetheworldheritage','savetheworldheritage',
'sbider','sbider',
'schizozilla','schizozilla',
'scooter','scooter',
'scoutjet','scoutjet',
'scumbot','scumbot',
'search\-info','search-info',
'search_au','search_au',
'searchguild[_+\s]dmoz[_+\s]experiment','searchguild dmoz experiment',
'searchmetricsbot','searchmetricsbot',
'searchprocess','searchprocess',
'seekbot','seekbot',
'semalt','semalt',
'senrigan','senrigan',
'sensis_web_crawler','sensis_web_crawler',
'seodiver','seodiver',
'seokicks\.de','seokicks.de',
'seoscanners','seoscanners',
'sgscout','sgscout',
'shaggy','shaggy',
'shaihulud','shaihulud',
'shareaholicbot','shareaholicbot',
'shoutcast','shoutcast',
'sift','sift',
'simbot','simbot',
'simplepie','simplepie',
'sistrix','sistrix',
'site\-valet','site-valet',
'sitebot','sitebot',
'sitedomain\-bot','sitedomain-bot',
'sitetech','sitetech',
'skimbot','skimbot',
'skymob','skymob',
'slcrawler','slcrawler',
'slurp','slurp',
'slysearch','slysearch',
'smartspider','smartspider',
'smtbot','smtbot',
'snap\.com_beta_crawler','snap.com_beta_crawler',
'snappy','snappy',
'snooper','snooper',
'sohu\-search','sohu-search',
'sohu','sohu ( catchall )',
'solbot','solbot',
'speedy','speedy',
'sphere_scout','sphere_scout',
'spider[_+\s]monkey','spider monkey',
'spiderline','spiderline',
'spiderlytics','spiderlytics',
'spiderman','spiderman',
'spiderview','spiderview',
'spip','spip',
'sproose_crawler','sproose_crawler',
'spry','spry',
'sqworm','sqworm',
'ssearcher','ssearcher',
'steeler','steeler',
'steroid__download','steroid__download',
'stq_bot','stq_bot',
'Stratagems[\x20]Kumo','Stratagems Kumo',
'suchfin\-bot','suchfin-bot',
'suke','suke',
'summify\.com','summify.com',
'sunrise','sunrise',
'suntek','suntek',
'superbot','superbot',
'superfeedr','superfeedr',
'susie','susie',
'sven','sven',
'syndic8','syndic8',
'syndicapi','syndicapi',
'synoobot','synoobot',
'synthesio','synthesio',
't\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e','t-h-u-n-d-e-r-s-t-o-n-e',
'tach_bw','tach_bw',
'tagyu_agent','tagyu_agent',
'tailrank','tailrank',
'tarantula','tarantula',
'tarspider','tarspider',
'tcl_http_client_package','tcl_http_client_package',
'techbot','techbot',
'technoratibot','technoratibot',
'templeton','templeton',
'teoma','teoma',
'teragramcrawlersurf','teragramcrawlersurf',
'test_crawler','test_crawler',
'testbot','testbot',
'thumbsniper','thumbsniper',
'titan','titan',
'titin','titin',
'tkwww','tkwww',
'tlspider','tlspider',
'topblogsinfo','topblogsinfo',
'topicblogs','topicblogs',
'topix\.net','topix.net',
'trapit','trapit',
'trileet','trileet',
'turtlescanner','turtlescanner',
'turtle','turtle',
'tutorgigbot','tutorgigbot',
'tweetedtimes','tweetedtimes',
'twiceler','twiceler',
'twisted[\x20]pagegetter','twisted pagegetter',
'twitterbot','twitterbot',
'twitterfeed','twitterfeed',
'ubicrawler','ubicrawler',
'ucsd','ucsd',
'udmsearch','udmsearch',
'ultraseek','ultraseek',
'um\-IC','ubermetrics-technologies.com',
'um\-LN','ubermetrics-technologies.com',
'unchaos_bot_hybrid_web_search_engine','unchaos_bot_hybrid_web_search_engine',
'unido\-bot','unido-bot',
'unisterbot','unisterbot',
'universalfeedparser','universalfeedparser',
'unlost_web_crawler','unlost_web_crawler',
'unwindfetchor','unwindfetchor',
'updated','updated',
'urlck','urlck',
'ustc\-semantic\-group','ustc-semantic-group',
'vagabondo\-wap','vagabondo-wap',
'vagabondo','vagabondo',
'valkyrie','valkyrie',
'vermut','vermut',
'versus_crawler_from_eda\.baykan@epfl\.ch','versus_crawler_from_eda.baykan@epfl.ch',
'verticrawl','verticrawl',
'vespa_crawler','vespa_crawler',
'victoria','victoria',
'virus[_+\s]detector','virus_detector',
'visionsearch','visionsearch',
'voidbot','voidbot',
'voltron','voltron',
'vse/','vse',
'vwbot','vwbot',
'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa','w3c_css_validator_jfouffa',
'w3index','w3index',
'w3m2','w3m2',
'wallpaper','wallpaper',
'wanderer','wanderer',
'wapspider','wapspider',
'wapspIRLider','wapspIRLider',
'watchmouse','watchmouse',
'wavefire','wavefire',
'waybackarchive\.org','waybackarchive.org',
'wazzup','wazzup',
'web_downloader','web_downloader',
'webbandit','webbandit',
'webbase','webbase',
'webcatcher','webcatcher',
'webclipping\.com','webclipping.com',
'webcollage','webcollage',
'webcompass','webcompass',
'webcopy','webcopy',
'webcrawl\.net','webcrawl.net',
'webdup','webdup',
'webfetcher','webfetcher',
'webfilter','webfilter',
'webfoot','webfoot',
'webinator','webinator',
'webindexer','webindexer',
'weblayers','weblayers',
'weblinker','weblinker',
'webminer','webminer',
'webmirror','webmirror',
'webmoose','webmoose',
'webquest','webquest',
'webreader','webreader',
'webreaper','webreaper',
'website[_+\s]monitoring[_+\s]bot','website monitoring bot',
'websnarf','websnarf',
'webspider','webspider',
'webvac','webvac',
'webvulncrawl','webvulncrawl',
'webwalker','webwalker',
'webwalk','webwalk',
'webwatch','webwatch',
'wells_search','wells_search',
'wer\-liefert\-was','wer-liefert-was',
'wesee:search','wesee:search',
'wevikabot','wevikabot',
'whatuseek','whatuseek',
'whowhere','whowhere',
'windows\-rss\-platform','windows-rss-platform',
'wired\-digital','wired-digital',
'zyborg','zyborg',
'wisenutbot','wisenutbot',
'wiumi','wiumi',
'wmir','wmir',
'wolp','wolp',
'wombat','wombat',
'wonderer','wonderer',
'woozweb','woozweb',
'wordpress','wordpress',
'worm','worm',
'wume_crawler','wume_crawler',
'wwwc','wwwc',
'wwweasel','wwweasel',
'wz101','wz101',
'xget','xget',
'xirq','xirq',
'xydo','xydo',
'y!j','y!j',
'yahoo![\x20]searchmonkey','yahoo! searchmonkey',
'yahoo!_mindset','yahoo!_mindset',
'yahoo\-blogs','yahoo-blogs',
'yahoo\-mmcrawler','yahoo-mmcrawler',
'yahoo\-newscrawler','yahoo-newscrawler',
'yahoo[\x20]pipes','yahoo pipes',
'yahoo\-verticalcrawler','yahoo-verticalcrawler',
'yahoocachesystem','yahoocachesystem',
'yahooexternalcache','yahooexternalcache',
'yahoofeedseeker','yahoofeedseeker',
'yahooseeker\-testing','yahooseeker-testing',
'yahooseeker','yahooseeker',
'yahooysmcm','yahooysmcm',
'yammer','yammer',
'yanga','yanga',
'yet\-another\-spider','yet-another-spider',
'yeti','yeti',
'yie8','yie8',
'yodaobot','yodaobot',
'yooglifetchagent','yooglifetchagent',
'youdao','youdao',
'yourls','yourls',
'z\-add_link_checker','z-add_link_checker',
'zealbot','zealbot',
'zemanta','zemanta',
'zend_http_client','zend_http_client',
'zeus','zeus',
'zhuaxia','zhuaxia',
'[^a]fish','[^a]fish',
'[\x20]netseer[\x20]',' netseer ',
'^[1-3]$','^[1-3]$',
'^finbot','^finbot',
'^motorola$','^motorola$',
'^msie','^msie',
'^voyager/','^voyager',
'^webindex$','webindex',
'1\-more_scanner','1-more_scanner',

# Generic robot
'robot','robot',
'blog','blog',
'checker','checker',
'crawl','crawl',
'discover','discover',
'feed','feed',
'fetcher','fetcher',
'hunter','hunter',
'link','link',
'scanner','scanner',
'seek','seek',
'sitemap','sitemap',
'spider','spider',
'sucker','sucker',
'survey','survey',
'validator','validator',
'bot[\s_+:,\.\;\/\\\-]','Unknown robot identified by bot\*',
'[\s_+:,\.\;\/\\\-]bot','Unknown robot identified by \*bot',
'curl','Curl',
'php','A PHP script',
'ruby/','Ruby script',
'no_user_agent','empty user agent string',
# Moving oBot towards the end so it does not pick up other *obot robots
'oBot/','oBot',
# Unknown robots identified by hit on robots.txt
'unknown','Unknown robot (identified by hit on robots.txt)'
);


# RobotsAffiliateLib
# This list try to tell by which Search Engine a robot is used
#-------------------------------------------------------------
%RobotsAffiliateLib = (
);

1;
