1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 """In this module we define a regular expression used to fetch the most common
25 robots."""
26
27 import re
28
29
30 robots = [
31 'antibot',
32 'appie',
33 'architext',
34 'bingbot',
35 'bjaaland',
36 'digout4u',
37 'echo',
38 'fast-webcrawler',
39 'ferret',
40 'googlebot',
41 'gulliver',
42 'harvest',
43 'htdig',
44 'ia_archiver',
45 'askjeeves',
46 'jennybot',
47 'linkwalker',
48 'lycos',
49 'mercator',
50 'moget',
51 'muscatferret',
52 'myweb',
53 'netcraft',
54 'nomad',
55 'petersnews',
56 'scooter',
57 'slurp',
58 'unlost_web_crawler',
59 'voila',
60 'voyager',
61 'webbase',
62 'weblayers',
63 'wisenutbot',
64 'aport',
65 'awbot',
66 'baiduspider',
67 'bobby',
68 'boris',
69 'bumblebee',
70 'cscrawler',
71 'daviesbot',
72 'exactseek',
73 'ezresult',
74 'gigabot',
75 'gnodspider',
76 'grub',
77 'henrythemiragorobot',
78 'holmes',
79 'internetseer',
80 'justview',
81 'linkbot',
82 'metager-linkchecker',
83 'linkchecker',
84 'microsoft_url_control',
85 'msiecrawler',
86 'nagios',
87 'perman',
88 'pompos',
89 'rambler',
90 'redalert',
91 'shoutcast',
92 'slysearch',
93 'surveybot',
94 'turnitinbot',
95 'turtlescanner',
96 'turtle',
97 'ultraseek',
98 'webclipping.com',
99 'webcompass',
100 'yahoo-verticalcrawler',
101 'yandex',
102 'zealbot',
103 'zyborg',
104 ]
105 robot_regex = re.compile("|".join(robots), re.IGNORECASE)
106