Difference between revisions of "MediaWiki:Robots.txt"

From Engineering_Policy_Guide
Jump to navigation Jump to search
(New page: # # robots.txt for http://www.wikipedia.org/ and friends # # Please note: There are a lot of pages on this site, and there are # some misbehaved spiders out there that go _way_ too fast. I...)
 
(Removing all content from page)
 
Line 1: Line 1:
#
 
# robots.txt for http://www.wikipedia.org/ and friends
 
#
 
# Please note: There are a lot of pages on this site, and there are
 
# some misbehaved spiders out there that go _way_ too fast. If you're
 
# irresponsible, your access to the site may be blocked.
 
#
 
  
# advertising-related bots:
 
User-agent: Mediapartners-Google*
 
Disallow: /
 
 
# Wikipedia work bots:
 
User-agent: IsraBot
 
Disallow:
 
 
User-agent: Orthogaffe
 
Disallow:
 
 
# Crawlers that are kind enough to obey, but which we'd rather not have
 
# unless they're feeding search engines.
 
User-agent: UbiCrawler
 
Disallow: /
 
 
User-agent: DOC
 
Disallow: /
 
 
User-agent: Zao
 
Disallow: /
 
 
# Some bots are known to be trouble, particularly those designed to copy
 
# entire sites. Please obey robots.txt.
 
User-agent: sitecheck.internetseer.com
 
Disallow: /
 
 
User-agent: Zealbot
 
Disallow: /
 
 
User-agent: MSIECrawler
 
Disallow: /
 
 
User-agent: SiteSnagger
 
Disallow: /
 
 
User-agent: WebStripper
 
Disallow: /
 
 
User-agent: WebCopier
 
Disallow: /
 
 
User-agent: Fetch
 
Disallow: /
 
 
User-agent: Offline Explorer
 
Disallow: /
 
 
User-agent: Teleport
 
Disallow: /
 
 
User-agent: TeleportPro
 
Disallow: /
 
 
User-agent: WebZIP
 
Disallow: /
 
 
User-agent: linko
 
Disallow: /
 
 
User-agent: Microsoft.URL.Control
 
Disallow: /
 
 
User-agent: Xenu
 
Disallow: /
 
 
User-agent: larbin
 
Disallow: /
 
 
User-agent: libwww
 
Disallow: /
 
 
User-agent: ZyBORG
 
Disallow: /
 
 
User-agent: Download Ninja
 
Disallow: /
 
 
#
 
# The 'grub' distributed client has been *very* poorly behaved.
 
#
 
User-agent: grub-client
 
Disallow: /
 
 
#
 
# Doesn't follow robots.txt anyway, but...
 
#
 
User-agent: k2spider
 
Disallow: /
 
 
#
 
# Hits many times per second, not acceptable
 
# http://www.nameprotect.com/botinfo.html
 
User-agent: NPBot
 
Disallow: /
 
 
# A capture bot, downloads gazillions of pages with no public benefit
 
# http://www.webreaper.net/
 
User-agent: WebReaper
 
Disallow: /
 

Latest revision as of 07:31, 19 July 2013