# $Id: robots.txt,v 1.7.2.3 2008/12/10 20:24:38 drumm Exp $ # # robots.txt # # This file is to prevent the crawling and indexing of certain parts # of your site by web crawlers and spiders run by sites like Yahoo! # and Google. By telling these "robots" where not to go on your site, # you save bandwidth and server resources. # # This file will be ignored unless it is at the root of your host: # Used: http://example.com/robots.txt # Ignored: http://example.com/site/robots.txt # # For more information about the robots.txt standard, see: # http://www.robotstxt.org/wc/robots.html # # For syntax checking, see: # http://www.sxw.org.uk/computing/robots/check.html User-agent: http://www.almaden.ibm.com/cs/crawler Disallow: / User-agent: ArribaPacketRat Disallow: / User-agent: autoemailspider Disallow: / User-agent: baiduspider Disallow: / User-agent: Bilbo Disallow: / User-agent: DigExt Disallow: / User-agent: dloader(NaverRobot)/1.0 Disallow: / User-agent: DittoSpyder Disallow: / User-agent: DTS Agent Disallow: / User-agent: fast Disallow: / User-agent: Getleft 1.1b2 Disallow: / User-agent: girafa Disallow: / User-agent: Gigabot/1.0 Disallow: / User-agent: grub-client Disallow: / User-agent: HTMLAB Disallow: / User-agent: httrack Disallow: / User-agent: ia_archiver Disallow: / User-agent: ImageVampire Disallow: / User-agent: k2spider Disallow: / User-agent: mail sweeper Disallow: / User-agent: MSIECrawler Disallow: / User-agent: NetCaptor Disallow: / User-agent: NITLE Blog Spider/0.01 Disallow: / User-agent: NPBot Disallow: / User-agent: nutch Disallow: / User-agent: oBot Disallow: / User-agent: Offline Explorer Disallow: / User-agent: PSBot Disallow: / User-agent: QuepasaCreep v0.9.13 Disallow: / User-agent: Scooter/3.3 Disallow: / User-agent: sitecheck.internetseer.com Disallow: / User-agent: Slurp/2.0 Disallow: / User-agent: SpiderKU/0.9 Disallow: / User-agent: Steeler Disallow: / User-agent: SurveyBot/2.3 Disallow: / User-agent: szukacz Disallow: / User-agent: Szukacz/1.5 Disallow: / User-agent: TurnitinBot Disallow: / User-agent: Vagabondo/2.1 Disallow: / User-agent: vischeck_spiderBot/0.1libwww-perl/5.48 Disallow: / User-agent: vscooter Disallow: / User-agent: WebCopier v3.3 Disallow: / User-agent: WebCopier v3.2a Disallow: / User-agent: WebCopier Disallow: / User-agent: webcrawler Disallow: / User-agent: Web Downloader/4.9 Disallow: / User-agent: Web Downloader/5.8 Disallow: / User-agent: WebGather 3.0 Disallow: / User-agent: WebStripper/2.56 Disallow: / User-agent: WebZIP/3.65 Disallow: / User-agent: WebZIP Disallow: / User-agent: Wget Disallow: / User-agent: Zao Disallow: / User-agent: Zeus 2.6 Disallow: / User-agent: * # Directories Disallow: /database/ Disallow: /includes/ Disallow: /misc/ Disallow: /modules/ Disallow: /sites/ Disallow: /test/ Disallow: /themes/ Disallow: /scripts/ Disallow: /updates/ Disallow: /profiles/ Disallow: /cache/ # Files Disallow: /xmlrpc.php Disallow: /cron.php Disallow: /update.php Disallow: /install.php Disallow: /INSTALL.txt Disallow: /INSTALL.mysql.txt Disallow: /INSTALL.pgsql.txt Disallow: /CHANGELOG.txt Disallow: /MAINTAINERS.txt Disallow: /LICENSE.txt Disallow: /UPGRADE.txt # Paths (clean URLs) Disallow: /admin/ Disallow: /aggregator Disallow: /comment/reply/ Disallow: /contact Disallow: /logout Disallow: /node/add Disallow: /search/ Disallow: /search$ Disallow: /user/register Disallow: /user/password Disallow: /user/login # Paths (no clean URLs) Disallow: /?q=admin/ Disallow: /?q=aggregator Disallow: /?q=comment/reply/ Disallow: /?q=contact Disallow: /?q=logout Disallow: /?q=node/add Disallow: /?q=search/ Disallow: /?q=user/password Disallow: /?q=user/register Disallow: /?q=user/login # Additional Rules Disallow: /node$ Disallow: /node/ Disallow: /user$ Disallow: /user/ Disallow: /profile/ Disallow: /profile$ Disallow: /*sort= Disallow: /search$ Disallow: /*/feed$ Disallow: /*/track$ # If you don't want the bulk of your "recent posts" pages indexed, add this: Disallow: /tracker? # Gallery specific: Disallow: /gallery/slideshow/ Disallow: /gallery/new/ Disallow: /gallery/pending/ Disallow: /gallery/suspended/ Disallow: /gallery/curator/ Disallow: /acac/ Disallow: /index.php?q=gallery* Disallow: /gallery/*/*/slideshow.html* # Advertisment paths: Disallow: /ad/ # Events module paths: Disallow: /event/*/table/ Disallow: /event/*/list/ Disallow: /event/*/month/ Disallow: /event/*/week/ Disallow: /event/*/day/ # Print module paths: Disallow: /print/ Disallow: /forward? # Authentication files: Disallow: /googleef96c09d85df99ab.html Disallow: /LiveSearchSiteAuth.xml Disallow: /y_key_2749965f53f94cf5.html Disallow: /disallows.txt