# 10-09-2007 02:38 Sitemap: http://www.rslinks.org/sitemap.xml User-agent: Mediapartners-Google* Disallow: / User-agent: Googlebot-Image Disallow: / # block the Internet Archiver if you want the privacy User-agent: ia_archiver Disallow: / # # All other robots User-agent: * # Directories Disallow: /database/ Disallow: /includes/ Disallow: /misc/ Disallow: /modules/ Disallow: /sites/ Disallow: /themes/ Disallow: /scripts/ Disallow: /updates/ Disallow: /profiles/ Disallow: /myscripts/ Disallow: /go/ # # Files Disallow: /xmlrpc.php Disallow: /cron.php Disallow: /update.php Disallow: /install.php Disallow: /INSTALL.mysql.txt Disallow: /INSTALL.pgsql.txt Disallow: /CHANGELOG.txt Disallow: /MAINTAINERS.txt Disallow: /LICENSE.txt Disallow: /UPGRADE.txt Disallow: /boost_stats.php # # Paths (clean URLs) -- modified from default! Disallow: /admin/ Disallow: /aggregator/ Disallow: /comment/ Disallow: /node/add Disallow: /node/add/ Disallow: /logout Disallow: /user/register Disallow: /user/register/ Disallow: /user/password Disallow: /user/password/ Disallow: /user/login Disallow: /login Disallow: /flag_content Disallow: /i/ Disallow: /i/r/ Disallow: /i/o/ Disallow: /i/rw/ Disallow: /i/up/ # # Additional Rules # Disallow: /node #Disallow: /search Disallow: /taxonomy/ Disallow: /dlc # Users # I block my user's pages on most sites because they don't have much content. Only leave it unblocked if your user pages have good content on them. # If you DON'T want your user's pages crawled, use this, otherwise delete it: Disallow: /user Disallow: /user/ # # This prevents Drupal's default non-clean URLs from being indexed Disallow: /?q= #Disallow: /*? Disallow: /?page # # block tracker pages Disallow: /tracker? # block paginated tracker pages Disallow: /tracker? Disallow: /*/track$ # # # # # IMPORTANT: THE FOLLOWING LINE BLOCKS ALL /node* URLs -- only use it if you do not have content with URLs like http://example.com/node/10. Alternatively use the Global Redirect module. Disallow: /node # # Disallow robots from all but the main feed - requires Pathauto feed aliases turned on. Optionally only block Google from the following paths because it causes the most havoc with Google Disallow: /*/feed$ # Prevent print-friendly duplicate pages from being crawled Disallow: /book/export/ # This is important for the Forward module Disallow: /forward/ # This prevents duplicate content caused by some modules like the Forums and Views modules Disallow: /*sort= # Prevent duplicate content created by the Image module Disallow: /*size= # If you have the Front module this will keep a duplicate front page from being indexed Disallow: /front_page # If you use the Views module's frontpage this will keep the duplicate from being indexed Disallow: /frontpage