# Robots Exclusion Protocol # Filename: Robots.txt # Author: Mark Garrison (mark@scarabmedia.com) # Date: 10/16/2007 # # Set directories & files to be Disallow/Allow # NOTE: directives for Disallow/Allow are case-sensitive! # Use $ to anchor the match to the end of a URL string # As in disallowing or allowing files of a particular extension # # See http://www.robotstxt.org/wc/norobots.html for full specifications # User-agent: * Crawl-delay: 300 # Disallows specific to the XcClassifieds Application Disallow: /data/ Disallow: /Classifieds/CPAdm*.asp Disallow: /Classifieds/XcTest*.asp Disallow: /Membership/CPAdm*.asp Disallow: /Membership/XcTest*.asp Disallow: /Online Processing/ Disallow: /WSH/ Disallow: /xcDiag/ # Standard disallows for Scarab Media hosted websites Disallow: /cgi-bin/ Disallow: /log/ Disallow: /controlpanel/ Disallow: /*.mdb$ Disallow: /*_setup*.* Disallow: /*_install*.* Disallow: /*_admin*.* Disallow: /*admin*/ Disallow: /*.flv$ # Disallow to log and trap bots that don't abide by these directives # Must setup robot-trap utility to function properly. # Otherwise leave commented out. #Disallow: /robot-trap/ # Extended Parameters # Under Proposal, not universally recognized commands Request-rate: 1/300 # maximum rate of 1 page every 300 seconds Visit-time: 0900-1400 # only visit between 9:00 AM and 2:00 PM UT (1:00-6:00AM PST)