# Jonrhus.com robots.txt $Revision: 1.2 $ # # Base directive for unknown and bad spiders # User-agent: * User-agent: Baiduspider User-agent: copyright sheriff User-agent: DealGates Bot User-agent: Gaisbot User-agent: GingerCrawler User-agent: Nutch User-agent: page_verifier User-agent: renlifangbot User-agent: SurveyBot User-agent: TurnitinBot User-agent: VoilaBot User-agent: Yandex User-agent: Yeti User-agent: YoudaoBot Disallow: / # # More specific general directives # User-agent: Butterfly User-agent: Charlotte User-agent: Exabot User-agent: envolk User-agent: Gigabot User-agent: ScoutJet User-agent: Speedy User-agent: Teoma User-agent: Twiceler User-agent: YowedoBot Disallow: Disallow: /ad Disallow: /ana Disallow: /maint Disallow: /cl Disallow: /gra Disallow: /inc Disallow: /them Disallow: /web Disallow: /gor/kajirae_positions/*.jpg # # Alexa web archiver directives # User-agent: ia_archiver Disallow: Disallow: /ad Disallow: /ana Disallow: /cl Disallow: /inc Disallow: /maint Disallow: /them Disallow: /web Disallow: /gor/kajirae_positions/*.jpg # # Googlebot(s) specific directives # User-agent: Googlebot Disallow: # Path[name]s that SHOULD NOT be crawled Disallow: /ad Disallow: /ana Disallow: /maint Disallow: /cl Disallow: /inc Disallow: /them Disallow: /web # MediaBot User-agent: Googlebot-Image Disallow: Disallow: /ad Disallow: /gra Disallow: /gor/kaj # Ad Bot(s) User-agent: Adsbot-Google User-agent: Mediapartners-Google Disallow: / # # Yahoo! Slurp(s) specific directives # User-agent: Slurp Disallow: # Path[name]s that SHOULD NOT be crawled Disallow: /*.css Disallow: /ad Disallow: /ana Disallow: /maint Disallow: /cl Disallow: /inc Disallow: /them Disallow: /web Disallow: /gra Disallow: /gor/kajirae_positions/*.jpg # # MSNbot(s) specific directives # User-agent: MSNBot User-agent: msnbot User-agent: msnbot-products User-agent: msnbot-newsblogs Disallow: # Path[name]s that SHOULD NOT be crawled Disallow: /ad Disallow: /ana Disallow: /maint Disallow: /cl Disallow: /inc Disallow: /them Disallow: /web # Media User-agent: msnbot-media # Path[name]s that SHOULD NOT be crawled Disallow: / Disallow: /ad Disallow: /gra Disallow: /gor/kaj