extensive robots.txt

This commit is contained in:
Pagwin 2024-12-21 21:07:56 -05:00
parent 5ff58debab
commit e89bc769b9
No known key found for this signature in database
GPG key ID: 81137023740CA260

View file

@ -1,2 +1,55 @@
# No AI companies please
User-agent: GPTBot
Disallow: /
User-agent: ChatGPT-User
Disallow: /
User-agent: ClaudeBot
Disallow: /
User-agent: Google-Extended
Disallow: /
User-agent: CCBot
Disallow: /
User-agent: PerplexityBot
Disallow: /
User-agent: Diffbot
Disallow: /
# Facebook https://developers.facebook.com/docs/sharing/webmasters/web-crawlers
User-agent: meta-externalfetcher
Disallow: /
# Apple doesn't distinguish a crawler specifically for their LLM if they ever make one and I don't care abou Siri results
User-agent: Applebot
Disallow: /
# Other fun things I wanna block which I found from https://www.cyberciti.biz/web-developer/block-openai-bard-bing-ai-crawler-bots-using-robots-txt-file/
User-agent: Omgilibot
Disallow: /
User-agent: Omgili
Disallow: /
# Will probably be ignored but might as well
User-agent: Bytespider
Disallow: /
User-agent: Amazonbot
Disallow: /
User-agent: ImagesiftBot
Disallow: /
User-agent: YouBot
Disallow: /
# generic allowance for search engines and what not
User-Agent: *
Allow: /