extensive robots.txt
This commit is contained in:
parent
5ff58debab
commit
e89bc769b9
1 changed files with 53 additions and 0 deletions
53
robots.txt
53
robots.txt
|
@ -1,2 +1,55 @@
|
|||
# No AI companies please
|
||||
User-agent: GPTBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: ChatGPT-User
|
||||
Disallow: /
|
||||
|
||||
User-agent: ClaudeBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: Google-Extended
|
||||
Disallow: /
|
||||
|
||||
User-agent: CCBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: PerplexityBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: Diffbot
|
||||
Disallow: /
|
||||
|
||||
# Facebook https://developers.facebook.com/docs/sharing/webmasters/web-crawlers
|
||||
|
||||
User-agent: meta-externalfetcher
|
||||
Disallow: /
|
||||
|
||||
# Apple doesn't distinguish a crawler specifically for their LLM if they ever make one and I don't care abou Siri results
|
||||
User-agent: Applebot
|
||||
Disallow: /
|
||||
|
||||
# Other fun things I wanna block which I found from https://www.cyberciti.biz/web-developer/block-openai-bard-bing-ai-crawler-bots-using-robots-txt-file/
|
||||
|
||||
User-agent: Omgilibot
|
||||
Disallow: /
|
||||
|
||||
User-agent: Omgili
|
||||
Disallow: /
|
||||
|
||||
# Will probably be ignored but might as well
|
||||
User-agent: Bytespider
|
||||
Disallow: /
|
||||
|
||||
User-agent: Amazonbot
|
||||
Disallow: /
|
||||
|
||||
User-agent: ImagesiftBot
|
||||
Disallow: /
|
||||
|
||||
User-agent: YouBot
|
||||
Disallow: /
|
||||
|
||||
# generic allowance for search engines and what not
|
||||
User-Agent: *
|
||||
Allow: /
|
||||
|
|
Loading…
Reference in a new issue