From e89bc769b9effe1d866acb392430dd306ccfec8d Mon Sep 17 00:00:00 2001 From: Pagwin Date: Sat, 21 Dec 2024 21:07:56 -0500 Subject: [PATCH] extensive robots.txt --- robots.txt | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/robots.txt b/robots.txt index f6e6d1d..b054e66 100644 --- a/robots.txt +++ b/robots.txt @@ -1,2 +1,55 @@ +# No AI companies please +User-agent: GPTBot +Disallow: / + +User-agent: ChatGPT-User +Disallow: / + +User-agent: ClaudeBot +Disallow: / + +User-agent: Google-Extended +Disallow: / + +User-agent: CCBot +Disallow: / + +User-agent: PerplexityBot +Disallow: / + +User-agent: Diffbot +Disallow: / + +# Facebook https://developers.facebook.com/docs/sharing/webmasters/web-crawlers + +User-agent: meta-externalfetcher +Disallow: / + +# Apple doesn't distinguish a crawler specifically for their LLM if they ever make one and I don't care abou Siri results +User-agent: Applebot +Disallow: / + +# Other fun things I wanna block which I found from https://www.cyberciti.biz/web-developer/block-openai-bard-bing-ai-crawler-bots-using-robots-txt-file/ + +User-agent: Omgilibot +Disallow: / + +User-agent: Omgili +Disallow: / + +# Will probably be ignored but might as well +User-agent: Bytespider +Disallow: / + +User-agent: Amazonbot +Disallow: / + +User-agent: ImagesiftBot +Disallow: / + +User-agent: YouBot +Disallow: / + +# generic allowance for search engines and what not User-Agent: * Allow: /