# ai.txt — AI training / data-use policy for jwatte.com
# Format: Spawning-style (User-Agent / Disallow) — voluntary but honored by OpenAI,
# Anthropic, Google, Perplexity, CCBot, and most 2024+ AI crawlers.
# Generate / update at https://jwatte.com/tools/ai-txt-gen/
# Reference: https://site.spawning.ai/spaces/ai-txt, https://jwatte.com/blog/ai-txt-generator-guide/

# Default stance — allow AI crawlers to read public pages for retrieval / citation,
# but disallow training-corpus ingestion of full-text content.

User-Agent: *
Allow: /
Disallow: /private/
Disallow: /admin/
Disallow: /tools/*/results/

# Explicit per-bot rules — overrides the default block above.

User-Agent: GPTBot
Disallow: /

User-Agent: ClaudeBot
Disallow: /

User-Agent: Claude-Web
Allow: /

User-Agent: anthropic-ai
Disallow: /

User-Agent: Google-Extended
Disallow: /

User-Agent: Applebot-Extended
Disallow: /

User-Agent: PerplexityBot
Allow: /
Disallow: /private/

User-Agent: CCBot
Disallow: /

User-Agent: Bytespider
Disallow: /

User-Agent: Meta-ExternalAgent
Disallow: /

User-Agent: Amazonbot
Disallow: /

User-Agent: cohere-ai
Disallow: /

User-Agent: MistralAI-User
Disallow: /

User-Agent: YouBot
Allow: /

User-Agent: DuckAssistBot
Allow: /

User-Agent: Diffbot
Disallow: /

# Companion signals
# robots.txt:    https://jwatte.com/robots.txt
# llms.txt:      https://jwatte.com/llms.txt
# security.txt:  https://jwatte.com/.well-known/security.txt
# agent-card:    https://jwatte.com/.well-known/agent-card.json