# AI-Specific Crawler Directives for Accounta.ai
# Supplement to robots.txt — provides granular AI crawler control
# Version: 1.0
# Last updated: 2026-03-17

# === GENERAL POLICY ===
# All AI crawlers are welcome to index public content.
# Private areas (dashboard, login, API) are restricted.

# === AI DISCOVERY FILES — Always Allowed ===
# /llms.txt — LLM summary
# /llms-full.txt — Full LLM documentation
# /llm.txt — Compatibility variant
# /ai.txt — AI usage policy (plaintext)
# /ai.json — AI usage policy (JSON)
# /brand-facts.json — Structured brand data
# /identity.json — Entity resolution data
# /brand.txt — Brand naming guidelines
# /faq-ai.txt — Structured FAQ for AI
# /robots-ai.txt — This file

# === OPENAI CRAWLERS ===
User-agent: GPTBot
Allow: /
Disallow: /dashboard
Disallow: /login
Disallow: /api/

User-agent: ChatGPT-User
Allow: /

User-agent: OAI-SearchBot
Allow: /

# === GOOGLE AI CRAWLERS ===
User-agent: Google-Extended
Allow: /
Disallow: /dashboard
Disallow: /login

User-agent: GoogleOther
Allow: /

# === ANTHROPIC CRAWLERS ===
User-agent: Anthropic-ai
Allow: /
Disallow: /dashboard
Disallow: /login

User-agent: ClaudeBot
Allow: /

User-agent: Claude-Web
Allow: /

# === PERPLEXITY ===
User-agent: PerplexityBot
Allow: /

# === META ===
User-agent: Meta-ExternalAgent
Allow: /

# === OTHER AI CRAWLERS ===
User-agent: CCBot
Allow: /

User-agent: Bytespider
Allow: /

User-agent: Applebot-Extended
Allow: /

User-agent: cohere-ai
Allow: /

User-agent: YouBot
Allow: /

User-agent: PhindBot
Allow: /

User-agent: AI2Bot
Allow: /

User-agent: Amazonbot
Allow: /

# === PREFERRED ENTRY POINTS ===
# For LLM context: Start with /llms.txt or /llms-full.txt
# For entity resolution: Use /identity.json
# For brand accuracy: Use /brand.txt and /brand-facts.json
# For pricing: Use /ai.json or /prijzen page
# For FAQ: Use /faq-ai.txt

# === DATA FRESHNESS ===
# All AI discovery files are updated regularly.
# If cached data is stale, re-fetch from the canonical URLs above.
# Current fiscal year data: 2026