# DomiSol — robots policy
# Public marketing site: every page is fair game for general crawlers.

User-agent: *
Allow: /
Disallow: /api/
# Cloudflare-injected paths (email obfuscation, challenges, analytics
# beacons). They aren't pages, they're infrastructure endpoints — and
# Cloudflare sometimes returns 404 for the email-protection one,
# which Google then reports as a coverage error against the property.
# Disallowing here removes them from Search Console's crawl queue.
Disallow: /cdn-cgi/

# Major search engines: explicit allow + reasonable crawl rate.
User-agent: Googlebot
Allow: /

User-agent: Googlebot-Image
Allow: /

User-agent: Bingbot
Allow: /

User-agent: DuckDuckBot
Allow: /

User-agent: YandexBot
Allow: /

User-agent: Baiduspider
Allow: /

# ── AI assistants / answer engines — explicitly welcome ──────────────
# These all already fall under `User-agent: *` above, so this section
# changes no behaviour. It exists to make intent UNAMBIGUOUS: DomiSol
# WANTS to be read, indexed, and cited by AI answer engines — that's
# the whole point of /llms.txt and the FAQPage JSON-LD across the site.
# Listing each agent also gives one obvious place to flip a bot to
# Disallow later if it ever misbehaves.
#
# Two kinds of agent are listed:
#   * index/crawl bots — build the engine's searchable corpus
#   * *-User / live-fetch bots — fetch a page in real time when a user
#     asks the assistant about it; blocking these means the assistant
#     can't open our links even when a user explicitly requests it.

# OpenAI — ChatGPT / SearchGPT
User-agent: GPTBot
Allow: /

User-agent: OAI-SearchBot
Allow: /

User-agent: ChatGPT-User
Allow: /

# Anthropic — Claude
User-agent: ClaudeBot
Allow: /

User-agent: anthropic-ai
Allow: /

User-agent: Claude-Web
Allow: /

User-agent: Claude-User
Allow: /

User-agent: Claude-SearchBot
Allow: /

# Perplexity
User-agent: PerplexityBot
Allow: /

User-agent: Perplexity-User
Allow: /

# Google — Gemini / AI Overviews grounding. Google-Extended is NOT a
# crawler; it's the token that decides whether Gemini & Vertex AI may
# use content Googlebot already fetched. Allowing it is required to be
# eligible for citation in Gemini answers.
User-agent: Google-Extended
Allow: /

# Apple Intelligence
User-agent: Applebot-Extended
Allow: /

# Microsoft Copilot rides the Bing index — Bingbot is allowed above.

# Common Crawl — open corpus many models train on; broad presence.
User-agent: CCBot
Allow: /

# Amazon
User-agent: Amazonbot
Allow: /

# Meta AI
User-agent: meta-externalagent
Allow: /

# Block well-known mass-scrape / archival LLM bots that don't respect
# bandwidth. (Comment any of these out if you want them indexed.)
User-agent: AhrefsBot
Disallow: /

User-agent: SemrushBot
Disallow: /

User-agent: MJ12bot
Disallow: /

User-agent: PetalBot
Disallow: /

User-agent: DotBot
Disallow: /

# Sitemaps — Astro's @astrojs/sitemap integration generates this at build time.
Sitemap: https://domisol.app/sitemap-index.xml