User-agent: *
Allow: /
Disallow: /api/
Disallow: /app/
Disallow: /preview/

# NOTE: The `Disallow: /*/index.html` rule that previously lived here is
# REMOVED on 2026-06-04. It was added during the GCS → Cloudflare Pages
# migration to discourage indexing of the legacy `/foo/index.html` paths
# the old GCS bucket served. The unintended effect: Google already had
# those URLs in its index (from the GCS era) and the block prevented it
# from re-crawling them. Without a re-crawl Google couldn't see the new
# `<link rel="canonical">` pointing at `/foo/`, so those URLs eventually
# dropped from the index entirely — this is the documented root cause of
# the 26-page de-indexation event. The `_redirects` file now 301s every
# `/foo/index.html` → `/foo/` so Google consolidates to the canonical
# trailing-slash URL on re-crawl.

# ── AI / LLM crawler directives ──
# Explicit allow blocks for each crawler we want to grant access. Default
# wildcard above already allows everything, but the spec for several of
# these crawlers (OpenAI in particular) is that the bot-specific block,
# when present, OVERRIDES the wildcard — so omitting an explicit User-agent
# can be interpreted as "default disallow" by overly-strict implementations.
# Order: training crawlers, then search/answer crawlers.

User-agent: GPTBot
Allow: /
Disallow: /api/
Disallow: /app/
Disallow: /preview/

User-agent: ChatGPT-User
Allow: /

User-agent: OAI-SearchBot
Allow: /

User-agent: Google-Extended
Allow: /

User-agent: PerplexityBot
Allow: /

User-agent: Perplexity-User
Allow: /

User-agent: ClaudeBot
Allow: /

User-agent: anthropic-ai
Allow: /

User-agent: Claude-Web
Allow: /

User-agent: Applebot-Extended
Allow: /

User-agent: Bingbot
Allow: /

User-agent: CCBot
Allow: /

User-agent: Bytespider
Allow: /

User-agent: DuckAssistBot
Allow: /

Sitemap: https://www.ritn3d.com/sitemap.xml

# AI discoverability
# https://www.ritn3d.com/llms.txt
# https://www.ritn3d.com/llms-full.txt