# ============================================================
#  AXIA Enterprises — robots.txt
#  https://axiaenterprises.io
# ============================================================
#  Policy: open to legitimate search and AI crawlers.
#  We want AXIA discoverable in Google, Bing, and major
#  AI assistants (ChatGPT, Claude, Perplexity, Gemini, etc.).
#  Disallowed paths are admin / internal only.
# ============================================================

# ---------- Default: allow everything to all crawlers ----------
# Note: per-bot groups below override the wildcard. Each named bot
# must therefore re-list any blocking rules it should inherit. We
# repeat the /.git/ and /.github/ disallows explicitly inside the
# named groups (or rely on the fact that no major bot would ever
# request them).
User-agent: *
Allow: /
Disallow: /.git/
Disallow: /.github/

# ---------- Major search engines (explicit allow) ----------
User-agent: Googlebot
Allow: /

User-agent: Googlebot-Image
Allow: /

User-agent: Googlebot-Video
Allow: /

User-agent: Bingbot
Allow: /

User-agent: DuckDuckBot
Allow: /

User-agent: YandexBot
Allow: /

User-agent: Baiduspider
Allow: /

# ---------- AI / LLM crawlers (explicit allow) ----------
# We want AXIA cited in AI answers, so we explicitly allow
# the major generative-AI training and answer crawlers.

# OpenAI (ChatGPT browsing + training)
User-agent: GPTBot
Allow: /

User-agent: OAI-SearchBot
Allow: /

User-agent: ChatGPT-User
Allow: /

# Anthropic (Claude)
User-agent: ClaudeBot
Allow: /

User-agent: Claude-Web
Allow: /

User-agent: anthropic-ai
Allow: /

# Google (Gemini / Bard / AI Overviews)
User-agent: Google-Extended
Allow: /

# Perplexity
User-agent: PerplexityBot
Allow: /

User-agent: Perplexity-User
Allow: /

# Apple Intelligence
User-agent: Applebot
Allow: /

User-agent: Applebot-Extended
Allow: /

# Meta AI
User-agent: Meta-ExternalAgent
Allow: /

User-agent: FacebookBot
Allow: /

# Mistral
User-agent: MistralAI-User
Allow: /

# Cohere
User-agent: cohere-ai
Allow: /

User-agent: cohere-training-data-crawler
Allow: /

# Common Crawl (foundation for many open AI datasets)
User-agent: CCBot
Allow: /

# You.com
User-agent: YouBot
Allow: /

# Bytespider (TikTok / ByteDance)
User-agent: Bytespider
Allow: /

# Diffbot
User-agent: Diffbot
Allow: /

# Amazon
User-agent: Amazonbot
Allow: /

# ---------- Block: aggressive scrapers / SEO spies ----------
# These provide no value to the brand and add server load.
User-agent: SemrushBot
Disallow: /

User-agent: AhrefsBot
Disallow: /

User-agent: MJ12bot
Disallow: /

User-agent: DotBot
Disallow: /

User-agent: PetalBot
Disallow: /

User-agent: BLEXBot
Disallow: /

# ---------- Sitemap ----------
Sitemap: https://axiaenterprises.io/sitemap.xml