# As a condition of accessing this website, you agree to abide by the following
# content signals:

# (a)  If a Content-Signal = yes, you may collect content for the corresponding
#      use.
# (b)  If a Content-Signal = no, you may not collect content for the
#      corresponding use.
# (c)  If the website operator does not include a Content-Signal for a
#      corresponding use, the website operator neither grants nor restricts
#      permission via Content-Signal with respect to the corresponding use.

# The content signals and their meanings are:

# search:   building a search index and providing search results (e.g., returning
#           hyperlinks and short excerpts from your website's contents). Search does not
#           include providing AI-generated search summaries.
# ai-input: inputting content into one or more AI models (e.g., retrieval
#           augmented generation, grounding, or other real-time taking of content for
#           generative AI search answers).
# ai-train: training or fine-tuning AI models.

# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS RESERVATIONS OF
# RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION DIRECTIVE 2019/790 ON COPYRIGHT
# AND RELATED RIGHTS IN THE DIGITAL SINGLE MARKET.

# BEGIN Cloudflare Managed content

User-agent: *
Content-Signal: search=yes,ai-train=no
Allow: /

User-agent: Amazonbot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: CloudflareBrowserRenderingCrawler
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: GPTBot
Disallow: /

User-agent: meta-externalagent
Disallow: /

# END Cloudflare Managed Content

# Mane Match — robots.txt
# Search engines, AI/LLM crawlers, and answer-engine agents are all welcome.
# Crawl, index, and reference our public pages freely.
#
# One ruleset for every crawler (single User-agent: * group) so access is
# consistent. The only blocked paths are login-walled surfaces with nothing
# indexable. Everything else — listings, breed/discipline pages, public
# profiles, llms.txt — is fully open.

User-agent: *
Allow: /
Allow: /api/listings
Allow: /llms.txt
Allow: /sitemap.xml
Allow: /mcp-guide
Disallow: /api/
Disallow: /admin
Disallow: /dashboard
Disallow: /messages
Disallow: /settings

# AI / LLM / answer-engine crawlers are explicitly welcome. They are all
# covered by the User-agent: * group above (consent to crawl, index, and
# train on public content): GPTBot, ChatGPT-User, OAI-SearchBot, ClaudeBot,
# Claude-Web, anthropic-ai, Google-Extended, GoogleOther, Applebot-Extended,
# Meta-ExternalAgent, PerplexityBot, Perplexity-User, Bytespider, cohere-ai,
# YouBot, CCBot, Amazonbot, DuckAssistBot, Diffbot, MistralAI-User, and any
# other compliant crawler.

# Machine-readable resources:
#   Listings JSON API:  https://mane-match.com/api/listings
#   Single listing:     https://mane-match.com/api/listings/{id}
#   Site summary:       https://mane-match.com/llms.txt
#   MCP endpoint:       POST https://mane-match.com/mcp

Sitemap: https://mane-match.com/sitemap.xml