User-agent: * Sitemap: https://www.captivea.com/sitemap.xml ############## # custom # ############## User-agent: * Sitemap: https://www.captivea.com/sitemap.xml # Allow specific AI bots full access User-agent: AI2Bot Allow: / User-agent: Ai2Bot-Dolma Allow: / User-agent: Amazonbot Allow: / User-agent: Applebot Allow: / User-agent: Applebot-Extended Allow: / User-agent: Bytespider Allow: / User-agent: CCBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: Claude-Web Allow: / User-agent: ClaudeBot Allow: / User-agent: Diffbot Allow: / User-agent: FriendlyCrawler Allow: / User-agent: GPTBot Allow: / User-agent: Google-Extended Allow: / User-agent: GoogleOther Allow: / User-agent: GoogleOther-Image Allow: / User-agent: GoogleOther-Video Allow: / User-agent: ICC-Crawler Allow: / User-agent: ImagesiftBot Allow: / User-agent: Meta-ExternalAgent Allow: / User-agent: Meta-ExternalFetcher Allow: / User-agent: OAI-SearchBot Allow: / User-agent: PerplexityBot Allow: / User-agent: PiplBot Allow: / User-agent: Timpibot Allow: / User-agent: Webzio-Extended Allow: / User-agent: YouBot Allow: / User-agent: anthropic-ai Allow: / User-agent: cohere-ai Allow: / User-agent: iaskspider/2.0 Allow: / User-agent: img2dataset Allow: / # Block unwanted bots entirely User-agent: Scrapy Disallow: / User-agent: VelenPublicWebCrawler Disallow: / User-agent: omgilibot Disallow: / User-agent: omgili Disallow: / # General rules for all other bots User-agent: * Allow: /social_instagram/ # Allow access to Instagram social pages Disallow: /web # Admin and login section Disallow: /website/info # Internal pages not useful for SEO Disallow: /web/login # User login page Disallow: /web?db=* # URLs with specific Odoo database parameters Disallow: /mail # Email management pages Disallow: /calendar/ # Calendar-related content Disallow: /page/ # All pages containing "/page/" Disallow: /profile/ # User profiles Disallow: /jobs/apply/ # Job application forms Disallow: /case-studies/ # Exclude if not optimized Disallow: /thank-you # Exclude if not optimized Disallow: /static/ # Static files (CSS, JS, etc.) Disallow: /portal/ # Client portal, not relevant for SEO Disallow: /shop # Block the shop and all subpages under /shop Disallow: /shop/ # Explicitly block all subpages under /shop Disallow: /*?* # Block URLs with query parameters (duplication) Disallow: /*&* # Block URLs with multiple parameters # Allow important files Allow: /sitemap.xml Allow: /robots.txt User-agent: * Allow: /social_instagram/