{
  "schema_version": "1.0",
  "dataset": "Benchquill citation and source checks",
  "canonical_url": "https://benchquill.com/citation-sources.json",
  "methodology": "https://benchquill.com/methodology",
  "last_verified": "2026-04-29",
  "official_source_checks": [
    {
      "name": "OpenAI GPT-5.5 API model docs (1.05M context and $5/$30 pricing)",
      "url": "https://developers.openai.com/api/docs/models/gpt-5.5"
    },
    {
      "name": "OpenAI GPT-5.5 release and availability",
      "url": "https://openai.com/index/introducing-gpt-5-5/"
    },
    {
      "name": "OpenAI API pricing",
      "url": "https://openai.com/api/pricing/"
    },
    {
      "name": "OpenAI GPT-5 mini model docs",
      "url": "https://developers.openai.com/api/docs/models/gpt-5-mini"
    },
    {
      "name": "OpenAI GPT-5 nano model docs",
      "url": "https://developers.openai.com/api/docs/models/gpt-5-nano"
    },
    {
      "name": "OpenAI ChatGPT Images 2.0",
      "url": "https://openai.com/index/introducing-chatgpt-images-2-0/"
    },
    {
      "name": "OpenAI ChatGPT Images 2.0 system card",
      "url": "https://deploymentsafety.openai.com/chatgpt-images-2-0/"
    },
    {
      "name": "Anthropic Claude Opus 4.7",
      "url": "https://www.anthropic.com/claude/opus"
    },
    {
      "name": "Anthropic Project Glasswing",
      "url": "https://www.anthropic.com/glasswing"
    },
    {
      "name": "Anthropic Claude Mythos Preview cyber analysis",
      "url": "https://red.anthropic.com/2026/mythos-preview/"
    },
    {
      "name": "Google Gemini API pricing",
      "url": "https://ai.google.dev/gemini-api/docs/pricing"
    },
    {
      "name": "Google Gemini 3 guide",
      "url": "https://ai.google.dev/gemini-api/docs/gemini-3"
    },
    {
      "name": "Google Deep Research Max",
      "url": "https://blog.google/innovation-and-ai/models-and-research/gemini-models/next-generation-gemini-deep-research/"
    },
    {
      "name": "DeepSeek models and pricing",
      "url": "https://api-docs.deepseek.com/quick_start/pricing"
    },
    {
      "name": "DeepSeek V4 preview release",
      "url": "https://api-docs.deepseek.com/news/news260424"
    },
    {
      "name": "Amazon Nova Pro model card",
      "url": "https://docs.aws.amazon.com/bedrock/latest/userguide/model-card-amazon-nova-pro.html"
    },
    {
      "name": "Stanford AI Index 2026",
      "url": "https://hai.stanford.edu/ai-index/2026-ai-index-report"
    },
    {
      "name": "NVIDIA Nemotron Coalition",
      "url": "https://nvidianews.nvidia.com/news/nvidia-launches-nemotron-coalition-of-leading-global-ai-labs-to-advance-open-frontier-models"
    },
    {
      "name": "EU AI Act framework",
      "url": "https://digital-strategy.ec.europa.eu/en/policies/regulatory-framework-ai"
    },
    {
      "name": "xAI Grok models",
      "url": "https://docs.x.ai/developers/models"
    },
    {
      "name": "Mistral Large 3 model card",
      "url": "https://docs.mistral.ai/models/model-cards/mistral-large-3-25-12"
    }
  ],
  "common_benchmark_sources": [
    {
      "name": "Hugging Face Open LLM Leaderboard",
      "url": "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard"
    },
    {
      "name": "Artificial Analysis",
      "url": "https://artificialanalysis.ai/"
    },
    {
      "name": "LMSYS Chatbot Arena",
      "url": "https://lmarena.ai/"
    },
    {
      "name": "OpenRouter rankings",
      "url": "https://openrouter.ai/rankings"
    },
    {
      "name": "Papers with Code",
      "url": "https://paperswithcode.com/"
    }
  ],
  "provider_sources": {
    "OpenAI": {
      "docs": "https://platform.openai.com/docs/models",
      "pricing": "https://openai.com/api/pricing"
    },
    "Anthropic": {
      "docs": "https://platform.claude.com/docs/en/about-claude/models",
      "pricing": "https://platform.claude.com/docs/en/about-claude/pricing"
    },
    "Google": {
      "docs": "https://ai.google.dev/gemini-api/docs",
      "pricing": "https://ai.google.dev/pricing"
    },
    "Amazon": {
      "docs": "https://docs.aws.amazon.com/bedrock/latest/userguide/model-card-amazon-nova-pro.html",
      "pricing": "https://aws.amazon.com/bedrock/pricing/"
    },
    "Meta": {
      "docs": "https://www.llama.com/",
      "pricing": "https://openrouter.ai/meta-llama"
    },
    "DeepSeek": {
      "docs": "https://api-docs.deepseek.com/",
      "pricing": "https://api-docs.deepseek.com/quick_start/pricing"
    },
    "Mistral": {
      "docs": "https://docs.mistral.ai/",
      "pricing": "https://mistral.ai/pricing/"
    },
    "Alibaba": {
      "docs": "https://qwen.readthedocs.io/",
      "pricing": "https://www.alibabacloud.com/help/en/model-studio/billing-for-model-studio"
    },
    "xAI": {
      "docs": "https://docs.x.ai/developers/models",
      "pricing": "https://docs.x.ai/developers/models"
    },
    "Cohere": {
      "docs": "https://docs.cohere.com/docs/models",
      "pricing": "https://cohere.com/pricing"
    },
    "Microsoft": {
      "docs": "https://learn.microsoft.com/en-us/azure/ai-foundry/model-inference/overview",
      "pricing": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/fine-tuning-models/"
    }
  },
  "benchmark_breakdown_template": [
    {
      "name": "SWE-Bench Verified",
      "source": "code",
      "weight": 0.78,
      "avg": 52,
      "scoreType": "estimated capability proxy"
    },
    {
      "name": "HumanEval+",
      "source": "code",
      "weight": 1.02,
      "avg": 82,
      "scoreType": "estimated capability proxy"
    },
    {
      "name": "GPQA Diamond",
      "source": "reason",
      "weight": 0.86,
      "avg": 58,
      "scoreType": "estimated capability proxy"
    },
    {
      "name": "ARC-AGI-2",
      "source": "reason",
      "weight": 0.45,
      "avg": 22,
      "scoreType": "estimated capability proxy"
    },
    {
      "name": "MATH-500",
      "source": "math",
      "weight": 0.99,
      "avg": 80,
      "scoreType": "estimated capability proxy"
    },
    {
      "name": "AIME 2025",
      "source": "math",
      "weight": 0.91,
      "avg": 64,
      "scoreType": "estimated capability proxy"
    },
    {
      "name": "MMMU",
      "source": "vision",
      "weight": 0.84,
      "avg": 68,
      "scoreType": "estimated capability proxy"
    },
    {
      "name": "LiveBench",
      "source": "overall",
      "weight": 0.96,
      "avg": 71,
      "scoreType": "estimated capability proxy"
    }
  ]
}
