{"benchmark":{"version":"1.1.0","updated":"2026-04-23"},"count":8,"rows":[{"runId":"b3adfec5-80c2-4eb6-9ae3-a2a99bc2f94a","modelSlug":"gpt-4-1","modelName":"GPT-4.1","provider":"OpenAI","gatewayId":"openai/gpt-4.1","versionPinned":"openai/gpt-4.1","triggeredAt":"2026-04-24T10:46:06.801288+00:00","score40":32,"rawMean":0.7875,"stdev":0.0102,"passed":true,"questionCount":80,"passThreshold":0.68},{"runId":"75f8b417-705c-4b74-8cb3-8b4959e3c01f","modelSlug":"gpt-5-2-pro","modelName":"GPT 5.2","provider":"OpenAI","gatewayId":"openai/gpt-5.2-pro","versionPinned":"openai/gpt-5.2-pro","triggeredAt":"2026-04-24T10:44:31.464008+00:00","score40":29,"rawMean":0.7375,"stdev":0.0177,"passed":true,"questionCount":80,"passThreshold":0.68},{"runId":"55d6c7e1-e79f-4316-9290-bbb9e9c406d2","modelSlug":"mistral-large-3","modelName":"Mistral Large 3","provider":"Mistral","gatewayId":"mistral/mistral-large-3","versionPinned":"mistral/mistral-large-3","triggeredAt":"2026-04-23T19:57:44.987056+00:00","score40":31,"rawMean":0.7833,"stdev":0.0059,"passed":true,"questionCount":80,"passThreshold":0.68},{"runId":"0da1dcf7-743b-4386-9622-5ca307d241d0","modelSlug":"grok-4-20-reasoning-beta","modelName":"Grok 4.20 Beta Reasoning","provider":"xAI","gatewayId":"xai/grok-4.20-reasoning-beta","versionPinned":"xai/grok-4.20-reasoning-beta","triggeredAt":"2026-04-23T19:56:13.77837+00:00","score40":28,"rawMean":0.7083,"stdev":0.0059,"passed":true,"questionCount":80,"passThreshold":0.68},{"runId":"103ca840-4018-4d83-abfe-35befd61890d","modelSlug":"gpt-5-4","modelName":"GPT 5.4","provider":"OpenAI","gatewayId":"openai/gpt-5.4","versionPinned":"openai/gpt-5.4","triggeredAt":"2026-04-23T17:58:01.583192+00:00","score40":29,"rawMean":0.7208,"stdev":0.0059,"passed":true,"questionCount":80,"passThreshold":0.68},{"runId":"134444bc-2f34-4bbc-98cd-e57b08476b9f","modelSlug":"deepseek-v3-2","modelName":"DeepSeek V3.2","provider":"DeepSeek","gatewayId":"deepseek/deepseek-v3.2","versionPinned":"deepseek/deepseek-v3.2","triggeredAt":"2026-04-23T13:40:39.574296+00:00","score40":28,"rawMean":0.6958,"stdev":0.0118,"passed":true,"questionCount":80,"passThreshold":0.68},{"runId":"cb68647e-0834-4bb9-8ade-e558a139817d","modelSlug":"claude-sonnet-4-6","modelName":"Claude Sonnet 4.6","provider":"Anthropic","gatewayId":"anthropic/claude-sonnet-4.6","versionPinned":"anthropic/claude-sonnet-4.6","triggeredAt":"2026-04-23T13:18:40.757918+00:00","score40":29,"rawMean":0.7333,"stdev":0.0118,"passed":true,"questionCount":80,"passThreshold":0.68},{"runId":"72fca688-ddca-4842-ab8d-1c98d7bee171","modelSlug":"claude-opus-4-7","modelName":"Claude Opus 4.7","provider":"Anthropic","gatewayId":"anthropic/claude-opus-4.7","versionPinned":"anthropic/claude-opus-4.7","triggeredAt":"2026-04-23T13:14:59.759818+00:00","score40":34,"rawMean":0.8417,"stdev":0.0059,"passed":true,"questionCount":80,"passThreshold":0.68}]}