{"datasetCard":{"datasetName":"InsureBench Wft-Basis v1.1","questionCount":80,"questionTypes":"Meerkeuze, stellingen en korte casus","domain":"Wft-Basis","basedOn":"CDFD-eindtermen 2025 t/m 2026","questionSources":"Eigen vragen, herschreven oefenvragen en gevalideerde oefenset op basis van CDFD-eindtermen.","legalReferenceDate":"2026-01-01","lastReviewDate":null,"reviewStatus":"externe review in voorbereiding","reviewerRoles":[],"distributionPolicy":"Per CDFD-taak, toetsterm, vraagtype en moeilijkheid.","difficultyPolicy":"Laag, midden en hoog.","publicationPolicy":"Vragenbank privé; alleen aggregaties, scores en methodologie worden gepubliceerd.","contaminationPolicy":"Private set met held-out beleid en canary-vragen; exacte vragen worden niet publiek gedeeld.","resultsLicense":"CC-BY-4.0","modelSelectionPolicy":"We testen publiek beschikbare tekstmodellen van grote aanbieders die via de gekozen gateway beschikbaar zijn op de testdatum."},"questionDistribution":[{"domain":"Onbekend","questionCount":80,"weight":1}],"questionTypeDistribution":[{"type":"KB","questionCount":80,"weight":1}],"difficultyDistribution":[{"difficulty":"midden","questionCount":80,"weight":1}],"officialCdfdComparison":{"validFrom":"2026-04-01","sourceUrl":"https://cdfd.nl/initieel-examen-basis/","rows":[{"feature":"Tijdsduur","official":"120 minuten","insurebench":"n.v.t. (LLM)"},{"feature":"Aantal vragen","official":"42","insurebench":"80"},{"feature":"Aantal punten","official":"63","insurebench":"40 (omgerekend)"},{"feature":"Slaaggrens","official":"68%","insurebench":"68%"},{"feature":"KB-vragen","official":"21 vragen, 21 punten","insurebench":"Zie vraagtypenverdeling"},{"feature":"PG-vragen","official":"2 vragen, 4 punten","insurebench":"Zie vraagtypenverdeling"},{"feature":"VC-vragen","official":"19 vragen, 38 punten","insurebench":"Zie vraagtypenverdeling"}]},"modelVersionPolicy":{"providerAliasesText":"Modelnamen kunnen bij providers als alias werken. InsureBench rapporteert daarom de gebruikte API-naam, gatewayroute, testdatum, promptversie en datasetversie. Bij gesloten modellen blijft een beperkte onzekerheid bestaan over interne providerwijzigingen.","fields":[{"field":"Provider","why":"Herkomst"},{"field":"Gateway","why":"Route"},{"field":"Exacte API-modelnaam","why":"Reproduceerbaarheid"},{"field":"Provider release date","why":"Context"},{"field":"Testdatum","why":"Momentopname"},{"field":"Endpointtype","why":"Direct of router"},{"field":"Model alias of snapshot","why":"Belangrijk verschil"},{"field":"Kan provider stil wijzigen","why":"Ja / nee / onbekend"},{"field":"Seed ondersteund","why":"Ja / nee"},{"field":"Tools uitgeschakeld","why":"Ja"},{"field":"System prompt","why":"Leeg of exact vermeld"},{"field":"Parserversie","why":"Controle"},{"field":"Prompt template hash","why":"Controle"},{"field":"Benchmarkversie","why":"Controle"},{"field":"Datasetversie","why":"Controle"}]},"runProtocol":{"parserVersion":"answer-parser-v2","promptTemplateHash":"5a732017979c","benchmarkVersion":"1.1.0","datasetVersion":"InsureBench Wft-Basis v1.1","retryPolicy":"Gateway timeouts en tijdelijke providerfouten worden begrensd opnieuw geprobeerd; blijvende fouten tellen zichtbaar mee.","timeoutPolicy":"Per model kan een request-timeout gelden; de hard cap blijft onder de serverless limiet.","refusalPolicy":"Een weigering telt als 0 punten en wordt als refusal zichtbaar gemaakt.","rateLimitPolicy":"Rate limits pauzeren de run waar nodig; publieke API-routes hebben eigen rate limiting.","loggingPolicy":"Alleen geaggregeerde runstatus, foutcategorieën en kostenmetadata worden publiek gemaakt."},"latestRunErrorStats":[{"runId":"a5f529ee-5108-4814-b882-49cd18c2f1a0","modelName":"GPT-4.1","provider":"OpenAI","triggeredAt":"2026-04-28T18:19:36.831+00:00","noLetterCount":0,"parsingFailedCount":0,"refusalCount":0,"timeoutCount":0,"retryReleaseCount":0,"failedJobCount":0,"parserVersion":"answer-parser-v2","promptTemplateHash":"5a732017979c"},{"runId":"0bacf64f-e049-40a7-9c45-87cbc1aaec68","modelName":"Claude Haiku 4.5","provider":"Anthropic","triggeredAt":"2026-04-28T13:34:39.069+00:00","noLetterCount":0,"parsingFailedCount":0,"refusalCount":0,"timeoutCount":0,"retryReleaseCount":0,"failedJobCount":0,"parserVersion":"answer-parser-v2","promptTemplateHash":"5a732017979c"},{"runId":"93a149c0-3db8-4451-acb1-1d46ce8562a2","modelName":"Claude Opus 4.7","provider":"Anthropic","triggeredAt":"2026-04-28T12:17:05.794+00:00","noLetterCount":0,"parsingFailedCount":0,"refusalCount":0,"timeoutCount":0,"retryReleaseCount":0,"failedJobCount":0,"parserVersion":"answer-parser-v2","promptTemplateHash":"5a732017979c"},{"runId":"59734658-b01d-4e15-b087-2c9ef4e873af","modelName":"Mistral: Mistral Nemo","provider":"Mistralai","triggeredAt":"2026-05-01T08:16:19.209347+00:00","noLetterCount":0,"parsingFailedCount":0,"refusalCount":0,"timeoutCount":0,"retryReleaseCount":0,"failedJobCount":0,"parserVersion":"answer-parser-v2","promptTemplateHash":"5a732017979c"},{"runId":"3330420c-965c-4f4b-98a2-8cfe387cddd5","modelName":"GPT-4o","provider":"OpenAI","triggeredAt":"2026-04-29T06:36:02.194+00:00","noLetterCount":0,"parsingFailedCount":0,"refusalCount":0,"timeoutCount":0,"retryReleaseCount":0,"failedJobCount":0,"parserVersion":"answer-parser-v2","promptTemplateHash":"5a732017979c"},{"runId":"cd3161ad-62ac-4339-b67e-bfc902440688","modelName":"Gemini 2.5 Flash","provider":"Google","triggeredAt":"2026-04-29T06:29:03.132+00:00","noLetterCount":0,"parsingFailedCount":0,"refusalCount":0,"timeoutCount":0,"retryReleaseCount":0,"failedJobCount":0,"parserVersion":"answer-parser-v2","promptTemplateHash":"5a732017979c"},{"runId":"c07fda62-d2d1-4757-9391-e26334813ed6","modelName":"Mistral Large 3","provider":"Mistral","triggeredAt":"2026-04-28T18:19:47.815+00:00","noLetterCount":0,"parsingFailedCount":0,"refusalCount":0,"timeoutCount":0,"retryReleaseCount":0,"failedJobCount":0,"parserVersion":"answer-parser-v2","promptTemplateHash":"5a732017979c"},{"runId":"0da1dcf7-743b-4386-9622-5ca307d241d0","modelName":"Grok 4.20 Beta Reasoning","provider":"xAI","triggeredAt":"2026-04-23T19:56:13.77837+00:00","noLetterCount":38,"parsingFailedCount":0,"refusalCount":0,"timeoutCount":38,"retryReleaseCount":0,"failedJobCount":0,"parserVersion":"answer-parser-v2","promptTemplateHash":"5a732017979c"}]}