charts: 6929
Data license: CC-BY
This data as json
id | slug | type | config | createdAt | updatedAt | lastEditedAt | publishedAt | lastEditedByUserId | publishedByUserId | isIndexable | title | subtitle | note | title_plus_variant | configWithDefaults |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
6929 | ai-performance-coding-math-knowledge-tests | LineChart | { "id": 6929, "map": { "columnSlug": "736553" }, "note": "Performance on these benchmarks should not be compared directly as they use different performance metrics and test different skills.", "slug": "ai-performance-coding-math-knowledge-tests", "title": "Top performing AI systems in coding, math, and language-based knowledge tests", "yAxis": { "max": 100, "min": 0, "facetDomain": "independent" }, "$schema": "https://files.ourworldindata.org/schemas/grapher-schema.004.json", "version": 48, "subtitle": "Coding performance is measured with the [APPS benchmark](#dod:ai-APPS); math performance with the [MATH benchmark](#dod:ai-MATH); and language-based knowledge tests with the [MMLU benchmark](#dod:ai-MMLU).", "originUrl": "https://ourworldindata.org/artificial-intelligence", "dimensions": [ { "property": "y", "variableId": 852091 }, { "property": "y", "variableId": 852092 }, { "display": { "name": "Math", "includeInTable": true }, "property": "y", "variableId": 852089 }, { "display": { "name": "Knowledge tests", "includeInTable": true }, "property": "y", "variableId": 852090 } ], "entityType": "subject", "isPublished": true, "hideTimeline": true, "addCountryMode": "disabled", "comparisonLines": [ { "label": "Math and knowledge tests: approximate score of expert human", "yEquals": "90" }, { "label": "Math: average score of 5 university students", "yEquals": "68" }, { "label": "Knowledge tests: average score of non-expert humans", "yEquals": "35" } ], "entityTypePlural": "subjects", "hideRelativeToggle": false, "selectedEntityNames": [ "State of the art" ], "selectedFacetStrategy": "none", "facettingLabelByYVariables": "skill or knowledge area", "hideAnnotationFieldsInTitle": { "time": true, "entity": true } } |
2023-07-04 14:22:59 | 2024-04-08 12:13:54 | 2024-03-26 17:35:28 | 2023-09-18 13:05:31 | 72 | 72 | 1 | Top performing AI systems in coding, math, and language-based knowledge tests | Coding performance is measured with the [APPS benchmark](#dod:ai-APPS); math performance with the [MATH benchmark](#dod:ai-MATH); and language-based knowledge tests with the [MMLU benchmark](#dod:ai-MMLU). | Performance on these benchmarks should not be compared directly as they use different performance metrics and test different skills. | Top performing AI systems in coding, math, and language-based knowledge tests () | { "$schema": "https://files.ourworldindata.org/schemas/grapher-schema.004.json", "map": { "projection": "World", "hideTimeline": false, "colorScale": { "baseColorScheme": "default", "equalSizeBins": true, "binningStrategy": "ckmeans", "customNumericColorsActive": false, "colorSchemeInvert": false, "binningStrategyBinCount": 5 }, "timeTolerance": 0, "toleranceStrategy": "closest", "tooltipUseCustomLabels": false, "time": "latest" }, "maxTime": "latest", "baseColorScheme": "default", "yAxis": { "removePointsOutsideDomain": false, "scaleType": "linear", "canChangeScaleType": false, "facetDomain": "shared" }, "tab": "chart", "matchingEntitiesOnly": false, "hasChartTab": true, "hideLegend": false, "hideLogo": false, "hideTimeline": true, "colorScale": { "baseColorScheme": "default", "equalSizeBins": true, "binningStrategy": "ckmeans", "customNumericColorsActive": false, "colorSchemeInvert": false, "binningStrategyBinCount": 5 }, "scatterPointLabelStrategy": "year", "selectedFacetStrategy": "none", "isPublished": true, "invertColorScheme": false, "version": 48, "logo": "owid", "entityType": "subject", "facettingLabelByYVariables": "skill or knowledge area", "addCountryMode": "disabled", "compareEndPointsOnly": false, "type": "LineChart", "hasMapTab": false, "stackMode": "absolute", "minTime": "earliest", "hideAnnotationFieldsInTitle": { "entity": false, "time": false, "changeInPrefix": false }, "xAxis": { "removePointsOutsideDomain": false, "scaleType": "linear", "canChangeScaleType": false, "facetDomain": "shared" }, "hideConnectedScatterLines": false, "showNoDataArea": true, "zoomToSelection": false, "showYearLabels": false, "hideLinesOutsideTolerance": false, "hideTotalValueLabel": false, "hideScatterLabels": false, "sortBy": "total", "sortOrder": "desc", "hideFacetControl": true, "entityTypePlural": "subjects", "missingDataStrategy": "auto", "id": 6929, "note": "Performance on these benchmarks should not be compared directly as they use different performance metrics and test different skills.", "slug": "ai-performance-coding-math-knowledge-tests", "title": "Top performing AI systems in coding, math, and language-based knowledge tests", "subtitle": "Coding performance is measured with the [APPS benchmark](#dod:ai-APPS); math performance with the [MATH benchmark](#dod:ai-MATH); and language-based knowledge tests with the [MMLU benchmark](#dod:ai-MMLU).", "originUrl": "https://ourworldindata.org/artificial-intelligence", "dimensions": [ { "property": "y", "variableId": 852091 }, { "property": "y", "variableId": 852092 }, { "display": { "name": "Math", "includeInTable": true }, "property": "y", "variableId": 852089 }, { "display": { "name": "Knowledge tests", "includeInTable": true }, "property": "y", "variableId": 852090 } ], "comparisonLines": [ { "label": "Math and knowledge tests: approximate score of expert human", "yEquals": "90" }, { "label": "Math: average score of 5 university students", "yEquals": "68" }, { "label": "Knowledge tests: average score of non-expert humans", "yEquals": "35" } ], "hideRelativeToggle": false, "selectedEntityNames": [ "State of the art" ] } |