diff --git a/CHANGELOG.md b/CHANGELOG.md index fcc445622..680541ff5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,5 @@ -## Unreleased +## v0.3.64 (14 February 2025) - [Reference documentation](https://inspect.ai-safety-institute.org.uk/reference/) for Python API and CLI commands. - Add support for [clustered standard errors](https://inspect.ai-safety-institute.org.uk/scorers.html#clustered-standard-errors) via a new `cluster` parameter for the `stderr()` metric. diff --git a/CITATION.cff b/CITATION.cff index e8d88bc98..6bb07f5ae 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,13 +1,13 @@ cff-version: 1.2.0 -title: 'Inspect AI: Framework for Large Language Model Evaluations' +title: "Inspect AI: Framework for Large Language Model Evaluations" message: >- If you cite this software, please do so using the metadata from this file. type: software authors: - - name: UK AI Safety Institute - website: 'https://www.aisi.gov.uk/' -repository-code: 'https://github.com/UKGovernmentBEIS/inspect_ai' -url: 'https://inspect.ai-safety-institute.org.uk/' + - name: UK AI Security Institute + website: "https://www.aisi.gov.uk/" +repository-code: "https://github.com/UKGovernmentBEIS/inspect_ai" +url: "https://inspect.ai-safety-institute.org.uk/" license: MIT date-released: "2024-05-10" diff --git a/LICENSE b/LICENSE index 5147fac72..72fc87742 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2024 UK AI Safety Institute +Copyright (c) 2024 UK AI Security Institute Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index fbfc1bb64..5029d314f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -[](https://aisi.gov.uk/) +[](https://aisi.gov.uk/) -Welcome to Inspect, a framework for large language model evaluations created by the [UK AI Safety Institute](https://aisi.gov.uk/). +Welcome to Inspect, a framework for large language model evaluations created by the [UK AI Security Institute](https://aisi.gov.uk/). Inspect provides many built-in components, including facilities for prompt engineering, tool usage, multi-turn dialog, and model graded evaluations. Extensions to Inspect (e.g. to support new elicitation and scoring techniques) can be provided by other Python packages. diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 06ab50e67..2cf434d5e 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -35,7 +35,7 @@ website: title: "Inspect AI" background: light search: true - logo: images/aisi-logo.png + logo: images/aisi-logo.svg logo-href: https://www.aisi.gov.uk/ left: - text: "User Guide" @@ -102,7 +102,7 @@ website: page-footer: left: - - text: UK AI Safety Institute + - text: UK AI Security Institute href: https://aisi.gov.uk/ center: - text: Code @@ -116,8 +116,8 @@ website: right: - icon: twitter - href: https://twitter.com/AISafetyInst - aria-label: UK AI Safety Institute Twitter + href: https://x.com/sciTechgovuk + aria-label: UK AI Security Institute Twitter - icon: github href: https://github.com/UKGovernmentBEIS/inspect_ai/ aria-label: Inspect on GitHub diff --git a/docs/images/aisi-logo.png b/docs/images/aisi-logo.png deleted file mode 100644 index 131a7e149..000000000 Binary files a/docs/images/aisi-logo.png and /dev/null differ diff --git a/docs/images/aisi-logo.svg b/docs/images/aisi-logo.svg new file mode 100644 index 000000000..59b03e2cb --- /dev/null +++ b/docs/images/aisi-logo.svg @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/index.qmd b/docs/index.qmd index a91efa307..dc41f4532 100644 --- a/docs/index.qmd +++ b/docs/index.qmd @@ -2,9 +2,9 @@ title: Inspect subtitle: An open-source framework for large language model evaluations citation: - id: "UK_AI_Safety_Institute_Inspect_AI_Framework_2024" + id: "UK_AI_Security_Institute_Inspect_AI_Framework_2024" title: "Inspect AI: Framework for Large Language Model Evaluations" - author: "UK AI Safety Institute" + author: "UK AI Security Institute" issued: 2024-05 url: "https://github.com/UKGovernmentBEIS/inspect_ai" type: "software" @@ -12,7 +12,7 @@ citation: ## Welcome -Welcome to Inspect, a framework for large language model evaluations created by the [UK AI Safety Institute](https://aisi.gov.uk). +Welcome to Inspect, a framework for large language model evaluations created by the [UK AI Security Institute](https://aisi.gov.uk). Inspect provides many built-in components, including facilities for prompt engineering, tool usage, multi-turn dialog, and model graded evaluations. Extensions to Inspect (e.g. to support new elicitation and scoring techniques) can be provided by other Python packages. diff --git a/docs/llms.txt b/docs/llms.txt index 4cfce7a1e..df78d3d38 100644 --- a/docs/llms.txt +++ b/docs/llms.txt @@ -1,6 +1,6 @@ # Inspect AI -> Inspect AI is a Python framework for large language model evaluations created by the [UK AI Safety Institute](https://aisi.gov.uk). Inspect provides many built-in components, including facilities for prompt engineering, tool usage, multi-turn dialog, and model graded evaluations. Extensions to Inspect (e.g. to support new elicitation and scoring techniques) can be provided by other Python packages. +> Inspect AI is a Python framework for large language model evaluations created by the [UK AI Security Institute](https://aisi.gov.uk). Inspect provides many built-in components, including facilities for prompt engineering, tool usage, multi-turn dialog, and model graded evaluations. Extensions to Inspect (e.g. to support new elicitation and scoring techniques) can be provided by other Python packages. ## Docs diff --git a/docs/scorers.qmd b/docs/scorers.qmd index 7dd6b9cbf..523afee5e 100644 --- a/docs/scorers.qmd +++ b/docs/scorers.qmd @@ -535,14 +535,6 @@ Inspect includes some simple built in metrics for calculating accuracy, mean, et #### Clustered Standard Errors -::: {.callout-note appearance="simple"} -The clustered standard errors feature described below is currently available only in the development version of Inspect. To install the development version from GitHub: - -``` bash -pip install git+https://github.com/UKGovernmentBEIS/inspect_ai -``` -::: - The `stderr()` metric supports computing [clustered standard errors](https://en.wikipedia.org/wiki/Clustered_standard_errors) via the `cluster` parameter. Most scorers already include `stderr()` as a built-in metric, so to compute clustered standard errors you'll want to specify custom `metrics` for your task (which will override the scorer's built in metrics). For example, let's say you wanted to cluster on a "category" variable defined in `Sample` metadata: @@ -662,14 +654,6 @@ def mean_score() -> ScoreReducer: ## Workflow {#sec-scorer-workflow} -::: {.callout-note appearance="simple"} -The `inspect score` command and `score()` function as described below are currently available only in the development version of Inspect. To install the development version from GitHub: - -``` bash -pip install git+https://github.com/UKGovernmentBEIS/inspect_ai -``` -::: - ### Unscored Evals By default, model output in evaluations is automatically scored. However, you can defer scoring by using the `--no-score` option. For example: diff --git a/pyproject.toml b/pyproject.toml index d027291ab..8c9c74550 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,7 +83,7 @@ ignore = ["W002", "W009"] [project] name = "inspect_ai" description = "Framework for large language model evaluations" -authors = [{ name = "UK AI Safety Institute" }] +authors = [{ name = "UK AI Security Institute" }] readme = "README.md" requires-python = ">=3.10" license = { text = "MIT License" } diff --git a/src/inspect_ai/_util/constants.py b/src/inspect_ai/_util/constants.py index 55fe40ad6..efb534e26 100644 --- a/src/inspect_ai/_util/constants.py +++ b/src/inspect_ai/_util/constants.py @@ -1,7 +1,7 @@ from pathlib import Path from typing import Literal -PKG_AUTHOR = "UK AI Safety Institute" +PKG_AUTHOR = "UK AI Security Institute" PKG_AUTHOR_DIR = "UK-AISI" PKG_NAME = Path(__file__).parent.parent.stem PKG_PATH = Path(__file__).parent.parent diff --git a/tools/vscode/LICENSE b/tools/vscode/LICENSE index f6816e614..5edfd1c6a 100644 --- a/tools/vscode/LICENSE +++ b/tools/vscode/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2024 UK AI Safety Institute +Copyright (c) 2024 UK AI Security Institute Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/tools/vscode/package.json b/tools/vscode/package.json index 7d7e237a5..ec57769f5 100644 --- a/tools/vscode/package.json +++ b/tools/vscode/package.json @@ -5,7 +5,7 @@ "publisher": "ukaisi", "icon": "assets/logo/inspect.png", "author": { - "name": "UK AI Safety Institute" + "name": "UK AI Security Institute" }, "version": "0.3.53", "license": "MIT",