Merge branch 'main' into javiermtorres/issue-492-runtime-resources

mozilla-ai · Jan 23, 2025 · c2f1932 · c2f1932
2 parents 878b6e8 + b6493a1
commit c2f1932
Show file tree

Hide file tree

Showing 47 changed files with 511 additions and 219 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -10,6 +10,7 @@
     },
     "python.testing.pytestEnabled": true,
     "python.analysis.extraPaths": [
-        "./lumigator/python/mzai/schemas"
+        "./lumigator/python/mzai/schemas",
+        "./lumigator/python/mzai/jobs"
     ]
 }
diff --git a/Dockerfile b/Dockerfile
@@ -5,6 +5,7 @@ COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
 ADD . /mzai
 
 WORKDIR /mzai/lumigator/python/mzai/backend
+ENV PYTHONPATH=/mzai/lumigator/python/mzai/backend:/mzai/lumigator/python/mzai/jobs
 
 FROM base AS main_image
 

diff --git a/Makefile b/Makefile
@@ -136,7 +136,7 @@ test-sdk-unit:
 
 test-sdk-integration:
 	cd lumigator/python/mzai/sdk/tests; \
-	uv run pytest -o python_files="integration/test_*.py integration/*/test_*.py"
+	uv run pytest -s -o python_files="integration/test_*.py integration/*/test_*.py"
 
 test-sdk-integration-containers:
 ifeq ($(CONTAINERS_RUNNING),)
@@ -159,7 +159,8 @@ test-backend-unit:
 	RAY_HEAD_NODE_HOST=localhost \
 	RAY_DASHBOARD_PORT=8265 \
 	SQLALCHEMY_DATABASE_URL=sqlite:////tmp/local.db \
-	uv run pytest -o python_files="backend/tests/unit/*/test_*.py"
+	PYTHONPATH=../jobs:$$PYTHONPATH \
+	uv run pytest -s -o python_files="backend/tests/unit/*/test_*.py backend/tests/unit/test_*.py"
 
 test-backend-integration:
 	cd lumigator/python/mzai/backend/; \
@@ -176,6 +177,7 @@ test-backend-integration:
 	EVALUATOR_WORK_DIR=../jobs/evaluator \
 	EVALUATOR_LITE_PIP_REQS=../jobs/evaluator_lite/requirements.txt \
 	EVALUATOR_LITE_WORK_DIR=../jobs/evaluator_lite \
+	PYTHONPATH=../jobs:$$PYTHONPATH \
 	uv run pytest -s -o python_files="backend/tests/integration/*/test_*.py"
 
 test-backend-integration-containers:

diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -116,7 +116,6 @@ services:
       - AWS_DEFAULT_REGION
       - AWS_ENDPOINT_URL
       - S3_BUCKET
-      - PYTHONPATH=/mzai/lumigator/python/mzai/backend
       - EVALUATOR_PIP_REQS=/mzai/lumigator/python/mzai/jobs/evaluator/requirements.txt
       - EVALUATOR_WORK_DIR=/mzai/lumigator/python/mzai/jobs/evaluator
       # TODO: the following two rows should be renamed to EVALUATOR_*

diff --git a/docs/assets/ui_guide_steps/datasets_page.png b/docs/assets/ui_guide_steps/datasets_page.png
diff --git a/docs/assets/ui_guide_steps/datasets_table.png b/docs/assets/ui_guide_steps/datasets_table.png
diff --git a/docs/assets/ui_guide_steps/experiment_fields.png b/docs/assets/ui_guide_steps/experiment_fields.png
diff --git a/docs/assets/ui_guide_steps/experiments_completed.png b/docs/assets/ui_guide_steps/experiments_completed.png
diff --git a/docs/assets/ui_guide_steps/experiments_page.png b/docs/assets/ui_guide_steps/experiments_page.png
diff --git a/docs/assets/ui_guide_steps/experiments_running.png b/docs/assets/ui_guide_steps/experiments_running.png
diff --git a/docs/assets/ui_guide_steps/model_select.png b/docs/assets/ui_guide_steps/model_select.png
diff --git a/docs/assets/ui_guide_steps/results_overall_comparison.png b/docs/assets/ui_guide_steps/results_overall_comparison.png
diff --git a/docs/assets/ui_guide_steps/results_row_by_row.png b/docs/assets/ui_guide_steps/results_row_by_row.png
diff --git a/docs/source/conceptual-guides/new-endpoint.md b/docs/source/conceptual-guides/new-endpoint.md
@@ -1,4 +1,4 @@
-# Creating a new endpoint
+# Creating a New Endpoint
 
 The examples in the [Understanding Lumigator Endpoints](https://mozilla-ai.github.io/lumigator/conceptual-guides/endpoints.html)
 guide show the main pieces of code involved in writing a new endpoint. Let us take a toy example,

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -48,6 +48,7 @@
 myst_enable_extensions = [
     "colon_fence",
 ]
+myst_heading_anchors = 3
 
 templates_path = ["_templates"]
 source_suffix = [".rst", ".md"]

diff --git a/docs/source/get-started/quickstart.md b/docs/source/get-started/quickstart.md
@@ -215,10 +215,11 @@ user@host:~/lumigator$ curl -s "http://localhost:8000/api/v1/health/jobs/$SUBMIS
 :::{tab-item} Python SDK
 :sync: tab2
 ```python
+from pprint import pprint
 job_id = responses[0].id
 
 job = lm_client.jobs.wait_for_job(job_id)  # Create the coroutine object
-result = await job  # Await the coroutine to get the result
+pprint(job)
 ```
 :::
 

diff --git a/docs/source/get-started/suggested-models.md b/docs/source/get-started/suggested-models.md
@@ -160,7 +160,7 @@ computer. There's nothing to install or configure.
 There are no summarization-specific parameters for this model.
 
 
-# Reference models
+# Reference Models
 
 Before you jump into evaluating datasets, you should consider the following importance of quality ground-truth and annotations.
 

diff --git a/docs/source/get-started/troubleshooting.md b/docs/source/get-started/troubleshooting.md
@@ -0,0 +1,19 @@
+# Troubleshooting
+
+This section describes some common issues and how you can solve them.
+If the problem you are experiencing is not listed here, feel free to
+[browse the list of open issues](https://github.com/mozilla-ai/lumigator/issues)
+to check if it has already been discussed or otherwise open a
+[bug report](https://github.com/mozilla-ai/lumigator/issues/new?template=bug_report.yaml).
+
+## Could not install packages due to an OSError: [Errno 28] No space left on device
+
+This problem usually occurs when the disk space allocated by Docker is completely
+filled up. There are two ways to tackle this:
+
+- increase the amount of disk space avaiable to the docker engine. You can do it
+by opening Settings in Docker Desktop, opening the `Resources` section, and then
+increasing the value specified under `Disk usage limit`
+
+- you can remove the Docker images left dangling by our build process by running the
+`docker image prune` command on the command line.
diff --git a/docs/source/get-started/ui-guide.md b/docs/source/get-started/ui-guide.md
@@ -0,0 +1,55 @@
+# Using Lumigator UI
+Lumigator comes with a web-based UI that allows you to interact with the Lumigator API. It is designed to be easy to use and to provide a quick way to get started with Lumigator.
+
+## Getting Started
+Follow the [installation guide](installation.md#local-deployment) to get Lumigator up and running. The UI can then be accessed by visiting [localhost](http://localhost) on your web browser. You should be able to see a screen with the sections **Datasets** and **Experiments**. Lets go through each of them in detail.
+
+## Upload a Dataset
+The main purpose of Lumigator is to help you select a model that works well for your particular data. It does this by evaluating multiple models on a sample of your data: your dataset.
+
+The first step is to upload your dataset. This can be done by clicking on the **Provide Dataset** button in the **Datasets** section. This will open a dialog box where you can select the dataset file to be uploaded from your local machine. The dataset file should be in `csv` format with columns examples and (optionally) ground_truth.
+
+You can get started by uploading the [sample dataset](https://github.com/mozilla-ai/lumigator/blob/8acc92460937cb82bbef9f01a964a584d2469c3f/lumigator/python/mzai/sample_data/dialogsum_exc.csv) provided in the [Lumigator repository](https://github.com/mozilla-ai/lumigator).
+
+![Datasets Page](../../assets/ui_guide_steps/datasets_page.png)
+
+Once the dataset is uploaded, it can be viewed as a row in the table in the **Datasets** section.
+
+![Datasets Table](../../assets/ui_guide_steps/datasets_table.png)
+
+## Create and Run an Experiment
+Next we move on to the **Experiments** section. Start by clicking on the **Create Experiment** button.
+
+![Experiments Page](../../assets/ui_guide_steps/experiments_page.png)
+
+This will open a sidebar where you would specify which use case and dataset, and further experiment details. The maximum samples field defines the number of rows from the dataset to be used for the experiment, on which Lumigator would run inference and evaluation to produce as results the [relevent metrics](quickstart.md#retrieve-the-results).
+
+Below that, you have the option to select the models to be used for the experiment. Multiple models may be simultaneously selected.
+
+| ![Experiment Fields](../../assets/ui_guide_steps/experiment_fields.png) | ![Model Selection](../../assets/ui_guide_steps/model_select.png) |
+| --- | --- |
+
+```{note}
+Reminder: In order to be able to use API based models, you must have provided the respective API keys (`OPENAI_API_KEY` or `MISTRAL_API_KEY`) as environment variables before starting Lumigator. See more in the [installation guide](installation.md).
+```
+
+Finally click the **Run Experiment** button.
+
+![Experiments Running](../../assets/ui_guide_steps/experiments_running.png)
+
+The **Experiments** page gets populated with the experiment details and the status of the experiment, which would initially be `RUNNING` and changes to `SUCCEEDED` once the experiment is complete or `FAILED` if the experiment fails.
+
+![Experiments Completed](../../assets/ui_guide_steps/experiments_completed.png)
+
+## View Results
+For each experiment, you can view the sidebar which contains a **View Results** button. On clicking this button, we would be able to see a table with evaluation metrics comparing the previously selected models.
+
+![Results Overall Comparison](../../assets/ui_guide_steps/results_overall_comparison.png)
+
+You can further view the row by row predictions for each individual model by expanding under a given model. This gives you a detailed view of how the model predictions compare to the ground truth for each instance in the dataset.
+
+![Results Row by Row Comparison](../../assets/ui_guide_steps/results_row_by_row.png)
+
+---
+
+Congratulations! With that, you have successfully run your first experiment using the Lumigator UI! You can continue to run more experiments by uploading your custom datasets and selecting from among the different models available.
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -37,7 +37,9 @@ Hugging Face and local stores or accessed through APIs. It consists of:
 
    get-started/installation
    get-started/quickstart
+   get-started/ui-guide
    get-started/suggested-models
+   get-started/troubleshooting
 
 .. toctree::
    :maxdepth: 2

diff --git a/docs/source/user-guides/inference.md b/docs/source/user-guides/inference.md
@@ -91,6 +91,21 @@ Refer to the `.env.template` file in the repository for more details.
     user@host:~/lumigator$ uv run python inference.py
     ```
 
+## Model Specification
+
+Different models can be chosen for summarization. The information about those models can be retrieved via the `http://<lumigator-host>:8000/api/v1/models/summarization` endpoint. It contains the following information for each model:
+
+* `name`: an identification name for the model
+* `uri`: a URI specifying how to use the model. The following protocols are supported:
+  * `hf://`: direct model usage in an [HF pipeline](https://huggingface.co/docs/transformers/en/main_classes/pipelines)
+  * `llamafile://`: model set up with [`llamafile`](https://github.com/Mozilla-Ocho/llamafile) on its default host and port
+  * `oai://`: OpenAI or compatible external API; needs a value for the environment variable OPENAI_API_KEY with a valid key
+  * `mistral://`: Mistral or compatible external API; needs a value for the environment variable MISTRAL_API_KEY with a valid key
+* `website_url`: a link to a web page with more information about the model
+* `description`: a short description about the model
+* `info`: a map containing information about the model like parameter count or model size
+* `tasks`: a list of supported tasks, with parameters and capabilities appropriate to the task and their default or set values
+
 ## Verify
 
 Review the contents of the `results.json` file to ensure that the inference job ran

diff --git a/lumigator/frontend/src/App.vue b/lumigator/frontend/src/App.vue
@@ -221,6 +221,13 @@ onMounted(async () => {
     p {
       font-size: $l-menu-font-size;
     }
+
+    &.error {
+      color: $l-grey-100;
+      h4 {
+        font-weight: $l-font-weight-semibold;
+      }
+    }
   }
 
   .mode-icon {

diff --git a/lumigator/frontend/src/components/molecules/LDatasetTable.vue b/lumigator/frontend/src/components/molecules/LDatasetTable.vue
@@ -94,45 +94,48 @@ defineProps({
 	}
 })
 
-const emit = defineEmits(['l-delete-dataset', 'l-dataset-selected', 'l-experiment'])
+const emit = defineEmits([
+  'l-delete-dataset',
+  'l-dataset-selected',
+  'l-experiment',
+  'l-download-dataset']);
 
 const { showSlidingPanel  } = useSlidePanel();
 const style = computed(() => {
   return showSlidingPanel.value ?
     'min-width: 40vw' : 'min-width: min(80vw, 1200px)'
-})
+});
 
 const focusedItem = ref(null);
-const tableVisible = ref(true)
+const tableVisible = ref(true);
 const optionsMenu = ref();
 const options = ref([
-	{
-		label: 'Use in Experiment',
+  {
+    label: 'Use in Experiment',
     icon: 'pi pi-experiments',
     disabled: false,
-     command: () => {
+    command: () => {
       emit('l-experiment', focusedItem.value)
     }
-	},
-	{
-		separator: true,
-	},
-	{
-		label: 'View',
-		icon: 'pi pi-external-link'
-	},
-	{
-		label: 'Download',
-		icon: 'pi pi-download'
-	},
-	{
-		label: 'Delete',
+  },
+  {
+    separator: true,
+  },
+  {
+    label: 'Download',
+    icon: 'pi pi-download',
+    command: () => {
+      emit('l-download-dataset', focusedItem.value)
+    }
+  },
+  {
+    label: 'Delete',
     icon: 'pi pi-trash',
     command: () => {
       emit('l-delete-dataset', focusedItem.value)
     }
-	},
-])
+  },
+]);
 
 const ptConfigOptionsMenu = ref({
   list: 'l-dataset-table__options-menu',

diff --git a/lumigator/frontend/src/components/molecules/LModelCards.vue b/lumigator/frontend/src/components/molecules/LModelCards.vue
@@ -35,7 +35,7 @@
           @click.stop
         />
       </div>
-       <div
+      <div
         v-if="modelsRequiringAPIKey.length"
         class="l-models-list__options-container-section"
       >

diff --git a/lumigator/frontend/src/components/organisms/LDatasetDetails.vue b/lumigator/frontend/src/components/organisms/LDatasetDetails.vue
@@ -16,7 +16,7 @@
           severity="secondary"
           variant="text"
           rounded
-          @click="emit('l-download-dataset')"
+          @click="emit('l-download-dataset', selectedDataset)"
         />
         <Button
           severity="secondary"

diff --git a/lumigator/frontend/src/components/pages/LDatasets.vue b/lumigator/frontend/src/components/pages/LDatasets.vue
@@ -27,6 +27,7 @@
         :table-data="datasets"
         @l-dataset-selected="onDatasetSelected($event)"
         @l-experiment="onExperimentDataset($event)"
+        @l-download-dataset="onDownloadDataset($event)"
         @l-delete-dataset="deleteConfirmation($event)"
       />
     </div>
@@ -43,7 +44,7 @@
         @l-experiment="onExperimentDataset($event)"
         @l-details-closed="onClearSelection()"
         @l-delete-dataset="deleteConfirmation($event)"
-        @l-download-dataset="onDownloadDataset()"
+        @l-download-dataset="onDownloadDataset($event)"
       />
     </Teleport>
   </div>
@@ -62,8 +63,6 @@ import LDatasetEmpty from '@/components/molecules/LDatasetEmpty.vue';
 import LDatasetDetails from '@/components/organisms/LDatasetDetails.vue';
 import { useConfirm } from "primevue/useconfirm";
 import { useToast } from "primevue/usetoast";
-import datasetsService from "@/services/datasets/datasetsService.js";
-import {downloadContent} from "@/helpers/index.js";
 
 const datasetStore = useDatasetStore();
 const { datasets, selectedDataset } = storeToRefs(datasetStore);
@@ -110,7 +109,8 @@ function deleteConfirmation(dataset) {
   });
 }
 
-function onDownloadDataset() {
+function onDownloadDataset(dataset) {
+  selectedDataset.value = dataset;
   datasetStore.loadDatasetFile();
 }
 

diff --git a/lumigator/frontend/src/services/datasets/datasetsService.js b/lumigator/frontend/src/services/datasets/datasetsService.js
@@ -30,7 +30,7 @@ async function postDataset(formData) {
     });
     return response.data
   } catch (error) {
-    return error
+    return error.response
   }
 }
 

diff --git a/lumigator/frontend/src/stores/datasets/store.js b/lumigator/frontend/src/stores/datasets/store.js
@@ -2,10 +2,12 @@ import { ref } from 'vue';
 import { defineStore } from 'pinia'
 import datasetsService from "@/services/datasets/datasetsService";
 import {downloadContent} from "@/helpers/index.js";
+import { useToast } from "primevue/usetoast";
 
 export const useDatasetStore = defineStore('dataset', () => {
   const datasets = ref([]);
   const selectedDataset = ref(null);
+  const toast = useToast();
 
 
   async function loadDatasets() {
@@ -28,7 +30,12 @@ export const useDatasetStore = defineStore('dataset', () => {
     formData.append('format', 'job'); // Specification @localhost:8000/docs
     const uploadConfirm = await datasetsService.postDataset(formData)
     if (uploadConfirm.status) {
-      console.log('⚠️ Error', uploadConfirm.message);
+      toast.add({
+        severity: 'error',
+        summary: `${uploadConfirm.data.detail}`,
+        messageicon: 'pi pi-exclamation-triangle',
+        group: 'br',
+      })
     }
     await loadDatasets();
   }

diff --git a/lumigator/python/mzai/backend/backend/models.yaml b/lumigator/python/mzai/backend/backend/models.yaml
@@ -32,17 +32,6 @@
         no_repeat_ngram_size: 3
         num_beams: 4
 
-- name: mistralai/Mistral-7B-Instruct-v0.3
-  uri: hf://mistralai/Mistral-7B-Instruct-v0.3
-  website_url: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3
-  description: Mistral-7B-Instruct-v0.3 is an instruct fine-tuned version of the Mistral-7B-v0.3.
-  info:
-    parameter_count: 7.25B
-    tensor_type: BF16
-    model_size: 14.5GB
-  tasks:
-    - summarization:
-
 - name: gpt-4o-mini
   uri: oai://gpt-4o-mini
   website_url: https://platform.openai.com/docs/models#gpt-4o-mini
-Original file line number
+Diff line change
@@ Expand Up / @@ -35,7 +35,7 @@ @@
               @click.stop
             />
           </div>
-           <div
+          <div
             v-if="modelsRequiringAPIKey.length"
             class="l-models-list__options-container-section"
           >
@@ Expand Down @@