cda-tum · flowerthrower · Jan 19, 2024 · Jan 22, 2024 · Jan 22, 2024 · Jan 25, 2024
diff --git a/.gitignore b/.gitignore
@@ -51,3 +51,4 @@ venv.bak/
 
 .ruff_cache/
 .mypy_cache/
+model_expected_fidelity_ionq_harmony/*
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -67,9 +67,9 @@ repos:
     hooks:
       - id: ruff
         args: ["--fix", "--show-fixes"]
-        types_or: [python, pyi, jupyter]
+        types_or: [python, pyi]
       - id: ruff-format
-        types_or: [python, pyi, jupyter]
+        types_or: [python, pyi]
 
   # Also run Black on examples in the documentation
   - repo: https://github.com/adamchainz/blacken-docs

diff --git a/evaluations/supervised_ml_models/evaluation.ipynb b/evaluations/supervised_ml_models/evaluation.ipynb
@@ -1,5 +1,291 @@
 {
  "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "83ad31f5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import torch\n",
+    "from joblib import parallel_config\n",
+    "from sklearn.model_selection import GridSearchCV\n",
+    "from torch_geometric.data import Dataset\n",
+    "\n",
+    "from mqt.predictor import ml\n",
+    "from mqt.predictor.ml import GNNClassifier\n",
+    "\n",
+    "predictor = ml.Predictor()\n",
+    "figure_of_merit = \"critical_depth\"\n",
+    "color1 = \"#21918c\"\n",
+    "color2 = \"#440154\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "90a08fc4",
+   "metadata": {},
+   "source": [
+    "Create training data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ae61e305",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "source_path = Path(\"/home/ubuntu/mqt/mqt-predictor/src/mqt/predictor/ml/training_data/training_circuits/\")\n",
+    "target_path = Path(\"/home/ubuntu/mqt/mqt-predictor/src/mqt/predictor/ml/training_data/training_circuits_compiled\")\n",
+    "\n",
+    "# uncomment only on first run\n",
+    "\n",
+    "# with parallel_config(backend=\"threading\", n_jobs=-1):\n",
+    "# training_data, name_list, scores_list = predictor.generate_trainingdata_from_qasm_files(\n",
+    "#     figure_of_merit, path_uncompiled_circuits=source_path, path_compiled_circuits=target_path\n",
+    "# )\n",
+    "# ml.helper.save_training_data(training_data, name_list, scores_list, figure_of_merit)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d9d2c7e5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "training_data = predictor.get_prepared_training_data(\n",
+    "    figure_of_merit=figure_of_merit, save_non_zero_indices=True, graph_only=True\n",
+    ")\n",
+    "\n",
+    "X_train = training_data.X_train\n",
+    "X_test = training_data.X_test\n",
+    "y_train = training_data.y_train\n",
+    "y_test = training_data.y_test\n",
+    "indices_train = training_data.indices_train\n",
+    "indices_test = training_data.indices_test\n",
+    "names_list = training_data.names_list\n",
+    "scores_list = training_data.scores_list\n",
+    "\n",
+    "scores_filtered = [scores_list[i] for i in indices_test]\n",
+    "names_filtered = [names_list[i] for i in indices_test]\n",
+    "\n",
+    "performance = []"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1068c12f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_histogram(y1, y2, y1_label=\"y1\", y2_label=\"y2\") -> None:\n",
+    "    plt.figure(figsize=(10, 5))\n",
+    "    counts1, bins1, patches1 = plt.hist(y1, bins=30, alpha=0.5, label=y1_label)\n",
+    "    counts2, bins2, patches2 = plt.hist(y2, bins=30, alpha=0.5, label=y2_label)\n",
+    "    plt.xlabel(\"Value\")\n",
+    "    plt.ylabel(\"Frequency\")\n",
+    "    plt.legend(loc=\"upper right\")\n",
+    "\n",
+    "    # Annotate y1 histogram bars\n",
+    "    for count, bn, _patch in zip(counts1, bins1, patches1, strict=False):\n",
+    "        plt.text(bn, count, str(int(count)), color=\"black\", ha=\"center\", va=\"bottom\")\n",
+    "\n",
+    "    # Annotate y2 histogram bars\n",
+    "    for count, bn, _patch in zip(counts2, bins2, patches2, strict=False):\n",
+    "        plt.text(bn, count, str(int(count)), color=\"black\", ha=\"center\", va=\"bottom\")\n",
+    "\n",
+    "    plt.show()\n",
+    "\n",
+    "\n",
+    "plot_histogram(y_train, y_test, \"y_train\", \"y_test\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "790e1994",
+   "metadata": {},
+   "source": [
+    "# Accuracy of a classifier that only learns probability distribution"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0a8c68f6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class_counts = np.bincount(y_train)\n",
+    "relative_frequencies = class_counts / len(y_train)\n",
+    "\n",
+    "num_iterations = 1000\n",
+    "scores = []\n",
+    "\n",
+    "for _ in range(num_iterations):\n",
+    "    # Sample instances according to the relative frequencies in y_train\n",
+    "    samples = np.random.choice(7, size=len(y_test), p=relative_frequencies)\n",
+    "\n",
+    "    pred = torch.tensor(samples)\n",
+    "    labels = torch.tensor(y_test)\n",
+    "    correct = pred.eq(labels).sum().item()\n",
+    "    total = len(y_test)\n",
+    "    scores.append(int(correct) / total)\n",
+    "\n",
+    "# Calculate the average score\n",
+    "average_score = sum(scores) / num_iterations\n",
+    "print(average_score)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "74b29595",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class MyDataset(Dataset):\n",
+    "    def __init__(self, data_list, scores_list, zx=False, max_num_of_circuits=100) -> None:\n",
+    "        super().__init__(\".\")\n",
+    "        self.data = []\n",
+    "        counts = {}\n",
+    "\n",
+    "        for graphs, score in zip(data_list, scores_list, strict=False):\n",
+    "            X = graphs[1] if zx else graphs[0]\n",
+    "            if X is None:\n",
+    "                continue\n",
+    "            y = torch.tensor(score)\n",
+    "            y[y == -1] = 0  # NOTE: score 0,1 might be easier to predict -> sigmoid\n",
+    "            label = y.argmax().item()\n",
+    "            counts[label] = counts.get(label, 0) + 1\n",
+    "            if counts[label] > max_num_of_circuits:\n",
+    "                continue\n",
+    "            X.y = y.float()\n",
+    "            self.data.append(X)\n",
+    "        return\n",
+    "\n",
+    "    @property\n",
+    "    def raw_file_names(self):\n",
+    "        return []\n",
+    "\n",
+    "    @property\n",
+    "    def processed_file_names(self):\n",
+    "        return []\n",
+    "\n",
+    "    def len(self):\n",
+    "        return len(self.data)\n",
+    "\n",
+    "    def get(self, idx):\n",
+    "        return self.data[idx]\n",
+    "\n",
+    "    def process(self) -> None:\n",
+    "        pass\n",
+    "\n",
+    "\n",
+    "# Either use the ZX graph or the standard graph from qiskit\n",
+    "use_zx_graph = False\n",
+    "\n",
+    "# Create the dataset\n",
+    "train_dataset = MyDataset(X_train, [scores_list[i] for i in indices_train], zx=use_zx_graph)\n",
+    "test_dataset = MyDataset(X_test, [scores_list[i] for i in indices_test], zx=use_zx_graph)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fcfd40a6",
+   "metadata": {},
+   "source": [
+    "# GNN Classifier"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d6644cee",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "clf = GNNClassifier(\n",
+    "    num_node_categories=43,  # distinct gate types (incl. 'id' and 'meas')\n",
+    "    num_edge_categories=2,  # wire features (control/target)\n",
+    "    output_dim=7,  # number of classes (devices)\n",
+    "    zx=use_zx_graph,\n",
+    ")\n",
+    "\n",
+    "param_grid = [\n",
+    "    {\n",
+    "        \"model\": [\"TransformerConv\"],\n",
+    "        # --------------------------------\n",
+    "        \"optimizer\": [\"adam\"],\n",
+    "        \"learning_rate\": [1e-3],\n",
+    "        \"batch_size\": [16],\n",
+    "        \"epochs\": [50],\n",
+    "        # --------------------------------\n",
+    "        \"node_embedding_dim\": [None],\n",
+    "        \"edge_embedding_dim\": [None],\n",
+    "        \"num_layers\": [3],\n",
+    "        \"hidden_dim\": [4],\n",
+    "        \"dropout\": [0.0],\n",
+    "        \"batch_norm\": [False],\n",
+    "        \"activation\": [\"relu\"],\n",
+    "        \"readout\": [\"feat-attention\"],\n",
+    "        \"heads\": [2],\n",
+    "        \"concat\": [True],\n",
+    "        \"beta\": [False],\n",
+    "        \"bias\": [True],\n",
+    "        \"root_weight\": [True],\n",
+    "    },\n",
+    "    {\n",
+    "        \"model\": [\"GAT\", \"GCN\"],\n",
+    "        # --------------------------------\n",
+    "        \"optimizer\": [\"adam\"],\n",
+    "        \"learning_rate\": [1e-3],\n",
+    "        \"batch_size\": [16],\n",
+    "        \"epochs\": [25],\n",
+    "        # --------------------------------\n",
+    "        \"node_embedding_dim\": [None],\n",
+    "        \"edge_embedding_dim\": [None],\n",
+    "        \"num_layers\": [3],\n",
+    "        \"hidden_dim\": [4, 8],\n",
+    "        \"dropout\": [0.0],\n",
+    "        \"batch_norm\": [False],\n",
+    "        \"activation\": [\"relu\"],\n",
+    "        \"readout\": [\"node-attention\"],\n",
+    "        \"jk\": [None],\n",
+    "    },\n",
+    "]\n",
+    "with parallel_config(backend=\"threading\", n_jobs=-1):\n",
+    "    clf = GridSearchCV(clf, param_grid, cv=3, n_jobs=-1, verbose=3).fit(train_dataset)\n",
+    "\n",
+    "y_pred = np.array(list(clf.predict(test_dataset)))\n",
+    "res, rel_scores = predictor.calc_performance_measures(scores_filtered, y_pred, y_test)\n",
+    "predictor.plot_eval_histogram(res, filename=\"RandomForestClassifier\", color=color1)\n",
+    "rel_goodness = np.round(np.mean(rel_scores), 4)\n",
+    "rel_goodness_std = np.round(np.std(rel_scores), 4)\n",
+    "print(\"Best Accuracy: \", clf.best_score_)\n",
+    "top3 = (res.count(1) + res.count(2) + res.count(3)) / len(res)\n",
+    "print(\"Top 3: \", top3)\n",
+    "print(\"Rel Goodness: \", rel_goodness)\n",
+    "print(\"Rel Goodness Std: \", rel_goodness_std)\n",
+    "performance.append((\"Random Forest\", clf.best_score_, top3, max(res), rel_goodness, rel_goodness_std))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2df04b00",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "clf.best_params_"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "5189fbc1",
@@ -15,7 +301,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "from sklearn import svm, tree\n",
     "from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier\n",
@@ -27,7 +312,7 @@
     "from mqt.predictor import ml\n",
     "\n",
     "predictor = ml.Predictor()\n",
-    "figure_of_merit = \"expected_fidelity\"\n",
+    "figure_of_merit = \"critical_depth\"\n",
     "\n",
     "training_data = predictor.get_prepared_training_data(figure_of_merit=figure_of_merit, save_non_zero_indices=True)\n",
     "\n",
Original file line number	Diff line number	Diff line change
Expand Up		@@ -51,3 +51,4 @@ venv.bak/

		.ruff_cache/
		.mypy_cache/
		model_expected_fidelity_ionq_harmony/*