diff --git a/.github/workflows/check-links-self.yaml b/.github/workflows/check-links-self.yaml
new file mode 100644
index 0000000..2c4b225
--- /dev/null
+++ b/.github/workflows/check-links-self.yaml
@@ -0,0 +1,12 @@
+name: 'Check Links'
+on:
+ workflow_dispatch:
+ push:
+ pull_request:
+
+jobs:
+ link_check:
+ name: 'Link Check'
+ uses: STRIDES/NIHCloudLab/.github/workflows/check-links.yaml@main
+ with:
+ repo_link_ignore_list: ""
diff --git a/.github/workflows/check_links.yml b/.github/workflows/check_links.yml
deleted file mode 100644
index 5b3d487..0000000
--- a/.github/workflows/check_links.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-name: Check Links
-
-on:
- push:
- branches:
- - '*'
- pull_request:
- branches:
- - '*'
-jobs:
- check-links:
- runs-on: ubuntu-latest
-
- steps:
- - name: Checkout repository
- uses: actions/checkout@v2
-
- - name: Set up Node.js
- uses: actions/setup-node@v3
- with:
- node-version: 16
-
- - name: Install dependencies
- run: |
- npm install -g markdown-link-check
-
- - name: Check links in Markdown files
- run: find . -name '*.md' -print0 | xargs -0 -n1 markdown-link-check -q -c .markdown-link-check.json
diff --git a/.github/workflows/notebook-lint-self.yaml b/.github/workflows/notebook-lint-self.yaml
new file mode 100644
index 0000000..9688b9b
--- /dev/null
+++ b/.github/workflows/notebook-lint-self.yaml
@@ -0,0 +1,14 @@
+name: 'Lint Notebook'
+on:
+ push:
+ workflow_dispatch:
+permissions:
+ contents: write
+ id-token: write
+
+jobs:
+ lint:
+ name: 'Linting'
+ uses: STRIDES/NIHCloudLab/.github/workflows/notebook-lint.yaml@main
+ with:
+ directory: .
diff --git a/README.md b/README.md
index b1d9668..4b1cd7d 100644
--- a/README.md
+++ b/README.md
@@ -60,8 +60,7 @@ Microsoft has several genomics-related offerings that will be useful to many Clo
## **Genome Wide Association Studies**
Genome-wide association studies (GWAS) are large-scale investigations that analyze the genomes of many individuals to identify common genetic variants associated with traits, diseases, or other phenotypes.
-- This [NIH CFDE written tutorial](https://training.nih-cfde.org/en/latest/Bioinformatic-Analyses/GWAS-in-the-cloud
-) walks you through running a simple GWAS on AWS, thus we converted it to Azure in [this notebook](/notebooks/GWAS). Note that the CFDE page has a few other bioinformatics related tutorials like BLAST and Illumina read simulation.
+- This [NIH CFDE written tutorial](https://training.nih-cfde.org/en/latest/Bioinformatic-Analyses/GWAS-in-the-cloud/) walks you through running a simple GWAS on AWS, thus we converted it to Azure in [this notebook](/notebooks/GWAS). Note that the CFDE page has a few other bioinformatics related tutorials like BLAST and Illumina read simulation.
- This blog post [illustrates some of the costs associated](https://techcommunity.microsoft.com/t5/azure-high-performance-computing/azure-to-accelerate-genome-wide-analysis-study/ba-p/2644120) with running GWAS on Azure
## **NCBI BLAST+**
diff --git a/notebooks/GWAS/GWAS_coat_color.ipynb b/notebooks/GWAS/GWAS_coat_color.ipynb
index 5f2d245..7397b38 100644
--- a/notebooks/GWAS/GWAS_coat_color.ipynb
+++ b/notebooks/GWAS/GWAS_coat_color.ipynb
@@ -58,11 +58,7 @@
"cell_type": "code",
"execution_count": null,
"id": "8ec900bd",
- "metadata": {
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"%%bash\n",
@@ -75,11 +71,7 @@
"cell_type": "code",
"execution_count": null,
"id": "4d43ae73",
- "metadata": {
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"%%bash\n",
@@ -101,11 +93,7 @@
"cell_type": "code",
"execution_count": null,
"id": "b3ba3eef",
- "metadata": {
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"%%bash\n",
@@ -117,14 +105,7 @@
"cell_type": "code",
"execution_count": null,
"id": "ae20d01c",
- "metadata": {
- "gather": {
- "logged": 1686580882939
- },
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"#add to your path\n",
@@ -136,11 +117,7 @@
"cell_type": "code",
"execution_count": null,
"id": "b219074a",
- "metadata": {
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"! mamba install -y -c bioconda plink vcftools"
@@ -158,14 +135,7 @@
"cell_type": "code",
"execution_count": null,
"id": "e91c7a01",
- "metadata": {
- "gather": {
- "logged": 1686579597925
- },
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"cd GWAS"
@@ -175,14 +145,7 @@
"cell_type": "code",
"execution_count": null,
"id": "9b770f7f",
- "metadata": {
- "gather": {
- "logged": 1686579600325
- },
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"ls"
@@ -192,11 +155,7 @@
"cell_type": "code",
"execution_count": null,
"id": "6570875d",
- "metadata": {
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"! vcftools --gzvcf pruned_coatColor_maf_geno.vcf.gz --plink --out coatColor"
@@ -215,14 +174,7 @@
"cell_type": "code",
"execution_count": null,
"id": "6c868a67",
- "metadata": {
- "gather": {
- "logged": 1686581972147
- },
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"#unzip vcf\n",
@@ -233,14 +185,7 @@
"cell_type": "code",
"execution_count": null,
"id": "8e11f991",
- "metadata": {
- "gather": {
- "logged": 1686581979545
- },
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"#create list of minor alleles\n",
@@ -251,11 +196,7 @@
"cell_type": "code",
"execution_count": null,
"id": "8cff47e3",
- "metadata": {
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"! head minor_alleles"
@@ -273,14 +214,7 @@
"cell_type": "code",
"execution_count": null,
"id": "dafa14a6",
- "metadata": {
- "gather": {
- "logged": 1686582023237
- },
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"#calculate missingness per locus\n",
@@ -291,14 +225,7 @@
"cell_type": "code",
"execution_count": null,
"id": "5cf5f51b",
- "metadata": {
- "gather": {
- "logged": 1686582030150
- },
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"#take a look at lmiss, which is the per locus rates of missingness\n",
@@ -309,14 +236,7 @@
"cell_type": "code",
"execution_count": null,
"id": "915bb263",
- "metadata": {
- "gather": {
- "logged": 1686582034753
- },
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"#peek at imiss which is the individual rates of missingness\n",
@@ -335,11 +255,7 @@
"cell_type": "code",
"execution_count": null,
"id": "3b8f2d7f",
- "metadata": {
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"! plink --file coatColor --allow-no-sex --dog --make-bed --noweb --out coatColor.binary"
@@ -357,11 +273,7 @@
"cell_type": "code",
"execution_count": null,
"id": "f926ef9b",
- "metadata": {
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"! plink --bfile coatColor.binary --make-pheno coatColor.pheno \"yellow\" --assoc --reference-allele minor_alleles --allow-no-sex --adjust --dog --noweb --out coatColor"
@@ -380,11 +292,7 @@
"cell_type": "code",
"execution_count": null,
"id": "b94e1e2a",
- "metadata": {
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"%%bash\n",
@@ -413,14 +321,7 @@
"cell_type": "code",
"execution_count": null,
"id": "60feed89",
- "metadata": {
- "gather": {
- "logged": 1686582094642
- },
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"#make a new library and install qqman their\n",
@@ -441,14 +342,7 @@
"cell_type": "code",
"execution_count": null,
"id": "a7e8cd2b",
- "metadata": {
- "gather": {
- "logged": 1686584355516
- },
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"#make sure you are still CD in GWAS, when you change kernel it may reset to home\n",
@@ -459,14 +353,7 @@
"cell_type": "code",
"execution_count": null,
"id": "7946a3a7",
- "metadata": {
- "gather": {
- "logged": 1686584356532
- },
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"require(qqman, lib = Sys.getenv(\"R_LIBS_USER\"))"
@@ -476,14 +363,7 @@
"cell_type": "code",
"execution_count": null,
"id": "0d28ef2c",
- "metadata": {
- "gather": {
- "logged": 1686584364339
- },
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"data=read.table(\"coatColor.assoc\", header=TRUE)"
@@ -493,14 +373,7 @@
"cell_type": "code",
"execution_count": null,
"id": "8e5207be",
- "metadata": {
- "gather": {
- "logged": 1686584368241
- },
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"data=data[!is.na(data$P),]"
@@ -510,14 +383,7 @@
"cell_type": "code",
"execution_count": null,
"id": "6330b1e0",
- "metadata": {
- "gather": {
- "logged": 1686584371278
- },
- "vscode": {
- "languageId": "r"
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"manhattan(data, p = \"P\", col = c(\"blue4\", \"orange3\"),\n",
@@ -561,32 +427,7 @@
"source": []
}
],
- "metadata": {
- "kernel_info": {
- "name": "ir"
- },
- "kernelspec": {
- "display_name": "R",
- "language": "R",
- "name": "ir"
- },
- "language_info": {
- "codemirror_mode": "r",
- "file_extension": ".r",
- "mimetype": "text/x-r-source",
- "name": "R",
- "pygments_lexer": "r",
- "version": "4.3.2"
- },
- "microsoft": {
- "ms_spell_check": {
- "ms_spell_check_language": "en"
- }
- },
- "nteract": {
- "version": "nteract-front-end@1.0.0"
- }
- },
+ "metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}
diff --git a/notebooks/GenAI/notebooks/AzureAIStudio_index_structured_notebook.ipynb b/notebooks/GenAI/notebooks/AzureAIStudio_index_structured_notebook.ipynb
index 0401fbc..55d9b90 100644
--- a/notebooks/GenAI/notebooks/AzureAIStudio_index_structured_notebook.ipynb
+++ b/notebooks/GenAI/notebooks/AzureAIStudio_index_structured_notebook.ipynb
@@ -59,10 +59,7 @@
"cell_type": "code",
"execution_count": null,
"id": "adb68924-26c9-4e6e-9880-2ea371c8d188",
- "metadata": {
- "scrolled": true,
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"pip install -U \"langchain\" \"openai\" \"langchain-openai\" \"langchain-community\""
@@ -72,10 +69,7 @@
"cell_type": "code",
"execution_count": null,
"id": "5292c962-637e-4169-ad77-2e62da44596f",
- "metadata": {
- "scrolled": true,
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"pip install azure-search-documents --pre --upgrade"
@@ -631,10 +625,7 @@
"cell_type": "code",
"execution_count": null,
"id": "d151e32a-ad57-4cf4-9928-8f8933e98959",
- "metadata": {
- "scrolled": true,
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"import json\n",
@@ -746,10 +737,7 @@
"cell_type": "code",
"execution_count": null,
"id": "69ff5e34-c60f-4e99-a3f3-29950c6e617b",
- "metadata": {
- "scrolled": true,
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"response = client.chat.completions.create(\n",
@@ -820,36 +808,7 @@
]
}
],
- "metadata": {
- "kernel_info": {
- "name": "python38-azureml"
- },
- "kernelspec": {
- "display_name": "Python 3.8 - AzureML",
- "language": "python",
- "name": "python38-azureml"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.8.5"
- },
- "microsoft": {
- "ms_spell_check": {
- "ms_spell_check_language": "en"
- }
- },
- "nteract": {
- "version": "nteract-front-end@1.0.0"
- }
- },
+ "metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}
diff --git a/notebooks/GenAI/notebooks/AzureAIStudio_index_structured_with_console.ipynb b/notebooks/GenAI/notebooks/AzureAIStudio_index_structured_with_console.ipynb
index e035608..ab9557f 100644
--- a/notebooks/GenAI/notebooks/AzureAIStudio_index_structured_with_console.ipynb
+++ b/notebooks/GenAI/notebooks/AzureAIStudio_index_structured_with_console.ipynb
@@ -2,13 +2,7 @@
"cells": [
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"# Indexing Delimited Files on Azure AI Search using Console and Notebook"
]
@@ -52,26 +46,14 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"### Install packages"
]
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"Use Python3 (ipykernel) kernel"
]
@@ -79,21 +61,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "gather": {
- "logged": 1707424158923
- },
- "jupyter": {
- "outputs_hidden": false,
- "source_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"pip install langchain openai"
@@ -101,13 +69,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"Import libraries"
]
@@ -115,21 +77,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "gather": {
- "logged": 1707412445314
- },
- "jupyter": {
- "outputs_hidden": false,
- "source_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"import os\n",
@@ -139,13 +87,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"### Connect to an index\n",
"This is the index you created via [these instructions](https://github.com/STRIDES/NIHCloudLabAzure/blob/main/docs/create_index_from_csv.md).\n",
@@ -155,21 +97,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "gather": {
- "logged": 1707412411658
- },
- "jupyter": {
- "outputs_hidden": false,
- "source_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"endpoint=\"\"\n",
@@ -180,18 +108,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false,
- "source_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"#connect to vector store \n",
@@ -203,13 +120,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"### Connect to your model\n",
"First, make sure you have a [model deployed](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-openai), and if not, deploy a model.\n",
@@ -219,21 +130,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "gather": {
- "logged": 1707412412208
- },
- "jupyter": {
- "outputs_hidden": false,
- "source_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"#connect to model\n",
@@ -249,26 +146,14 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"### Query the Vector Store"
]
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"First, enter your question. Feel free to experiment with different variations or prompts."
]
@@ -276,18 +161,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false,
- "source_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"query = \" \\\n",
@@ -300,13 +174,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"Now we feed the query and the input embeddings to our LLM and return the results. "
]
@@ -314,18 +182,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false,
- "source_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"#run query output on model\n",
@@ -373,41 +230,7 @@
"source": []
}
],
- "metadata": {
- "kernel_info": {
- "name": "python38-azureml"
- },
- "kernelspec": {
- "display_name": "Python 3.8 - AzureML",
- "language": "python",
- "name": "python38-azureml"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.8.5"
- },
- "microsoft": {
- "host": {
- "AzureML": {
- "notebookHasBeenCompleted": true
- }
- },
- "ms_spell_check": {
- "ms_spell_check_language": "en"
- }
- },
- "nteract": {
- "version": "nteract-front-end@1.0.0"
- }
- },
+ "metadata": {},
"nbformat": 4,
"nbformat_minor": 4
}
diff --git a/notebooks/GenAI/notebooks/AzureAIStudio_langchain.ipynb b/notebooks/GenAI/notebooks/AzureAIStudio_langchain.ipynb
index 65d8b58..5f8c6b3 100644
--- a/notebooks/GenAI/notebooks/AzureAIStudio_langchain.ipynb
+++ b/notebooks/GenAI/notebooks/AzureAIStudio_langchain.ipynb
@@ -2,26 +2,14 @@
"cells": [
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"# Use Azure AI Studio LLMs from a notebook using Langchain"
]
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"## Overview\n",
"Models you deploy to your Azure AI Studio can be accessed via API calls. [Langchain](https://python.langchain.com/docs/get_started/introduction) is a development framework for applications power by language models. \n",
@@ -55,13 +43,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"### Install packages"
]
@@ -69,18 +51,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false,
- "source_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"# pip install other packages as needed\n",
@@ -89,13 +60,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"### Load libraries"
]
@@ -103,21 +68,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "gather": {
- "logged": 1699910512197
- },
- "jupyter": {
- "outputs_hidden": false,
- "source_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"import os\n",
@@ -148,13 +99,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"Try loading in your deployed model and running a simple prompt"
]
@@ -162,21 +107,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "gather": {
- "logged": 1699909844481
- },
- "jupyter": {
- "outputs_hidden": false,
- "source_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"import os\n",
@@ -191,13 +122,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"Run the prompt against the LLM"
]
@@ -205,21 +130,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "gather": {
- "logged": 1699909895704
- },
- "jupyter": {
- "outputs_hidden": false,
- "source_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"message = HumanMessage(\n",
@@ -230,26 +141,14 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"### Use the LLM to summarize a scientific document"
]
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"Now let's load in a scientific document to run a query against. Read more about document loaders from langchain [here](https://python.langchain.com/docs/modules/data_connection/document_loaders/pdf). Note that we are both loading, and splitting our document. You can read more about the default document chunking/splitting procedures [here](https://python.langchain.com/docs/modules/data_connection/document_transformers/#get-started-with-text-splitters)."
]
@@ -257,21 +156,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "gather": {
- "logged": 1699909908495
- },
- "jupyter": {
- "outputs_hidden": false,
- "source_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"loader = WebBaseLoader(\"https://pubmed.ncbi.nlm.nih.gov/37883540/\")\n",
@@ -280,13 +165,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"Define that we want to use [stuffing](https://python.langchain.com/docs/modules/chains/document/stuff) to summarize the document."
]
@@ -294,21 +173,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "gather": {
- "logged": 1699909918382
- },
- "jupyter": {
- "outputs_hidden": false,
- "source_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"chain = load_summarize_chain(model, chain_type=\"stuff\")\n",
@@ -333,41 +198,7 @@
]
}
],
- "metadata": {
- "kernel_info": {
- "name": "python310-sdkv2"
- },
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.8.5"
- },
- "microsoft": {
- "host": {
- "AzureML": {
- "notebookHasBeenCompleted": true
- }
- },
- "ms_spell_check": {
- "ms_spell_check_language": "en"
- }
- },
- "nteract": {
- "version": "nteract-front-end@1.0.0"
- }
- },
+ "metadata": {},
"nbformat": 4,
"nbformat_minor": 4
}
diff --git a/notebooks/GenAI/notebooks/AzureAIStudio_sql_chatbot.ipynb b/notebooks/GenAI/notebooks/AzureAIStudio_sql_chatbot.ipynb
index 454c4d6..322487e 100644
--- a/notebooks/GenAI/notebooks/AzureAIStudio_sql_chatbot.ipynb
+++ b/notebooks/GenAI/notebooks/AzureAIStudio_sql_chatbot.ipynb
@@ -67,13 +67,7 @@
{
"cell_type": "markdown",
"id": "79baaa6a-b851-45b5-9002-68af981fb145",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"### Install packages "
]
@@ -467,9 +461,7 @@
"cell_type": "code",
"execution_count": null,
"id": "c37e37ca-f6f0-4f5b-a94f-5906a25d6681",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"py_connectionString=f\"mssql+pyodbc://{username}:{password}@{server_name}.database.windows.net/{database}?driver={driver}\"\n",
@@ -689,36 +681,7 @@
]
}
],
- "metadata": {
- "kernel_info": {
- "name": "python310-sdkv2"
- },
- "kernelspec": {
- "display_name": "Python 3.10 - SDK v2",
- "language": "python",
- "name": "python310-sdkv2"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.11"
- },
- "microsoft": {
- "ms_spell_check": {
- "ms_spell_check_language": "en"
- }
- },
- "nteract": {
- "version": "nteract-front-end@1.0.0"
- }
- },
+ "metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}
diff --git a/notebooks/GenAI/notebooks/AzureOpenAI_embeddings.ipynb b/notebooks/GenAI/notebooks/AzureOpenAI_embeddings.ipynb
index 5958f36..e483057 100644
--- a/notebooks/GenAI/notebooks/AzureOpenAI_embeddings.ipynb
+++ b/notebooks/GenAI/notebooks/AzureOpenAI_embeddings.ipynb
@@ -2,13 +2,7 @@
"cells": [
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"# Access Azure OpenAI LLMs from a notebook "
]
@@ -49,13 +43,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"### Install packages"
]
@@ -63,11 +51,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "gather": {
- "logged": 1696341373678
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"pip install \"openai\" \"requests\""
@@ -75,48 +59,22 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"### Run a query on a local csv file by creating local embeddings"
]
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"Import required libraries"
]
},
{
"cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "collapsed": false,
- "gather": {
- "logged": 1696365118786
- },
- "jupyter": {
- "outputs_hidden": false,
- "source_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"import os\n",
@@ -129,26 +87,14 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"You also need to [deploy a new model](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource?pivots=web-portal#deploy-a-model). You need to select and deploy `text-embedding-ada-0021`. If you get an error downstream about your model not being ready, give it up to five minutes for everything to sync. "
]
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"For simplicity, we just use a microsoft example here, but you could theoretically use any csv file as long as you match the expected format of the downstream code. This example is a recent earning report given by the CEO of Microsoft. "
]
@@ -156,21 +102,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "gather": {
- "logged": 1696367383849
- },
- "jupyter": {
- "outputs_hidden": false,
- "source_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"# read the data file to be embedded\n",
@@ -181,21 +113,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "gather": {
- "logged": 1696367387035
- },
- "jupyter": {
- "outputs_hidden": false,
- "source_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"# set keys and configure Azure OpenAI\n",
@@ -235,21 +153,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "gather": {
- "logged": 1696367395456
- },
- "jupyter": {
- "outputs_hidden": false,
- "source_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"# calculate word embeddings\n",
@@ -260,13 +164,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"Query the embeddings. After each query you put into the little box, you need to rerun this cell to reset the query. "
]
@@ -274,21 +172,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "gather": {
- "logged": 1696346882392
- },
- "jupyter": {
- "outputs_hidden": false,
- "source_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"# read in the embeddings .csv \n",
@@ -315,26 +199,14 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"### Query your own data"
]
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"In the README, we show how to add your own data. Go to **Azure AI Search** click on the your service you created to fill in the AI search variable needed below. When you have done this, type in a query, and then similar to what we show above you will get a response."
]
@@ -342,17 +214,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"endpoint = os.getenv(\"AZURE_OPENAI_ENDPOINT\")\n",
@@ -367,13 +229,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"Now run the query, note that the query is defined in the block below within the `message` parameter."
]
@@ -381,20 +237,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "gather": {
- "logged": 1696353881797
- },
- "jupyter": {
- "outputs_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"dotenv.load_dotenv()\n",
@@ -435,13 +278,7 @@
},
{
"cell_type": "markdown",
- "metadata": {
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"source": [
"## Conclusion\n",
"In this notebook you learned how to feed a PDF document directly to an LLM that you deployed in the Azure console and summarize the document."
@@ -463,41 +300,7 @@
"source": []
}
],
- "metadata": {
- "kernel_info": {
- "name": "python310-sdkv2"
- },
- "kernelspec": {
- "display_name": "Python 3.10 - SDK v2",
- "language": "python",
- "name": "python310-sdkv2"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.11"
- },
- "microsoft": {
- "host": {
- "AzureML": {
- "notebookHasBeenCompleted": true
- }
- },
- "ms_spell_check": {
- "ms_spell_check_language": "en"
- }
- },
- "nteract": {
- "version": "nteract-front-end@1.0.0"
- }
- },
+ "metadata": {},
"nbformat": 4,
"nbformat_minor": 4
}
diff --git a/notebooks/GenAI/notebooks/Pubmed_RAG_chatbot.ipynb b/notebooks/GenAI/notebooks/Pubmed_RAG_chatbot.ipynb
index 72c005d..a9f0760 100644
--- a/notebooks/GenAI/notebooks/Pubmed_RAG_chatbot.ipynb
+++ b/notebooks/GenAI/notebooks/Pubmed_RAG_chatbot.ipynb
@@ -133,10 +133,7 @@
"cell_type": "code",
"execution_count": null,
"id": "d8c1803b-829a-4256-a88c-1f4b57372ba2",
- "metadata": {
- "scrolled": true,
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"#uncomment if update is needed\n",
@@ -147,18 +144,7 @@
"cell_type": "code",
"execution_count": null,
"id": "abddde62-f269-454e-bea6-538bd4267277",
- "metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false,
- "source_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"#Authenticate to use azure cli\n",
@@ -185,23 +171,7 @@
"cell_type": "code",
"execution_count": null,
"id": "43cdc419-25e5-4ba8-b836-a13b2ad77a26",
- "metadata": {
- "collapsed": false,
- "gather": {
- "logged": 1701806019922
- },
- "jupyter": {
- "outputs_hidden": false,
- "source_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- },
- "scrolled": true,
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"location = '' #(e.g.,eastus2)\n",
@@ -335,11 +305,7 @@
"cell_type": "code",
"execution_count": null,
"id": "7b395e34-062d-4f77-afee-3601d471954a",
- "metadata": {
- "gather": {
- "logged": 1701794361537
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"#download the metadata file\n",
@@ -358,12 +324,7 @@
"cell_type": "code",
"execution_count": null,
"id": "c26b0f29-2b07-43a6-800d-4aa5e957fe52",
- "metadata": {
- "gather": {
- "logged": 1701794425470
- },
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"#import the file as a dataframe\n",
@@ -386,11 +347,7 @@
"cell_type": "code",
"execution_count": null,
"id": "ff77b2aa-ed1b-4d27-8163-fdaa7a304582",
- "metadata": {
- "gather": {
- "logged": 1701794430114
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"first_50.head()"
@@ -841,9 +798,7 @@
"cell_type": "code",
"execution_count": null,
"id": "4a11c98f-463b-48fd-84f7-f2b99f87d992",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"from langchain_community.document_loaders import AzureBlobStorageContainerLoader\n",
@@ -1136,10 +1091,7 @@
"cell_type": "code",
"execution_count": null,
"id": "27eff5e6-ff27-4ea8-9e6a-c0c5c05a245a",
- "metadata": {
- "scrolled": true,
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"vector_store.add_texts(texts=texts, metadatas=metadatas)"
@@ -1148,9 +1100,7 @@
{
"cell_type": "markdown",
"id": "07b3bc6b-8c43-476f-a662-abda830dc2da",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"source": [
"### Creating an inference script "
]
@@ -1185,9 +1135,7 @@
{
"cell_type": "markdown",
"id": "6f0ad48d-c6c8-421a-a48b-88e979d15b57",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"source": [
"```python\n",
"from langchain_community.retrievers import PubMedRetriever\n",
@@ -1212,9 +1160,7 @@
{
"cell_type": "markdown",
"id": "decbb901-f811-4b8e-a956-4c8c7f914ae2",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"source": [
"```python\n",
"class bcolors:\n",
@@ -1275,9 +1221,7 @@
{
"cell_type": "markdown",
"id": "8cadb1af-2c46-4ab1-92f9-6e0861f83324",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"source": [
"```python\n",
"llm = AzureChatOpenAI(\n",
@@ -1302,9 +1246,7 @@
{
"cell_type": "markdown",
"id": "21c61724-23d3-4b49-8c72-cbd208bdb5df",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"source": [
"```python\n",
"retriever= PubMedRetriever()\n",
@@ -1338,9 +1280,7 @@
{
"cell_type": "markdown",
"id": "c0316dc5-6274-4a5e-92e4-3d266ed6a4df",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"source": [
"```python\n",
"prompt_template = \"\"\"\n",
@@ -1508,9 +1448,7 @@
"cell_type": "code",
"execution_count": null,
"id": "ba97df23-6893-438d-8a67-cb7dbf83e407",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"#retreive info to allow langchain to connect to Azure Search AI\n",
@@ -1539,9 +1477,7 @@
{
"cell_type": "markdown",
"id": "bbe127e6-c0b1-4e07-ad56-38c30a9bf858",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"source": [
"You should see similar results on the terminal. In this example we ask the chatbot to summarize one of our documents!"
]
@@ -1622,43 +1558,7 @@
"source": []
}
],
- "metadata": {
- "environment": {
- "kernel": "python3",
- "name": "common-cpu.m113",
- "type": "gcloud",
- "uri": "gcr.io/deeplearning-platform-release/base-cpu:m113"
- },
- "kernel_info": {
- "name": "python3"
- },
- "kernelspec": {
- "display_name": "Python 3.8 - AzureML",
- "language": "python",
- "name": "python38-azureml"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.8.5"
- },
- "microsoft": {
- "ms_spell_check": {
- "ms_spell_check_language": "en"
- }
- },
- "nteract": {
- "version": "nteract-front-end@1.0.0"
- },
- "toc-autonumbering": false
- },
+ "metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}
diff --git a/notebooks/GenAI/notebooks/sharepoint_RAG_bot.ipynb b/notebooks/GenAI/notebooks/sharepoint_RAG_bot.ipynb
index 9ab1076..d5f2629 100644
--- a/notebooks/GenAI/notebooks/sharepoint_RAG_bot.ipynb
+++ b/notebooks/GenAI/notebooks/sharepoint_RAG_bot.ipynb
@@ -79,9 +79,7 @@
"cell_type": "code",
"execution_count": null,
"id": "cde8da16-327b-415f-9886-00aab1b4884a",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"pip install \"openai\" \"requests\" \"python-dotenv\""
@@ -147,9 +145,7 @@
"cell_type": "code",
"execution_count": null,
"id": "de4f3e7d-2bf5-485b-8c43-6bf4bb69e85f",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"import subprocess\n",
@@ -404,9 +400,7 @@
"cell_type": "code",
"execution_count": null,
"id": "5333cf56-ab5d-44a1-8568-8121f14f3de4",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"#INDEX\n",
@@ -437,9 +431,7 @@
"cell_type": "code",
"execution_count": null,
"id": "7bd80f81-c6e1-4466-878c-93efad687b9e",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"#INDEXER\n",
@@ -488,9 +480,7 @@
"cell_type": "code",
"execution_count": null,
"id": "28c14d13-4c21-4c9f-bb38-96d952747427",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"import requests\n",
@@ -560,6 +550,7 @@
{
"cell_type": "code",
"execution_count": null,
+ "id": "b84b0c55",
"metadata": {},
"outputs": [],
"source": [
@@ -689,25 +680,7 @@
]
}
],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3.10 - SDK v2",
- "language": "python",
- "name": "python310-sdkv2"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.11"
- }
- },
+ "metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}
diff --git a/notebooks/SpleenLiverSegmentation/SpleenSeg_Pretrained-4_27.ipynb b/notebooks/SpleenLiverSegmentation/SpleenSeg_Pretrained-4_27.ipynb
index 93d2ce3..5955865 100644
--- a/notebooks/SpleenLiverSegmentation/SpleenSeg_Pretrained-4_27.ipynb
+++ b/notebooks/SpleenLiverSegmentation/SpleenSeg_Pretrained-4_27.ipynb
@@ -820,30 +820,7 @@
]
}
],
- "metadata": {
- "environment": {
- "name": "pytorch-gpu.1-9.m75",
- "type": "gcloud",
- "uri": "gcr.io/deeplearning-platform-release/pytorch-gpu.1-9:m75"
- },
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.8.5"
- }
- },
+ "metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}
diff --git a/notebooks/pangolin/pangolin_pipeline.ipynb b/notebooks/pangolin/pangolin_pipeline.ipynb
index c9d56f3..84b09c8 100644
--- a/notebooks/pangolin/pangolin_pipeline.ipynb
+++ b/notebooks/pangolin/pangolin_pipeline.ipynb
@@ -68,10 +68,7 @@
"cell_type": "code",
"execution_count": null,
"id": "a19b662e",
- "metadata": {
- "scrolled": true,
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"! curl -L -O https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh\n",
@@ -115,10 +112,7 @@
"cell_type": "code",
"execution_count": null,
"id": "fd936fd6",
- "metadata": {
- "scrolled": true,
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"! mamba install ipyrad iqtree -c conda-forge -c bioconda -y"
@@ -250,9 +244,7 @@
{
"cell_type": "markdown",
"id": "2db37b4e",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"source": [
"### Run pangolin to identify lineages and output alignment\n",
"Here we call pangolin, give it our input sequences and the number of threads. We also tell it to output the alignment. The full list of pangolin parameters can be found in the [docs](https://cov-lineages.org/resources/pangolin/usage.html)."
@@ -364,31 +356,7 @@
]
}
],
- "metadata": {
- "environment": {
- "kernel": "python3",
- "name": "r-cpu.4-1.m87",
- "type": "gcloud",
- "uri": "gcr.io/deeplearning-platform-release/r-cpu.4-1:m87"
- },
- "kernelspec": {
- "display_name": "Python 3.10 - SDK v2",
- "language": "python",
- "name": "python310-sdkv2"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.11"
- }
- },
+ "metadata": {},
"nbformat": 4,
"nbformat_minor": 5
}
diff --git a/notebooks/rnaseq-myco-tutorial-main/RNAseq_pipeline.ipynb b/notebooks/rnaseq-myco-tutorial-main/RNAseq_pipeline.ipynb
index 3bf4e27..0eb598c 100644
--- a/notebooks/rnaseq-myco-tutorial-main/RNAseq_pipeline.ipynb
+++ b/notebooks/rnaseq-myco-tutorial-main/RNAseq_pipeline.ipynb
@@ -84,9 +84,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"! curl -L -O https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh\n",
@@ -95,12 +93,8 @@
},
{
"cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "gather": {
- "logged": 1682515170386
- }
- },
+ "execution_count": null,
+ "metadata": {},
"outputs": [],
"source": [
"#add to your path\n",
@@ -127,10 +121,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "scrolled": true,
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"! mamba install -c conda-forge -c bioconda -c defaults -y sra-tools pigz pbzip2 fastp fastqc multiqc salmon"
@@ -145,7 +136,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -200,20 +191,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "gather": {
- "logged": 1682517580413
- },
- "jupyter": {
- "outputs_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"ls data/raw_fastq"
@@ -229,17 +207,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "jupyter": {
- "outputs_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"! fastp -i data/raw_fastq/SRR13349122_1.fastq -I data/raw_fastq/SRR13349122_2.fastq -o data/trimmed/SRR13349122_1_trimmed.fastq -O data/trimmed/SRR13349122_2_trimmed.fastq\n",
@@ -284,11 +252,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "gather": {
- "logged": 1682517201690
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"! multiqc -f data/fastqc -f\n",
@@ -322,10 +286,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "scrolled": true,
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"%%bash\n",
@@ -336,20 +297,7 @@
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": false,
- "gather": {
- "logged": 1682518630201
- },
- "jupyter": {
- "outputs_hidden": false
- },
- "nteract": {
- "transient": {
- "deleting": false
- }
- }
- },
+ "metadata": {},
"outputs": [],
"source": [
"ls data/quants/"
@@ -458,36 +406,7 @@
"source": []
}
],
- "metadata": {
- "kernel_info": {
- "name": "python3"
- },
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.8.5"
- },
- "microsoft": {
- "ms_spell_check": {
- "ms_spell_check_language": "en"
- }
- },
- "nteract": {
- "version": "nteract-front-end@1.0.0"
- }
- },
+ "metadata": {},
"nbformat": 4,
"nbformat_minor": 4
}