From 0c60ea798492a5f4e3cbdb755ec876bbe3ed25b6 Mon Sep 17 00:00:00 2001 From: Etienne Boileau Date: Wed, 7 Feb 2024 17:15:48 +0100 Subject: [PATCH] UPD Alembic clean start, FIX data model, WIP test fail --- server/migrations/dumps/scimodom_v.1.sql | 405 ------------------ .../versions/1c61e979733b_euf_v1_7.py | 30 -- .../versions/2702dc292599_add_annotation.py | 84 ---- .../versions/46cf79e27b16_scimodom_v_1.py | 29 -- .../4cf76bf1e874_simplify_annotation.py | 133 ------ .../5800126056c3_add_project_source.py | 48 --- .../665ece797465_scimodom_schema_v2.py | 36 -- .../migrations/versions/71054e891d6e_setup.py | 362 ++++++++++++++++ .../versions/79fa0c30513f_drop_annotation.py | 50 --- .../versions/a2107e9c03fc_dataset_id_str.py | 119 ----- .../versions/abbad4ef0a9c_fix_minor_v1.py | 118 ----- .../b60acf426325_upd_genomic_annotation.py | 64 --- .../be9340f7ff6e_scimodom_schema_v3.py | 52 --- server/src/scimodom/api/queries.py | 35 +- server/src/scimodom/database/database.py | 12 +- server/src/scimodom/database/models.py | 311 ++++++++------ server/src/scimodom/services/annotation.py | 11 +- server/src/scimodom/services/dataset.py | 122 +++--- server/src/scimodom/services/importer.py | 48 ++- server/src/scimodom/services/project.py | 36 +- server/src/scimodom/utils/models.py | 73 ++-- 21 files changed, 735 insertions(+), 1443 deletions(-) delete mode 100644 server/migrations/dumps/scimodom_v.1.sql delete mode 100644 server/migrations/versions/1c61e979733b_euf_v1_7.py delete mode 100644 server/migrations/versions/2702dc292599_add_annotation.py delete mode 100644 server/migrations/versions/46cf79e27b16_scimodom_v_1.py delete mode 100644 server/migrations/versions/4cf76bf1e874_simplify_annotation.py delete mode 100644 server/migrations/versions/5800126056c3_add_project_source.py delete mode 100644 server/migrations/versions/665ece797465_scimodom_schema_v2.py create mode 100644 server/migrations/versions/71054e891d6e_setup.py delete mode 100644 server/migrations/versions/79fa0c30513f_drop_annotation.py delete mode 100644 server/migrations/versions/a2107e9c03fc_dataset_id_str.py delete mode 100644 server/migrations/versions/abbad4ef0a9c_fix_minor_v1.py delete mode 100644 server/migrations/versions/b60acf426325_upd_genomic_annotation.py delete mode 100644 server/migrations/versions/be9340f7ff6e_scimodom_schema_v3.py diff --git a/server/migrations/dumps/scimodom_v.1.sql b/server/migrations/dumps/scimodom_v.1.sql deleted file mode 100644 index 96a8b6e5..00000000 --- a/server/migrations/dumps/scimodom_v.1.sql +++ /dev/null @@ -1,405 +0,0 @@ --- MariaDB dump 10.19 Distrib 10.5.19-MariaDB, for debian-linux-gnu (x86_64) --- --- Host: localhost Database: scimodom --- ------------------------------------------------------ --- Server version 10.5.19-MariaDB-0+deb11u2 - -/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; -/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; -/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; -/*!40101 SET NAMES utf8mb4 */; -/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */; -/*!40103 SET TIME_ZONE='+00:00' */; -/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */; -/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; -/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */; -/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */; - --- --- Table structure for table `assembly` --- - -DROP TABLE IF EXISTS `assembly`; -/*!40101 SET @saved_cs_client = @@character_set_client */; -/*!40101 SET character_set_client = utf8 */; -CREATE TABLE `assembly` ( - `id` int(11) NOT NULL AUTO_INCREMENT, - `name` varchar(128) NOT NULL, - `taxa_id` int(11) NOT NULL, - PRIMARY KEY (`id`), - KEY `taxa_id` (`taxa_id`), - CONSTRAINT `assembly_ibfk_1` FOREIGN KEY (`taxa_id`) REFERENCES `ncbi_taxa` (`id`) -) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; -/*!40101 SET character_set_client = @saved_cs_client */; - --- --- Dumping data for table `assembly` --- - -LOCK TABLES `assembly` WRITE; -/*!40000 ALTER TABLE `assembly` DISABLE KEYS */; -INSERT INTO `assembly` VALUES (1,'GRCh38',9606),(2,'GRCm38',10090); -/*!40000 ALTER TABLE `assembly` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `association` --- - -DROP TABLE IF EXISTS `association`; -/*!40101 SET @saved_cs_client = @@character_set_client */; -/*!40101 SET character_set_client = utf8 */; -CREATE TABLE `association` ( - `id` int(11) NOT NULL AUTO_INCREMENT, - `dataset_id` int(11) NOT NULL, - `selection_id` int(11) NOT NULL, - PRIMARY KEY (`id`), - UNIQUE KEY `dataset_id` (`dataset_id`,`selection_id`), - KEY `selection_id` (`selection_id`), - CONSTRAINT `association_ibfk_1` FOREIGN KEY (`dataset_id`) REFERENCES `dataset` (`id`), - CONSTRAINT `association_ibfk_2` FOREIGN KEY (`selection_id`) REFERENCES `selection` (`id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; -/*!40101 SET character_set_client = @saved_cs_client */; - --- --- Dumping data for table `association` --- - -LOCK TABLES `association` WRITE; -/*!40000 ALTER TABLE `association` DISABLE KEYS */; -/*!40000 ALTER TABLE `association` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `data` --- - -DROP TABLE IF EXISTS `data`; -/*!40101 SET @saved_cs_client = @@character_set_client */; -/*!40101 SET character_set_client = utf8 */; -CREATE TABLE `data` ( - `id` int(11) NOT NULL AUTO_INCREMENT, - `dataset_id` int(11) NOT NULL, - `chrom` varchar(128) NOT NULL, - `start` int(11) NOT NULL, - `end` int(11) NOT NULL, - `name` varchar(32) NOT NULL, - `score` int(11) NOT NULL, - `strand` varchar(1) NOT NULL, - `thick_start` int(11) NOT NULL, - `thick_end` int(11) NOT NULL, - `item_rgb` varchar(128) NOT NULL, - `coverage` int(11) NOT NULL, - `frequency` int(11) NOT NULL, - `ref_base` varchar(1) NOT NULL, - PRIMARY KEY (`id`), - KEY `dataset_id` (`dataset_id`), - CONSTRAINT `data_ibfk_1` FOREIGN KEY (`dataset_id`) REFERENCES `dataset` (`id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; -/*!40101 SET character_set_client = @saved_cs_client */; - --- --- Dumping data for table `data` --- - -LOCK TABLES `data` WRITE; -/*!40000 ALTER TABLE `data` DISABLE KEYS */; -/*!40000 ALTER TABLE `data` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `dataset` --- - -DROP TABLE IF EXISTS `dataset`; -/*!40101 SET @saved_cs_client = @@character_set_client */; -/*!40101 SET character_set_client = utf8 */; -CREATE TABLE `dataset` ( - `id` int(11) NOT NULL AUTO_INCREMENT, - `project_id` int(11) NOT NULL, - `title` varchar(255) NOT NULL, - `file_format` varchar(32) NOT NULL, - `modification_type` varchar(32) NOT NULL, - `taxa_id` int(11) NOT NULL, - `assembly_id` int(11) NOT NULL, - `lifted` tinyint(1) NOT NULL, - `annotation_source` varchar(128) NOT NULL, - `annotation_version` varchar(128) NOT NULL, - `sequencing_platform` varchar(255) DEFAULT NULL, - `basecalling` text DEFAULT NULL, - `bioinformatics_workflow` text DEFAULT NULL, - `experiment` text DEFAULT NULL, - `external_source` varchar(255) DEFAULT NULL, - PRIMARY KEY (`id`), - KEY `project_id` (`project_id`), - KEY `taxa_id` (`taxa_id`), - KEY `assembly_id` (`assembly_id`), - CONSTRAINT `dataset_ibfk_1` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`), - CONSTRAINT `dataset_ibfk_2` FOREIGN KEY (`taxa_id`) REFERENCES `ncbi_taxa` (`id`), - CONSTRAINT `dataset_ibfk_3` FOREIGN KEY (`assembly_id`) REFERENCES `assembly` (`id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; -/*!40101 SET character_set_client = @saved_cs_client */; - --- --- Dumping data for table `dataset` --- - -LOCK TABLES `dataset` WRITE; -/*!40000 ALTER TABLE `dataset` DISABLE KEYS */; -/*!40000 ALTER TABLE `dataset` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `method` --- - -DROP TABLE IF EXISTS `method`; -/*!40101 SET @saved_cs_client = @@character_set_client */; -/*!40101 SET character_set_client = utf8 */; -CREATE TABLE `method` ( - `id` int(11) NOT NULL AUTO_INCREMENT, - `cls` varchar(32) NOT NULL, - `meth` varchar(128) NOT NULL, - PRIMARY KEY (`id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; -/*!40101 SET character_set_client = @saved_cs_client */; - --- --- Dumping data for table `method` --- - -LOCK TABLES `method` WRITE; -/*!40000 ALTER TABLE `method` DISABLE KEYS */; -/*!40000 ALTER TABLE `method` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `modification` --- - -DROP TABLE IF EXISTS `modification`; -/*!40101 SET @saved_cs_client = @@character_set_client */; -/*!40101 SET character_set_client = utf8 */; -CREATE TABLE `modification` ( - `id` int(11) NOT NULL AUTO_INCREMENT, - `rna` varchar(32) NOT NULL, - `modomics_id` varchar(128) NOT NULL, - PRIMARY KEY (`id`), - KEY `modomics_id` (`modomics_id`), - CONSTRAINT `modification_ibfk_1` FOREIGN KEY (`modomics_id`) REFERENCES `modomics` (`id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; -/*!40101 SET character_set_client = @saved_cs_client */; - --- --- Dumping data for table `modification` --- - -LOCK TABLES `modification` WRITE; -/*!40000 ALTER TABLE `modification` DISABLE KEYS */; -/*!40000 ALTER TABLE `modification` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `modomics` --- - -DROP TABLE IF EXISTS `modomics`; -/*!40101 SET @saved_cs_client = @@character_set_client */; -/*!40101 SET character_set_client = utf8 */; -CREATE TABLE `modomics` ( - `id` varchar(128) NOT NULL, - `name` varchar(255) NOT NULL, - `short_name` varchar(32) NOT NULL, - `moiety` varchar(32) NOT NULL, - PRIMARY KEY (`id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; -/*!40101 SET character_set_client = @saved_cs_client */; - --- --- Dumping data for table `modomics` --- - -LOCK TABLES `modomics` WRITE; -/*!40000 ALTER TABLE `modomics` DISABLE KEYS */; -INSERT INTO `modomics` VALUES ('00551A','2\'-O-[(5\'-phospho)ribosyl]adenosine-5\'-monophosphate','pAr(p)','nucleotide'),('00551G','2\'-O-ribosylguanosine (phosphate)-5\'-monophosphate','pGr(p)','nucleotide'),('00A','2\'-O-ribosyladenosine (phosphate)','Ar(p)','nucleoside'),('00G','2\'-O-ribosylguanosine (phosphate)','Gr(p)','nucleoside'),('01551A','1,2\'-O-dimethyladenosine-5\'-monophosphate','pm1Am','nucleotide'),('01551G','1,2\'-O-dimethylguanosine-5\'-monophosphate','pm1Gm','nucleotide'),('019551A','1,2\'-O-dimethylinosine-5\'-monophosphate','pm1Im','nucleotide'),('019A','1,2\'-O-dimethylinosine','m1Im','nucleoside'),('01A','1,2\'-O-dimethyladenosine','m1Am','nucleoside'),('01G','1,2\'-O-dimethylguanosine','m1Gm','nucleoside'),('022551G','N2,N2,2\'-O-trimethylguanosine-5\'-monophospate','pm2,2Gm','nucleotide'),('022G','N2,N2,2\'-O-trimethylguanosine','m2,2Gm','nucleoside'),('02551G','N2,2\'-O-dimethylguanosine-5\'-monophospate','pm2Gm','nucleotide'),('02551U','2-thio-2\'-O-methyluridine-5\'-monophosphate','ps2Um','nucleotide'),('027551G','N2,7,2\'-O-trimethylguanosine-5\'-monophosphate','pm2,7Gm','nucleotide'),('027G','N2,7,2\'-O-trimethylguanosine','m2,7Gm','nucleoside'),('02G','N2,2\'-O-dimethylguanosine','m2Gm','nucleoside'),('02U','2-thio-2\'-O-methyluridine','s2Um','nucleoside'),('03551U','3,2\'-O-dimethyluridine-5\'-monophosphate','pm3Um','nucleotide'),('03U','3,2\'-O-dimethyluridine','m3Um','nucleoside'),('042551C','[(2~{R},3~{R},4~{R},5~{R})-5-(4-acetamido-2-oxidanylidene-pyrimidin-1-yl)-4-methoxy-3-oxidanyl-oxolan-2-yl]methyl dihydrogen phosphate','pac4Cm','nucleotide'),('042C','N4-acetyl-2\'-O-methylcytidine','ac4Cm','nucleoside'),('044551C','N4,N4,2\'-O-trimethylcytidine-5\'-monophospate','pm4,4Cm','nucleotide'),('044C','N4,N4,2\'-O-trimethylcytidine','m4,4Cm','nucleoside'),('04551C','4N,O2\'-methylcytidine-5\'-monophosphate','pm4Cm','nucleotide'),('04C','N4,2\'-O-dimethylcytidine','m4Cm','nucleoside'),('0503551U','2\'-O-methyluridine 5-oxyacetic acid methyl ester-5\'-monophosphate','pmcmo5Um','nucleotide'),('0503U','2\'-O-methyluridine 5-oxyacetic acid methyl ester','mcmo5Um','nucleoside'),('051551C','2\'-O-methyl-5-hydroxymethylcytidine-5\'-monophosphate','phm5Cm','nucleotide'),('051551U','5-carboxymethylaminomethyl-2\'-O-methyluridine-5\'-monophosphate','pcmnm5Um','nucleotide'),('051C','2\'-O-methyl-5-hydroxymethylcytidine','hm5Cm','nucleoside'),('051U','5-carboxymethylaminomethyl-2\'-O-methyluridine','cmnm5Um','nucleoside'),('0521551U','5-methoxycarbonylmethyl-2\'-O-methyluridine-5\'-monophosphate','pmcm5Um','nucleotide'),('0521U','5-methoxycarbonylmethyl-2\'-O-methyluridine','mcm5Um','nucleoside'),('0522551U','5-(carboxyhydroxymethyl)-2\'-O-methyluridine methyl ester-5\'-monophosphate','pmchm5Um','nucleotide'),('0522U','5-(carboxyhydroxymethyl)-2\'-O-methyluridine methyl ester','mchm5Um','nucleoside'),('053551U','5-carbamoylmethyl-2\'-O-methyluridine-5\'-monophosphate','pncm5Um','nucleotide'),('053U','5-carbamoylmethyl-2\'-O-methyluridine','ncm5Um','nucleoside'),('0551A','2\'-O-methyladenosine 5\'-(dihydrogen phosphate)','pAm','nucleotide'),('0551C','O2\'-methylcytidine-5\'-monophosphate','pCm','nucleotide'),('0551G','O2\'-methylguanosine-5\'-monophosphate','pGm','nucleotide'),('0551U','O2\'-methyluridine 5\'-monophosphate','pUm','nucleotide'),('05551C','5,2\'-O-dimethylcytidine-5\'-monophosphate','pm5Cm','nucleotide'),('05551U','2\',5-dimethyluridine-5\'-monophosphate','pm5Um','nucleotide'),('0583551U','5-(isopentenylaminomethyl)-2\'-O-methyluridine-5\'-monophosphate','pinm5Um','nucleotide'),('0583U','5-(isopentenylaminomethyl)-2\'-O-methyluridine','inm5Um','nucleoside'),('05C','5,2\'-O-dimethylcytidine','m5Cm','nucleoside'),('05U','5,2\'-O-dimethyluridine','m5Um','nucleoside'),('06551A','N6,2\'-O-dimethyladenosine-5\'-monophospate','pm6Am','nucleotide'),('066551A','N6,N6,2\'-O-trimethyladenosine-5\'-monophospate','pm6,6Am','nucleotide'),('066A','N6,N6,2\'-O-trimethyladenosine','m6,6Am','nucleoside'),('06A','N6,2\'-O-dimethyladenosine','m6Am','nucleoside'),('071551C','5-formyl-2\'-O-methylcytidine-5\'-monophosphate','pf5Cm','nucleotide'),('071C','5-formyl-2\'-O-methylcytidine','f5Cm','nucleoside'),('09551A','2\'-O-methylinosine-5\'-monophosphate','pIm','nucleotide'),('09551U','2\'-O-methylpseudouridine-5\'-monophosphate','pYm','nucleotide'),('09A','2\'-O-methylinosine','Im','nucleoside'),('09U','2\'-O-methylpseudouridine','Ym','nucleoside'),('0A','2\'-O-methyladenosine','Am','nucleoside'),('0C','2\'-O-methylcytidine','Cm','nucleoside'),('0G','2\'-O-methylguanosine','Gm','nucleoside'),('0U','2\'-O-methyluridine','Um','nucleoside'),('0X','unknown nucleoside 2\'-O-methylated','Xm','nucleoside'),('100000G','7-cyano-7-carbaguanine','preQ0base','base'),('10000G','queuine','Qbase','base'),('100551G','7-cyano-7-deazaguanosine-5\'-monophosphate','ppreQ0','nucleotide'),('100G','7-cyano-7-deazaguanosine','preQ0','nucleoside'),('101000G','7-aminomethyl-7-carbaguanine','preQ1base','base'),('101551G','7-aminomethyl-7-deazaguanosine-5\'-monophosphate','ppreQ1','nucleotide'),('101G','7-aminomethyl-7-deazaguanosine','preQ1','nucleoside'),('102551G','epoxyqueuosine-5\'-monophosphate','poQ','nucleotide'),('102G','epoxyqueuosine','oQ','nucleoside'),('103551G','archaeosine-5\'-monophosphate','pG+','nucleotide'),('103G','archaeosine','G+','nucleoside'),('104551G','galactosyl-queuosine-5\'-monophosphate','pgalQ','nucleotide'),('104G','galactosyl-queuosine','galQ','nucleoside'),('10551G','queuosine-5\'-monophospate','pQ','nucleotide'),('105551G','glutamyl-queuosine-5\'-monophosphate','pgluQ','nucleotide'),('105G','glutamyl-queuosine','gluQ','nucleoside'),('106551G','mannosyl-queuosine-5\'-monophosphate','pmanQ','nucleotide'),('106G','mannosyl-queuosine','manQ','nucleoside'),('10G','queuosine','Q','nucleoside'),('1309551U','3-(3-amino-3-carboxypropyl)pseudouridine-5\'-monophosphate','pm1acp3Y','nucleotide'),('1309U','1-methyl-3-(3-amino-3-carboxypropyl)pseudouridine','m1acp3Y','nucleoside'),('1551A','6-hydro-1-methyladenosine-5\'-monophosphate','pm1A','nucleotide'),('1551G','1N-methylguanosine-5\'-monophosphate','pm1G','nucleotide'),('1551N','alpha-methylmonophosphate 5\' cap','mpN','nucleotide'),('1553N','gamma-methyltriphosphate 5\' cap','mpppN','nucleotide'),('19551A','1-methylinosine-5\'-monophosphate','pm1I','nucleotide'),('19551U','1-methylpseudouridine-5\'-monophosphate','pm1Y','nucleotide'),('19A','1-methylinosine','m1I','nucleoside'),('19U','1-methylpseudouridine','m1Y','nucleoside'),('1A','1-methyladenosine','m1A','nucleoside'),('1G','1-methylguanosine','m1G','nucleoside'),('20510551U','5-aminomethyl-2-selenouridine-5\'-monophosphate','pnm5se2U','nucleotide'),('20510U','5-aminomethyl-2-selenouridine','nm5se2U','nucleoside'),('20511551U','5-methylaminomethyl-2-selenouridine-5\'-monophosphate','pmnm5se2U','nucleotide'),('20511U','5-methylaminomethyl-2-selenouridine','mnm5se2U','nucleoside'),('2051551U','5-carboxymethylaminomethyl-2-selenouridine-5\'-monophosphate','pcmnm5se2U','nucleotide'),('2051U','5-carboxymethylaminomethyl-2-selenouridine','cmnm5se2U','nucleoside'),('20551C','agmatidine-5\'-monophosphate','pC+','nucleotide'),('20551U','1-(5-O-phosphono-beta-D-ribofuranosyl)-2-selanylpyrimidin-4(1H)-one','pse2U','nucleotide'),('20C','agmatidine','C+','nucleoside'),('20U','2-selenouridine','se2U','nucleoside'),('21161551A','2- methylthiomethylenethio-N6-isopentenyl-adenosine-5\'-monophosphate','pmsms2i6A','nucleotide'),('21161A','2- methylthiomethylenethio-N6-isopentenyl-adenosine','msms2i6A','nucleoside'),('21510551U','5-aminomethyl-2-geranylthiouridine-5\'-monophosphate','pnm5ges2U','nucleotide'),('21510U','5-aminomethyl-2-geranylthiouridine','nm5ges2U','nucleoside'),('21511551U','5-methylaminomethyl-2-geranylthiouridine-5\'-monophosphate','pmnm5ges2U','nucleotide'),('21511U','5-methylaminomethyl-2-geranylthiouridine','mnm5ges2U','nucleoside'),('2151551U','5-carboxymethylaminomethyl-2-geranylthiouridine-5\'-monophosphate','pcmnm5ges2U','nucleotide'),('2151U','5-carboxymethylaminomethyl-2-geranylthiouridine','cmnm5ges2U','nucleoside'),('21551C','2-lysidine-5\'-monophosphate','pk2C','nucleotide'),('21551U','2-geranylthiouridine-5\'-monophosphate','pges2U','nucleotide'),('2160551A','2-methylthio-N6-(cis-hydroxyisopentenyl) adenosine-5\'-monophosphate','pms2io6A','nucleotide'),('2160A','2-methylthio-N6-(cis-hydroxyisopentenyl) adenosine','ms2io6A','nucleoside'),('2161551A','2-methylthio-N6-isopentenyl-adenosine-5\'-monophosphate','pms2i6A','nucleotide'),('2161A','2-methylthio-N6-isopentenyladenosine','ms2i6A','nucleoside'),('2162551A','2-methylthio-N6-threonylcarbamoyladenosine-5\'-monophosphate','pms2t6A','nucleotide'),('2162A','2-methylthio-N6-threonylcarbamoyladenosine','ms2t6A','nucleoside'),('2163551A','2-methylthio-N6-hydroxynorvalylcarbamoyladenosine-5\'-monophosphate','pms2hn6A','nucleotide'),('2163A','2-methylthio-N6-hydroxynorvalylcarbamoyladenosine','ms2hn6A','nucleoside'),('2164551A','2-methylthio cyclic N6-threonylcarbamoyladenosine-5\'-monophosphate','pms2ct6A','nucleotide'),('2164A','2-methylthio cyclic N6-threonylcarbamoyladenosine','ms2ct6A','nucleoside'),('2165551A','hydroxy-N6-threonylcarbamoyladenosine-5\'-monophosphate','pht6A','nucleotide'),('2165A','hydroxy-N6-threonylcarbamoyladenosine','ht6A','nucleoside'),('21C','2-lysidine','k2C','nucleoside'),('21U','2-geranylthiouridine','ges2U','nucleoside'),('22551G','N2-dimethylguanosine-5\'-monophosphate','pm2,2G','nucleotide'),('227551G','N2,N2,7-trimethylguanosine-5\'-monophosphate','pm2,2,7G','nucleotide'),('2279553N','N2,N2,7-trimethylguanosine cap (cap TMG)','m2,2,7GpppN','nucleotide'),('227G','N2,N2,7-trimethylguanosine','m2,2,7G','nucleoside'),('22G','N2,N2-dimethylguanosine','m2,2G','nucleoside'),('2510551U','5-aminomethyl-2-thiouridine-5\'-monophosphate','pnm5s2U','nucleotide'),('2510U','5-aminomethyl-2-thiouridine','nm5s2U','nucleoside'),('2511551U','5-methylaminomethyl-2-thiouridine-5\'-monophosphate','pmnm5s2U','nucleotide'),('2511U','5-methylaminomethyl-2-thiouridine','mnm5s2U','nucleoside'),('251551U','5-carboxymethylaminomethyl-2-thiouridine-5\'-monophosphate','pcmnm5s2U','nucleotide'),('251U','5-carboxymethylaminomethyl-2-thiouridine','cmnm5s2U','nucleoside'),('2521551U','5-(O-methylaceto)-2-thio-2-deoxy-uridine-5\'-monophosphate','pmcm5s2U','nucleotide'),('2521U','5-methoxycarbonylmethyl-2-thiouridine','mcm5s2U','nucleoside'),('253551U','5-carbamoylmethyl-2-thiouridine-5\'-monophosphate','pncm5s2U','nucleotide'),('253U','5-carbamoylmethyl-2-thiouridine','ncm5s2U','nucleoside'),('2540551U','5-carboxymethyl-2-thiouridine-5\'-monophosphate','pcm5s2U','nucleotide'),('2540U','5-carboxymethyl-2-thiouridine','cm5s2U','nucleoside'),('254551U','5-taurinomethyl-2-thiouridine-5\'-monophosphate','ptm5s2U','nucleotide'),('254U','5-taurinomethyl-2-thiouridine','tm5s2U','nucleoside'),('2551A','2-methyladenosine-5\'-monophosphate','pm2A','nucleotide'),('2551C','2-thiocytidine-5\'-monophosphate','ps2C','nucleotide'),('2551G','2N-methylguanosine-5\'-monophosphate','pm2G','nucleotide'),('2551N','alpha-dimethylmonophosphate 5\' cap','mmpN','nucleotide'),('2551U','1-(beta-D-ribofuranosyl)-2-thio-uracil-5\'-phosphate','ps2U','nucleotide'),('25551U','5-methyl-2-thiouridine-5\'-monophosphate','pm5s2U','nucleotide'),('255N','5\' nicotinamide adenine dinucleotide','NADpN','nucleotide'),('2583551U','5-(isopentenylaminomethyl)-2-thiouridine-5\'-monophosphate','pinm5s2U','nucleotide'),('2583U','5-(isopentenylaminomethyl)-2-thiouridine','inm5s2U','nucleoside'),('25U','5-methyl-2-thiouridine','m5s2U','nucleoside'),('27551G','N2,7-dimethylguanosine-5\'-monophosphate','pm2,7G','nucleotide'),('279553N','N2,7-dimethylguanosine cap (cap DMG)','m2,7GpppN','nucleotide'),('27G','N2,7-dimethylguanosine','m2,7G','nucleoside'),('28551A','2,8-dimethyladenosine-5\'-monophosphate','pm2,8A','nucleotide'),('28A','2,8-dimethyladenosine','m2,8A','nucleoside'),('2A','2-methyladenosine','m2A','nucleoside'),('2C','2-thiocytidine','s2C','nucleoside'),('2G','N2-methylguanosine','m2G','nucleoside'),('2U','2-thiouridine','s2U','nucleoside'),('30551U','3-(3-amino-3-carboxypropyl)uridine-5\'-monophosphate','pacp3U','nucleotide'),('308551U','3-(3-amino-3-carboxypropyl)-5,6-dihydrouridine-5\'-monophosphate','pacp3D','nucleotide'),('308U','3-(3-amino-3-carboxypropyl)-5,6-dihydrouridine','acp3D','nucleoside'),('309U','3-(3-amino-3-carboxypropyl)pseudouridine','acp3Y','nucleoside'),('30U','3-(3-amino-3-carboxypropyl)uridine','acp3U','nucleoside'),('33551A','adenosine-3\',5\'-diphosphate','pAp','nucleotide'),('33551G','guanosine-3\',5\'-diphosphate','pGp','nucleotide'),('3377551U','uridine-5\'-monophosphate-2\',3\'-cyclic phosphate','pU2\'3\'cp','nucleotide'),('3377A','adenosine-5\'-phosphate-2\',3\'-cyclic phosphate','pA2\'3\'cp','nucleotide'),('3377C','cytidine-5\'-phosphate-2\',3\'-cyclic phosphate','pC2\'3\'cp','nucleotide'),('3377G','guanoside-5\'-phosphate-2\',3\'-cyclic phosphate','pG2\'3\'cp','nucleotide'),('3377N','2\'3\'-cyclic phosphate end','N2\'3\'cp','nucleotide'),('342551G','methylwyosine-5\'-monophosphate','pmimG','nucleotide'),('342G','methylwyosine','mimG','nucleoside'),('34551G','wyosine-5\'-monophospate','pimG','nucleotide'),('3470551G','undermodified hydroxywybutosine-5\'-monophospate','pOHyWx','nucleotide'),('3470G','undermodified hydroxywybutosine','OHyWx','nucleoside'),('347551G','7-aminocarboxypropylwyosine-5\'-monophosphate','pyW-72','nucleotide'),('347G','7-aminocarboxypropylwyosine','yW-72','nucleoside'),('3480551G','methylated undermodified hydroxywybutosine-5\'-monophosphate','pOHyWy','nucleotide'),('3480G','methylated undermodified hydroxywybutosine','OHyWy','nucleoside'),('34830551G','hydroxywybutosine-5\'-monophosphate','pOHyW','nucleotide'),('34830G','hydroxywybutosine','OHyW','nucleoside'),('34831551G','wybutosine[C15(S)]-5\'-monophosphate','pyW','nucleotide'),('34832G','peroxywybutosine','o2yW','nucleoside'),('3483551G','wybutosine-5\'-monophosphate','pyW','nucleotide'),('3483G','wybutosine','yW','nucleoside'),('348551G','7-aminocarboxypropylwyosine methyl ester-5\'-monophosphate','pyW-58','nucleotide'),('348G','7-aminocarboxypropylwyosine methyl ester','yW-58','nucleoside'),('34G','wyosine','imG','nucleoside'),('3551C','3-Methylcytidine- 5\'-monophosphate','pm3C','nucleotide'),('3551U','3-methyluridine-5\'-monophosphate','pm3U','nucleotide'),('39551U','3-methylpseudouridine-5\'-monophosphate','pm3Y','nucleotide'),('39U','3-methylpseudouridine','m3Y','nucleoside'),('3C','3-methylcytidine','m3C','nucleoside'),('3U','3-methyluridine','m3U','nucleoside'),('4155N','5\' (3\' -dephosphoacetyl-CoA)','acCoApN','nucleotide'),('42551C','N(4)-acetylcytidine-5\'-monophosphate','pac4C','nucleotide'),('42551G','isowyosine-5\'-monophosphate','pimG2','nucleotide'),('4255N','5\' (3\' -dephosphomalonyl-CoA)','malonyl-CoApN','nucleotide'),('42C','N4-acetylcytidine','ac4C','nucleoside'),('42G','isowyosine','imG2','nucleoside'),('4355N','5\' (3\' -dephosphosuccinyl-CoA)','succinyl-CoApN','nucleotide'),('44551C','[(2~{R},3~{S},4~{R},5~{R})-5-[4-(dimethylamino)-2-oxidanylidene-pyrimidin-1-yl]-3,4-bis(oxidanyl)oxolan-2-yl]methyl dihydrogen phosphate','pm4,4C','nucleotide'),('44C','N4,N4-dimethylcytidine','m4,4C','nucleoside'),('4551C','4-methyl, cytidine-5\'-monophosphate','pm4C','nucleotide'),('4551G','4-demethylwyosine-5\'-monophosphate','pimG-14','nucleotide'),('4553N','adenosine triphosphate 5\' cap (cap A)','ApppN','nucleotide'),('4554N','adenosine tetraphosphate 5\' cap (cap Ap4N)','AppppN','nucleotide'),('4555N','adenosine pentaphosphate 5\' cap (cap Ap5N)','ApppppN','nucleotide'),('455N','5\' (3\' -dephospho-CoA)','CoApN','nucleotide'),('47551G','7-aminocarboxypropyl-demethylwyosine-5\'-monophosphate','pyW-86','nucleotide'),('47G','7-aminocarboxypropyl-demethylwyosine','yW-86','nucleoside'),('4C','N4-methylcytidine','m4C','nucleoside'),('4G','4-demethylwyosine','imG-14','nucleoside'),('501551U','5-methoxyuridine-5\'-monophosphate','pmo5U','nucleotide'),('501U','5-methoxyuridine','mo5U','nucleoside'),('502551U','5-(carboxymethoxy) uridine-5\'-monophosphate','pcmo5U','nucleotide'),('502U','uridine 5-oxyacetic acid','cmo5U','nucleoside'),('503551U','uridine 5-oxyacetic acid methyl ester-5\'-monophospate','pmcmo5U','nucleotide'),('503U','uridine 5-oxyacetic acid methyl ester','mcmo5U','nucleoside'),('50551C','5-hydroxycytidine-5\'-monophosphate','pho5C','nucleotide'),('50551U','5-hydroxyuridine-5\'-monophosphate','pho5U','nucleotide'),('50C','5-hydroxycytidine','ho5C','nucleoside'),('50U','5-hydroxyuridine','ho5U','nucleoside'),('510551U','5-aminomethyluridine-5\'-monophosphate','pnm5U','nucleotide'),('510U','5-aminomethyluridine','nm5U','nucleoside'),('511551U','(2R,4S)-1-[(4R)-3,4-dihydroxytetrahydrofuran-2-YL]-5-[(methylamino)methyl]-1,2,3,4-tetrahydropyrimidine-2,4-diol-5\'-monophosphate','pmnm5U','nucleotide'),('511U','5-methylaminomethyluridine','mnm5U','nucleoside'),('51551C','5-(hydroxymethyl)cytidine 5\'-(dihydrogen phosphate)','phm5C','nucleotide'),('51551U','5-carboxymethylaminomethyluridine-5\'-monophosphate','pcmnm5U','nucleotide'),('51C','5-hydroxymethylcytidine','hm5C','nucleoside'),('51U','5-carboxymethylaminomethyluridine','cmnm5U','nucleoside'),('520551U','5-carboxyhydroxymethyluridine-5\'-monophosphate','pchm5U','nucleotide'),('520U','5-carboxyhydroxymethyluridine','chm5U','nucleoside'),('521551U','5-methoxycarbonylmethyluridine-5\'-monophosphate','pmcm5U','nucleotide'),('521U','5-methoxycarbonylmethyluridine','mcm5U','nucleoside'),('522551U','5-(carboxyhydroxymethyl)uridine methyl ester-5\'-monophosphate','pmchm5U','nucleotide'),('522U','5-(carboxyhydroxymethyl)uridine methyl ester','mchm5U','nucleoside'),('52551U','5-carboxymethyluridine-5\'-monophosphate','pcm5U','nucleotide'),('52U','5-carboxymethyluridine','cm5U','nucleoside'),('531551U','5-carbamoylhydroxymethyluridine-5\'-monophosphate','pnchm5U','nucleotide'),('531U','5-carbamoylhydroxymethyluridine','nchm5U','nucleoside'),('53551U','5-carbamoylmethyluridine-5\'-monophosphate','pncm5U','nucleotide'),('53U','5-carbamoylmethyluridine','ncm5U','nucleoside'),('54551U','5-taurinomethyluridine-5\'-monophosphate','ptm5U','nucleotide'),('54U','5-taurinomethyluridine','tm5U','nucleoside'),('550N','5\' hydroxyl end','5\'-OH-N','nucleoside'),('551A','adenosine-5\'-monophosphate','pA','nucleotide'),('551C','cytidine-5\'-monophosphate','pC','nucleotide'),('551G','guanosine-5\'-monophosphate','pG','nucleotide'),('551G551N','guanosine added to any ribonucleotide','pG(pN)','nucleotide'),('551N','unknown 5\' monophosphate ribonucleotide','pN','nucleotide'),('551U','uridine-5\'-monophosphate','pU','nucleotide'),('552G','guanosine-5\'-diphosphate','ppG','nucleotide'),('552N','5\' diphosphate end','ppN','nucleotide'),('553A','adenosine-5\'-triphosphate','pppA','nucleotide'),('553G','guanosine-5\'-triphosphate','pppG','nucleotide'),('553N','5\' triphosphate end','pppN','nucleotide'),('5551C','5-methylcytidine-5\'-monophosphate','pm5C','nucleotide'),('5551U','5-methyluridine-5\'-monophosphate','pm5U','nucleotide'),('55551U','5-cyanomethyluridine-5\'-monophosphate','pcnm5U','nucleotide'),('55U','5-cyanomethyluridine','cnm5U','nucleoside'),('583551U','5-(isopentenylaminomethyl)uridine-5\'-monophosphate','pinm5U','nucleotide'),('583U','5-(isopentenylaminomethyl)uridine','inm5U','nucleoside'),('58551U','5-methyldihydrouridine-5\'-monophosphate','pm5D','nucleotide'),('58U','5-methyldihydrouridine','m5D','nucleoside'),('5C','5-methylcytidine','m5C','nucleoside'),('5U','5-methyluridine','m5U','nucleoside'),('60551A','N6-(cis-hydroxyisopentenyl)adenosine-5\'-monophospate','pio6A','nucleotide'),('60A','N6-(cis-hydroxyisopentenyl)adenosine','io6A','nucleoside'),('61551A','N6-isopentenyl-adenosine-5\'-monophosphate','pi6A','nucleotide'),('61A','N6-isopentenyladenosine','i6A','nucleoside'),('621551A','2-methylthio-N6-methyladenosine-5\'-monophosphate','pms2m6A','nucleotide'),('621A','2-methylthio-N6-methyladenosine','ms2m6A','nucleoside'),('62551A','N6-threonylcarbamoyladenosine-5\'-monophosphate','pt6A','nucleotide'),('62A','N6-threonylcarbamoyladenosine','t6A','nucleoside'),('63551A','N6-hydroxynorvalylcarbamoyladenosine-5\'-monophosphate','phn6A','nucleotide'),('63A','N6-hydroxynorvalylcarbamoyladenosine','hn6A','nucleoside'),('64551A','N6-acetyladenosine-5\'-monophospate','pac6A','nucleotide'),('64553N','N6-methyl-adenosine triphosphate 5\' cap (cap A)','m6ApppN','nucleotide'),('64554N','N6-methyl-adenosine tetraphosphate 5\' cap (cap Ap4N)','m6AppppN','nucleotide'),('64555N','N6-methyl-adenosine pentaphosphate 5\' cap (cap Ap5N)','m6ApppppN','nucleotide'),('64A','N6-acetyladenosine','ac6A','nucleoside'),('6551A','N6-methyladenosine-5\'-monophosphate','pm6A','nucleotide'),('65551A','N6-glycinylcarbamoyladenosine-5\'-monophosphate','pg6A','nucleotide'),('65A','N6-glycinylcarbamoyladenosine','g6A','nucleoside'),('662551A','N6-methyl-N6-threonylcarbamoyladenosine-5\'-monophosphate','pm6t6A','nucleotide'),('662A','N6-methyl-N6-threonylcarbamoyladenosine','m6t6A','nucleoside'),('66551A','6N-dimethyladenosine-5\'-monophosphate','pm6,6A','nucleotide'),('66A','N6,N6-dimethyladenosine','m6,6A','nucleoside'),('67551A','N6-formyladenosine-5\'-monophospate','pf6A','nucleotide'),('67A','N6-formyladenosine','f6A','nucleoside'),('68551A','N6-hydroxymethyladenosine-5\'-monophosphate','phm6A','nucleotide'),('68A','N6-hydroxymethyladenosine','hm6A','nucleoside'),('69551A','cyclic N6-threonylcarbamoyladenosine-5\'-monophosphate','pct6A','nucleotide'),('69A','cyclic N6-threonylcarbamoyladenosine','ct6A','nucleoside'),('6A','N6-methyladenosine','m6A','nucleoside'),('71551C','5-formylcytidine 5\'-(dihydrogen phosphate)','pf5C','nucleotide'),('71C','5-formylcytidine','f5C','nucleoside'),('74551U','4-thiouridine-5\'-monophosphate','ps4U','nucleotide'),('74U','4-thiouridine','s4U','nucleoside'),('7551G','7N-methyl-8-hydroguanosine-5\'-monophosphate','pm7G','nucleotide'),('79553N','N7-methyl-guanosine cap (cap 0)','m7GpppN','nucleotide'),('79554N','N7-methyl-guanosine tetraphosphate 5\' cap (cap m7Gp4N)','m7GppppN','nucleotide'),('7G','7-methylguanosine','m7G','nucleoside'),('8551A','8-methyladenosine-5\'-monophosphate','pm8A','nucleotide'),('8551U','5,6-dihydrouridine-5\'-monophosphate','pD','nucleotide'),('8A','8-methyladenosine','m8A','nucleoside'),('8U','dihydrouridine','D','nucleoside'),('9551A','inosine-5\'-monophosphate','pI','nucleotide'),('9551U','pseudouridine-5\'-monophosphate','pY','nucleotide'),('9553N','guanosine triphosphate 5\' cap (cap G)','GpppN','nucleotide'),('9A','inosine','I','nucleoside'),('9U','pseudouridine','Y','nucleoside'),('?A','unknown modified adenosine','xA','nucleoside'),('?C','unknown modified cytidine','xC','nucleoside'),('?G','unknown modified guanosine','xG','nucleoside'),('?U','unknown modified uridine','xU','nucleoside'),('A','adenosine','A','nucleoside'),('C','cytidine','C','nucleoside'),('G','guanosine','G','nucleoside'),('N','unknown ribonucleoside residue','N','nucleoside'),('U','uridine','U','nucleoside'),('X','unknown modification','xX','nucleoside'); -/*!40000 ALTER TABLE `modomics` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `ncbi_taxa` --- - -DROP TABLE IF EXISTS `ncbi_taxa`; -/*!40101 SET @saved_cs_client = @@character_set_client */; -/*!40101 SET character_set_client = utf8 */; -CREATE TABLE `ncbi_taxa` ( - `id` int(11) NOT NULL AUTO_INCREMENT, - `name` varchar(128) NOT NULL, - `taxonomy_id` int(11) NOT NULL, - `short_name` varchar(128) NOT NULL, - PRIMARY KEY (`id`), - KEY `taxonomy_id` (`taxonomy_id`), - CONSTRAINT `ncbi_taxa_ibfk_1` FOREIGN KEY (`taxonomy_id`) REFERENCES `taxonomy` (`id`) -) ENGINE=InnoDB AUTO_INCREMENT=10091 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; -/*!40101 SET character_set_client = @saved_cs_client */; - --- --- Dumping data for table `ncbi_taxa` --- - -LOCK TABLES `ncbi_taxa` WRITE; -/*!40000 ALTER TABLE `ncbi_taxa` DISABLE KEYS */; -INSERT INTO `ncbi_taxa` VALUES (562,'Escherichia coli',6,'E. coli'),(3702,'Arabidopsis thaliana',5,'A. thaliana'),(4932,'Saccharomyces cerevisiae',4,'S. cerevisiae'),(6239,'Caenorhabditis elegans',3,'C. elegans'),(7227,'Drosophila melanogaster',2,'D. melanogaster'),(9606,'Homo sapiens',1,'H. sapiens'),(10090,'Mus musculus',1,'M. musculus'); -/*!40000 ALTER TABLE `ncbi_taxa` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `organism` --- - -DROP TABLE IF EXISTS `organism`; -/*!40101 SET @saved_cs_client = @@character_set_client */; -/*!40101 SET character_set_client = utf8 */; -CREATE TABLE `organism` ( - `id` int(11) NOT NULL AUTO_INCREMENT, - `cto` varchar(255) NOT NULL, - `taxa_id` int(11) NOT NULL, - PRIMARY KEY (`id`), - KEY `taxa_id` (`taxa_id`), - CONSTRAINT `organism_ibfk_1` FOREIGN KEY (`taxa_id`) REFERENCES `ncbi_taxa` (`id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; -/*!40101 SET character_set_client = @saved_cs_client */; - --- --- Dumping data for table `organism` --- - -LOCK TABLES `organism` WRITE; -/*!40000 ALTER TABLE `organism` DISABLE KEYS */; -/*!40000 ALTER TABLE `organism` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `project` --- - -DROP TABLE IF EXISTS `project`; -/*!40101 SET @saved_cs_client = @@character_set_client */; -/*!40101 SET character_set_client = utf8 */; -CREATE TABLE `project` ( - `id` int(11) NOT NULL AUTO_INCREMENT, - `title` varchar(255) NOT NULL, - `summary` text NOT NULL, - `contact_name` varchar(128) NOT NULL, - `contact_institution` varchar(255) NOT NULL, - `contact_email` varchar(320) NOT NULL, - `date_published` datetime NOT NULL, - `date_added` datetime NOT NULL, - `doi` varchar(255) DEFAULT NULL, - `pmid` int(11) DEFAULT NULL, - PRIMARY KEY (`id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; -/*!40101 SET character_set_client = @saved_cs_client */; - --- --- Dumping data for table `project` --- - -LOCK TABLES `project` WRITE; -/*!40000 ALTER TABLE `project` DISABLE KEYS */; -/*!40000 ALTER TABLE `project` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `selection` --- - -DROP TABLE IF EXISTS `selection`; -/*!40101 SET @saved_cs_client = @@character_set_client */; -/*!40101 SET character_set_client = utf8 */; -CREATE TABLE `selection` ( - `id` int(11) NOT NULL AUTO_INCREMENT, - `modification_id` int(11) NOT NULL, - `technology_id` int(11) NOT NULL, - `organism_id` int(11) NOT NULL, - PRIMARY KEY (`id`), - UNIQUE KEY `modification_id` (`modification_id`,`technology_id`,`organism_id`), - KEY `technology_id` (`technology_id`), - KEY `organism_id` (`organism_id`), - CONSTRAINT `selection_ibfk_1` FOREIGN KEY (`modification_id`) REFERENCES `modification` (`id`), - CONSTRAINT `selection_ibfk_2` FOREIGN KEY (`technology_id`) REFERENCES `technology` (`id`), - CONSTRAINT `selection_ibfk_3` FOREIGN KEY (`organism_id`) REFERENCES `organism` (`id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; -/*!40101 SET character_set_client = @saved_cs_client */; - --- --- Dumping data for table `selection` --- - -LOCK TABLES `selection` WRITE; -/*!40000 ALTER TABLE `selection` DISABLE KEYS */; -/*!40000 ALTER TABLE `selection` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `taxonomy` --- - -DROP TABLE IF EXISTS `taxonomy`; -/*!40101 SET @saved_cs_client = @@character_set_client */; -/*!40101 SET character_set_client = utf8 */; -CREATE TABLE `taxonomy` ( - `id` int(11) NOT NULL AUTO_INCREMENT, - `domain` varchar(32) NOT NULL, - `kingdom` varchar(32) DEFAULT NULL, - `phylum` varchar(32) DEFAULT NULL, - PRIMARY KEY (`id`) -) ENGINE=InnoDB AUTO_INCREMENT=8 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; -/*!40101 SET character_set_client = @saved_cs_client */; - --- --- Dumping data for table `taxonomy` --- - -LOCK TABLES `taxonomy` WRITE; -/*!40000 ALTER TABLE `taxonomy` DISABLE KEYS */; -INSERT INTO `taxonomy` VALUES (1,'Eukarya','Animalia','Chordata'),(2,'Eukarya','Animalia','Arthropoda'),(3,'Eukarya','Animalia','Nematoda'),(4,'Eukarya','Fungi',NULL),(5,'Eukarya','Plantae',NULL),(6,'Bacteria',NULL,NULL),(7,'Vira',NULL,NULL); -/*!40000 ALTER TABLE `taxonomy` ENABLE KEYS */; -UNLOCK TABLES; - --- --- Table structure for table `technology` --- - -DROP TABLE IF EXISTS `technology`; -/*!40101 SET @saved_cs_client = @@character_set_client */; -/*!40101 SET character_set_client = utf8 */; -CREATE TABLE `technology` ( - `id` int(11) NOT NULL AUTO_INCREMENT, - `tech` varchar(255) NOT NULL, - `method_id` int(11) NOT NULL, - PRIMARY KEY (`id`), - KEY `method_id` (`method_id`), - CONSTRAINT `technology_ibfk_1` FOREIGN KEY (`method_id`) REFERENCES `method` (`id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci; -/*!40101 SET character_set_client = @saved_cs_client */; - --- --- Dumping data for table `technology` --- - -LOCK TABLES `technology` WRITE; -/*!40000 ALTER TABLE `technology` DISABLE KEYS */; -/*!40000 ALTER TABLE `technology` ENABLE KEYS */; -UNLOCK TABLES; -/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */; - -/*!40101 SET SQL_MODE=@OLD_SQL_MODE */; -/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */; -/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */; -/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */; -/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */; -/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; -/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; - --- Dump completed on 2023-08-09 14:20:49 diff --git a/server/migrations/versions/1c61e979733b_euf_v1_7.py b/server/migrations/versions/1c61e979733b_euf_v1_7.py deleted file mode 100644 index 5d07f340..00000000 --- a/server/migrations/versions/1c61e979733b_euf_v1_7.py +++ /dev/null @@ -1,30 +0,0 @@ -"""euf_v1.7 - -Revision ID: 1c61e979733b -Revises: 4cf76bf1e874 -Create Date: 2023-12-14 12:20:56.045474 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import mysql - -# revision identifiers, used by Alembic. -revision = "1c61e979733b" -down_revision = "4cf76bf1e874" -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column("data", "ref_base") - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.add_column( - "data", sa.Column("ref_base", mysql.VARCHAR(length=1), nullable=False) - ) - # ### end Alembic commands ### diff --git a/server/migrations/versions/2702dc292599_add_annotation.py b/server/migrations/versions/2702dc292599_add_annotation.py deleted file mode 100644 index b473290a..00000000 --- a/server/migrations/versions/2702dc292599_add_annotation.py +++ /dev/null @@ -1,84 +0,0 @@ -"""add_annotation - -Revision ID: 2702dc292599 -Revises: a2107e9c03fc -Create Date: 2023-12-05 13:49:00.435404 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = "2702dc292599" -down_revision = "a2107e9c03fc" -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - "annotation_version", - sa.Column("version_num", sa.String(length=12), nullable=False), - sa.PrimaryKeyConstraint("version_num"), - ) - op.create_table( - "annotation", - sa.Column("id", sa.Integer(), nullable=False), - sa.Column("release", sa.Integer(), nullable=False), - sa.Column("taxa_id", sa.Integer(), nullable=False), - sa.Column("version", sa.String(length=12), nullable=False), - sa.ForeignKeyConstraint( - ["taxa_id"], - ["ncbi_taxa.id"], - ), - sa.PrimaryKeyConstraint("id"), - ) - op.create_table( - "genomic_annotation", - sa.Column("id", sa.Integer(), nullable=False), - sa.Column("annotation_id", sa.Integer(), nullable=False), - sa.Column("chrom", sa.String(length=128), nullable=False), - sa.Column("start", sa.Integer(), nullable=False), - sa.Column("end", sa.Integer(), nullable=False), - sa.Column("strand", sa.String(length=1), nullable=False), - sa.Column("gene_name", sa.String(length=32), nullable=False), - sa.Column("gene_id", sa.String(length=32), nullable=False), - sa.Column("gene_biotype", sa.String(length=32), nullable=False), - sa.ForeignKeyConstraint( - ["annotation_id"], - ["annotation.id"], - ), - sa.PrimaryKeyConstraint("id"), - ) - op.create_table( - "genomic_region", - sa.Column("id", sa.Integer(), nullable=False), - sa.Column("data_id", sa.Integer(), nullable=False), - sa.Column("annotation_id", sa.Integer(), nullable=False), - sa.Column("feature", sa.String(length=32), nullable=False), - sa.ForeignKeyConstraint( - ["annotation_id"], - ["annotation.id"], - ), - sa.ForeignKeyConstraint( - ["data_id"], - ["data.id"], - ), - sa.PrimaryKeyConstraint("id"), - ) - op.add_column("data", sa.Column("annotation_id", sa.Integer(), nullable=True)) - op.create_foreign_key(None, "data", "annotation", ["annotation_id"], ["id"]) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_constraint(None, "data", type_="foreignkey") - op.drop_column("data", "annotation_id") - op.drop_table("genomic_region") - op.drop_table("genomic_annotation") - op.drop_table("annotation") - op.drop_table("annotation_version") - # ### end Alembic commands ### diff --git a/server/migrations/versions/46cf79e27b16_scimodom_v_1.py b/server/migrations/versions/46cf79e27b16_scimodom_v_1.py deleted file mode 100644 index 19e45740..00000000 --- a/server/migrations/versions/46cf79e27b16_scimodom_v_1.py +++ /dev/null @@ -1,29 +0,0 @@ -"""scimodom_v.1 - -Revision ID: 46cf79e27b16 -Revises: -Create Date: 2023-08-09 14:43:22.072547 - -""" -from os.path import join, dirname - -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = "46cf79e27b16" -down_revision = None -branch_labels = None -depends_on = None - -DUMP_FOLDER = join(dirname(dirname(__file__)), 'dumps') - - -def upgrade() -> None: - with open(f"{DUMP_FOLDER}/scimodom_v.1.sql") as file: - op.execute(file.read()) - - -def downgrade() -> None: - pass diff --git a/server/migrations/versions/4cf76bf1e874_simplify_annotation.py b/server/migrations/versions/4cf76bf1e874_simplify_annotation.py deleted file mode 100644 index 8c474659..00000000 --- a/server/migrations/versions/4cf76bf1e874_simplify_annotation.py +++ /dev/null @@ -1,133 +0,0 @@ -"""simplify_annotation - -Revision ID: 4cf76bf1e874 -Revises: b60acf426325 -Create Date: 2023-12-08 16:50:38.788583 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import mysql - -# revision identifiers, used by Alembic. -revision = "4cf76bf1e874" -down_revision = "b60acf426325" -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table("genomic_region") - op.drop_constraint("data_ibfk_2", "data", type_="foreignkey") - op.drop_column("data", "annotation_id") - op.add_column( - "genomic_annotation", sa.Column("data_id", sa.Integer(), nullable=False) - ) - op.add_column( - "genomic_annotation", sa.Column("feature", sa.String(length=32), nullable=False) - ) - op.alter_column( - "genomic_annotation", - "gene_name", - existing_type=mysql.VARCHAR(length=32), - type_=sa.String(length=128), - existing_nullable=True, - ) - op.alter_column( - "genomic_annotation", - "gene_id", - existing_type=mysql.VARCHAR(length=32), - type_=sa.String(length=128), - existing_nullable=True, - ) - op.create_foreign_key(None, "genomic_annotation", "data", ["data_id"], ["id"]) - op.drop_column("genomic_annotation", "end") - op.drop_column("genomic_annotation", "strand") - op.drop_column("genomic_annotation", "start") - op.drop_column("genomic_annotation", "chrom") - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.add_column( - "genomic_annotation", - sa.Column("chrom", mysql.VARCHAR(length=128), nullable=False), - ) - op.add_column( - "genomic_annotation", - sa.Column( - "start", - mysql.INTEGER(display_width=11), - autoincrement=False, - nullable=False, - ), - ) - op.add_column( - "genomic_annotation", - sa.Column("strand", mysql.VARCHAR(length=1), nullable=False), - ) - op.add_column( - "genomic_annotation", - sa.Column( - "end", mysql.INTEGER(display_width=11), autoincrement=False, nullable=False - ), - ) - op.drop_constraint(None, "genomic_annotation", type_="foreignkey") - op.alter_column( - "genomic_annotation", - "gene_id", - existing_type=sa.String(length=128), - type_=mysql.VARCHAR(length=32), - existing_nullable=True, - ) - op.alter_column( - "genomic_annotation", - "gene_name", - existing_type=sa.String(length=128), - type_=mysql.VARCHAR(length=32), - existing_nullable=True, - ) - op.drop_column("genomic_annotation", "feature") - op.drop_column("genomic_annotation", "data_id") - op.add_column( - "data", - sa.Column( - "annotation_id", - mysql.INTEGER(display_width=11), - autoincrement=False, - nullable=True, - ), - ) - op.create_foreign_key( - "data_ibfk_2", "data", "annotation", ["annotation_id"], ["id"] - ) - op.create_table( - "genomic_region", - sa.Column( - "id", mysql.INTEGER(display_width=11), autoincrement=True, nullable=False - ), - sa.Column( - "data_id", - mysql.INTEGER(display_width=11), - autoincrement=False, - nullable=False, - ), - sa.Column( - "annotation_id", - mysql.INTEGER(display_width=11), - autoincrement=False, - nullable=False, - ), - sa.Column("feature", mysql.VARCHAR(length=32), nullable=False), - sa.ForeignKeyConstraint( - ["annotation_id"], ["annotation.id"], name="genomic_region_ibfk_1" - ), - sa.ForeignKeyConstraint(["data_id"], ["data.id"], name="genomic_region_ibfk_2"), - sa.PrimaryKeyConstraint("id"), - mysql_collate="utf8mb4_general_ci", - mysql_default_charset="utf8mb4", - mysql_engine="InnoDB", - ) - # ### end Alembic commands ### diff --git a/server/migrations/versions/5800126056c3_add_project_source.py b/server/migrations/versions/5800126056c3_add_project_source.py deleted file mode 100644 index f6f83c54..00000000 --- a/server/migrations/versions/5800126056c3_add_project_source.py +++ /dev/null @@ -1,48 +0,0 @@ -"""add_project_source - -Revision ID: 5800126056c3 -Revises: 46cf79e27b16 -Create Date: 2023-08-10 11:47:56.778569 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import mysql - -# revision identifiers, used by Alembic. -revision = "5800126056c3" -down_revision = "46cf79e27b16" -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - "project_source", - sa.Column("id", sa.Integer(), nullable=False), - sa.Column("project_id", sa.Integer(), nullable=False), - sa.Column("doi", sa.String(length=255), nullable=True), - sa.Column("pmid", sa.Integer(), nullable=True), - sa.ForeignKeyConstraint( - ["project_id"], - ["project.id"], - ), - sa.PrimaryKeyConstraint("id"), - ) - op.drop_column("project", "pmid") - op.drop_column("project", "doi") - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.add_column("project", sa.Column("doi", mysql.VARCHAR(length=255), nullable=True)) - op.add_column( - "project", - sa.Column( - "pmid", mysql.INTEGER(display_width=11), autoincrement=False, nullable=True - ), - ) - op.drop_table("project_source") - # ### end Alembic commands ### diff --git a/server/migrations/versions/665ece797465_scimodom_schema_v2.py b/server/migrations/versions/665ece797465_scimodom_schema_v2.py deleted file mode 100644 index b5c272b2..00000000 --- a/server/migrations/versions/665ece797465_scimodom_schema_v2.py +++ /dev/null @@ -1,36 +0,0 @@ -"""scimodom_schema_v2 - -Revision ID: 665ece797465 -Revises: abbad4ef0a9c -Create Date: 2023-09-20 10:52:26.191121 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = "665ece797465" -down_revision = "abbad4ef0a9c" -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - "assembly_version", - sa.Column("version_num", sa.String(length=12), nullable=False), - sa.PrimaryKeyConstraint("version_num"), - ) - op.add_column( - "assembly", sa.Column("version", sa.String(length=12), nullable=False) - ) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column("assembly", "version") - op.drop_table("assembly_version") - # ### end Alembic commands ### diff --git a/server/migrations/versions/71054e891d6e_setup.py b/server/migrations/versions/71054e891d6e_setup.py new file mode 100644 index 00000000..4e2cb42d --- /dev/null +++ b/server/migrations/versions/71054e891d6e_setup.py @@ -0,0 +1,362 @@ +"""setup + +Revision ID: 71054e891d6e +Revises: +Create Date: 2024-02-07 14:14:31.681427 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "71054e891d6e" +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "annotation_version", + sa.Column("version_num", sa.String(length=12), nullable=False), + sa.PrimaryKeyConstraint("version_num", name=op.f("pk_annotation_version")), + ) + op.create_table( + "assembly_version", + sa.Column("version_num", sa.String(length=12), nullable=False), + sa.PrimaryKeyConstraint("version_num", name=op.f("pk_assembly_version")), + ) + op.create_table( + "method", + sa.Column("id", sa.String(length=8), autoincrement=False, nullable=False), + sa.Column("cls", sa.String(length=32), nullable=False), + sa.Column("meth", sa.String(length=128), nullable=False), + sa.PrimaryKeyConstraint("id", name=op.f("pk_method")), + ) + op.create_table( + "modomics", + sa.Column("id", sa.String(length=128), autoincrement=False, nullable=False), + sa.Column("name", sa.String(length=255), nullable=False), + sa.Column("short_name", sa.String(length=32), nullable=False), + sa.Column("moiety", sa.String(length=32), nullable=False), + sa.PrimaryKeyConstraint("id", name=op.f("pk_modomics")), + ) + op.create_table( + "project_contact", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("contact_name", sa.String(length=128), nullable=False), + sa.Column("contact_institution", sa.String(length=255), nullable=False), + sa.Column("contact_email", sa.String(length=320), nullable=False), + sa.PrimaryKeyConstraint("id", name=op.f("pk_project_contact")), + ) + op.create_table( + "taxonomy", + sa.Column("id", sa.String(length=8), autoincrement=False, nullable=False), + sa.Column("domain", sa.String(length=32), nullable=False), + sa.Column("kingdom", sa.String(length=32), nullable=True), + sa.Column("phylum", sa.String(length=32), nullable=True), + sa.PrimaryKeyConstraint("id", name=op.f("pk_taxonomy")), + ) + op.create_table( + "modification", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("modomics_id", sa.String(length=128), nullable=False), + sa.Column("rna", sa.String(length=32), nullable=False), + sa.ForeignKeyConstraint( + ["modomics_id"], + ["modomics.id"], + name=op.f("fk_modification_modomics_id_modomics"), + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_modification")), + sa.UniqueConstraint( + "modomics_id", "rna", name=op.f("uq_modification_modomics_id") + ), + ) + op.create_table( + "ncbi_taxa", + sa.Column("id", sa.Integer(), autoincrement=False, nullable=False), + sa.Column("name", sa.String(length=128), nullable=False), + sa.Column("short_name", sa.String(length=128), nullable=False), + sa.Column("taxonomy_id", sa.String(length=8), nullable=False), + sa.ForeignKeyConstraint( + ["taxonomy_id"], + ["taxonomy.id"], + name=op.f("fk_ncbi_taxa_taxonomy_id_taxonomy"), + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_ncbi_taxa")), + ) + op.create_table( + "project", + sa.Column("id", sa.String(length=8), autoincrement=False, nullable=False), + sa.Column("title", sa.String(length=255), nullable=False), + sa.Column("summary", sa.Text(), nullable=False), + sa.Column("contact_id", sa.Integer(), nullable=False), + sa.Column("date_published", sa.DateTime(), nullable=False), + sa.Column("date_added", sa.DateTime(), nullable=False), + sa.ForeignKeyConstraint( + ["contact_id"], + ["project_contact.id"], + name=op.f("fk_project_contact_id_project_contact"), + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_project")), + ) + op.create_table( + "technology", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("method_id", sa.String(length=8), nullable=False), + sa.Column("tech", sa.String(length=255), nullable=False), + sa.ForeignKeyConstraint( + ["method_id"], ["method.id"], name=op.f("fk_technology_method_id_method") + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_technology")), + sa.UniqueConstraint("method_id", "tech", name=op.f("uq_technology_method_id")), + ) + op.create_index(op.f("ix_technology_tech"), "technology", ["tech"], unique=False) + op.create_table( + "annotation", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("release", sa.Integer(), nullable=False), + sa.Column("taxa_id", sa.Integer(), nullable=False), + sa.Column("version", sa.String(length=12), nullable=False), + sa.ForeignKeyConstraint( + ["taxa_id"], ["ncbi_taxa.id"], name=op.f("fk_annotation_taxa_id_ncbi_taxa") + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_annotation")), + sa.UniqueConstraint( + "release", "taxa_id", "version", name=op.f("uq_annotation_release") + ), + ) + op.create_table( + "assembly", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("name", sa.String(length=128), nullable=False), + sa.Column("taxa_id", sa.Integer(), nullable=False), + sa.Column("version", sa.String(length=12), nullable=False), + sa.ForeignKeyConstraint( + ["taxa_id"], ["ncbi_taxa.id"], name=op.f("fk_assembly_taxa_id_ncbi_taxa") + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_assembly")), + sa.UniqueConstraint( + "name", "taxa_id", "version", name=op.f("uq_assembly_name") + ), + ) + op.create_table( + "dataset", + sa.Column("id", sa.String(length=12), autoincrement=False, nullable=False), + sa.Column("project_id", sa.String(length=8), nullable=False), + sa.Column("title", sa.String(length=255), nullable=False), + sa.Column("lifted", sa.Boolean(), nullable=False), + sa.Column("file_format", sa.String(length=32), nullable=False), + sa.Column("modification_type", sa.String(length=32), nullable=False), + sa.Column("sequencing_platform", sa.String(length=255), nullable=True), + sa.Column("basecalling", sa.Text(), nullable=True), + sa.Column("bioinformatics_workflow", sa.Text(), nullable=True), + sa.Column("experiment", sa.Text(), nullable=True), + sa.Column("external_source", sa.String(length=255), nullable=True), + sa.ForeignKeyConstraint( + ["project_id"], ["project.id"], name=op.f("fk_dataset_project_id_project") + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_dataset")), + ) + op.create_index( + op.f("ix_dataset_project_id"), "dataset", ["project_id"], unique=False + ) + op.create_table( + "organism", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("taxa_id", sa.Integer(), nullable=False), + sa.Column("cto", sa.String(length=255), nullable=False), + sa.ForeignKeyConstraint( + ["taxa_id"], ["ncbi_taxa.id"], name=op.f("fk_organism_taxa_id_ncbi_taxa") + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_organism")), + sa.UniqueConstraint("taxa_id", "cto", name=op.f("uq_organism_taxa_id")), + ) + op.create_index(op.f("ix_organism_cto"), "organism", ["cto"], unique=False) + op.create_index(op.f("ix_organism_taxa_id"), "organism", ["taxa_id"], unique=False) + op.create_table( + "project_source", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("project_id", sa.String(length=8), nullable=False), + sa.Column("doi", sa.String(length=255), nullable=True), + sa.Column("pmid", sa.Integer(), nullable=True), + sa.ForeignKeyConstraint( + ["project_id"], + ["project.id"], + name=op.f("fk_project_source_project_id_project"), + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_project_source")), + ) + op.create_table( + "data", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("association_id", sa.String(length=12), nullable=False), + sa.Column("chrom", sa.String(length=128), nullable=False), + sa.Column("start", sa.Integer(), nullable=False), + sa.Column("end", sa.Integer(), nullable=False), + sa.Column("name", sa.String(length=32), nullable=False), + sa.Column("score", sa.Integer(), nullable=False), + sa.Column("strand", sa.String(length=1), nullable=False), + sa.Column("thick_start", sa.Integer(), nullable=False), + sa.Column("thick_end", sa.Integer(), nullable=False), + sa.Column("item_rgb", sa.String(length=128), nullable=False), + sa.Column("coverage", sa.Integer(), nullable=False), + sa.Column("frequency", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint( + ["association_id"], + ["dataset.id"], + name=op.f("fk_data_association_id_dataset"), + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_data")), + ) + op.create_index( + "idx_data_sort", + "data", + ["chrom", "start", "score", "coverage", "frequency"], + unique=False, + ) + op.create_index( + op.f("ix_data_association_id"), "data", ["association_id"], unique=False + ) + op.create_table( + "genomic_annotation", + sa.Column("id", sa.String(length=128), autoincrement=False, nullable=False), + sa.Column("annotation_id", sa.Integer(), nullable=False), + sa.Column("name", sa.String(length=128), nullable=True), + sa.Column("biotype", sa.String(length=255), nullable=True), + sa.ForeignKeyConstraint( + ["annotation_id"], + ["annotation.id"], + name=op.f("fk_genomic_annotation_annotation_id_annotation"), + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_genomic_annotation")), + ) + op.create_index( + op.f("ix_genomic_annotation_biotype"), + "genomic_annotation", + ["biotype"], + unique=False, + ) + op.create_index( + op.f("ix_genomic_annotation_name"), "genomic_annotation", ["name"], unique=False + ) + op.create_table( + "selection", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("modification_id", sa.Integer(), nullable=False), + sa.Column("technology_id", sa.Integer(), nullable=False), + sa.Column("organism_id", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint( + ["modification_id"], + ["modification.id"], + name=op.f("fk_selection_modification_id_modification"), + ), + sa.ForeignKeyConstraint( + ["organism_id"], + ["organism.id"], + name=op.f("fk_selection_organism_id_organism"), + ), + sa.ForeignKeyConstraint( + ["technology_id"], + ["technology.id"], + name=op.f("fk_selection_technology_id_technology"), + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_selection")), + ) + op.create_index( + "idx_select", + "selection", + ["modification_id", "technology_id", "organism_id"], + unique=True, + ) + op.create_table( + "association", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("dataset_id", sa.String(length=12), nullable=False), + sa.Column("selection_id", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint( + ["dataset_id"], + ["dataset.id"], + name=op.f("fk_association_dataset_id_dataset"), + ), + sa.ForeignKeyConstraint( + ["selection_id"], + ["selection.id"], + name=op.f("fk_association_selection_id_selection"), + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_association")), + ) + op.create_index( + "idx_assoc", "association", ["dataset_id", "selection_id"], unique=True + ) + op.create_table( + "data_annotation", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("gene_id", sa.String(length=128), nullable=False), + sa.Column("data_id", sa.Integer(), nullable=False), + sa.Column("feature", sa.String(length=32), nullable=False), + sa.ForeignKeyConstraint( + ["data_id"], ["data.id"], name=op.f("fk_data_annotation_data_id_data") + ), + sa.ForeignKeyConstraint( + ["gene_id"], + ["genomic_annotation.id"], + name=op.f("fk_data_annotation_gene_id_genomic_annotation"), + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_data_annotation")), + sa.UniqueConstraint( + "gene_id", "data_id", "feature", name=op.f("uq_data_annotation_gene_id") + ), + ) + op.create_index( + op.f("ix_data_annotation_data_id"), "data_annotation", ["data_id"], unique=False + ) + op.create_index( + op.f("ix_data_annotation_feature"), "data_annotation", ["feature"], unique=False + ) + op.create_index( + op.f("ix_data_annotation_gene_id"), "data_annotation", ["gene_id"], unique=False + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index(op.f("ix_data_annotation_gene_id"), table_name="data_annotation") + op.drop_index(op.f("ix_data_annotation_feature"), table_name="data_annotation") + op.drop_index(op.f("ix_data_annotation_data_id"), table_name="data_annotation") + op.drop_table("data_annotation") + op.drop_index("idx_assoc", table_name="association") + op.drop_table("association") + op.drop_index("idx_select", table_name="selection") + op.drop_table("selection") + op.drop_index(op.f("ix_genomic_annotation_name"), table_name="genomic_annotation") + op.drop_index( + op.f("ix_genomic_annotation_biotype"), table_name="genomic_annotation" + ) + op.drop_table("genomic_annotation") + op.drop_index(op.f("ix_data_association_id"), table_name="data") + op.drop_index("idx_data_sort", table_name="data") + op.drop_table("data") + op.drop_table("project_source") + op.drop_index(op.f("ix_organism_taxa_id"), table_name="organism") + op.drop_index(op.f("ix_organism_cto"), table_name="organism") + op.drop_table("organism") + op.drop_index(op.f("ix_dataset_project_id"), table_name="dataset") + op.drop_table("dataset") + op.drop_table("assembly") + op.drop_table("annotation") + op.drop_index(op.f("ix_technology_tech"), table_name="technology") + op.drop_table("technology") + op.drop_table("project") + op.drop_table("ncbi_taxa") + op.drop_table("modification") + op.drop_table("taxonomy") + op.drop_table("project_contact") + op.drop_table("modomics") + op.drop_table("method") + op.drop_table("assembly_version") + op.drop_table("annotation_version") + # ### end Alembic commands ### diff --git a/server/migrations/versions/79fa0c30513f_drop_annotation.py b/server/migrations/versions/79fa0c30513f_drop_annotation.py deleted file mode 100644 index f5ec6bdd..00000000 --- a/server/migrations/versions/79fa0c30513f_drop_annotation.py +++ /dev/null @@ -1,50 +0,0 @@ -"""drop_annotation - -Revision ID: 79fa0c30513f -Revises: 1c61e979733b -Create Date: 2023-12-14 14:30:30.442283 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import mysql - -# revision identifiers, used by Alembic. -revision = "79fa0c30513f" -down_revision = "1c61e979733b" -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.alter_column( - "dataset", - "annotation_source", - existing_type=mysql.VARCHAR(length=128), - nullable=True, - ) - op.alter_column( - "dataset", - "annotation_version", - existing_type=mysql.VARCHAR(length=128), - nullable=True, - ) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.alter_column( - "dataset", - "annotation_version", - existing_type=mysql.VARCHAR(length=128), - nullable=False, - ) - op.alter_column( - "dataset", - "annotation_source", - existing_type=mysql.VARCHAR(length=128), - nullable=False, - ) - # ### end Alembic commands ### diff --git a/server/migrations/versions/a2107e9c03fc_dataset_id_str.py b/server/migrations/versions/a2107e9c03fc_dataset_id_str.py deleted file mode 100644 index e1c026ff..00000000 --- a/server/migrations/versions/a2107e9c03fc_dataset_id_str.py +++ /dev/null @@ -1,119 +0,0 @@ -"""dataset_id_str - -Revision ID: a2107e9c03fc -Revises: be9340f7ff6e -Create Date: 2023-09-25 14:57:25.722852 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import mysql - -# revision identifiers, used by Alembic. -revision = "a2107e9c03fc" -down_revision = "be9340f7ff6e" -branch_labels = None -depends_on = None - -# CHANGED: DROP CONSTRAINT, ALTER COLUMN, CREATE CONSTRAINT - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - - op.drop_constraint( - constraint_name="association_ibfk_1", - table_name="association", - type_="foreignkey", - ) - op.drop_constraint( - constraint_name="data_ibfk_1", table_name="data", type_="foreignkey" - ) - - op.alter_column( - "dataset", - "id", - existing_type=mysql.INTEGER(display_width=11), - type_=sa.String(length=12), - existing_nullable=False, - ) - op.alter_column( - "data", - "dataset_id", - existing_type=mysql.INTEGER(display_width=11), - type_=sa.String(length=12), - existing_nullable=False, - ) - op.alter_column( - "association", - "dataset_id", - existing_type=mysql.INTEGER(display_width=11), - type_=sa.String(length=12), - existing_nullable=False, - ) - - op.create_foreign_key( - constraint_name="association_ibfk_1", - source_table="association", - referent_table="dataset", - local_cols=["dataset_id"], - remote_cols=["id"], - ) - op.create_foreign_key( - constraint_name="data_ibfk_1", - source_table="data", - referent_table="dataset", - local_cols=["dataset_id"], - remote_cols=["id"], - ) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_constraint( - constraint_name="association_ibfk_1", - table_name="association", - type_="foreignkey", - ) - op.drop_constraint( - constraint_name="data_ibfk_1", table_name="data", type_="foreignkey" - ) - - op.alter_column( - "dataset", - "id", - existing_type=sa.String(length=12), - type_=mysql.INTEGER(display_width=11), - existing_nullable=False, - ) - op.alter_column( - "data", - "dataset_id", - existing_type=sa.String(length=12), - type_=mysql.INTEGER(display_width=11), - existing_nullable=False, - ) - op.alter_column( - "association", - "dataset_id", - existing_type=sa.String(length=12), - type_=mysql.INTEGER(display_width=11), - existing_nullable=False, - ) - - op.create_foreign_key( - constraint_name="association_ibfk_1", - source_table="association", - referent_table="dataset", - local_cols=["dataset_id"], - remote_cols=["id"], - ) - op.create_foreign_key( - constraint_name="data_ibfk_1", - source_table="data", - referent_table="dataset", - local_cols=["dataset_id"], - remote_cols=["id"], - ) - # ### end Alembic commands ### diff --git a/server/migrations/versions/abbad4ef0a9c_fix_minor_v1.py b/server/migrations/versions/abbad4ef0a9c_fix_minor_v1.py deleted file mode 100644 index 5bb51c9e..00000000 --- a/server/migrations/versions/abbad4ef0a9c_fix_minor_v1.py +++ /dev/null @@ -1,118 +0,0 @@ -"""fix_minor_v1 - -Revision ID: abbad4ef0a9c -Revises: 5800126056c3 -Create Date: 2023-09-19 16:30:54.231393 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import mysql - -# revision identifiers, used by Alembic. -revision = "abbad4ef0a9c" -down_revision = "5800126056c3" -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - - op.drop_constraint( - constraint_name="project_source_ibfk_1", - table_name="project_source", - type_="foreignkey", - ) - op.drop_constraint( - constraint_name="dataset_ibfk_1", table_name="dataset", type_="foreignkey" - ) - - op.alter_column( - "project", - "id", - existing_type=mysql.INTEGER(display_width=11), - type_=sa.String(length=8), - existing_nullable=False, - ) - op.alter_column( - "project_source", - "project_id", - existing_type=mysql.INTEGER(display_width=11), - type_=sa.String(length=8), - existing_nullable=False, - ) - op.alter_column( - "dataset", - "project_id", - existing_type=mysql.INTEGER(display_width=11), - type_=sa.String(length=8), - existing_nullable=False, - ) - - op.create_foreign_key( - constraint_name="project_source_ibfk_1", - source_table="project_source", - referent_table="project", - local_cols=["project_id"], - remote_cols=["id"], - ) - op.create_foreign_key( - constraint_name="dataset_ibfk_1", - source_table="dataset", - referent_table="project", - local_cols=["project_id"], - remote_cols=["id"], - ) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - # CHANGED: DROP CONSTRAINT, ALTER COLUMN, CREATE CONSTRAINT - op.drop_constraint( - constraint_name="project_source_ibfk_1", - table_name="project_source", - type_="foreignkey", - ) - op.drop_constraint( - constraint_name="dataset_ibfk_1", table_name="dataset", type_="foreignkey" - ) - - op.alter_column( - "project", - "id", - existing_type=sa.String(length=8), - type_=mysql.INTEGER(display_width=11), - existing_nullable=False, - ) - op.alter_column( - "project_source", - "project_id", - existing_type=sa.String(length=8), - type_=mysql.INTEGER(display_width=11), - existing_nullable=False, - ) - op.alter_column( - "dataset", - "project_id", - existing_type=sa.String(length=8), - type_=mysql.INTEGER(display_width=11), - existing_nullable=False, - ) - - op.create_foreign_key( - constraint_name="project_source_ibfk_1", - source_table="project_source", - referent_table="project", - local_cols=["project_id"], - remote_cols=["id"], - ) - op.create_foreign_key( - constraint_name="dataset_ibfk_1", - source_table="dataset", - referent_table="project", - local_cols=["project_id"], - remote_cols=["id"], - ) - # ### end Alembic commands ### diff --git a/server/migrations/versions/b60acf426325_upd_genomic_annotation.py b/server/migrations/versions/b60acf426325_upd_genomic_annotation.py deleted file mode 100644 index 121c1e39..00000000 --- a/server/migrations/versions/b60acf426325_upd_genomic_annotation.py +++ /dev/null @@ -1,64 +0,0 @@ -"""upd_genomic_annotation - -Revision ID: b60acf426325 -Revises: 2702dc292599 -Create Date: 2023-12-06 12:58:29.174047 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import mysql - -# revision identifiers, used by Alembic. -revision = "b60acf426325" -down_revision = "2702dc292599" -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.alter_column( - "genomic_annotation", - "gene_name", - existing_type=mysql.VARCHAR(length=32), - nullable=True, - ) - op.alter_column( - "genomic_annotation", - "gene_id", - existing_type=mysql.VARCHAR(length=32), - nullable=True, - ) - op.alter_column( - "genomic_annotation", - "gene_biotype", - existing_type=mysql.VARCHAR(length=32), - type_=sa.String(length=255), - nullable=True, - ) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.alter_column( - "genomic_annotation", - "gene_biotype", - existing_type=sa.String(length=255), - type_=mysql.VARCHAR(length=32), - nullable=False, - ) - op.alter_column( - "genomic_annotation", - "gene_id", - existing_type=mysql.VARCHAR(length=32), - nullable=False, - ) - op.alter_column( - "genomic_annotation", - "gene_name", - existing_type=mysql.VARCHAR(length=32), - nullable=False, - ) - # ### end Alembic commands ### diff --git a/server/migrations/versions/be9340f7ff6e_scimodom_schema_v3.py b/server/migrations/versions/be9340f7ff6e_scimodom_schema_v3.py deleted file mode 100644 index 729a3651..00000000 --- a/server/migrations/versions/be9340f7ff6e_scimodom_schema_v3.py +++ /dev/null @@ -1,52 +0,0 @@ -"""scimodom_schema_v3 - -Revision ID: be9340f7ff6e -Revises: 665ece797465 -Create Date: 2023-09-20 12:33:22.376640 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import mysql - -# revision identifiers, used by Alembic. -revision = "be9340f7ff6e" -down_revision = "665ece797465" -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - "project_contact", - sa.Column("id", sa.Integer(), nullable=False), - sa.Column("contact_name", sa.String(length=128), nullable=False), - sa.Column("contact_institution", sa.String(length=255), nullable=False), - sa.Column("contact_email", sa.String(length=320), nullable=False), - sa.PrimaryKeyConstraint("id"), - ) - op.add_column("project", sa.Column("contact_id", sa.Integer(), nullable=False)) - op.create_foreign_key(None, "project", "project_contact", ["contact_id"], ["id"]) - op.drop_column("project", "contact_email") - op.drop_column("project", "contact_name") - op.drop_column("project", "contact_institution") - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.add_column( - "project", - sa.Column("contact_institution", mysql.VARCHAR(length=255), nullable=False), - ) - op.add_column( - "project", sa.Column("contact_name", mysql.VARCHAR(length=128), nullable=False) - ) - op.add_column( - "project", sa.Column("contact_email", mysql.VARCHAR(length=320), nullable=False) - ) - op.drop_constraint(None, "project", type_="foreignkey") - op.drop_column("project", "contact_id") - op.drop_table("project_contact") - # ### end Alembic commands ### diff --git a/server/src/scimodom/api/queries.py b/server/src/scimodom/api/queries.py index f2f65bf2..2f5d855c 100644 --- a/server/src/scimodom/api/queries.py +++ b/server/src/scimodom/api/queries.py @@ -450,10 +450,13 @@ def get_comparison(step): Data.name, Data.score, Data.strand, - Data.dataset_id, + Association.dataset_id, + # Data.dataset_id, Data.coverage, Data.frequency, - ).where(Data.dataset_id.in_(dataset_ids_a)) + ) + .join_from(Data, Association, Data.inst_association) + .where(Association.dataset_id.in_(dataset_ids_a)) # .order_by(Data.chrom.asc(), Data.start.asc()) ) a_records = get_session().execute(query).all() @@ -469,17 +472,23 @@ def get_comparison(step): else: b_records = [] for idx in dataset_ids_b: - query = select( - Data.chrom, - Data.start, - Data.end, - Data.name, - Data.score, - Data.strand, - Data.dataset_id, - Data.coverage, - Data.frequency, - ).where(Data.dataset_id == idx) + query = ( + select( + Data.chrom, + Data.start, + Data.end, + Data.name, + Data.score, + Data.strand, + Association.dataset_id, + # Data.dataset_id, + Data.coverage, + Data.frequency, + ) + .join_from(Data, Association, Data.inst_association) + .where(Association.dataset_id == idx) + # .where(Data.dataset_id == idx) + ) b_records.append(get_session().execute(query).all()) op, strand = query_operation.split("S") diff --git a/server/src/scimodom/database/database.py b/server/src/scimodom/database/database.py index 1ad7843a..03d62c4d 100644 --- a/server/src/scimodom/database/database.py +++ b/server/src/scimodom/database/database.py @@ -1,7 +1,7 @@ import os from typing import Callable, Optional -from sqlalchemy import create_engine +from sqlalchemy import MetaData, create_engine from sqlalchemy.orm import sessionmaker, DeclarativeBase, Session from sqlalchemy.engine import Engine @@ -10,7 +10,15 @@ class Base(DeclarativeBase): - pass + metadata = MetaData( + naming_convention={ + "ix": "ix_%(column_0_label)s", + "uq": "uq_%(table_name)s_%(column_0_name)s", + "ck": "ck_%(table_name)s_%(constraint_name)s", + "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s", + "pk": "pk_%(table_name)s", + } + ) def make_session(database_uri: str) -> tuple[Engine, sessionmaker[Session]]: diff --git a/server/src/scimodom/database/models.py b/server/src/scimodom/database/models.py index 85408564..8a88eb7f 100644 --- a/server/src/scimodom/database/models.py +++ b/server/src/scimodom/database/models.py @@ -1,7 +1,7 @@ from datetime import datetime from typing import List, Optional -from sqlalchemy import String, Text, DateTime, ForeignKey, UniqueConstraint +from sqlalchemy import String, Text, DateTime, Index, ForeignKey, UniqueConstraint from sqlalchemy.orm import Mapped, mapped_column, relationship from scimodom.database.database import Base @@ -12,9 +12,11 @@ class Modomics(Base): __tablename__ = "modomics" - id: Mapped[str] = mapped_column(String(128), primary_key=True) # MODOMICS code - name: Mapped[str] = mapped_column(String(255), nullable=False) # NVARCHAR ? - short_name: Mapped[str] = mapped_column(String(32), nullable=False) # NVARCHAR ? + id: Mapped[str] = mapped_column( + String(128), primary_key=True, autoincrement=False + ) # MODOMICS code + name: Mapped[str] = mapped_column(String(255), nullable=False) + short_name: Mapped[str] = mapped_column(String(32), nullable=False) moiety: Mapped[str] = mapped_column(String(32), nullable=False) modifications: Mapped[List["Modification"]] = relationship( @@ -28,11 +30,15 @@ class Modification(Base): __tablename__ = "modification" id: Mapped[int] = mapped_column(primary_key=True) - rna: Mapped[str] = mapped_column(String(32), nullable=False) modomics_id: Mapped[str] = mapped_column(ForeignKey("modomics.id")) + rna: Mapped[str] = mapped_column(String(32), nullable=False) + + __table_args__ = (UniqueConstraint(modomics_id, rna),) inst_modomics: Mapped["Modomics"] = relationship(back_populates="modifications") - selections: Mapped[List["Selection"]] = relationship(back_populates="modifications") + selections: Mapped[List["Selection"]] = relationship( + back_populates="inst_modification" + ) class DetectionMethod(Base): @@ -40,7 +46,7 @@ class DetectionMethod(Base): __tablename__ = "method" - id: Mapped[int] = mapped_column(primary_key=True) + id: Mapped[str] = mapped_column(String(8), primary_key=True, autoincrement=False) cls: Mapped[str] = mapped_column(String(32), nullable=False) meth: Mapped[str] = mapped_column(String(128), nullable=False) @@ -55,15 +61,93 @@ class DetectionTechnology(Base): __tablename__ = "technology" id: Mapped[int] = mapped_column(primary_key=True) - tech: Mapped[str] = mapped_column(String(255), nullable=False) method_id: Mapped[int] = mapped_column(ForeignKey("method.id")) + tech: Mapped[str] = mapped_column(String(255), nullable=False, index=True) + + __table_args__ = (UniqueConstraint(method_id, tech),) inst_method: Mapped["DetectionMethod"] = relationship(back_populates="technologies") - selections: Mapped[List["Selection"]] = relationship(back_populates="technologies") + selections: Mapped[List["Selection"]] = relationship( + back_populates="inst_technology" + ) + + +class Taxonomy(Base): + """Taxonomic rank (up to phylum)""" + + __tablename__ = "taxonomy" + + id: Mapped[str] = mapped_column(String(8), primary_key=True, autoincrement=False) + domain: Mapped[str] = mapped_column(String(32), nullable=False) + kingdom: Mapped[str] = mapped_column(String(32), nullable=True) + phylum: Mapped[str] = mapped_column(String(32), nullable=True) + + taxa: Mapped[List["Taxa"]] = relationship(back_populates="inst_taxonomy") + + +class Taxa(Base): + """NCBI Taxonomy i.e. species records""" + + __tablename__ = "ncbi_taxa" + + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=False) # NCBI Taxid + name: Mapped[str] = mapped_column(String(128), nullable=False) + short_name: Mapped[str] = mapped_column(String(128), nullable=False) + taxonomy_id: Mapped[int] = mapped_column(ForeignKey("taxonomy.id")) + + inst_taxonomy: Mapped["Taxonomy"] = relationship(back_populates="taxa") + organisms: Mapped[List["Organism"]] = relationship(back_populates="inst_taxa") + assemblies: Mapped[List["Assembly"]] = relationship(back_populates="inst_taxa") + annotations: Mapped[List["Annotation"]] = relationship(back_populates="inst_taxa") + datasets: Mapped[List["Dataset"]] = relationship(back_populates="inst_taxa") + + +class Organism(Base): + """Organism (cell, tissue, organ) per species (taxa_id)""" + + __tablename__ = "organism" + + id: Mapped[int] = mapped_column(primary_key=True) + taxa_id: Mapped[int] = mapped_column(ForeignKey("ncbi_taxa.id"), index=True) + cto: Mapped[str] = mapped_column(String(255), nullable=False, index=True) + + __table_args__ = (UniqueConstraint(taxa_id, cto),) + + inst_taxa: Mapped["Taxa"] = relationship(back_populates="organisms") + selections: Mapped[List["Selection"]] = relationship(back_populates="inst_organism") + + +class Selection(Base): + """Association: Modification, DetectionTechnology, Organism""" + + __tablename__ = "selection" + + id: Mapped[int] = mapped_column(primary_key=True) + modification_id: Mapped[int] = mapped_column(ForeignKey("modification.id")) + technology_id: Mapped[int] = mapped_column(ForeignKey("technology.id")) + organism_id: Mapped[int] = mapped_column(ForeignKey("organism.id")) + + __table_args__ = ( + Index( + "idx_select", "modification_id", "technology_id", "organism_id", unique=True + ), + ) + + inst_modification: Mapped["Modification"] = relationship( + back_populates="selections" + ) + inst_technology: Mapped["DetectionTechnology"] = relationship( + back_populates="selections" + ) + inst_organism: Mapped["Organism"] = relationship(back_populates="selections") + + associations: Mapped[List["Association"]] = relationship( + back_populates="inst_selection" + ) class Assembly(Base): - """Assembly""" + """Assembly releases""" __tablename__ = "assembly" @@ -74,8 +158,9 @@ class Assembly(Base): String(12), nullable=False ) # current is assembly_version.version_num + __table_args__ = (UniqueConstraint(name, taxa_id, version),) + inst_taxa: Mapped["Taxa"] = relationship(back_populates="assemblies") - datasets: Mapped[List["Dataset"]] = relationship(back_populates="inst_assembly") class AssemblyVersion(Base): @@ -98,6 +183,8 @@ class Annotation(Base): String(12), nullable=False ) # current is annotation_version.version_num + __table_args__ = (UniqueConstraint(release, taxa_id, version),) + inst_taxa: Mapped["Taxa"] = relationship(back_populates="annotations") annotations: Mapped[List["GenomicAnnotation"]] = relationship( back_populates="inst_annotation" @@ -117,81 +204,21 @@ class GenomicAnnotation(Base): __tablename__ = "genomic_annotation" - id: Mapped[int] = mapped_column(primary_key=True) - data_id: Mapped[int] = mapped_column(ForeignKey("data.id")) + id: Mapped[str] = mapped_column( + String(128), primary_key=True, autoincrement=False + ) # Ensembl ID annotation_id: Mapped[int] = mapped_column(ForeignKey("annotation.id")) - feature: Mapped[str] = mapped_column(String(32), nullable=False) - gene_name: Mapped[str] = mapped_column(String(128), nullable=True) - gene_id: Mapped[str] = mapped_column(String(128), nullable=True) - gene_biotype: Mapped[str] = mapped_column(String(255), nullable=True) + name: Mapped[str] = mapped_column( + String(128), nullable=True, index=True + ) # Ensembl gene name + biotype: Mapped[str] = mapped_column( + String(255), nullable=True, index=True + ) # Ensembl gene biotype - inst_data: Mapped["Data"] = relationship(back_populates="annotations") inst_annotation: Mapped["Annotation"] = relationship(back_populates="annotations") - -class Taxonomy(Base): - """Taxonomic rank (up to phylum) for the TreeSelect component""" - - __tablename__ = "taxonomy" - - id: Mapped[int] = mapped_column(primary_key=True) - domain: Mapped[str] = mapped_column(String(32), nullable=False) - kingdom: Mapped[str] = mapped_column(String(32), nullable=True) - phylum: Mapped[str] = mapped_column(String(32), nullable=True) - - taxa: Mapped[List["Taxa"]] = relationship(back_populates="inst_taxonomy") - - -class Taxa(Base): - """NCBI Taxonomy""" - - __tablename__ = "ncbi_taxa" - - id: Mapped[int] = mapped_column(primary_key=True) # NCBI Taxid - name: Mapped[str] = mapped_column(String(128), nullable=False) - short_name: Mapped[str] = mapped_column(String(128), nullable=False) - taxonomy_id: Mapped[int] = mapped_column(ForeignKey("taxonomy.id")) - - assemblies: Mapped[List["Assembly"]] = relationship(back_populates="inst_taxa") - annotations: Mapped[List["Annotation"]] = relationship(back_populates="inst_taxa") - inst_taxonomy: Mapped["Taxonomy"] = relationship(back_populates="taxa") - organisms: Mapped[List["Organism"]] = relationship(back_populates="inst_taxa") - datasets: Mapped[List["Dataset"]] = relationship(back_populates="inst_taxa") - - -class Organism(Base): - """Organism (cell, tissue, organ)""" - - __tablename__ = "organism" - - id: Mapped[int] = mapped_column(primary_key=True) - cto: Mapped[str] = mapped_column(String(255), nullable=False) - taxa_id: Mapped[int] = mapped_column(ForeignKey("ncbi_taxa.id")) - - inst_taxa: Mapped["Taxa"] = relationship(back_populates="organisms") - selections: Mapped[List["Selection"]] = relationship(back_populates="organisms") - - -class Selection(Base): - """Association: Modification, DetectionTechnology, Organism""" - - __tablename__ = "selection" - - id: Mapped[int] = mapped_column(primary_key=True) - modification_id: Mapped[int] = mapped_column(ForeignKey("modification.id")) - technology_id: Mapped[int] = mapped_column(ForeignKey("technology.id")) - organism_id: Mapped[int] = mapped_column(ForeignKey("organism.id")) - - __table_args__ = (UniqueConstraint(modification_id, technology_id, organism_id),) - - modifications: Mapped["Modification"] = relationship(back_populates="selections") - technologies: Mapped["DetectionTechnology"] = relationship( - back_populates="selections" - ) - organisms: Mapped["Organism"] = relationship(back_populates="selections") - - associations: Mapped[List["Association"]] = relationship( - back_populates="selections" + annotations: Mapped[List["DataAnnotation"]] = relationship( + back_populates="inst_genomic" ) @@ -203,8 +230,8 @@ class Project(Base): __tablename__ = "project" id: Mapped[str] = mapped_column( - String(8), primary_key=True - ) # SMID - NOT INCREMENT, BUT WHAT? + String(8), primary_key=True, autoincrement=False + ) # SMID title: Mapped[str] = mapped_column(String(255), nullable=False) summary: Mapped[str] = mapped_column(Text) # TEXT ? contact_id: Mapped[int] = mapped_column(ForeignKey("project_contact.id")) @@ -215,7 +242,7 @@ class Project(Base): inst_contact: Mapped["ProjectContact"] = relationship(back_populates="projects") - sources: Mapped["ProjectSource"] = relationship(back_populates="inst_project") + sources: Mapped[List["ProjectSource"]] = relationship(back_populates="inst_project") datasets: Mapped[List["Dataset"]] = relationship(back_populates="inst_project") @@ -239,48 +266,31 @@ class ProjectSource(Base): id: Mapped[int] = mapped_column(primary_key=True) project_id: Mapped[str] = mapped_column(ForeignKey("project.id")) # SMID - doi: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) # NVARCHAR ? + doi: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) pmid: Mapped[Optional[int]] = mapped_column(nullable=True) - inst_project: Mapped[List["Project"]] = relationship(back_populates="sources") + inst_project: Mapped["Project"] = relationship(back_populates="sources") -# bedRMod metadata - redundant taxid, assembly at upload, lifted is None (==assembly) or final assembly -# for RNA type/mod, technology, and tissue/cell/organ, use an association table/model -# allowing, in principle e.g. future change, to have a given bedRMod/dataset to have 1+ RNA type/mod (and technology and/or e.g. cell type) -# although at upload we'd only allow 1+ RNA type/mod class Dataset(Base): """Dataset metadata""" __tablename__ = "dataset" - id: Mapped[str] = mapped_column(String(12), primary_key=True) # EUFID - project_id: Mapped[str] = mapped_column(ForeignKey("project.id")) # SMID + id: Mapped[str] = mapped_column( + String(12), primary_key=True, autoincrement=False + ) # EUFID + project_id: Mapped[str] = mapped_column( + ForeignKey("project.id"), index=True + ) # SMID title: Mapped[str] = mapped_column(String(255), nullable=False) - # header information - bedRMod - # read from header or selected from dropdown options (SMID/project, RNA type/mod, technology, organism/cto, assembly) - # we can add a "validator" to check against the file header/records for required fields that are selected at upload - # e.g. RNA mod, taxid, assembly + # TODO + lifted: Mapped[Optional[bool]] = mapped_column(default=False, nullable=False) + # bedRMod header file_format: Mapped[str] = mapped_column(String(32), nullable=False) modification_type: Mapped[str] = mapped_column( String(32), nullable=False - ) # DNA or RNA - in principle the latter only... - taxa_id: Mapped[int] = mapped_column( - ForeignKey("ncbi_taxa.id") - ) # redundant - Selection or allow for "double checking" - assembly_id: Mapped[int] = mapped_column(ForeignKey("assembly.id")) - # does this work NULL FOREIGN KEY ? or nullable=False, but leave Optional, and fill with assembly i.e. this becomes effectively assembly - # lifted: Mapped[Optional[int]] = mapped_column(ForeignKey("assembly.id"), nullable=True) - # in fact, for a given DB version, assembly is fixed, i.e. we know it if we know taxid - # so keep assembly as the one selected (matched with header), and flag (Boolean) if it was lifted or not - # when "upgrading" the DB, all data is lifted from old to new assembly, and flag is set to True for all -> dump old DB with stamp - # so assembly is just really recorded for data tracing - lifted: Mapped[Optional[bool]] = mapped_column(default=False, nullable=False) - annotation_source: Mapped[str] = mapped_column(String(128), nullable=True) - annotation_version: Mapped[str] = mapped_column( - String(128), nullable=True - ) # VARCHAR or INTEGER - can we fix this at upload ? - # all optional - from header only + ) # DNA or RNA sequencing_platform: Mapped[str] = mapped_column(String(255), nullable=True) basecalling: Mapped[str] = mapped_column(Text, nullable=True) bioinformatics_workflow: Mapped[str] = mapped_column(Text, nullable=True) @@ -288,26 +298,10 @@ class Dataset(Base): external_source: Mapped[str] = mapped_column(String(255), nullable=True) inst_project: Mapped["Project"] = relationship(back_populates="datasets") - inst_assembly: Mapped["Assembly"] = relationship(back_populates="datasets") - inst_taxa: Mapped["Taxa"] = relationship(back_populates="datasets") - - associations: Mapped[List["Association"]] = relationship(back_populates="datasets") - records: Mapped[List["Data"]] = relationship(back_populates="inst_dataset") - - -class Association(Base): - """Association: Dataset, Selection""" - __tablename__ = "association" - - id: Mapped[int] = mapped_column(primary_key=True) - dataset_id: Mapped[str] = mapped_column(ForeignKey("dataset.id")) - selection_id: Mapped[int] = mapped_column(ForeignKey("selection.id")) - - __table_args__ = (UniqueConstraint(dataset_id, selection_id),) - - datasets: Mapped["Dataset"] = relationship(back_populates="associations") - selections: Mapped["Selection"] = relationship(back_populates="associations") + associations: Mapped[List["Association"]] = relationship( + back_populates="inst_dataset" + ) class Data(Base): @@ -316,12 +310,12 @@ class Data(Base): __tablename__ = "data" id: Mapped[int] = mapped_column(primary_key=True) - dataset_id: Mapped[str] = mapped_column(ForeignKey("dataset.id")) # EUFID + association_id: Mapped[str] = mapped_column(ForeignKey("dataset.id"), index=True) # bedRMod fields - order must match bedRMod columns? chrom: Mapped[str] = mapped_column(String(128), nullable=False) start: Mapped[int] = mapped_column(nullable=False) end: Mapped[int] = mapped_column(nullable=False) - name: Mapped[str] = mapped_column(String(32), nullable=False) # as is ? + name: Mapped[str] = mapped_column(String(32), nullable=False) score: Mapped[int] = mapped_column(nullable=False) strand: Mapped[str] = mapped_column(String(1), nullable=False) thick_start: Mapped[int] = mapped_column(nullable=False) @@ -330,7 +324,50 @@ class Data(Base): coverage: Mapped[int] = mapped_column(nullable=False) frequency: Mapped[int] = mapped_column(nullable=False) - inst_dataset: Mapped["Dataset"] = relationship(back_populates="records") - annotations: Mapped[List["GenomicAnnotation"]] = relationship( + __table_args__ = ( + Index("idx_data_sort", "chrom", "start", "score", "coverage", "frequency"), + ) + + annotations: Mapped[List["DataAnnotation"]] = relationship( back_populates="inst_data" ) + + inst_association: Mapped["Association"] = relationship(back_populates="data") + + +class Association(Base): + """Association: Dataset, Selection""" + + __tablename__ = "association" + + id: Mapped[int] = mapped_column(primary_key=True) + dataset_id: Mapped[str] = mapped_column(ForeignKey("dataset.id")) + selection_id: Mapped[int] = mapped_column(ForeignKey("selection.id")) + + __table_args__ = (Index("idx_assoc", "dataset_id", "selection_id", unique=True),) + + inst_dataset: Mapped["Dataset"] = relationship(back_populates="associations") + inst_selection: Mapped["Selection"] = relationship(back_populates="associations") + + data: Mapped[List["Data"]] = relationship(back_populates="inst_association") + + +class DataAnnotation(Base): + """Association: GenomicAnnotation, Data""" + + __tablename__ = "data_annotation" + + id: Mapped[int] = mapped_column(primary_key=True) + gene_id: Mapped[str] = mapped_column( + ForeignKey("genomic_annotation.id"), index=True + ) + data_id: Mapped[int] = mapped_column(ForeignKey("data.id"), index=True) + feature: Mapped[str] = mapped_column(String(32), nullable=False, index=True) + + # __table_args__ = (Index("idx_data_ann", "gene_id", "data_id", "feature", unique=True),) + __table_args__ = (UniqueConstraint(gene_id, data_id, feature),) + + inst_genomic: Mapped["GenomicAnnotation"] = relationship( + back_populates="annotations" + ) + inst_data: Mapped["Data"] = relationship(back_populates="annotations") diff --git a/server/src/scimodom/services/annotation.py b/server/src/scimodom/services/annotation.py index 61a0df81..5cf11490 100644 --- a/server/src/scimodom/services/annotation.py +++ b/server/src/scimodom/services/annotation.py @@ -408,8 +408,9 @@ def annotate_data(self): msg = "... done! Now inserting into DB." logger.debug(msg) - annotated = [ - records_factory("GenomicAnnotation", r)._asdict() for r in annotated - ] - self._session.execute(insert(GenomicAnnotation), annotated) - self._session.commit() + # TODO + # annotated = [ + # records_factory("GenomicAnnotation", r)._asdict() for r in annotated + # ] + # self._session.execute(insert(GenomicAnnotation), annotated) + # self._session.commit() diff --git a/server/src/scimodom/services/dataset.py b/server/src/scimodom/services/dataset.py index a49e6958..e989dc1b 100644 --- a/server/src/scimodom/services/dataset.py +++ b/server/src/scimodom/services/dataset.py @@ -123,16 +123,22 @@ def _get_selection(self) -> None: .join_from( Selection, Modification, - Selection.modification_id == Modification.id, + Selection.inst_modification, + # Selection.modification_id == Modification.id, ) .join_from( Selection, DetectionTechnology, - Selection.technology_id == DetectionTechnology.id, + Selection.inst_technology, + # Selection.technology_id == DetectionTechnology.id, ) - .join_from(Selection, Organism, Selection.organism_id == Organism.id) + .join_from(Selection, Organism, Selection.inst_organism) + # .join_from(Selection, Organism, Selection.organism_id == Organism.id) .join_from( - Modification, Modomics, Modification.modomics_id == Modomics.id + Modification, + Modomics, + Modification.inst_modomics + # Modification, Modomics, Modification.modomics_id == Modomics.id ) .where(Selection.id == selection_id) ) @@ -170,8 +176,10 @@ def _validate_entry(self) -> None: """Validate new dataset using SMID, title, assembly, and selection.""" for selection_id, selection in self._selection_ids.items(): query = ( - select(func.distinct(Dataset.id)) - .outerjoin(Association, Dataset.id == Association.dataset_id) + select(func.distinct(Dataset.id)).join( + Association, Association.inst_dataset, isouter=True + ) + # .outerjoin(Association, Dataset.id == Association.dataset_id) .where( Association.selection_id == selection_id, Dataset.project_id == self._smid, @@ -202,49 +210,6 @@ def _validate_assembly(self) -> None: self._lifted = True print("Some message... do something when?") - def _create_eufid(self) -> None: - """Create new dataset using EUFimporter class.""" - query = select(Dataset.id) - eufids = self._session.execute(query).scalars().all() - self._eufid = utils.gen_short_uuid(self.EUFID_LENGTH, eufids) - - importer = EUFImporter( - self._session, - self._filen, - self._handle, - self._smid, - self._eufid, - self._title, - self._taxa_id, - self._assembly_id, - self._lifted, - data_path=self._data_path, - ) - importer.parseEUF() - - modifications = {s[0] for s in self._selection_ids.values()} - modifications = {m.lower() for m in modifications} - modifications_from_file = importer.get_modifications_from_file() - modifications_from_file = {m.lower() for m in modifications_from_file} - symdiff = modifications.symmetric_difference(modifications_from_file) - if symdiff: - msg = ( - f"Selection for modification and modifications read from {self._filen} " - f"differ: {symdiff}. Aborting transaction!" - ) - raise Exception(msg) - - selection_str = " and ".join( - [", ".join(map(str, s)) for s in self._selection_ids.values()] - ) - msg = ( - f"Adding dataset {self._eufid} to project {self._smid} with title = {self._title}, " - f"and the following selection: {selection_str}." - ) - logger.info(msg) - # confirm ? - importer.close() - def _add_association(self) -> None: """Create new association entry for dataset. @@ -253,7 +218,63 @@ def _add_association(self) -> None: for selection_id in self._selection_ids.keys(): association = Association(dataset_id=self._eufid, selection_id=selection_id) self._session.add(association) - self._session.commit() + self._session.flush() + + def _create_eufid(self) -> None: + """Create new dataset using EUFimporter class.""" + + try: + query = select(Dataset.id) + eufids = self._session.execute(query).scalars().all() + self._eufid = utils.gen_short_uuid(self.EUFID_LENGTH, eufids) + + self._add_association() + + importer = EUFImporter( + self._session, + self._filen, + self._handle, + self._smid, + self._eufid, + self._title, + self._taxa_id, + self._assembly_id, + self._lifted, + # TODO assume modification is unique for any combination of RNA, tech, and cto... + # i.e. a dataset can have 1+ modification, but only 1 RNA type, 1 technology, and 1 cto + # so we cannot have e.g. twice m6A + {k: v[0] for k, v in self._selection_ids.items()}, + data_path=self._data_path, + ) + importer.parseEUF() + + # TODO + # modifications = {s[0] for s in self._selection_ids.values()} + # modifications = {m.lower() for m in modifications} + # modifications_from_file = importer.get_modifications_from_file() + # modifications_from_file = {m.lower() for m in modifications_from_file} + # symdiff = modifications.symmetric_difference(modifications_from_file) + # if symdiff: + # msg = ( + # f"Selection for modification and modifications read from {self._filen} " + # f"differ: {symdiff}. Aborting transaction!" + # ) + # raise Exception(msg) + + selection_str = " and ".join( + [", ".join(map(str, s)) for s in self._selection_ids.values()] + ) + msg = ( + f"Adding dataset {self._eufid} to project {self._smid} with title = {self._title}, " + f"and the following selection: {selection_str}." + ) + logger.info(msg) + except: + self._session.rollback() + raise + else: + # confirm ? + importer.close() def create_dataset(self) -> str: """Dataset constructor. @@ -266,7 +287,6 @@ def create_dataset(self) -> str: self._validate_assembly() self._create_eufid() - self._add_association() return self._eufid diff --git a/server/src/scimodom/services/importer.py b/server/src/scimodom/services/importer.py index d8ff06a3..8571d296 100644 --- a/server/src/scimodom/services/importer.py +++ b/server/src/scimodom/services/importer.py @@ -65,6 +65,8 @@ class EUFImporter: :type assembly_id: int :param lifted: Is Assembly ID (version) different from DB assembly version? (dataset marked for liftover) :type lifted: bool + :param association: Association ids and modification short names + :type association: dict :param data_path: DATA_PATH (AnnotationService) :type data_path: str | Path | None :param SPECS: Default specs @@ -114,6 +116,7 @@ def __init__( taxa_id: int, assembly_id: int, lifted: bool, + association: dict, data_path: str | Path | None = None, ) -> None: """Initializer method.""" @@ -145,6 +148,7 @@ def __init__( self._taxa_id = taxa_id self._assembly_id = assembly_id self._lifted = lifted + self._association = association self._data_path = data_path if self._data_path is None: @@ -247,6 +251,7 @@ def _add_missing_header_fields(self, assembly: str) -> None: self._header["project_id"] = self._smid self._header["title"] = self._title self._header["file_format"] = self._version + # TODO if not self._header["taxa_id"] == self._taxa_id: msg = ( f"Organism={self._header['taxa_id']} from {self._filen} differs " @@ -254,18 +259,18 @@ def _add_missing_header_fields(self, assembly: str) -> None: f"Data import will continue with {self._taxa_id}..." ) logger.warning(msg) - self._header["taxa_id"] = self._taxa_id - query = queries.query_column_where( - "Assembly", "name", filters={"id": self._assembly_id} - ) - assembly_name = self._session.execute(query).scalar() - msg = ( - f"Overwriting header: assembly={assembly} from {self._filen} " - f"with {assembly_name} given at upload. Data import will continue..." - ) - logger.warning(msg) - # assign id now - self._header["assembly_id"] = self._assembly_id + # self._header["taxa_id"] = self._taxa_id + # query = queries.query_column_where( + # "Assembly", "name", filters={"id": self._assembly_id} + # ) + # assembly_name = self._session.execute(query).scalar() + # msg = ( + # f"Overwriting header: assembly={assembly} from {self._filen} " + # f"with {assembly_name} given at upload. Data import will continue..." + # ) + # logger.warning(msg) + # # assign id now + # self._header["assembly_id"] = self._assembly_id self._header["lifted"] = self._lifted def _munge_header(self, lines: list[str]) -> str: @@ -309,7 +314,8 @@ def _get_header(header: str) -> str: raise SpecsError(f" Missing or misformatted header: {h} ") return s[0] - skip_header = ["fileformat", "assembly"] + skip_header = ["fileformat", "organism", "assembly"] + # skip_header = ["fileformat", "assembly"] self._header = { mapped_header: self._dtypes["Dataset"][mapped_header].__call__( _get_header(header) @@ -438,7 +444,16 @@ def _munge_values(self, values: list[str]) -> dict: c: self._dtypes["Data"][c].__call__(cvalues[i]) for i, c in enumerate(self._specs["columns"].values()) } - data["dataset_id"] = self._eufid + # TODO + # clean validation done in DataService + # data["dataset_id"] = self._eufid + try: + data["association_id"] = self._association[data["name"]] + except: + raise ValueError( + f"Skipping line {self._lino}, unrecognized modification {data['name']}" + ) + # format chrom field match = chrom_pattern.match(data["chrom"]) if match: @@ -663,7 +678,10 @@ def _munge_values(self, values: list[str]) -> dict: itertools.islice(self._specs["columns"].values(), self._num_col) ) } - data["dataset_id"] = self._dataset_id + # TODO + data["association_id"] = 1 # DOES IT MATTER??? + # data["dataset_id"] = self._dataset_id + # AD HOC * fill remaining columns based on query order * # ignore columns that are not queried! if self._num_col < 11: diff --git a/server/src/scimodom/services/project.py b/server/src/scimodom/services/project.py index 88390007..b5f5371c 100644 --- a/server/src/scimodom/services/project.py +++ b/server/src/scimodom/services/project.py @@ -195,24 +195,24 @@ def _add_selection(self) -> None: self._session.commit() organism_id = organism.id - # assembly - # TODO: liftover (here or at data upload) - name = d_organism["assembly"] - query = queries.query_column_where( - Assembly, "id", filters={"name": name, "taxa_id": taxa_id} - ) - assembly_id = self._session.execute(query).scalar() - if not assembly_id: - # add new version for new entry, presumably a lower assembly - # that will not be used (i.e. data must be lifted) - query = select(Assembly.version) - version_nums = self._session.execute(query).scalars().all() - version_num = utils.gen_short_uuid( - self.ASSEMBLY_NUM_LENGTH, version_nums - ) - assembly = Assembly(name=name, taxa_id=taxa_id, version=version_num) - self._session.add(assembly) - self._session.commit() + # # assembly + # # TODO: liftover (here or at data upload) + # name = d_organism["assembly"] + # query = queries.query_column_where( + # Assembly, "id", filters={"name": name, "taxa_id": taxa_id} + # ) + # assembly_id = self._session.execute(query).scalar() + # if not assembly_id: + # # add new version for new entry, presumably a lower assembly + # # that will not be used (i.e. data must be lifted) + # query = select(Assembly.version) + # version_nums = self._session.execute(query).scalars().all() + # version_num = utils.gen_short_uuid( + # self.ASSEMBLY_NUM_LENGTH, version_nums + # ) + # assembly = Assembly(name=name, taxa_id=taxa_id, version=version_num) + # self._session.add(assembly) + # self._session.commit() # selection query = queries.query_column_where( diff --git a/server/src/scimodom/utils/models.py b/server/src/scimodom/utils/models.py index f12f18f8..e09598ee 100644 --- a/server/src/scimodom/utils/models.py +++ b/server/src/scimodom/utils/models.py @@ -53,7 +53,8 @@ class Subtract(NamedTuple): name: _dtypes["name"] score: _dtypes["score"] strand: _dtypes["strand"] - dataset_id: _dtypes["dataset_id"] + # TODO + dataset_id: get_types("Dataset")["id"] # noqa: F821 coverage: _dtypes["coverage"] frequency: _dtypes["frequency"] @@ -88,7 +89,8 @@ class Intersect(NamedTuple): name: _dtypes["name"] score: _dtypes["score"] strand: _dtypes["strand"] - dataset_id: _dtypes["dataset_id"] + # TODO + dataset_id: get_types("Dataset")["id"] # noqa: F821 coverage: _dtypes["coverage"] frequency: _dtypes["frequency"] chrom_b: _dtypes["chrom"] @@ -97,7 +99,8 @@ class Intersect(NamedTuple): name_b: _dtypes["name"] score_b: _dtypes["score"] strand_b: _dtypes["strand"] - dataset_id_b: _dtypes["dataset_id"] + # TODO + dataset_id_b: get_types("Dataset")["id"] # noqa: F821 coverage_b: _dtypes["coverage"] frequency_b: _dtypes["frequency"] @@ -132,7 +135,8 @@ class Closest(NamedTuple): name: _dtypes["name"] score: _dtypes["score"] strand: _dtypes["strand"] - dataset_id: _dtypes["dataset_id"] + # TODO + dataset_id: get_types("Dataset")["id"] # noqa: F821 coverage: _dtypes["coverage"] frequency: _dtypes["frequency"] chrom_b: _dtypes["chrom"] @@ -141,41 +145,42 @@ class Closest(NamedTuple): name_b: _dtypes["name"] score_b: _dtypes["score"] strand_b: _dtypes["strand"] - dataset_id_b: _dtypes["dataset_id"] + # TODO + dataset_id_b: get_types("Dataset")["id"] # noqa: F821 coverage_b: _dtypes["coverage"] frequency_b: _dtypes["frequency"] distance: int -class GenomicAnnotation(NamedTuple): - """Named tuple for GenomicAnnotation records. - - :param chrom: Chromosome - :type chrom: Inferred from Data (str) - :param start: start - :type start: Inferred from Data (int) - :param end: end - :type end: Inferred from Data (int) - :param score: score - :type score: Inferred from Data (int) - :param strand: strand - :type strand: Inferred from Data (str) - :param dataset_id: dataset_id - :type dataset_id: Inferred from Data (str) - :param coverage: coverage - :type coverage: Inferred from Data (int) - :param frequency: frequency - :type frequency: Inferred from Data (int) - """ - - _dtypes = get_types("GenomicAnnotation") - - data_id: _dtypes["data_id"] - annotation_id: _dtypes["annotation_id"] - feature: _dtypes["feature"] - gene_name: _dtypes["gene_name"] # | None - gene_id: _dtypes["gene_id"] # | None - gene_biotype: _dtypes["gene_biotype"] # | None +# class GenomicAnnotation(NamedTuple): +# """Named tuple for GenomicAnnotation records. + +# :param chrom: Chromosome +# :type chrom: Inferred from Data (str) +# :param start: start +# :type start: Inferred from Data (int) +# :param end: end +# :type end: Inferred from Data (int) +# :param score: score +# :type score: Inferred from Data (int) +# :param strand: strand +# :type strand: Inferred from Data (str) +# :param dataset_id: dataset_id +# :type dataset_id: Inferred from Data (str) +# :param coverage: coverage +# :type coverage: Inferred from Data (int) +# :param frequency: frequency +# :type frequency: Inferred from Data (int) +# """ + +# _dtypes = get_types("GenomicAnnotation") + +# data_id: _dtypes["data_id"] +# annotation_id: _dtypes["annotation_id"] +# feature: _dtypes["feature"] +# gene_name: _dtypes["gene_name"] # | None +# gene_id: _dtypes["gene_id"] # | None +# gene_biotype: _dtypes["gene_biotype"] # | None def records_factory(instance_str: str, vals: Sequence[Any]):