diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..173b448 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +token* +node_modules \ No newline at end of file diff --git a/README.md b/README.md index 8d11a34..907c51e 100644 --- a/README.md +++ b/README.md @@ -10,16 +10,35 @@ 4. Drag your token files into the codespace root directory, they should be named token 6. Run the following commands in the terminal ```bash -run.sh +sh run.sh ``` +6.1 to check if everything is ok, run the following command +```bash +docker ps -a +docker logs [insert CONTAINER ID from previous command] +``` + 7. Open a new terminal window and run the following command to test the synthclient ```bash -curl -X POST -H "Content-Type: application/json" -d @test.json https://api.securedna.org/synth/v1/synth +sh simple_test.sh ``` -8. Try a more complex example by running the following command +8. Try a more complex example by running the following command. This example shows how to use the synthclient to process a fasta file and calculate some statistics ```bash npx ts-node test.ts ``` + +The output should look like this: +``` +=== Processing file: test.fasta +[ + { + organism: 'Ricin', + percentageHit: 11.182795698924732, + longestHitregion: 96, + longestUniqueHitRegion: 96 + } +] +``` 9. Enjoy! diff --git a/adv_test.ts b/adv_test.ts new file mode 100644 index 0000000..86ed6e2 --- /dev/null +++ b/adv_test.ts @@ -0,0 +1,339 @@ +const fs = require('node:fs/promises'); +const path = require ('node:path'); + + +let influenzaLargestCounter = 0; +let influenzaCounted = []; + +/** A request to the /v1/screen endpoint. */ +interface ApiRequest { + /** + * The input FASTA. This field MUST be included. + */ + fasta: string, + /** + * The screening region. This field MUST be included. + * See below for more details. + */ + region: "us" | "prc" | "eu" | "all"; + /** + * An optional arbitrary string that will be returned in the + * response, for your tracking purposes. This field MAY be included. + * + * Note that this string may be logged in our backend, so be careful + * about including sensitive information (such as customer names). + */ + provider_reference?: string | null, +} + + +/** The top-level response. */ +export interface ApiResponse { + /** Whether synthesis should be allowed to proceed. */ + synthesis_permission: "granted" | "denied"; + /** If provided in the input, `provider_reference` will be + * returned here. `null` otherwise. + */ + + provider_reference?: string | null; + /** + * If `synthesis_permission` is `"denied"` due to one or more + * screening hits, this list will contain those hits, grouped + * by which record they occurred in. + */ + hits_by_record?: FastaRecordHits[]; + /** Any non-fatal warnings will be in this list. */ + warnings?: ErrorOrWarning[]; + /** + * Will contain fatal errors if `synthesis_permission + * is `"denied"` due to an error. + */ + errors?: ErrorOrWarning[]; +} + +/** Screening hits, grouped by which record they occurred in. */ +export interface FastaRecordHits { + /** The record header, possibly empty. */ + fasta_header: string; + /** Line range in FASTA input this record covers. */ + line_number_range: [number, number]; + /** The length of the record sequence. */ + sequence_length: number; + /** + * The hits that occurred in this record, grouped by similarity. + */ + hits_by_hazard: HazardHits[]; +} + +/** A list of hits grouped by similarity. */ +export interface HazardHits { + /** Whether this hit group matched nucleotides or amino acids. */ + type: "nuc" | "aa"; + /** + * Whether this hit group matched a hazard wild type + * (observed genome) or predicted functional variant + * (mutation SecureDNA believes would still be hazardous). + * This field is always `null` for `type: "nuc"` hit groups. + */ + is_wild_type: boolean | null; + /** + * A list of regions in the sequence that matched this + * hazard group. + */ + + + hit_regions: HitRegion[]; + /** The most likely organism match for this hazard group. */ + most_likely_organism: Organism; + /** + * All possible hazard matches for this hazard group, + * including `most_likely_organism`. + */ + organisms: Organism[]; +} +/** A region of a record sequence that matched one or more hazards. */ +export interface HitRegion { + /** The matching subsequence. */ + seq: string; + /** The start of `seq` in the record sequence, in bp. */ + seq_range_start: number; + /** The (exclusive) end of `seq` in the record sequence, in bp. */ + seq_range_end: number; +} +/** Organism metadata. */ +export interface Organism { + /** The SecureDNA name for this organism. */ + name: string; + /** The high-level classification of this organism. */ + organism_type: "Virus" | "Toxin" | "Bacterium" | "Fungus"; + /** A list of NCBI accession numbers for this organism. */ + ans: string[]; + /** + * A list of SecureDNA tags for this organism. + * A table of current tags is included below, + * but more may be added in the future. + */ + tags: string[]; +} +/** An error or warning. */ +export type ErrorOrWarning = { + /** + * The diagnostic code. + * A list of current diagnostic codes is provided + * below, but more may be added in the future. + */ + diagnostic: string; + /** Additional information about the cause of this error. */ + additional_info: string; + /** + * If applicable, a line number range in the + * * input FASTA that caused this error or warning. + */ + line_number_range?: [number, number] | null; +} + +async function screen(name, fasta) { + + let response; + + const request: ApiRequest = { + fasta: fasta, + region: 'all', + provider_reference: 'test' + }; + response = await fetch('http://localhost:80/v1/screen', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify(request), + }).then(res => res.json()); + + const json = response as ApiResponse; + + const hitsByOrganism: { [organism: string]: HitRegion[] } = {}; + + if (json.hits_by_record) { + json.hits_by_record.forEach(record => { + record.hits_by_hazard.forEach(hazard => { + const organismName = hazard.most_likely_organism.name; + if (!hitsByOrganism[organismName]) { + hitsByOrganism[organismName] = []; + } + hitsByOrganism[organismName].push(...hazard.hit_regions); + }); + }); + } + + // ================= + // join hit regions + // ================= + const joinedHitsByOrganism: { [organism: string]: HitRegion[] } = {}; + + for(const organism in hitsByOrganism) { + const sortedHitRegions = hitsByOrganism[organism].flat().sort((a, b) => a.seq_range_start - b.seq_range_start); + + // Join overlapping hit regions + const mergedHitRegions: HitRegion[] = []; + let currentHitRegion: HitRegion | undefined = undefined; + + for (const hitRegion of sortedHitRegions) { + if (!currentHitRegion) { + currentHitRegion = hitRegion; + } else if (hitRegion.seq_range_start <= currentHitRegion.seq_range_end) { + currentHitRegion.seq_range_end = Math.max(currentHitRegion.seq_range_end, hitRegion.seq_range_end); + } else { + mergedHitRegions.push(currentHitRegion); + currentHitRegion = hitRegion; + } + } + + if (currentHitRegion) { + mergedHitRegions.push(currentHitRegion); + } + + joinedHitsByOrganism[organism] = mergedHitRegions; + + } + + // Calculate the length of FASTA sequence + const fastaWithoutFirstLine = fasta.substring(fasta.indexOf('\n') + 1); + const fastaWithoutWhitespace = fastaWithoutFirstLine.replace(/\s/g, ''); + const totalSequenceLength = fastaWithoutWhitespace.length; + + // ================= + // Find unique indices per organism + // ================= + + const uniqueIndicesByOrganism: { [organism: string]: number[] } = {}; + + for (let i = 0; i < totalSequenceLength; i++) { + let uniqueOrganism: string | undefined = undefined; + + for (const organism in joinedHitsByOrganism) { + const hitRegions = joinedHitsByOrganism[organism]; + let isUnique = true; + + for (const hitRegion of hitRegions) { + if (i >= hitRegion.seq_range_start && i < hitRegion.seq_range_end) { + if (uniqueOrganism && uniqueOrganism !== organism) { + isUnique = false; + break; + } + uniqueOrganism = organism; + } + } + + if (!isUnique) { + break; + } + } + + if (uniqueOrganism) { + if (!uniqueIndicesByOrganism[uniqueOrganism]) { + uniqueIndicesByOrganism[uniqueOrganism] = []; + } + uniqueIndicesByOrganism[uniqueOrganism].push(i); + } + } + + // ================= + // join unique overlapping indices into segments + // ================= + + const joinedUniqueIndicesByOrganism: { [organism: string]: { start: number, end: number }[] } = {}; + + for (const organism in uniqueIndicesByOrganism) { + const indices = uniqueIndicesByOrganism[organism]; + const joinedIndices: { start: number, end: number }[] = []; + + let currentStart = indices[0]; + let currentEnd = indices[0]; + + for (let i = 1; i < indices.length; i++) { + if (indices[i] === currentEnd + 1) { + currentEnd = indices[i]; + } else { + joinedIndices.push({ start: currentStart, end: currentEnd + 1 }); + currentStart = indices[i]; + currentEnd = indices[i]; + } + } + + joinedIndices.push({ start: currentStart, end: currentEnd + 1 }); + + joinedUniqueIndicesByOrganism[organism] = joinedIndices; + } + + + + // ================= + // Calculate the percentage of the sequence that is a hit + // ================= + const sortedOrganisms: { organism: string; percentageHit: number, longestHitregion: number, longestUniqueHitRegion: number }[] = []; + for (const organism in joinedHitsByOrganism) { + let longestHitregion = 0; + const hitRegions = joinedHitsByOrganism[organism]; + let totalHitLength = 0; + + for (const hitRegion of hitRegions) { + const hitLength = hitRegion.seq_range_end - hitRegion.seq_range_start; + if(hitLength > longestHitregion) { + longestHitregion = hitLength; + } + totalHitLength += hitLength; + } + + const percentageHit = (totalHitLength / totalSequenceLength) * 100; + + sortedOrganisms.push({ organism, percentageHit, longestHitregion, longestUniqueHitRegion: 0 }); + } + + sortedOrganisms.sort((a, b) => b.longestHitregion - a.longestHitregion); + + + // ================= + // Calculate the longest unique subsequence sequence that is unique + // ================= + for (const organism in joinedUniqueIndicesByOrganism) { + const uniqueIndices = joinedUniqueIndicesByOrganism[organism]; + let longestUniqueHitRegion = 0; + + for (const indices of uniqueIndices) { + const uniqueHitLength = indices.end - indices.start; + if (uniqueHitLength > longestUniqueHitRegion) { + longestUniqueHitRegion = uniqueHitLength; + } + } + + sortedOrganisms.find(item => item.organism === organism).longestUniqueHitRegion = longestUniqueHitRegion; + } + + console.log(sortedOrganisms); + +} + + +async function processFastaFiles() { + try { + const fastaDirectory = './fasta'; + const files = await fs.readdir(fastaDirectory); + + let i = 0; + for (const file of files) { + if(file.startsWith('.')) { + continue + } + const filePath = path.join(fastaDirectory, file); + const content = await fs.readFile(filePath, 'utf8'); + + console.log("=== Processing file: ", file); + await screen(file, content) + } + } catch (error) { + console.error('Error processing FASTA files:', error); + } + +} + +processFastaFiles(); diff --git a/fasta/test.fasta b/fasta/test.fasta new file mode 100644 index 0000000..413e5ab --- /dev/null +++ b/fasta/test.fasta @@ -0,0 +1,25 @@ +>Test 1 +atggaatttgggctgcgctgggttttccttgttgctattttaaaagatgtccagtgtgac +gtgcaactggtggagtccgggggaggcttagttcagcctggggggtccctgagactctcc +tgcgcagcctctggattcgcctacagtagtttttggatgcactgggtccgccaagctcca +gggaggggtctggtgtgggtctcacgtattaatcctgatgggagaatcacagtctacgcg +gacgccgtaaagggccgattcaccatctccagagacaacgccaagaacacgctctatctc +caaatgaacaacctgagagccgaggacacggctgtttattactgtgcaagagggacacga +tttctggagttgacttctaggggacaaatggaccagtggggccagggaaccctggtcact +gtctcctcagcctccaccaagggcccatcggtcttccccctggcaccctcctccaagagc +acctctgggggcacagcggccgtctcaaagcccCGTgggaacactatagtcatctggttt +tacgccgttgcgacctggttaTTCttcgggagtacgagtggctggagttttaccctcgaa +gacaacaatGTCtttccgaagcagtatccgatcattaatttcacaactgcaggagcgATC +gttcaatcatataccaacttcattcgggcggtaagaggacgccttGCCacaggtgcagac +gtgcggcacgagatccccgttctcccgaatcggGCGggccttcccatcaatcagcgcttc +atactggtggagttgagcaacTACgccgaactaagtgtcacgctcgcactcgacgttaca +aatgcttacATGgttggttatcgagcaggtaactctgccttttgagacgaaCACCTGGGC +TGCCTGGTCAAGGACTACTTCCCCGAACCGGTGACGGTGTCGTGGAACTCAGGCGCCCTG +ACCAGCGGCGTGCACACCTTCCCGGCTGTCCTACAGTCCTCAGGACTCTACTCCCTCAGC +AGCGTGGTGACCGTGCCCTCCAGCAGCTTGGGCACCCAGACCTACATCTGCAACGTGAAT +CACAAGCCCAGCAACACCAAGGTGGACAAGAAAGTTGAGCCCAAATCTTGTGACAAAACT +CACACATGCCCACCGTGCCCAGCACCTGAACTCCTGGGGGGACCGTCAGTCTTCCTCTTC +CCCCCAAAACCCAAGGACACCCTCATGATCTCCCGGACCCCTGAGGTCACATGCGTGGTG +GTGGACGTGAGCCACGAAGACCCTGAGGTCAAGTTCAACTGGTACGTGGACGGCGTGGAG +GTGCATAATGCCAAGACAAAGCCGCGGGAGGAGCAGTACAACAGCACGTACCGTGTGGTC +AGCGTCCTCACCGTC diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..152aece --- /dev/null +++ b/package-lock.json @@ -0,0 +1,34 @@ +{ + "name": "securedna-synthclien-template", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "devDependencies": { + "@types/node": "^7.10.14", + "typescript": "^5.5.4" + } + }, + "node_modules/@types/node": { + "version": "7.10.14", + "resolved": "https://registry.npmjs.org/@types/node/-/node-7.10.14.tgz", + "integrity": "sha512-29GS75BE8asnTno3yB6ubOJOO0FboExEqNJy4bpz0GSmW/8wPTNL4h9h63c6s1uTrOopCmJYe/4yJLh5r92ZUA==", + "dev": true, + "license": "MIT" + }, + "node_modules/typescript": { + "version": "5.6.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.3.tgz", + "integrity": "sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..6f33e13 --- /dev/null +++ b/package.json @@ -0,0 +1,6 @@ +{ + "devDependencies": { + "@types/node": "^7.10.14", + "typescript": "^5.5.4" + } +} diff --git a/run.sh b/run.sh index 2ca06f3..3a0745e 100644 --- a/run.sh +++ b/run.sh @@ -2,7 +2,7 @@ docker run --name synthclient \ --env SECUREDNA_SYNTHCLIENT_TOKEN_FILE="/certs/token.st" \ --env SECUREDNA_SYNTHCLIENT_KEYPAIR_FILE="/certs/token.priv" \ --env SECUREDNA_SYNTHCLIENT_KEYPAIR_PASSPHRASE_FILE="/certs/token.passphrase" \ ---volume /workspaces/sdna/:/certs/:z \ +--volume ./:/certs/:z \ --detach \ -p 80:80 \ ghcr.io/securedna/synthclient \ diff --git a/simple_test.sh b/simple_test.sh new file mode 100644 index 0000000..abc1f3e --- /dev/null +++ b/simple_test.sh @@ -0,0 +1,10 @@ +curl "localhost:80/v1/screen" \ +--header "Content-Type: application/json" \ +--no-progress-meter \ +--data-raw ' +{ +"fasta": ">NC_007373.1\nGAATCGCAATTAACAATAACTAAAGAGAAAAAAGAAGAACTC", +"region": "all", +"provider_reference": "documentation" +} +' | jq \ No newline at end of file diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..98840de --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,9 @@ +{ + + "compilerOptions": { + "module": "commonjs", + /* List of folders to include type definitions from. */ + "lib": ["es2017.object", "es2019.Array", "DOM"] + }, + +} \ No newline at end of file