From f996724839b0ae19f589b90ebab94c1fed7c1d52 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 20 Dec 2023 10:50:39 +0100 Subject: [PATCH 01/27] Prepare next release 2.12.2-SNAPSHOT --- biodata-external/pom.xml | 2 +- biodata-formats/pom.xml | 2 +- biodata-models/pom.xml | 2 +- biodata-tools/pom.xml | 2 +- pom.xml | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/biodata-external/pom.xml b/biodata-external/pom.xml index 347042c9..0c0fb358 100644 --- a/biodata-external/pom.xml +++ b/biodata-external/pom.xml @@ -6,7 +6,7 @@ biodata org.opencb.biodata - 2.12.1 + 2.12.2-SNAPSHOT ../pom.xml diff --git a/biodata-formats/pom.xml b/biodata-formats/pom.xml index f0b0019e..44fad6f3 100644 --- a/biodata-formats/pom.xml +++ b/biodata-formats/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.1 + 2.12.2-SNAPSHOT ../pom.xml diff --git a/biodata-models/pom.xml b/biodata-models/pom.xml index 81a8b17b..34ead415 100644 --- a/biodata-models/pom.xml +++ b/biodata-models/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.1 + 2.12.2-SNAPSHOT ../pom.xml diff --git a/biodata-tools/pom.xml b/biodata-tools/pom.xml index e2438f80..87cfcc16 100644 --- a/biodata-tools/pom.xml +++ b/biodata-tools/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.1 + 2.12.2-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 97688342..5d3d9743 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.1 + 2.12.2-SNAPSHOT pom Biodata @@ -38,7 +38,7 @@ - 4.12.0 + 4.12.1-SNAPSHOT 2.11.4 4.4 1.7.7 From 6c11e006bd51aa10ba5e1379d96752cd567ed7bc Mon Sep 17 00:00:00 2001 From: imedina Date: Mon, 11 Mar 2024 03:12:56 +0000 Subject: [PATCH 02/27] Add new SNP classes to store dbSNP --- .../org/opencb/biodata/models/core/Snp.java | 147 ++++++++++++++++++ .../biodata/models/core/SnpAnnotation.java | 90 +++++++++++ 2 files changed, 237 insertions(+) create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/Snp.java create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/SnpAnnotation.java diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/Snp.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/Snp.java new file mode 100644 index 00000000..c2d079ba --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/Snp.java @@ -0,0 +1,147 @@ +/* + * + * + */ + +package org.opencb.biodata.models.core; + +import java.util.List; + +public class Snp { + private String id; + private String chromosome; + private int position; + private String reference; + private List alleles; + private String type; + private String source; + private String version; + private SnpAnnotation annotation; + + public Snp() { + } + + public Snp(String id, String chromosome, int position, String reference, List alleles, String type, + String source, String version, SnpAnnotation annotation) { + this.id = id; + this.chromosome = chromosome; + this.position = position; + this.reference = reference; + this.alleles = alleles; + this.type = type; + this.source = source; + this.version = version; + this.annotation = annotation; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("Snp{"); + sb.append("id='").append(id).append('\''); + sb.append(", chromosome='").append(chromosome).append('\''); + sb.append(", position=").append(position); + sb.append(", reference='").append(reference).append('\''); + sb.append(", alleles=").append(alleles); + sb.append(", type='").append(type).append('\''); + sb.append(", source='").append(source).append('\''); + sb.append(", version='").append(version).append('\''); + sb.append(", annotation=").append(annotation); + sb.append('}'); + return sb.toString(); + } + + public String getId() { + return id; + } + + public Snp setId(String id) { + this.id = id; + return this; + } + + public String getChromosome() { + return chromosome; + } + + public Snp setChromosome(String chromosome) { + this.chromosome = chromosome; + return this; + } + + public int getPosition() { + return position; + } + + public Snp setPosition(int position) { + this.position = position; + return this; + } + + public String getReference() { + return reference; + } + + public Snp setReference(String reference) { + this.reference = reference; + return this; + } + + public List getAlleles() { + return alleles; + } + + public Snp setAlleles(List alleles) { + this.alleles = alleles; + return this; + } + + public String getType() { + return type; + } + + public Snp setType(String type) { + this.type = type; + return this; + } + + public String getSource() { + return source; + } + + public Snp setSource(String source) { + this.source = source; + return this; + } + + public String getVersion() { + return version; + } + + public Snp setVersion(String version) { + this.version = version; + return this; + } + + public SnpAnnotation getAnnotation() { + return annotation; + } + + public Snp setAnnotation(SnpAnnotation annotation) { + this.annotation = annotation; + return this; + } +} diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/SnpAnnotation.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/SnpAnnotation.java new file mode 100644 index 00000000..215341ad --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/SnpAnnotation.java @@ -0,0 +1,90 @@ +/* + * + * + */ + +package org.opencb.biodata.models.core; + +import org.opencb.biodata.models.variant.avro.EvidenceEntry; +import org.opencb.biodata.models.variant.avro.PopulationFrequency; + +import java.util.List; + +public class SnpAnnotation { + + private List flags; + private String gene; + private List populationFrequencies; + private List traitAssociation; + + public SnpAnnotation() { + } + + public SnpAnnotation(List flags, String gene, List populationFrequencies, List traitAssociation) { + this.flags = flags; + this.gene = gene; + this.populationFrequencies = populationFrequencies; + this.traitAssociation = traitAssociation; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("SnpAnnotation{"); + sb.append("flags=").append(flags); + sb.append(", gene='").append(gene).append('\''); + sb.append(", populationFrequencies=").append(populationFrequencies); + sb.append(", traitAssociation=").append(traitAssociation); + sb.append('}'); + return sb.toString(); + } + + public List getFlags() { + return flags; + } + + public SnpAnnotation setFlags(List flags) { + this.flags = flags; + return this; + } + + public String getGene() { + return gene; + } + + public SnpAnnotation setGene(String gene) { + this.gene = gene; + return this; + } + + public List getPopulationFrequencies() { + return populationFrequencies; + } + + public SnpAnnotation setPopulationFrequencies(List populationFrequencies) { + this.populationFrequencies = populationFrequencies; + return this; + } + + public List getTraitAssociation() { + return traitAssociation; + } + + public SnpAnnotation setTraitAssociation(List traitAssociation) { + this.traitAssociation = traitAssociation; + return this; + } +} From ef752abc04d234ee16e4a68169c32d8d55d28fcb Mon Sep 17 00:00:00 2001 From: imedina Date: Tue, 12 Mar 2024 02:30:43 +0000 Subject: [PATCH 03/27] Update SnpAnnotation data model --- .../biodata/models/core/SnpAnnotation.java | 27 ++++++------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/SnpAnnotation.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/SnpAnnotation.java index 215341ad..16fab718 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/SnpAnnotation.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/SnpAnnotation.java @@ -19,37 +19,26 @@ package org.opencb.biodata.models.core; -import org.opencb.biodata.models.variant.avro.EvidenceEntry; import org.opencb.biodata.models.variant.avro.PopulationFrequency; import java.util.List; +import java.util.Map; public class SnpAnnotation { private List flags; private String gene; private List populationFrequencies; - private List traitAssociation; + private Map additionalAttributes; public SnpAnnotation() { } - public SnpAnnotation(List flags, String gene, List populationFrequencies, List traitAssociation) { + public SnpAnnotation(List flags, String gene, List populationFrequencies, Map additionalAttributes) { this.flags = flags; this.gene = gene; this.populationFrequencies = populationFrequencies; - this.traitAssociation = traitAssociation; - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder("SnpAnnotation{"); - sb.append("flags=").append(flags); - sb.append(", gene='").append(gene).append('\''); - sb.append(", populationFrequencies=").append(populationFrequencies); - sb.append(", traitAssociation=").append(traitAssociation); - sb.append('}'); - return sb.toString(); + this.additionalAttributes = additionalAttributes; } public List getFlags() { @@ -79,12 +68,12 @@ public SnpAnnotation setPopulationFrequencies(List populati return this; } - public List getTraitAssociation() { - return traitAssociation; + public Map getAdditionalAttributes() { + return additionalAttributes; } - public SnpAnnotation setTraitAssociation(List traitAssociation) { - this.traitAssociation = traitAssociation; + public SnpAnnotation setAdditionalAttributes(Map additionalAttributes) { + this.additionalAttributes = additionalAttributes; return this; } } From 9ea6c147b64917ff95fed4b8335bd89eae109176 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 12 Mar 2024 11:28:15 +0100 Subject: [PATCH 04/27] models: rename alleles to alternates, #TASK-5813, #TASK-5789 --- .../java/org/opencb/biodata/models/core/Snp.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/Snp.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/Snp.java index c2d079ba..8f8cc712 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/Snp.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/Snp.java @@ -26,7 +26,7 @@ public class Snp { private String chromosome; private int position; private String reference; - private List alleles; + private List alternates; private String type; private String source; private String version; @@ -35,13 +35,13 @@ public class Snp { public Snp() { } - public Snp(String id, String chromosome, int position, String reference, List alleles, String type, + public Snp(String id, String chromosome, int position, String reference, List alternates, String type, String source, String version, SnpAnnotation annotation) { this.id = id; this.chromosome = chromosome; this.position = position; this.reference = reference; - this.alleles = alleles; + this.alternates = alternates; this.type = type; this.source = source; this.version = version; @@ -55,7 +55,7 @@ public String toString() { sb.append(", chromosome='").append(chromosome).append('\''); sb.append(", position=").append(position); sb.append(", reference='").append(reference).append('\''); - sb.append(", alleles=").append(alleles); + sb.append(", alternates=").append(alternates); sb.append(", type='").append(type).append('\''); sb.append(", source='").append(source).append('\''); sb.append(", version='").append(version).append('\''); @@ -100,12 +100,12 @@ public Snp setReference(String reference) { return this; } - public List getAlleles() { - return alleles; + public List getAlternates() { + return alternates; } - public Snp setAlleles(List alleles) { - this.alleles = alleles; + public Snp setAlternates(List alternates) { + this.alternates = alternates; return this; } From b79bbdca15b81d4f30f1f0f3216c76160f726a60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 28 Mar 2024 11:45:19 +0100 Subject: [PATCH 05/27] tools: increase MAX_REGION_COVERAGE; and fix sonnar issues, #TASK-5162 --- .../opencb/biodata/tools/alignment/BamManager.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/alignment/BamManager.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/alignment/BamManager.java index 895767e9..54b84cc0 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/alignment/BamManager.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/alignment/BamManager.java @@ -49,6 +49,7 @@ import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; /** @@ -62,7 +63,7 @@ public class BamManager implements AutoCloseable { public static final int DEFAULT_WINDOW_SIZE = 1; public static final int MAX_NUM_RECORDS = 50000; - public static final int MAX_REGION_COVERAGE = 100000; + public static final int MAX_REGION_COVERAGE = 500000; public static final String COVERAGE_BIGWIG_EXTENSION = ".bw"; private Logger logger; @@ -191,7 +192,10 @@ public Path calculateBigWigCoverage(Path bigWigPath, int windowSize) throws IOEx return bigWigPath; } - + /** + * @deprecated (since getFileHeader().getTextHeader() is deprecated !) + */ + @Deprecated public String header() { return samReader.getFileHeader().getTextHeader(); } @@ -338,7 +342,7 @@ public List getChunks(Region region) { BAMIndex index = samReader.indexing().getIndex(); return index.getSpanOverlapping(sequenceIndex, start, end).getChunks(); } - return null; + return Collections.emptyList(); } public List getBreakpoints(Region region) throws IOException { @@ -378,7 +382,7 @@ public List getBreakpoints(Region region) throws IOException { } } } - return null; + return Collections.emptyList(); } /** @@ -445,7 +449,7 @@ public AlignmentGlobalStats stats(Region region, AlignmentFilters fil return calculateGlobalStats(iterator(region, filters, options)); } - private AlignmentGlobalStats calculateGlobalStats(BamIterator iterator) throws IOException { + private AlignmentGlobalStats calculateGlobalStats(BamIterator iterator) { AlignmentGlobalStats alignmentGlobalStats = new AlignmentGlobalStats(); SamRecordAlignmentGlobalStatsCalculator calculator = new SamRecordAlignmentGlobalStatsCalculator(); while (iterator.hasNext()) { From 941cb51e9721d38da344c10afaa440c3e2aacbbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 25 Apr 2024 15:53:04 +0100 Subject: [PATCH 06/27] tools: Fix normalization of variants wiht ins seq. #TASK-6122 --- .../opencb/biodata/tools/variant/VariantNormalizer.java | 2 ++ .../biodata/tools/variant/VariantNormalizerTest.java | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java index 990d61e8..e3bb3d6e 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java @@ -1394,6 +1394,8 @@ private Variant newVariant(Variant variant, VariantKeyFields keyFields, Structur normalizedVariant.getSv().setCiStartRight(sv.getCiStartRight()); normalizedVariant.getSv().setCiEndLeft(sv.getCiEndLeft()); normalizedVariant.getSv().setCiEndRight(sv.getCiEndRight()); + normalizedVariant.getSv().setLeftSvInsSeq(sv.getLeftSvInsSeq()); + normalizedVariant.getSv().setRightSvInsSeq(sv.getRightSvInsSeq()); // Variant will never have CopyNumber, because the Alternate is normalized from to normalizedVariant.getSv().setCopyNumber(keyFields.getCopyNumber()); diff --git a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java index 95265190..a4a62f06 100644 --- a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java +++ b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java @@ -784,6 +784,14 @@ public void testNormalizeSvToIndel() throws NonStandardCompliantSampleField { } + @Test + public void testNormalizeWithInsSeq() throws NonStandardCompliantSampleField { + Variant variant = new Variant("1:799984<800001<800022:-:ACCACACCCACACAACACACA...TGTGGTGTGTGTGGTGTG"); + Variant normVar = new VariantNormalizer().normalize(Collections.singletonList(variant), false).get(0); + assertEquals(variant, normVar); + assertEquals(variant.toString(), normVar.toString()); + } + @Test public void testNormalizeBND() throws NonStandardCompliantSampleField { normalizeBnd(newVariant(101, 100, "", ".[9:10["), newVariant(100, 99, "A", "A[chr9:10[")); From 5678bd3ed24a641d74b1aac180db27c68f3ebe77 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 30 Apr 2024 10:27:48 +0200 Subject: [PATCH 07/27] Prepare release 2.12.2 --- biodata-external/pom.xml | 2 +- biodata-formats/pom.xml | 2 +- biodata-models/pom.xml | 2 +- biodata-tools/pom.xml | 2 +- pom.xml | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/biodata-external/pom.xml b/biodata-external/pom.xml index 0c0fb358..3a98861e 100644 --- a/biodata-external/pom.xml +++ b/biodata-external/pom.xml @@ -6,7 +6,7 @@ biodata org.opencb.biodata - 2.12.2-SNAPSHOT + 2.12.2 ../pom.xml diff --git a/biodata-formats/pom.xml b/biodata-formats/pom.xml index 44fad6f3..fe80d521 100644 --- a/biodata-formats/pom.xml +++ b/biodata-formats/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.2-SNAPSHOT + 2.12.2 ../pom.xml diff --git a/biodata-models/pom.xml b/biodata-models/pom.xml index 34ead415..79a1954e 100644 --- a/biodata-models/pom.xml +++ b/biodata-models/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.2-SNAPSHOT + 2.12.2 ../pom.xml diff --git a/biodata-tools/pom.xml b/biodata-tools/pom.xml index 87cfcc16..45d41e63 100644 --- a/biodata-tools/pom.xml +++ b/biodata-tools/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.2-SNAPSHOT + 2.12.2 ../pom.xml diff --git a/pom.xml b/pom.xml index 5d3d9743..a051029f 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.2-SNAPSHOT + 2.12.2 pom Biodata @@ -38,7 +38,7 @@ - 4.12.1-SNAPSHOT + 4.12.0 2.11.4 4.4 1.7.7 From c139406bbaeba0a77a4c1d545d75572613476882 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 30 Apr 2024 10:28:15 +0200 Subject: [PATCH 08/27] Prepare next release 2.12.3-SNAPSHOT --- biodata-external/pom.xml | 2 +- biodata-formats/pom.xml | 2 +- biodata-models/pom.xml | 2 +- biodata-tools/pom.xml | 2 +- pom.xml | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/biodata-external/pom.xml b/biodata-external/pom.xml index 3a98861e..3c1b2425 100644 --- a/biodata-external/pom.xml +++ b/biodata-external/pom.xml @@ -6,7 +6,7 @@ biodata org.opencb.biodata - 2.12.2 + 2.12.3-SNAPSHOT ../pom.xml diff --git a/biodata-formats/pom.xml b/biodata-formats/pom.xml index fe80d521..24182d67 100644 --- a/biodata-formats/pom.xml +++ b/biodata-formats/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.2 + 2.12.3-SNAPSHOT ../pom.xml diff --git a/biodata-models/pom.xml b/biodata-models/pom.xml index 79a1954e..1eb260d6 100644 --- a/biodata-models/pom.xml +++ b/biodata-models/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.2 + 2.12.3-SNAPSHOT ../pom.xml diff --git a/biodata-tools/pom.xml b/biodata-tools/pom.xml index 45d41e63..a283a477 100644 --- a/biodata-tools/pom.xml +++ b/biodata-tools/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.2 + 2.12.3-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index a051029f..c4ba58fc 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.2 + 2.12.3-SNAPSHOT pom Biodata @@ -38,7 +38,7 @@ - 4.12.0 + 4.12.1-SNAPSHOT 2.11.4 4.4 1.7.7 From c52673185b0aa2cf31f8915689519d935e39a271 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Thu, 30 May 2024 17:52:46 +0200 Subject: [PATCH 09/27] cicd: Update action version to test for compatibility with test and release process #TASK-6264 --- .github/workflows/test-analysis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-analysis.yml b/.github/workflows/test-analysis.yml index d460871a..e9df51ec 100644 --- a/.github/workflows/test-analysis.yml +++ b/.github/workflows/test-analysis.yml @@ -10,11 +10,11 @@ jobs: name: Test and push Sonar analysis runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: '0' - name: Set up JDK 11 - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: distribution: 'temurin' java-version: '11' From a4eb6dc3ee7760a3289e51dea0e25af96ade962f Mon Sep 17 00:00:00 2001 From: pfurio Date: Mon, 10 Jun 2024 12:26:59 +0200 Subject: [PATCH 10/27] models: deprecate status name, #TASK-5964 --- .../interpretation/Interpretation.java | 64 +++++++++++-------- .../opencb/biodata/models/common/Status.java | 19 +++--- 2 files changed, 48 insertions(+), 35 deletions(-) diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/Interpretation.java b/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/Interpretation.java index 99fbff5d..36f9c7e0 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/Interpretation.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/clinical/interpretation/Interpretation.java @@ -28,29 +28,29 @@ public class Interpretation { - private String id; - private String uuid; - private String description; - private String clinicalAnalysisId; + protected String id; + protected String uuid; + protected String name; + protected String description; + protected String clinicalAnalysisId; /** * Interpretation algorithm tool used to generate this interpretation. */ - private ClinicalAnalyst analyst; - private InterpretationMethod method; + protected ClinicalAnalyst analyst; + protected InterpretationMethod method; - private List primaryFindings; - private List secondaryFindings; + protected List primaryFindings; + protected List secondaryFindings; - private List comments; + protected List comments; - private InterpretationStats stats; + protected InterpretationStats stats; - private boolean locked; - private Status status; - private String creationDate; - private String modificationDate; - private int version; + protected boolean locked; + protected String creationDate; + protected String modificationDate; + protected int version; /** * Users can add custom information in this field. @@ -61,12 +61,23 @@ public class Interpretation { public Interpretation() { } + @Deprecated public Interpretation(String id, String uuid, String description, String clinicalAnalysisId, ClinicalAnalyst analyst, InterpretationMethod method, List primaryFindings, List secondaryFindings, List comments, InterpretationStats stats, Status status, String creationDate, String modificationDate, boolean locked, int version, Map attributes) { + this(id, uuid, id, description, clinicalAnalysisId, analyst, method, primaryFindings, secondaryFindings, + comments, stats, locked, creationDate, modificationDate, version, attributes); + } + + public Interpretation(String id, String uuid, String name, String description, String clinicalAnalysisId, + ClinicalAnalyst analyst, InterpretationMethod method, List primaryFindings, + List secondaryFindings, List comments, + InterpretationStats stats, boolean locked, String creationDate, String modificationDate, + int version, Map attributes) { this.id = id; this.uuid = uuid; + this.name = name; this.description = description; this.clinicalAnalysisId = clinicalAnalysisId; this.analyst = analyst; @@ -75,10 +86,9 @@ public Interpretation(String id, String uuid, String description, String clinica this.secondaryFindings = secondaryFindings; this.comments = comments; this.stats = stats; - this.status = status; + this.locked = locked; this.creationDate = creationDate; this.modificationDate = modificationDate; - this.locked = locked; this.version = version; this.attributes = attributes; } @@ -88,6 +98,7 @@ public String toString() { final StringBuilder sb = new StringBuilder("Interpretation{"); sb.append("id='").append(id).append('\''); sb.append(", uuid='").append(uuid).append('\''); + sb.append(", name='").append(name).append('\''); sb.append(", description='").append(description).append('\''); sb.append(", clinicalAnalysisId='").append(clinicalAnalysisId).append('\''); sb.append(", analyst=").append(analyst); @@ -96,7 +107,6 @@ public String toString() { sb.append(", secondaryFindings=").append(secondaryFindings); sb.append(", comments=").append(comments); sb.append(", stats=").append(stats); - sb.append(", status=").append(status); sb.append(", creationDate='").append(creationDate).append('\''); sb.append(", modificationDate='").append(modificationDate).append('\''); sb.append(", locked='").append(locked).append('\''); @@ -124,6 +134,15 @@ public Interpretation setUuid(String uuid) { return this; } + public String getName() { + return name; + } + + public Interpretation setName(String name) { + this.name = name; + return this; + } + public String getDescription() { return description; } @@ -196,15 +215,6 @@ public Interpretation setStats(InterpretationStats stats) { return this; } - public Status getStatus() { - return status; - } - - public Interpretation setStatus(Status status) { - this.status = status; - return this; - } - public String getCreationDate() { return creationDate; } diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/common/Status.java b/biodata-models/src/main/java/org/opencb/biodata/models/common/Status.java index 7563ce9e..65ffd889 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/common/Status.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/common/Status.java @@ -19,17 +19,22 @@ public class Status { protected String id; - protected String name; protected String description; protected String date; public Status() { - this("", "", "", ""); + this("", "", ""); } + public Status(String id, String description, String date) { + this.id = id; + this.description = description; + this.date = date; + } + + @Deprecated public Status(String id, String name, String description, String date) { this.id = id; - this.name = name; this.description = description; this.date = date; } @@ -38,7 +43,6 @@ public Status(String id, String name, String description, String date) { public String toString() { final StringBuilder sb = new StringBuilder("Status{"); sb.append("id='").append(id).append('\''); - sb.append(", name='").append(name).append('\''); sb.append(", description='").append(description).append('\''); sb.append(", date='").append(date).append('\''); sb.append('}'); @@ -53,7 +57,6 @@ public boolean equals(Object o) { Status status = (Status) o; if (!id.equals(status.id)) return false; - if (name != null ? !name.equals(status.name) : status.name != null) return false; if (description != null ? !description.equals(status.description) : status.description != null) return false; return date != null ? date.equals(status.date) : status.date == null; } @@ -61,7 +64,6 @@ public boolean equals(Object o) { @Override public int hashCode() { int result = id.hashCode(); - result = 31 * result + (name != null ? name.hashCode() : 0); result = 31 * result + (description != null ? description.hashCode() : 0); result = 31 * result + (date != null ? date.hashCode() : 0); return result; @@ -76,12 +78,13 @@ public Status setId(String id) { return this; } + @Deprecated public String getName() { - return name; + return id; } + @Deprecated public Status setName(String name) { - this.name = name; return this; } From 50a2be94aed6556d7cb83eee48f3a4170fd4d20f Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 26 Jun 2024 20:07:24 +0200 Subject: [PATCH 11/27] cicd: Modify pull request approve #TASK-6399 --- .github/workflows/pull-request-approved.yml | 34 +++++++++++++--- .github/workflows/scripts/xetabase-branch.sh | 42 ++++++++++++++++++++ 2 files changed, 70 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/scripts/xetabase-branch.sh diff --git a/.github/workflows/pull-request-approved.yml b/.github/workflows/pull-request-approved.yml index eb410c9c..8a60928f 100644 --- a/.github/workflows/pull-request-approved.yml +++ b/.github/workflows/pull-request-approved.yml @@ -1,15 +1,37 @@ name: Pull request approve workflow +run-name: 'Pull request approve workflow ${{ github.event.pull_request.head.ref }} -> ${{ github.event.pull_request.base.ref }} by @${{ github.actor }}' on: pull_request_review: types: [ submitted ] jobs: - build: - uses: opencb/java-common-libs/.github/workflows/build-java-app-workflow.yml@develop + calculate-xetabase-branch: + name: Calculate Xetabase branch + runs-on: ubuntu-22.04 + outputs: + xetabase_branch: ${{ steps.get_xetabase_branch.outputs.xetabase_branch }} + steps: + - name: Clone java-common-libs + uses: actions/checkout@v4 + with: + fetch-depth: '10' + - id: get_xetabase_branch + name: "Get current branch for Xetabase from target branch" + run: | + chmod +x ./.github/workflows/scripts/xetabase-branch.sh + ls ./.github/workflows/scripts/ + ls ./.github/workflows/ + bash --version + xetabase_branch=$(./.github/workflows/scripts/xetabase-branch.sh ${{ github.event.pull_request.base.ref }}) + echo "__Xetabase ref:__ \"${xetabase_branch}\"" | tee -a ${GITHUB_STEP_SUMMARY} + echo "xetabase_branch=${xetabase_branch}" >> $GITHUB_OUTPUT test: - name: "Test analysis" - uses: ./.github/workflows/test-analysis.yml - needs: build - secrets: inherit + name: "Run all tests before merging" + needs: calculate-xetabase-branch + uses: opencb/java-common-libs/.github/workflows/test-xetabase-workflow.yml@TASK-6399 + with: + branch: ${{ needs.calculate-xetabase-branch.outputs.xetabase_branch }} + task: ${{ github.event.pull_request.head.ref }} + secrets: inherit \ No newline at end of file diff --git a/.github/workflows/scripts/xetabase-branch.sh b/.github/workflows/scripts/xetabase-branch.sh new file mode 100644 index 00000000..af17f7f1 --- /dev/null +++ b/.github/workflows/scripts/xetabase-branch.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# Function to calculate the corresponding branch of Xetabase project +get_xetabase_branch() { + # Input parameter (branch name) + input_branch="$1" + + # Check if the branch name is "develop" in that case return the same branch name + if [[ "$input_branch" == "develop" ]]; then + echo "develop" + return 0 + fi + + # Check if the branch name starts with "release-" and follows the patterns "release-a.b.x" or "release-a.b.c.x" + if [[ "$input_branch" =~ ^release-([0-9]+)\.([0-9]+)\.x$ ]] || [[ "$input_branch" =~ ^release-([0-9]+)\.([0-9]+)\.([0-9]+)\.x$ ]]; then + # Extract the MAJOR part of the branch name + MAJOR=${BASH_REMATCH[1]} + # Calculate the XETABASE_MAJOR by subtracting 3 from MAJOR + XETABASE_MAJOR=$((MAJOR - 3)) + # Check if the XETABASE_MAJOR is negative + if (( XETABASE_MAJOR < 0 )); then + echo "Error: 'MAJOR' digit after subtraction results in a negative number." + return 1 + fi + # Construct and echo the new branch name + echo "release-$XETABASE_MAJOR.${input_branch#release-$MAJOR.}" + return 0 + fi + + # If the branch name does not match any of the expected patterns + echo "Error: The branch name is not correct." + return 1 +} + +# Check if the script receives exactly one argument +if [ "$#" -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +# Call the function with the input branch name +get_xetabase_branch "$1" From 10aa6c8019c3a4e876d2477f1c1e67c493c5b2b7 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Mon, 1 Jul 2024 17:36:40 +0200 Subject: [PATCH 12/27] cicd: Modify pull request approve #TASK-6399 --- .github/workflows/pull-request-approved.yml | 7 ++----- .../{xetabase-branch.sh => get-xetabase-branch.sh} | 10 +++++++++- 2 files changed, 11 insertions(+), 6 deletions(-) rename .github/workflows/scripts/{xetabase-branch.sh => get-xetabase-branch.sh} (80%) diff --git a/.github/workflows/pull-request-approved.yml b/.github/workflows/pull-request-approved.yml index 8a60928f..a0d481fa 100644 --- a/.github/workflows/pull-request-approved.yml +++ b/.github/workflows/pull-request-approved.yml @@ -19,11 +19,8 @@ jobs: - id: get_xetabase_branch name: "Get current branch for Xetabase from target branch" run: | - chmod +x ./.github/workflows/scripts/xetabase-branch.sh - ls ./.github/workflows/scripts/ - ls ./.github/workflows/ - bash --version - xetabase_branch=$(./.github/workflows/scripts/xetabase-branch.sh ${{ github.event.pull_request.base.ref }}) + chmod +x ./.github/workflows/scripts/get-xetabase-branch.sh + xetabase_branch=$(./.github/workflows/scripts/get-xetabase-branch.sh ${{ github.event.pull_request.base.ref }}) echo "__Xetabase ref:__ \"${xetabase_branch}\"" | tee -a ${GITHUB_STEP_SUMMARY} echo "xetabase_branch=${xetabase_branch}" >> $GITHUB_OUTPUT diff --git a/.github/workflows/scripts/xetabase-branch.sh b/.github/workflows/scripts/get-xetabase-branch.sh similarity index 80% rename from .github/workflows/scripts/xetabase-branch.sh rename to .github/workflows/scripts/get-xetabase-branch.sh index af17f7f1..e971f990 100644 --- a/.github/workflows/scripts/xetabase-branch.sh +++ b/.github/workflows/scripts/get-xetabase-branch.sh @@ -5,6 +5,14 @@ get_xetabase_branch() { # Input parameter (branch name) input_branch="$1" + # If the branch begins with 'TASK' and exists in the opencga-enterprise repository, I return it + if [[ $input_branch == TASK* ]]; then + if [ "$(git ls-remote https://github.com/zetta-genomics/opencga-enterprise.git "$input_branch" )" ] ; then + echo "$GIT_BRANCH"; + exit 0; + fi + fi + # Check if the branch name is "develop" in that case return the same branch name if [[ "$input_branch" == "develop" ]]; then echo "develop" @@ -16,7 +24,7 @@ get_xetabase_branch() { # Extract the MAJOR part of the branch name MAJOR=${BASH_REMATCH[1]} # Calculate the XETABASE_MAJOR by subtracting 3 from MAJOR - XETABASE_MAJOR=$((MAJOR - 3)) + XETABASE_MAJOR=$((MAJOR - 1)) # Check if the XETABASE_MAJOR is negative if (( XETABASE_MAJOR < 0 )); then echo "Error: 'MAJOR' digit after subtraction results in a negative number." From b37690016e70f7cf0b2abe72d13bbdbeba733f88 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Mon, 8 Jul 2024 15:01:10 +0200 Subject: [PATCH 13/27] exclude distlib dependency --- pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pom.xml b/pom.xml index c4ba58fc..6166063d 100644 --- a/pom.xml +++ b/pom.xml @@ -198,6 +198,12 @@ com.databricks SnpEff ${SnpEff.version} + + + distlib + distlib + + com.google.guava From aacf0bbe1bacc7ce0f5cd0decb7986236348616a Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Mon, 8 Jul 2024 15:51:26 +0200 Subject: [PATCH 14/27] exclude distlib dependency --- biodata-tools/pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/biodata-tools/pom.xml b/biodata-tools/pom.xml index a283a477..fdb21cbf 100644 --- a/biodata-tools/pom.xml +++ b/biodata-tools/pom.xml @@ -53,6 +53,12 @@ com.databricks SnpEff + + + distlib + distlib + + org.rocksdb From 234ebdb3cb48496360151d0ef1995c369176ce58 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 17 Jul 2024 10:42:13 +0200 Subject: [PATCH 15/27] Prepare new development branch release-3.2.x --- biodata-external/pom.xml | 2 +- biodata-formats/pom.xml | 2 +- biodata-models/pom.xml | 2 +- biodata-tools/pom.xml | 2 +- pom.xml | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/biodata-external/pom.xml b/biodata-external/pom.xml index 12a6b4f5..d6d1acdd 100644 --- a/biodata-external/pom.xml +++ b/biodata-external/pom.xml @@ -6,7 +6,7 @@ biodata org.opencb.biodata - 3.2.0 + 3.3.0-SNAPSHOT ../pom.xml diff --git a/biodata-formats/pom.xml b/biodata-formats/pom.xml index bfef3117..c3332c2e 100644 --- a/biodata-formats/pom.xml +++ b/biodata-formats/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.2.0 + 3.3.0-SNAPSHOT ../pom.xml diff --git a/biodata-models/pom.xml b/biodata-models/pom.xml index 887ce92d..7325a383 100644 --- a/biodata-models/pom.xml +++ b/biodata-models/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.2.0 + 3.3.0-SNAPSHOT ../pom.xml diff --git a/biodata-tools/pom.xml b/biodata-tools/pom.xml index 03bfa0e2..e8d90ada 100644 --- a/biodata-tools/pom.xml +++ b/biodata-tools/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.2.0 + 3.3.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index c1c07dc3..faf0f65b 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.2.0 + 3.3.0-SNAPSHOT pom Biodata @@ -38,7 +38,7 @@ - 5.2.0 + 5.3.0-SNAPSHOT 2.14.3 4.4 From b4c2afb5bd4659e3d90a11446935797d595685bb Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 17 Jul 2024 10:42:33 +0200 Subject: [PATCH 16/27] Prepare new development branch release-3.2.x --- biodata-external/pom.xml | 2 +- biodata-formats/pom.xml | 2 +- biodata-models/pom.xml | 2 +- biodata-tools/pom.xml | 2 +- pom.xml | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/biodata-external/pom.xml b/biodata-external/pom.xml index d6d1acdd..bb85a556 100644 --- a/biodata-external/pom.xml +++ b/biodata-external/pom.xml @@ -6,7 +6,7 @@ biodata org.opencb.biodata - 3.3.0-SNAPSHOT + 3.2.1-SNAPSHOT ../pom.xml diff --git a/biodata-formats/pom.xml b/biodata-formats/pom.xml index c3332c2e..238588ca 100644 --- a/biodata-formats/pom.xml +++ b/biodata-formats/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.3.0-SNAPSHOT + 3.2.1-SNAPSHOT ../pom.xml diff --git a/biodata-models/pom.xml b/biodata-models/pom.xml index 7325a383..f9cf2628 100644 --- a/biodata-models/pom.xml +++ b/biodata-models/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.3.0-SNAPSHOT + 3.2.1-SNAPSHOT ../pom.xml diff --git a/biodata-tools/pom.xml b/biodata-tools/pom.xml index e8d90ada..c7c32361 100644 --- a/biodata-tools/pom.xml +++ b/biodata-tools/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.3.0-SNAPSHOT + 3.2.1-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index faf0f65b..cee50cbd 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.3.0-SNAPSHOT + 3.2.1-SNAPSHOT pom Biodata @@ -38,7 +38,7 @@ - 5.3.0-SNAPSHOT + 5.2.1-SNAPSHOT 2.14.3 4.4 From 75345478c664b4fdae9803ab8138e370313717b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 17 Jul 2024 10:57:11 +0100 Subject: [PATCH 17/27] tools: Centralise SV normalization at VariantKeyFields.sv #TASK-6558 --- .../tools/variant/VariantNormalizer.java | 110 +++++-- .../variant/VariantNormalizerGenericTest.java | 12 +- .../tools/variant/VariantNormalizerTest.java | 309 +++++++++++++----- 3 files changed, 306 insertions(+), 125 deletions(-) diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java index e3bb3d6e..1b8992c0 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java @@ -306,19 +306,18 @@ public List normalize(List batch, boolean reuse) throws NonSta Integer start = variant.getStart(); Integer end = variant.getEnd(); String chromosome = variant.getChromosome(); - StructuralVariation sv = variant.getSv(); if (variant.getStudies() == null || variant.getStudies().isEmpty()) { List keyFieldsList; if (isSymbolic(variant)) { - keyFieldsList = normalizeSymbolic(start, end, reference, alternate, sv); + keyFieldsList = normalizeSymbolic(start, end, reference, alternate, variant.getSv()); } else { keyFieldsList = normalize(chromosome, start, reference, alternate); } // Iterate keyFields sorting by position, so the generated variants are ordered. Do not modify original order! for (VariantKeyFields keyFields : sortByPosition(keyFieldsList)) { OriginalCall call = new OriginalCall(variant.toString(), keyFields.getNumAllele()); - Variant normalizedVariant = newVariant(variant, keyFields, sv); + Variant normalizedVariant = newVariant(variant, keyFields); if (keyFields.getPhaseSet() != null) { StudyEntry studyEntry = new StudyEntry(); studyEntry.setSamples( @@ -346,7 +345,7 @@ public List normalize(List batch, boolean reuse) throws NonSta List keyFieldsList; List originalKeyFieldsList; if (isSymbolic(variant)) { - keyFieldsList = normalizeSymbolic(start, end, reference, alternates, sv); + keyFieldsList = normalizeSymbolic(start, end, reference, alternates, variant.getSv()); } else { keyFieldsList = normalize(chromosome, start, reference, alternates); } @@ -400,6 +399,9 @@ public List normalize(List batch, boolean reuse) throws NonSta variant.setEnd(keyFields.getEnd()); variant.setReference(keyFields.getReference()); variant.setAlternate(keyFields.getAlternate()); + if (keyFields.getSv() != null) { + variant.setSv(keyFields.getSv()); + } variant.reset(); // Variant is being reused, must ensure the SV field si appropriately created // if (isSymbolic(variant)) { @@ -415,7 +417,7 @@ public List normalize(List batch, boolean reuse) throws NonSta } samples = entry.getSamples(); } else { - normalizedVariant = newVariant(variant, keyFields, sv); + normalizedVariant = newVariant(variant, keyFields); normalizedEntry = new StudyEntry(); normalizedEntry.setStudyId(entry.getStudyId()); @@ -624,6 +626,36 @@ public List normalizeSymbolic(final Integer start, final Integ Integer copyNumber = sv == null ? null : sv.getCopyNumber(); keyFields = normalizeSymbolic(start, end, reference, alternate, alternates, copyNumber, numAllelesIdx); } + + if (alternate.equals(VariantBuilder.DUP_TANDEM_ALT)) { + if (keyFields.getSv() == null) { + keyFields.setSv(new StructuralVariation()); + } + keyFields.getSv().setType(StructuralVariantType.TANDEM_DUPLICATION); + } + + if (sv != null) { + StructuralVariation normalizedSv = keyFields.getSv(); + if (normalizedSv == null) { + normalizedSv = new StructuralVariation(); + } + // CI positions may change during the normalization. Update them. + normalizedSv.setCiStartLeft(sv.getCiStartLeft()); + normalizedSv.setCiStartRight(sv.getCiStartRight()); + normalizedSv.setCiEndLeft(sv.getCiEndLeft()); + normalizedSv.setCiEndRight(sv.getCiEndRight()); + normalizedSv.setLeftSvInsSeq(sv.getLeftSvInsSeq()); + normalizedSv.setRightSvInsSeq(sv.getRightSvInsSeq()); + + if (keyFields.getSv() == null) { + if (normalizedSv.getCiStartLeft() != null || normalizedSv.getCiStartRight() != null + || normalizedSv.getCiEndLeft() != null || normalizedSv.getCiEndRight() != null + || normalizedSv.getLeftSvInsSeq() != null || normalizedSv.getRightSvInsSeq() != null) { + keyFields.setSv(normalizedSv); + } + } + } + list.add(keyFields); } @@ -695,7 +727,7 @@ private static VariantKeyFields normalizeMateBreakend( } VariantKeyFields keyFields = new VariantKeyFields(newStart, newStart - 1, numAllelesIdx, newReference, newAlternate); - keyFields.getSv().setBreakend(breakend); + keyFields.setBreakend(breakend); return keyFields; } @@ -718,20 +750,23 @@ private VariantKeyFields normalizeSymbolic( + "contain 0 or 1 nt, but no more. Please, check."); } - Integer cn = VariantBuilder.getCopyNumberFromAlternate(alternate); // if (cn != null) { // // Alternate with the form , being xxx the number of copies, must be normalized into "" // newAlternate = ""; // } String newAlternate; + Integer newCn; if (alternate.equals("") && copyNumber != null) { // Alternate must be of the form , being xxx the number of copies newAlternate = ""; + newCn = copyNumber; } else { newAlternate = alternate; + newCn = VariantBuilder.getCopyNumberFromAlternate(alternate); } + return new VariantKeyFields(newStart, end, numAllelesIdx, newReference, newAlternate, - null, cn, false); + null, newCn, false); } @@ -1380,34 +1415,24 @@ private int[] getGenotypesReorderingMap(int numAllele, int[] alleleMap) { } } - - private Variant newVariant(Variant variant, VariantKeyFields keyFields, StructuralVariation sv) { + private Variant newVariant(Variant variant, VariantKeyFields keyFields) { Variant normalizedVariant = new Variant(variant.getChromosome(), keyFields.getStart(), keyFields.getEnd(), keyFields.getReference(), keyFields.getAlternate()) .setId(variant.getId()) .setNames(variant.getNames()) .setStrand(variant.getStrand()); - if (sv != null) { - if (normalizedVariant.getSv() != null) { - // CI positions may change during the normalization. Update them. - normalizedVariant.getSv().setCiStartLeft(sv.getCiStartLeft()); - normalizedVariant.getSv().setCiStartRight(sv.getCiStartRight()); - normalizedVariant.getSv().setCiEndLeft(sv.getCiEndLeft()); - normalizedVariant.getSv().setCiEndRight(sv.getCiEndRight()); - normalizedVariant.getSv().setLeftSvInsSeq(sv.getLeftSvInsSeq()); - normalizedVariant.getSv().setRightSvInsSeq(sv.getRightSvInsSeq()); - - // Variant will never have CopyNumber, because the Alternate is normalized from to - normalizedVariant.getSv().setCopyNumber(keyFields.getCopyNumber()); - VariantType cnvSubtype = VariantBuilder.getCopyNumberSubtype(keyFields.getCopyNumber()); - if (cnvSubtype != null) { - normalizedVariant.setType(cnvSubtype); - } - } + if (keyFields.getSv() != null) { + normalizedVariant.setSv(keyFields.getSv()); } - normalizedVariant.setAnnotation(variant.getAnnotation()); + if (keyFields.getCopyNumber() != null) { + VariantType cnvSubtype = VariantBuilder.getCopyNumberSubtype(keyFields.getCopyNumber()); + if (cnvSubtype != null) { + normalizedVariant.setType(cnvSubtype); + } + } + return normalizedVariant; // normalizedVariant.setAnnotation(variant.getAnnotation()); // if (isSymbolic(variant)) { @@ -1527,8 +1552,10 @@ public VariantKeyFields(int start, int end, int numAllele, String reference, Str this.alternate = alternate; this.originalKeyFields = originalKeyFields == null ? this : originalKeyFields; this.referenceBlock = referenceBlock; - this.sv = new StructuralVariation(); - setCopyNumber(copyNumber); + this.sv = null; + if (copyNumber != null) { + setCopyNumber(copyNumber); + } } @@ -1604,7 +1631,28 @@ public Integer getCopyNumber() { } public VariantKeyFields setCopyNumber(Integer copyNumber) { - sv.setCopyNumber(copyNumber); + if (sv == null) { + if (copyNumber != null) { + sv = new StructuralVariation(); + sv.setCopyNumber(copyNumber); + sv.setType(VariantBuilder.getCNVSubtype(copyNumber)); + } + } else { + sv.setCopyNumber(copyNumber); + sv.setType(VariantBuilder.getCNVSubtype(copyNumber)); + } + return this; + } + + public VariantKeyFields setBreakend(Breakend breakend) { + if (sv == null) { + if (breakend != null) { + sv = new StructuralVariation(); + sv.setBreakend(breakend); + } + } else { + sv.setBreakend(breakend); + } return this; } diff --git a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerGenericTest.java b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerGenericTest.java index f097d1e1..e59ad530 100644 --- a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerGenericTest.java +++ b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerGenericTest.java @@ -230,7 +230,7 @@ protected Variant newVariant(int position, String ref, String altsCsv) { return newVariant(position, position, ref, Arrays.asList(altsCsv.split(",")), "2"); } - protected Variant newVariant(int start, int end, String ref, String altsCsv) { + protected Variant newVariant(int start, Integer end, String ref, String altsCsv) { return newVariant(start, end, ref, Arrays.asList(altsCsv.split(",")), "2"); } @@ -238,12 +238,16 @@ protected Variant newVariant(int position, String ref, List altsList, St return newVariant(position, position, ref, altsList, studyId); } - protected Variant newVariant(int start, int end, String ref, List altsList, String studyId) { + protected Variant newVariant(int start, Integer end, String ref, List altsList, String studyId) { return newVariantBuilder(start, end, ref, altsList, studyId).build(); } - protected VariantBuilder newVariantBuilder(int position, int end, String ref, List altsList, String studyId) { - return Variant.newBuilder("1", position, end, ref, String.join(",", altsList)) + protected VariantBuilder newVariantBuilder(int position, Integer end, String ref, List altsList, String studyId) { + return newVariantBuilder(position, end, ref, String.join(",", altsList), studyId); + } + + protected VariantBuilder newVariantBuilder(int position, Integer end, String ref, String alts, String studyId) { + return Variant.newBuilder("1", position, end, ref, alts) .setStudyId(studyId) .setSampleDataKeys("GT") .setSamples(new ArrayList<>()) diff --git a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java index a4a62f06..4253d940 100644 --- a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java +++ b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java @@ -9,6 +9,7 @@ import org.opencb.biodata.models.variant.exceptions.NonStandardCompliantSampleField; import java.util.*; +import java.util.function.Consumer; import java.util.stream.Collectors; import static org.junit.Assert.*; @@ -582,9 +583,7 @@ public void testMultiSNP() throws NonStandardCompliantSampleField { public void testNormalizeMultiAllelicPL() throws NonStandardCompliantSampleField { Variant variant = generateVariantWithFormat("X:100:A:T", "GT:GL", "S01", "0/0", "1,2,3", "S02", "0", "1,2"); - List normalize1 = normalizer.normalize(Collections.singletonList(variant), false); - assertEquals("1,2,3", normalize1.get(0).getStudies().get(0).getSampleData("S01", "GL")); - assertEquals("1,2", normalize1.get(0).getStudies().get(0).getSampleData("S02", "GL")); + normalizeUnmodified(variant); Variant variant2 = generateVariantWithFormat("X:100:A:T,C", "GT:GL", "S01", "0/0", "1,2,3,4,5,6", "S02", "A", "1,2,3"); List normalize2 = normalizer.normalize(Collections.singletonList(variant2), false); @@ -614,14 +613,138 @@ public void testCNVsNormalization() throws Exception { .addSample("HG00096", "0|0") .build(); - List normalizedVariantList = normalizer.normalize(Collections.singletonList(variant), true); - assertEquals(1, normalizedVariantList.size()); - assertEquals(new StructuralVariation(86, 150, 150, 211, 0, null, null, - StructuralVariantType.COPY_NUMBER_LOSS, null), normalizedVariantList.get(0).getSv()); - // Normalize CNV alternate - assertEquals("", normalizedVariantList.get(0).getAlternate()); - assertEquals("1:86<100<150-150<200<211:C:", normalizedVariantList.get(0).getStudies().get(0).getFiles().get(0).getCall().getVariantId()); - assertEquals(0, normalizedVariantList.get(0).getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + normalizeOne(variant, normalizedVariant -> { + assertEquals(new StructuralVariation(86, 150, 150, 211, 0, null, null, + StructuralVariantType.COPY_NUMBER_LOSS, null), normalizedVariant.getSv()); + // Normalize CNV alternate + assertEquals("", normalizedVariant.getAlternate()); + assertEquals("1:86<100<150-150<200<211:C:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); + } + + @Test + public void testCNVsNormalizationNoNumber() throws Exception { + Variant variant = newVariantBuilder(100, 200, "C", Collections.singletonList(""), "2") + .addFileData("CIPOS", "-14,50") + .addFileData("CIEND", "-50,11") + .addSample("HG00096", "0|0") + .build(); + + normalizeOne(variant, normalizedVariant -> { + assertEquals(new StructuralVariation(86, 150, 150, 211, null, null, null, null, null), normalizedVariant.getSv()); + // Normalize CNV alternate + assertEquals("", normalizedVariant.getAlternate()); + assertEquals("1:86<100<150-150<200<211:C:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); + } + + @Test + public void testCNVsNormalizationNoNumberNoCipos() throws Exception { + Variant variant = newVariantBuilder(100, 200, "C", Collections.singletonList(""), "2") + .addSample("HG00096", "0|0") + .build(); + + normalizeOne(variant, normalizedVariant -> { + assertEquals(new StructuralVariation(null, null, null, null, null, null, null, null, null), normalizedVariant.getSv()); + // Normalize CNV alternate + assertEquals("", normalizedVariant.getAlternate()); + assertEquals("1:100-200:C:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); + } + + @Test + public void testCNVsNormalizationUnmodified() throws Exception { + Variant variant = newVariantBuilder(101, 200, "-", Collections.singletonList(""), "2") + .addSample("HG00096", "0|0") + .build(); + + normalizeUnmodified(variant); + } + + @Test + public void testINSsNormalizationWithCIEND() throws Exception { + Variant variant = newVariantBuilder(100, null, "C", Collections.singletonList(""), "2") + .addFileData("CIPOS", "-14,50") + .addFileData("CIEND", "-50,11") + .addFileData("LEFT_SVINSSEQ", "AAAA") + .addFileData("RIGHT_SVINSSEQ", "CCCC") + .addSample("HG00096", "0|0") + .build(); + + normalizeOne(variant, normalizedVariant -> { + assertEquals(new StructuralVariation(86, 150, null, null, null, "AAAA", "CCCC", null, null), normalizedVariant.getSv()); + // Normalize CNV alternate + assertEquals("", normalizedVariant.getAlternate()); + assertEquals("1:86<100<150-50<100<111:C:AAAA...CCCC", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals("1:86<101<150:-:AAAA...CCCC", normalizedVariant.toString()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); + } + + @Test + public void testDUPTANDEMNormalization() throws Exception { + Variant variant = newVariantBuilder(100, 200, "C", Collections.singletonList(""), "2") + .addFileData("CIPOS", "-14,50") + .addFileData("CIEND", "-50,11") + .addSample("HG00096", "0|0") + .build(); + normalizeOne(variant, normalizedVariant -> { + assertEquals(new StructuralVariation(86, 150, 150, 211, null, null, null, StructuralVariantType.TANDEM_DUPLICATION, null), normalizedVariant.getSv()); + // Normalize CNV alternate + assertEquals("", normalizedVariant.getAlternate()); + assertEquals("1:86<100<150-150<200<211:C:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals("1:86<101<150-150<200<211:-:", normalizedVariant.toString()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); + } + + public void normalizeUnmodified(Variant variant) throws NonStandardCompliantSampleField { + normalizer.setGenerateReferenceBlocks(false); + + int hashCode = variant.hashCode(); + List list = normalizer.normalize(Collections.singletonList(variant), false); + assertEquals(1, list.size()); + Variant normVar = list.get(0); + + assertEquals(variant.toString(), normVar.toString()); + assertEquals("Ensure input variant is not modified", hashCode, variant.hashCode()); + assertEquals("Ensure norm variant is not modified", hashCode, normVar.hashCode()); + + list = normalizer.normalize(Collections.singletonList(variant), true); + assertEquals(1, list.size()); + normVar = list.get(0); + + assertEquals(variant.toString(), normVar.toString()); + assertEquals("Ensure input variant is not modified", hashCode, variant.hashCode()); + assertEquals("Ensure norm variant is not modified", hashCode, normVar.hashCode()); + + } + + public void normalizeOne(Variant variant, Consumer consumer) throws NonStandardCompliantSampleField { + normalizer.setGenerateReferenceBlocks(false); + + int hashCode = variant.hashCode(); + List list = normalizer.normalize(Collections.singletonList(variant), false); + assertEquals(1, list.size()); + consumer.accept(list.get(0)); + + int hashCode2 = variant.hashCode(); + + // Check that the original variant has not been modified, and check again, but reusing the input variant + assertEquals("Ensure input variant is not modified", hashCode, hashCode2); + + + list = normalizer.normalize(Collections.singletonList(variant), true); + assertEquals(1, list.size()); + assertSame(variant, list.get(0)); + consumer.accept(variant); + consumer.accept(list.get(0)); + + int hashCode3 = variant.hashCode(); + assertNotEquals(hashCode3, hashCode); } @Test @@ -670,23 +793,40 @@ public void testVNCNormalizationMultiallelic() throws NonStandardCompliantSample @Test public void testCNVsNormalizationCopyNumber() throws NonStandardCompliantSampleField { Variant variant; - List normalizedVariantList; variant = newVariantBuilder(100, 200, "C", Arrays.asList(""), "2") .setSampleDataKeys("GT", "CN") .addSample("HG00096", "0|1","3") .build(); - normalizedVariantList = normalizer.normalize(Collections.singletonList(variant), true); - assertEquals(1, normalizedVariantList.size()); - Variant normalizedVariant = normalizedVariantList.get(0); - assertEquals(new StructuralVariation(null, null, null, null, 3, null, null, - StructuralVariantType.COPY_NUMBER_GAIN, null), normalizedVariant.getSv()); - // Normalize CNV alternate - assertEquals("", normalizedVariant.getAlternate()); - assertEquals(101, normalizedVariant.getStart().intValue()); - assertEquals("", normalizedVariant.getReference()); - assertEquals("1:100-200:C:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); - assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + normalizeOne(variant, normalizedVariant->{ + assertEquals(new StructuralVariation(null, null, null, null, 3, null, null, + StructuralVariantType.COPY_NUMBER_GAIN, null), normalizedVariant.getSv()); + // Normalize CNV alternate + assertEquals("", normalizedVariant.getAlternate()); + assertEquals(101, normalizedVariant.getStart().intValue()); + assertEquals("", normalizedVariant.getReference()); + assertEquals("1:100-200:C:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); + } + @Test + public void testCNVsNormalizationCopyNumberWithCipos() throws NonStandardCompliantSampleField { + Variant variant; + variant = newVariantBuilder(100, 200, "C", Arrays.asList(""), "2") + .addFileData("CIPOS", "-10,50") + .setSampleDataKeys("GT", "CN") + .addSample("HG00096", "0|1","3") + .build(); + normalizeOne(variant, normalizedVariant->{ + assertEquals(new StructuralVariation(90, 150, null, null, 3, null, null, + StructuralVariantType.COPY_NUMBER_GAIN, null), normalizedVariant.getSv()); + // Normalize CNV alternate + assertEquals("", normalizedVariant.getAlternate()); + assertEquals(101, normalizedVariant.getStart().intValue()); + assertEquals("", normalizedVariant.getReference()); + assertEquals("1:90<100<150-200:C:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); } @Test @@ -725,38 +865,33 @@ public void testNormalizeSV() throws NonStandardCompliantSampleField { @Test public void testNormalizeDEL() throws NonStandardCompliantSampleField { - Variant variant = newVariant(100, 200, "N", Collections.singletonList(""), STUDY_ID); - List normalized = normalizer.normalize(Collections.singletonList(variant), false); - - assertEquals(1, normalized.size()); - assertEquals(101, normalized.get(0).getStart().intValue()); - assertEquals(200, normalized.get(0).getEnd().intValue()); - assertEquals(new StructuralVariation(), normalized.get(0).getSv()); - System.out.println(normalized.get(0).toJson()); + normalizeOne(variant, normalized -> { + assertEquals(101, normalized.getStart().intValue()); + assertEquals(200, normalized.getEnd().intValue()); + assertEquals(new StructuralVariation(), normalized.getSv()); +// System.out.println(normalized.toJson()); + }); } @Test public void testNormalizeINS() throws NonStandardCompliantSampleField { - String seq = "ACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTG"; Variant variant = newVariantBuilder(100, 100, "N", Collections.singletonList(""), STUDY_ID) .addFileData("SVINSSEQ", seq) .build(); - List list = new VariantNormalizer().normalize(Collections.singletonList(variant), false); - - assertEquals(1, list.size()); - Variant normalized = list.get(0); - assertEquals(101, normalized.getStart().intValue()); - assertEquals(100, normalized.getEnd().intValue()); - assertEquals(seq.length(), normalized.getLength().intValue()); - assertEquals(seq.length(), normalized.getLengthAlternate().intValue()); - assertEquals(0, normalized.getLengthReference().intValue()); - assertEquals("", normalized.getReference()); - assertEquals(seq, normalized.getAlternate()); - assertEquals(new StructuralVariation(), normalized.getSv()); - assertEquals("1:100-100:N:", normalized.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); - assertEquals(0, normalized.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + normalizeOne(variant, normalizedVariant -> { + assertEquals(101, normalizedVariant.getStart().intValue()); + assertEquals(100, normalizedVariant.getEnd().intValue()); + assertEquals(seq.length(), normalizedVariant.getLength().intValue()); + assertEquals(seq.length(), normalizedVariant.getLengthAlternate().intValue()); + assertEquals(0, normalizedVariant.getLengthReference().intValue()); + assertEquals("", normalizedVariant.getReference()); + assertEquals(seq, normalizedVariant.getAlternate()); + assertEquals(new StructuralVariation(), normalizedVariant.getSv()); + assertEquals("1:100-100:N:", normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getVariantId()); + assertEquals(0, normalizedVariant.getStudies().get(0).getFiles().get(0).getCall().getAlleleIndex().intValue()); + }); } @Test @@ -768,66 +903,60 @@ public void testNormalizeSvToIndel() throws NonStandardCompliantSampleField { assertEquals(Variant.SV_THRESHOLD + 1, variant.getLengthAlternate().intValue()); assertNotNull(variant.getSv()); - Variant normVar = new VariantNormalizer().normalize(Collections.singletonList(variant), false).get(0); - assertEquals(VariantType.INDEL, normVar.getType()); - assertEquals(Variant.SV_THRESHOLD, normVar.getLengthAlternate().intValue()); - assertNull(normVar.getSv()); - - // Check that the original variant has not been modified, and check again, but reusing the input variant - assertEquals(VariantType.INSERTION, variant.getType()); - assertEquals(Variant.SV_THRESHOLD + 1, variant.getLengthAlternate().intValue()); - assertNotNull(variant.getSv()); - Variant normVarReuse = new VariantNormalizer().normalize(Collections.singletonList(variant), true).get(0); - assertEquals(VariantType.INDEL, normVarReuse.getType()); - assertEquals(Variant.SV_THRESHOLD, normVarReuse.getLengthAlternate().intValue()); - assertNull(normVarReuse.getSv()); - + normalizeOne(variant, normVar -> { + assertEquals(VariantType.INDEL, normVar.getType()); + assertEquals(Variant.SV_THRESHOLD, normVar.getLengthAlternate().intValue()); + assertNull(normVar.getSv()); + }); } @Test public void testNormalizeWithInsSeq() throws NonStandardCompliantSampleField { Variant variant = new Variant("1:799984<800001<800022:-:ACCACACCCACACAACACACA...TGTGGTGTGTGTGGTGTG"); - Variant normVar = new VariantNormalizer().normalize(Collections.singletonList(variant), false).get(0); - assertEquals(variant, normVar); - assertEquals(variant.toString(), normVar.toString()); + normalizeUnmodified(variant); } @Test public void testNormalizeBND() throws NonStandardCompliantSampleField { - normalizeBnd(newVariant(101, 100, "", ".[9:10["), newVariant(100, 99, "A", "A[chr9:10[")); - normalizeBnd(newVariant(100, 99, "", "[22:10[."), newVariant(100, 99, "A", "[chr22:10[A")); - normalizeBnd(newVariant(101, 100, "", ".]9:10]"), newVariant(100, 99, "A", "A]chr9:10]")); - normalizeBnd(newVariant(100, 99, "", "]22:10]."), newVariant(100, 99, "A", "]chr22:10]A")); - normalizeBnd(newVariant(100, 99, "", "]22:10]NNN"), newVariant(100, 99, "A", "]chr22:10]NNNA")); + normalizeBnd(newVariant(101, 100, "", ".[9:10["), newVariant(100, null, "A", "A[chr9:10[")); + normalizeBnd(newVariant(100, 99, "", "[22:10[."), newVariant(100, null, "A", "[chr22:10[A")); + normalizeBnd(newVariant(101, 100, "", ".]9:10]"), newVariant(100, null, "A", "A]chr9:10]")); + normalizeBnd(newVariant(100, 99, "", "]22:10]."), newVariant(100, null, "A", "]chr22:10]A")); + normalizeBnd(newVariant(100, 99, "", "]22:10]NNN"), newVariant(100, null, "A", "]chr22:10]NNNA")); - normalizeBnd(newVariant(100, 99, "", "[1:10[T"), newVariant(100, 99, "A", "[1:10[TA")); - normalizeBnd(newVariant(100, 99, "", "[1:10[T"), newVariant(100, 99, "AC", "[1:10[TAC")); + normalizeBnd(newVariant(100, 99, "", "[1:10[T"), newVariant(100, null, "A", "[1:10[TA")); + normalizeBnd(newVariant(100, 99, "", "[1:10[T"), newVariant(100, null, "AC", "[1:10[TAC")); - normalizeBnd(newVariant(100, 99, "TAC", "[1:10[AC"), newVariant(100, 99, "TAC", "[1:10[AC")); - normalizeBnd(newVariant(100, 99, "TAC", "TA[1:10["), newVariant(100, 99, "TAC", "TA[1:10[")); + normalizeBnd(newVariant(100, 99, "TAC", "[1:10[AC"), newVariant(100, null, "TAC", "[1:10[AC")); + normalizeBnd(newVariant(100, 99, "TAC", "TA[1:10["), newVariant(100, null, "TAC", "TA[1:10[")); + + normalizeBnd(newVariantBuilder(101, 100, "", ".[9:10[", "s1").setCiStart(95, 105).build(), + newVariantBuilder(100, null, "A", "A[chr9:10[", "s1").setCiStart(95,105).setCiEnd(95,105).build()); } private void normalizeBnd(Variant expectedVariant, Variant variant) throws NonStandardCompliantSampleField { - System.out.println("---"); +// System.out.println("---"); boolean expectsNormalization = !expectedVariant.equals(variant); - System.out.println(" - Actual"); - System.out.println(" " + variant.toString()); - System.out.println(" " + variant.toJson()); - System.out.println(" - Expected"); - System.out.println(" " + expectedVariant.toString()); - System.out.println(" " + expectedVariant.toJson()); - System.out.println(" - Normalized (same = " + !expectsNormalization + ")"); - List normalized = normalizer.normalize(Collections.singletonList(variant), false); - - for (Variant v : normalized) { - System.out.println(" " + v.toString()); - System.out.println(" " + v.toJson()); - if (expectsNormalization) { - assertNotNull(v.getStudies().get(0).getFiles().get(0).getCall()); - v.getStudies().get(0).getFiles().get(0).setCall(null); - } - assertEquals(expectedVariant, v); +// System.out.println(" - Actual"); +// System.out.println(" " + variant.toString()); +// System.out.println(" " + variant.toJson()); +// System.out.println(" - Expected"); +// System.out.println(" " + expectedVariant.toString()); +// System.out.println(" " + expectedVariant.toJson()); +// System.out.println(" - Normalized (same = " + !expectsNormalization + ")"); + if (expectsNormalization) { + normalizeOne(variant, normVar -> { + System.out.println(" " + normVar.toString()); + System.out.println(" " + normVar.toJson()); + OriginalCall call = normVar.getStudies().get(0).getFiles().get(0).getCall(); + assertNotNull(call); + normVar.getStudies().get(0).getFiles().get(0).setCall(null); + assertEquals(expectedVariant, normVar); + normVar.getStudies().get(0).getFiles().get(0).setCall(call); + }); + } else { + normalizeUnmodified(variant); } } From a6abc515055de2f42805197c0859852441b91317 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 17 Jul 2024 11:03:56 +0100 Subject: [PATCH 18/27] tools: Remove sv.ciEnd from INSERTION and BREAKEND variants. #TASK-6558 --- .../biodata/tools/variant/VariantNormalizer.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java index 1b8992c0..e902ce99 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java @@ -642,8 +642,18 @@ public List normalizeSymbolic(final Integer start, final Integ // CI positions may change during the normalization. Update them. normalizedSv.setCiStartLeft(sv.getCiStartLeft()); normalizedSv.setCiStartRight(sv.getCiStartRight()); - normalizedSv.setCiEndLeft(sv.getCiEndLeft()); - normalizedSv.setCiEndRight(sv.getCiEndRight()); + + // Structural variants that affect a single point (INSERTIONS or Breakends) should not have CIEND. + // At this point, we're removing the CIEND from the normalized variant. + // Do not remove the value from the INFO field (if any). + // The END is the same as the start (which, in base-1 means that "end == start -1" , so "end < start") + if (keyFields.getEnd() < keyFields.getStart()) { + normalizedSv.setCiEndLeft(null); + normalizedSv.setCiEndRight(null); + } else { + normalizedSv.setCiEndLeft(sv.getCiEndLeft()); + normalizedSv.setCiEndRight(sv.getCiEndRight()); + } normalizedSv.setLeftSvInsSeq(sv.getLeftSvInsSeq()); normalizedSv.setRightSvInsSeq(sv.getRightSvInsSeq()); From 58bee081bda9506b26f224aad484ad8ad4c38a8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Thu, 18 Jul 2024 12:38:37 +0100 Subject: [PATCH 19/27] tools: Normalize sv for non-symbolic variants. #TASK-6558 --- .../tools/variant/VariantNormalizer.java | 160 +++++++++++------- .../tools/variant/VariantNormalizerTest.java | 26 +++ .../variant/merge/VariantMergerTest.java | 3 +- 3 files changed, 126 insertions(+), 63 deletions(-) diff --git a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java index e902ce99..3e16977f 100644 --- a/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java +++ b/biodata-tools/src/main/java/org/opencb/biodata/tools/variant/VariantNormalizer.java @@ -301,19 +301,16 @@ public List normalize(List batch, boolean reuse) throws NonSta normalizedVariants.add(variant); continue; } - String reference = variant.getReference(); //Save original values, as they can be changed + //Save original values, as they can be changed + String reference = variant.getReference(); String alternate = variant.getAlternate(); Integer start = variant.getStart(); Integer end = variant.getEnd(); String chromosome = variant.getChromosome(); if (variant.getStudies() == null || variant.getStudies().isEmpty()) { - List keyFieldsList; - if (isSymbolic(variant)) { - keyFieldsList = normalizeSymbolic(start, end, reference, alternate, variant.getSv()); - } else { - keyFieldsList = normalize(chromosome, start, reference, alternate); - } + List keyFieldsList = normalizeAlleles(variant); + // Iterate keyFields sorting by position, so the generated variants are ordered. Do not modify original order! for (VariantKeyFields keyFields : sortByPosition(keyFieldsList)) { OriginalCall call = new OriginalCall(variant.toString(), keyFields.getNumAllele()); @@ -331,25 +328,16 @@ public List normalize(List batch, boolean reuse) throws NonSta normalizedVariants.add(normalizedVariant); } } else { - for (StudyEntry entry : variant.getStudies()) { - List originalAlternates = new ArrayList<>(1 + entry.getSecondaryAlternates().size()); - List alternates = new ArrayList<>(1 + entry.getSecondaryAlternates().size()); - alternates.add(alternate); - originalAlternates.add(alternate); - for (String secondaryAlternatesAllele : entry.getSecondaryAlternatesAlleles()) { - alternates.add(secondaryAlternatesAllele); - originalAlternates.add(secondaryAlternatesAllele); - } + if (variant.getStudies().size() != 1) { + throw new IllegalStateException("Only one study per variant is supported when normalizing variants. Found " + + variant.getStudies().size() + " studies. Variant: " + variant); + } else { + StudyEntry entry = variant.getStudies().get(0); + List alternates = getAllAlternates(variant); // FIXME: assumes there wont be multinucleotide positions with CNVs and short variants mixed - List keyFieldsList; - List originalKeyFieldsList; - if (isSymbolic(variant)) { - keyFieldsList = normalizeSymbolic(start, end, reference, alternates, variant.getSv()); - } else { - keyFieldsList = normalize(chromosome, start, reference, alternates); - } - originalKeyFieldsList = keyFieldsList + List keyFieldsList = normalizeAlleles(variant); + List originalKeyFieldsList = keyFieldsList .stream() .filter(k -> !k.isReferenceBlock()) .map(k -> k.originalKeyFields) @@ -372,8 +360,8 @@ public List normalize(List batch, boolean reuse) throws NonSta originalCall = entry.getFiles().get(0).getCall().getVariantId(); } else { StringBuilder sb = new StringBuilder(variant.toString()); - for (int i = 1; i < originalAlternates.size(); i++) { - sb.append(",").append(originalAlternates.get(i)); + for (int i = 1; i < alternates.size(); i++) { + sb.append(",").append(alternates.get(i)); } originalCall = sb.toString(); } @@ -600,17 +588,54 @@ private Collection sortByPosition(List keyFi // } // } + protected List normalizeAlleles(Variant variant) { + List alternates = getAllAlternates(variant); + + List keyFieldsList; + if (isSymbolic(variant)) { + keyFieldsList = normalizeSymbolic(variant.getStart(), variant.getEnd(), variant.getReference(), alternates, variant.getSv()); + } else { + keyFieldsList = normalize(variant.getChromosome(), variant.getStart(), variant.getReference(), alternates, variant.getSv()); + } + return keyFieldsList; + } + + private static List getAllAlternates(Variant variant) { + List alternates; + if (variant.getStudies() != null && !variant.getStudies().isEmpty()) { + StudyEntry entry = variant.getStudies().get(0); + String alternate = variant.getAlternate(); + alternates = new ArrayList<>(1 + entry.getSecondaryAlternates().size()); + alternates.add(alternate); + for (AlternateCoordinate secondaryAlternate : entry.getSecondaryAlternates()) { + if (secondaryAlternate.getStart() != null && !secondaryAlternate.getStart().equals(variant.getStart())) { + throw new IllegalStateException("Unable to normalize variant where secondary alternates do not start at the same position. " + + "Variant: " + variant + " , secondaryAlternate: " + secondaryAlternate); + } + if (secondaryAlternate.getEnd() != null && !secondaryAlternate.getEnd().equals(variant.getEnd())) { + throw new IllegalStateException("Unable to normalize variant where secondary alternates do not end at the same position. " + + "Variant: " + variant + " (end=" + variant.getEnd() + ") , secondaryAlternate: " + secondaryAlternate); + } + alternates.add(secondaryAlternate.getAlternate()); + } + } else { + alternates = Collections.singletonList(variant.getAlternate()); + } + return Collections.unmodifiableList(alternates); + } + + @Deprecated // Test purposes only public List normalizeSymbolic(Integer start, Integer end, String reference, String alternate, StructuralVariation sv) { return normalizeSymbolic(start, end, reference, Collections.singletonList(alternate), sv); } - @Deprecated + @Deprecated // Test purposes only public List normalizeSymbolic(final Integer start, final Integer end, final String reference, final List alternates) { return normalizeSymbolic(start, end, reference, alternates, null); } - public List normalizeSymbolic(final Integer start, final Integer end, final String reference, + protected List normalizeSymbolic(final Integer start, final Integer end, final String reference, final List alternates, StructuralVariation sv) { List list = new ArrayList<>(alternates.size()); @@ -634,37 +659,7 @@ public List normalizeSymbolic(final Integer start, final Integ keyFields.getSv().setType(StructuralVariantType.TANDEM_DUPLICATION); } - if (sv != null) { - StructuralVariation normalizedSv = keyFields.getSv(); - if (normalizedSv == null) { - normalizedSv = new StructuralVariation(); - } - // CI positions may change during the normalization. Update them. - normalizedSv.setCiStartLeft(sv.getCiStartLeft()); - normalizedSv.setCiStartRight(sv.getCiStartRight()); - - // Structural variants that affect a single point (INSERTIONS or Breakends) should not have CIEND. - // At this point, we're removing the CIEND from the normalized variant. - // Do not remove the value from the INFO field (if any). - // The END is the same as the start (which, in base-1 means that "end == start -1" , so "end < start") - if (keyFields.getEnd() < keyFields.getStart()) { - normalizedSv.setCiEndLeft(null); - normalizedSv.setCiEndRight(null); - } else { - normalizedSv.setCiEndLeft(sv.getCiEndLeft()); - normalizedSv.setCiEndRight(sv.getCiEndRight()); - } - normalizedSv.setLeftSvInsSeq(sv.getLeftSvInsSeq()); - normalizedSv.setRightSvInsSeq(sv.getRightSvInsSeq()); - - if (keyFields.getSv() == null) { - if (normalizedSv.getCiStartLeft() != null || normalizedSv.getCiStartRight() != null - || normalizedSv.getCiEndLeft() != null || normalizedSv.getCiEndRight() != null - || normalizedSv.getLeftSvInsSeq() != null || normalizedSv.getRightSvInsSeq() != null) { - keyFields.setSv(normalizedSv); - } - } - } + normalizeSvField(sv, keyFields); list.add(keyFields); } @@ -672,6 +667,40 @@ public List normalizeSymbolic(final Integer start, final Integ return list; } + private static void normalizeSvField(StructuralVariation sv, VariantKeyFields keyFields) { + if (sv != null) { + StructuralVariation normalizedSv = keyFields.getSv(); + if (normalizedSv == null) { + normalizedSv = new StructuralVariation(); + } + // CI positions may change during the normalization. Update them. + normalizedSv.setCiStartLeft(sv.getCiStartLeft()); + normalizedSv.setCiStartRight(sv.getCiStartRight()); + + // Structural variants that affect a single point (INSERTIONS or Breakends) should not have CIEND. + // At this point, we're removing the CIEND from the normalized variant. + // Do not remove the value from the INFO field (if any). + // The END is the same as the start (which, in base-1 means that "end == start -1" , so "end < start") + if (keyFields.getEnd() < keyFields.getStart()) { + normalizedSv.setCiEndLeft(null); + normalizedSv.setCiEndRight(null); + } else { + normalizedSv.setCiEndLeft(sv.getCiEndLeft()); + normalizedSv.setCiEndRight(sv.getCiEndRight()); + } + normalizedSv.setLeftSvInsSeq(sv.getLeftSvInsSeq()); + normalizedSv.setRightSvInsSeq(sv.getRightSvInsSeq()); + + if (keyFields.getSv() == null) { + if (normalizedSv.getCiStartLeft() != null || normalizedSv.getCiStartRight() != null + || normalizedSv.getCiEndLeft() != null || normalizedSv.getCiEndRight() != null + || normalizedSv.getLeftSvInsSeq() != null || normalizedSv.getRightSvInsSeq() != null) { + keyFields.setSv(normalizedSv); + } + } + } + } + private boolean isNonRef(String alternate) { return alternate.equals(Allele.NO_CALL_STRING) || alternate.equals(VariantBuilder.NON_REF_ALT) @@ -780,12 +809,17 @@ private VariantKeyFields normalizeSymbolic( } + @Deprecated // Test purposes only public List normalize(String chromosome, int position, String reference, String alternate) { - return normalize(chromosome, position, reference, Collections.singletonList(alternate)); + return normalize(chromosome, position, reference, Collections.singletonList(alternate), null); } - public List normalize(String chromosome, int position, String reference, List alternates) - { + @Deprecated // Test purposes only + public List normalize(String chromosome, int position, String reference, List alternates) { + return normalize(chromosome, position, reference, alternates, null); + } + + protected List normalize(String chromosome, int position, String reference, List alternates, StructuralVariation sv) { List list = new ArrayList<>(alternates.size()); int numAllelesIdx = 0; // This index is necessary for getting the samples where the mutated allele is present @@ -829,6 +863,8 @@ public List normalize(String chromosome, int position, String } } + normalizeSvField(sv, keyFields); + if (keyFields != null) { // To deal with cases such as A>GT diff --git a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java index 4253d940..a1faf486 100644 --- a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java +++ b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/VariantNormalizerTest.java @@ -684,6 +684,32 @@ public void testINSsNormalizationWithCIEND() throws Exception { }); } + @Test + public void testNormalizeNonSymbolicInsertion() throws Exception { + Variant variant = newVariantBuilder(100, null, "C", Collections.singletonList("CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"), "2") + .addFileData("CIPOS", "-14,50") + .addFileData("CIEND", "-50,11") + .addSample("HG00096", "0|0") + .build(); + + normalizeOne(variant, normalizedVariant -> { + assertEquals(new StructuralVariation(86, 150, null, null, null, null, null, null, null), normalizedVariant.getSv()); + }); + } + + @Test + public void testNormalizeNonSymbolicDeletion() throws Exception { + Variant variant = newVariantBuilder(100, null, "CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "C", "2") + .addFileData("CIPOS", "-14,50") + .addFileData("CIEND", "-1,1") + .addSample("HG00096", "0|1") + .build(); + + normalizeOne(variant, normalizedVariant -> { + assertEquals(new StructuralVariation(86, 150, 179, 181, null, null, null, null, null), normalizedVariant.getSv()); + }); + } + @Test public void testDUPTANDEMNormalization() throws Exception { Variant variant = newVariantBuilder(100, 200, "C", Collections.singletonList(""), "2") diff --git a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/merge/VariantMergerTest.java b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/merge/VariantMergerTest.java index 46ab5800..07533ab5 100644 --- a/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/merge/VariantMergerTest.java +++ b/biodata-tools/src/test/java/org/opencb/biodata/tools/variant/merge/VariantMergerTest.java @@ -498,7 +498,8 @@ public void testMergeIndelCase1() throws NonStandardCompliantSampleField { Variant v1 = VariantTestUtils.generateVariantWithFormat("1:328:CTT:C", VCFConstants.GENOTYPE_KEY + "," + VCFConstants.GENOTYPE_FILTER_KEY, "S1", "1/2","PASS"); - v1.getStudies().get(0).getSecondaryAlternates().add(new AlternateCoordinate(null,null,331,"CTT", "CTTTC", VariantType.INDEL)); + + v1.getStudies().get(0).getSecondaryAlternates().add(new AlternateCoordinate(null, null, 330, "CTT", "CTTTC", VariantType.INDEL)); Variant v2 = VariantTestUtils.generateVariantWithFormat("1:331:T:TCT", VCFConstants.GENOTYPE_KEY + "," + VCFConstants.GENOTYPE_FILTER_KEY, From da73175ec437c88a62a572c4960c572272acf306 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 6 Aug 2024 17:35:52 +0200 Subject: [PATCH 20/27] Prepare Port Patch Cellbase 2.12.2 -> 3.2.1 #TASK-6647 --- biodata-external/pom.xml | 2 +- biodata-formats/pom.xml | 2 +- biodata-models/pom.xml | 2 +- biodata-tools/pom.xml | 2 +- pom.xml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/biodata-external/pom.xml b/biodata-external/pom.xml index 3a98861e..bb85a556 100644 --- a/biodata-external/pom.xml +++ b/biodata-external/pom.xml @@ -6,7 +6,7 @@ biodata org.opencb.biodata - 2.12.2 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/biodata-formats/pom.xml b/biodata-formats/pom.xml index fe80d521..ce1f093f 100644 --- a/biodata-formats/pom.xml +++ b/biodata-formats/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.2 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/biodata-models/pom.xml b/biodata-models/pom.xml index 79a1954e..b10170a1 100644 --- a/biodata-models/pom.xml +++ b/biodata-models/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.2 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/biodata-tools/pom.xml b/biodata-tools/pom.xml index 45d41e63..9ccec982 100644 --- a/biodata-tools/pom.xml +++ b/biodata-tools/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.2 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index a051029f..bc1643f8 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.2 + 3.2.1-SNAPSHOT pom Biodata From 009756479fc77d9dfcad87422877622e9b5aa365 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Mon, 2 Sep 2024 16:10:28 +0200 Subject: [PATCH 21/27] Prepare release 2.12.3 --- biodata-external/pom.xml | 2 +- biodata-formats/pom.xml | 2 +- biodata-models/pom.xml | 2 +- biodata-tools/pom.xml | 2 +- pom.xml | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/biodata-external/pom.xml b/biodata-external/pom.xml index 3c1b2425..748ae79f 100644 --- a/biodata-external/pom.xml +++ b/biodata-external/pom.xml @@ -6,7 +6,7 @@ biodata org.opencb.biodata - 2.12.3-SNAPSHOT + 2.12.3 ../pom.xml diff --git a/biodata-formats/pom.xml b/biodata-formats/pom.xml index 24182d67..93a56dcc 100644 --- a/biodata-formats/pom.xml +++ b/biodata-formats/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.3-SNAPSHOT + 2.12.3 ../pom.xml diff --git a/biodata-models/pom.xml b/biodata-models/pom.xml index 1eb260d6..a0862b06 100644 --- a/biodata-models/pom.xml +++ b/biodata-models/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.3-SNAPSHOT + 2.12.3 ../pom.xml diff --git a/biodata-tools/pom.xml b/biodata-tools/pom.xml index fdb21cbf..7d496042 100644 --- a/biodata-tools/pom.xml +++ b/biodata-tools/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.3-SNAPSHOT + 2.12.3 ../pom.xml diff --git a/pom.xml b/pom.xml index 6166063d..425f4543 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.3-SNAPSHOT + 2.12.3 pom Biodata @@ -38,7 +38,7 @@ - 4.12.1-SNAPSHOT + 4.12.0 2.11.4 4.4 1.7.7 From ded97e0c2065368fa4974954e70abc0778472e09 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 3 Sep 2024 16:16:33 +0200 Subject: [PATCH 22/27] preparing Port Patch 1.10.7 -> 2.2.1 --- biodata-external/pom.xml | 2 +- biodata-formats/pom.xml | 2 +- biodata-models/pom.xml | 2 +- biodata-tools/pom.xml | 2 +- pom.xml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/biodata-external/pom.xml b/biodata-external/pom.xml index 748ae79f..bb85a556 100644 --- a/biodata-external/pom.xml +++ b/biodata-external/pom.xml @@ -6,7 +6,7 @@ biodata org.opencb.biodata - 2.12.3 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/biodata-formats/pom.xml b/biodata-formats/pom.xml index 93a56dcc..ce1f093f 100644 --- a/biodata-formats/pom.xml +++ b/biodata-formats/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.3 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/biodata-models/pom.xml b/biodata-models/pom.xml index a0862b06..b10170a1 100644 --- a/biodata-models/pom.xml +++ b/biodata-models/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.3 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/biodata-tools/pom.xml b/biodata-tools/pom.xml index 7d496042..fb5dbfa4 100644 --- a/biodata-tools/pom.xml +++ b/biodata-tools/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.3 + 3.2.1-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 425f4543..9848510f 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 2.12.3 + 3.2.1-SNAPSHOT pom Biodata From cc133b9eeaad60eb035cb381959e636c02a46bfd Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Wed, 4 Sep 2024 18:31:05 +0200 Subject: [PATCH 23/27] cicd: Upload reference to develop branch in pull-request-approve to test-xetabase-workflow #TASK-6807 --- .github/workflows/pull-request-approved.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull-request-approved.yml b/.github/workflows/pull-request-approved.yml index a0d481fa..23709720 100644 --- a/.github/workflows/pull-request-approved.yml +++ b/.github/workflows/pull-request-approved.yml @@ -27,7 +27,7 @@ jobs: test: name: "Run all tests before merging" needs: calculate-xetabase-branch - uses: opencb/java-common-libs/.github/workflows/test-xetabase-workflow.yml@TASK-6399 + uses: opencb/java-common-libs/.github/workflows/test-xetabase-workflow.yml@develop with: branch: ${{ needs.calculate-xetabase-branch.outputs.xetabase_branch }} task: ${{ github.event.pull_request.head.ref }} From b1d29ec0434f23b587d151b60c149257af1d4a58 Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Mon, 9 Sep 2024 16:37:16 +0200 Subject: [PATCH 24/27] cicd: Fix xetabase branch calculation #TASK-6807 --- .github/workflows/pull-request-approved.yml | 7 ++++++- .github/workflows/scripts/get-xetabase-branch.sh | 6 +++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pull-request-approved.yml b/.github/workflows/pull-request-approved.yml index 23709720..e4535a19 100644 --- a/.github/workflows/pull-request-approved.yml +++ b/.github/workflows/pull-request-approved.yml @@ -20,9 +20,14 @@ jobs: name: "Get current branch for Xetabase from target branch" run: | chmod +x ./.github/workflows/scripts/get-xetabase-branch.sh - xetabase_branch=$(./.github/workflows/scripts/get-xetabase-branch.sh ${{ github.event.pull_request.base.ref }}) + echo "github.event.pull_request.base.ref: ${{ github.event.pull_request.base.ref }}" + echo "github.event.pull_request.head.ref: ${{ github.event.pull_request.head.ref }}" + echo "secrets.ZETTA_REPO_ACCESS_TOKEN: ${{ secrets.ZETTA_REPO_ACCESS_TOKEN }}" + xetabase_branch=$(./.github/workflows/scripts/get-xetabase-branch.sh ${{ github.event.pull_request.head.ref }}) echo "__Xetabase ref:__ \"${xetabase_branch}\"" | tee -a ${GITHUB_STEP_SUMMARY} echo "xetabase_branch=${xetabase_branch}" >> $GITHUB_OUTPUT + env: + ZETTA_REPO_ACCESS_TOKEN: ${{ secrets.ZETTA_REPO_ACCESS_TOKEN }} test: name: "Run all tests before merging" diff --git a/.github/workflows/scripts/get-xetabase-branch.sh b/.github/workflows/scripts/get-xetabase-branch.sh index e971f990..a1eb7e52 100644 --- a/.github/workflows/scripts/get-xetabase-branch.sh +++ b/.github/workflows/scripts/get-xetabase-branch.sh @@ -7,9 +7,9 @@ get_xetabase_branch() { # If the branch begins with 'TASK' and exists in the opencga-enterprise repository, I return it if [[ $input_branch == TASK* ]]; then - if [ "$(git ls-remote https://github.com/zetta-genomics/opencga-enterprise.git "$input_branch" )" ] ; then - echo "$GIT_BRANCH"; - exit 0; + if [ "$(git ls-remote "https://$ZETTA_REPO_ACCESS_TOKEN@github.com/zetta-genomics/opencga-enterprise.git" "$input_branch" )" ] ; then + echo $input_branch; + return 0; fi fi From 80791e4ba74ce3265c70c24b1a5bf174503e205b Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 10 Sep 2024 09:10:16 +0200 Subject: [PATCH 25/27] cicd: Fix xetabase branch calculation #TASK-6807 --- .github/workflows/pull-request-approved.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/pull-request-approved.yml b/.github/workflows/pull-request-approved.yml index e4535a19..c2fe27a0 100644 --- a/.github/workflows/pull-request-approved.yml +++ b/.github/workflows/pull-request-approved.yml @@ -16,6 +16,8 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: '10' + ## This is important to avoid the error in the next step: "fatal: repository 'https://github.com/zetta-genomics/opencga-enterprise.git/' not found" + persist-credentials: false - id: get_xetabase_branch name: "Get current branch for Xetabase from target branch" run: | From 0a09df23c5d585621a78157aa415dfcec48691bf Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Tue, 10 Sep 2024 09:27:07 +0200 Subject: [PATCH 26/27] cicd: Fix xetabase branch calculation #TASK-6807 --- .github/workflows/pull-request-approved.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/pull-request-approved.yml b/.github/workflows/pull-request-approved.yml index c2fe27a0..d339f65b 100644 --- a/.github/workflows/pull-request-approved.yml +++ b/.github/workflows/pull-request-approved.yml @@ -24,7 +24,6 @@ jobs: chmod +x ./.github/workflows/scripts/get-xetabase-branch.sh echo "github.event.pull_request.base.ref: ${{ github.event.pull_request.base.ref }}" echo "github.event.pull_request.head.ref: ${{ github.event.pull_request.head.ref }}" - echo "secrets.ZETTA_REPO_ACCESS_TOKEN: ${{ secrets.ZETTA_REPO_ACCESS_TOKEN }}" xetabase_branch=$(./.github/workflows/scripts/get-xetabase-branch.sh ${{ github.event.pull_request.head.ref }}) echo "__Xetabase ref:__ \"${xetabase_branch}\"" | tee -a ${GITHUB_STEP_SUMMARY} echo "xetabase_branch=${xetabase_branch}" >> $GITHUB_OUTPUT From 014f1ca23ae6386f7e3f5f6baf18eec90f68b31e Mon Sep 17 00:00:00 2001 From: JuanfeSanahuja Date: Fri, 13 Sep 2024 11:56:18 +0200 Subject: [PATCH 27/27] Prepare release 3.2.1 --- biodata-external/pom.xml | 2 +- biodata-formats/pom.xml | 2 +- biodata-models/pom.xml | 2 +- biodata-tools/pom.xml | 2 +- pom.xml | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/biodata-external/pom.xml b/biodata-external/pom.xml index bb85a556..dafe64c2 100644 --- a/biodata-external/pom.xml +++ b/biodata-external/pom.xml @@ -6,7 +6,7 @@ biodata org.opencb.biodata - 3.2.1-SNAPSHOT + 3.2.1 ../pom.xml diff --git a/biodata-formats/pom.xml b/biodata-formats/pom.xml index 238588ca..e071844f 100644 --- a/biodata-formats/pom.xml +++ b/biodata-formats/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.2.1-SNAPSHOT + 3.2.1 ../pom.xml diff --git a/biodata-models/pom.xml b/biodata-models/pom.xml index f9cf2628..f2311443 100644 --- a/biodata-models/pom.xml +++ b/biodata-models/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.2.1-SNAPSHOT + 3.2.1 ../pom.xml diff --git a/biodata-tools/pom.xml b/biodata-tools/pom.xml index c7c32361..a9a269a7 100644 --- a/biodata-tools/pom.xml +++ b/biodata-tools/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.2.1-SNAPSHOT + 3.2.1 ../pom.xml diff --git a/pom.xml b/pom.xml index 1a6c71bc..cd18672d 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.biodata biodata - 3.2.1-SNAPSHOT + 3.2.1 pom Biodata @@ -38,7 +38,7 @@ - 5.2.1-SNAPSHOT + 5.2.1 2.14.3 4.4