diff --git a/docs/.gitbook/assets/image (1).png b/docs/.gitbook/assets/image (6) (5) (1).png similarity index 100% rename from docs/.gitbook/assets/image (1).png rename to docs/.gitbook/assets/image (6) (5) (1).png diff --git a/docs/.gitbook/assets/image (3).png b/docs/.gitbook/assets/image (6) (5) (2).png similarity index 100% rename from docs/.gitbook/assets/image (3).png rename to docs/.gitbook/assets/image (6) (5) (2).png diff --git a/docs/.gitbook/assets/image (4).png b/docs/.gitbook/assets/image (6) (5) (3).png similarity index 100% rename from docs/.gitbook/assets/image (4).png rename to docs/.gitbook/assets/image (6) (5) (3).png diff --git a/docs/.gitbook/assets/image (5).png b/docs/.gitbook/assets/image (6) (5) (4).png similarity index 100% rename from docs/.gitbook/assets/image (5).png rename to docs/.gitbook/assets/image (6) (5) (4).png diff --git a/docs/.gitbook/assets/image (6).png b/docs/.gitbook/assets/image (6) (5) (5).png similarity index 100% rename from docs/.gitbook/assets/image (6).png rename to docs/.gitbook/assets/image (6) (5) (5).png diff --git a/docs/.gitbook/assets/image.png b/docs/.gitbook/assets/image (6) (5).png similarity index 100% rename from docs/.gitbook/assets/image.png rename to docs/.gitbook/assets/image (6) (5).png diff --git a/docs/.gitbook/assets/image (7).png b/docs/.gitbook/assets/image (8) (1).png similarity index 100% rename from docs/.gitbook/assets/image (7).png rename to docs/.gitbook/assets/image (8) (1).png diff --git a/opencga-analysis/pom.xml b/opencga-analysis/pom.xml index 42714e60244..a9b5c9779eb 100644 --- a/opencga-analysis/pom.xml +++ b/opencga-analysis/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.0.2 + 2.0.3 ../pom.xml diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/ActionableVariantManager.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/ActionableVariantManager.java index c4e1d3226ac..ff499b24323 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/ActionableVariantManager.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/clinical/ActionableVariantManager.java @@ -16,12 +16,13 @@ package org.opencb.opencga.analysis.clinical; -import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.VariantBuilder; import org.opencb.commons.utils.FileUtils; import org.opencb.commons.utils.URLUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.BufferedReader; import java.io.File; @@ -35,12 +36,13 @@ public class ActionableVariantManager { // Folder where actionable variant files are located, multiple assemblies are supported, i.e.: one variant actionable file per assembly // File name format: actionableVariants_xxx.txt[.gz] where xxx = assembly in lower case - private final String ACTIONABLE_URL = "http://resources.opencb.org/opencb/opencga/analysis/commons/"; + private final static String ACTIONABLE_URL = "http://resources.opencb.org/opencb/opencga/analysis/commons/"; + private final static Logger logger = LoggerFactory.getLogger(ActionableVariantManager.class); // We keep a Map for each assembly with a Map of variant IDs with the phenotype list private static Map>> actionableVariants = null; - private Path openCgaHome; + private final Path openCgaHome; public ActionableVariantManager(Path openCgaHome) { this.openCgaHome = openCgaHome; @@ -49,7 +51,7 @@ public ActionableVariantManager(Path openCgaHome) { public Map> getActionableVariants(String assembly) throws IOException { // Lazy loading if (actionableVariants == null) { - actionableVariants = loadActionableVariants(); + actionableVariants = loadActionableVariants(openCgaHome); } if (actionableVariants.containsKey(assembly)) { @@ -58,8 +60,13 @@ public Map> getActionableVariants(String assembly) throws I return null; } + public static void init(Path openCgaHome) throws IOException { + if (actionableVariants == null) { + actionableVariants = loadActionableVariants(openCgaHome); + } + } - private Map>> loadActionableVariants() throws IOException { + private static Map>> loadActionableVariants(Path openCgaHome) throws IOException { // Load actionable variants for each assembly, if present // First, read all actionableVariants filenames, actionableVariants_xxx.txt[.gz] where xxx = assembly in lower case Map>> actionableVariantsByAssembly = new HashMap<>(); @@ -67,18 +74,20 @@ private Map>> loadActionableVariants() throws I String[] assemblies = new String[]{"grch37", "grch38"}; for (String assembly : assemblies) { File actionableFile; + boolean temporalFile = false; try { String filename = "actionableVariants_" + assembly + ".txt.gz"; Path path = openCgaHome.resolve("analysis/commons/" + filename); if (path.toFile().exists()) { - System.out.println("loadActionableVariants from path: " + path); + logger.info("loadActionableVariants from path: " + path); actionableFile = path.toFile(); } else { // Donwload 'actionable variant' file - System.out.println("loadActionableVariants from URL: " + (ACTIONABLE_URL + filename) + ", (path does not exist: " + logger.info("loadActionableVariants from URL: " + (ACTIONABLE_URL + filename) + ", (path does not exist: " + path + ")"); actionableFile = URLUtils.download(new URL(ACTIONABLE_URL + filename), Paths.get("/tmp")); + temporalFile = true; } } catch (IOException e) { continue; @@ -86,10 +95,12 @@ private Map>> loadActionableVariants() throws I if (actionableFile != null) { actionableVariantsByAssembly.put(assembly, loadActionableVariants(actionableFile)); - } - // Delete - actionableFile.delete(); + if (temporalFile) { + // Delete + actionableFile.delete(); + } + } } return actionableVariantsByAssembly; @@ -101,14 +112,17 @@ private Map>> loadActionableVariants() throws I * @return Map of variant IDs with a alist of phenotypes * @throws IOException If file is not found */ - private Map> loadActionableVariants(File file) throws IOException { + private static Map> loadActionableVariants(File file) throws IOException { -// System.out.println("ActionableVariantManager: path = " + file.toString()); +// logger.info("ActionableVariantManager: path = " + file.toString()); Map> actionableVariants = new HashMap<>(); - if (file != null && file.exists()) { - BufferedReader bufferedReader = FileUtils.newBufferedReader(file.toPath()); + if (file == null || !file.exists()) { + return actionableVariants; + } + + try (BufferedReader bufferedReader = FileUtils.newBufferedReader(file.toPath())) { List lines = bufferedReader.lines().collect(Collectors.toList()); for (String line : lines) { if (line.startsWith("#")) { @@ -134,16 +148,16 @@ private Map> loadActionableVariants(File file) throws IOExc actionableVariants.put(variant.toString(), phenotypes); } catch (NumberFormatException e) { // Skip this variant - System.err.println("Skip actionable variant: " + line + "\nCause: " + e.getMessage()); + logger.error("Skip actionable variant: " + line + "\nCause: " + e.getMessage()); } } else { // Skip this variant - System.err.println("Skip actionable variant, invalid format: " + line); + logger.error("Skip actionable variant, invalid format: " + line); } } } -// System.out.println("ActionableVariantManager: size = " + actionableVariants.size()); +// logger.info("ActionableVariantManager: size = " + actionableVariants.size()); return actionableVariants; } diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/file/FileLinkTask.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/file/FileLinkTask.java index 1b0af2f16fd..532963fc2b9 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/file/FileLinkTask.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/file/FileLinkTask.java @@ -42,7 +42,7 @@ protected void run() throws Exception { addGeneratedFile(result.first()); for (File fileResult : result.getResults()) { if (fileResult.getInternal().getStatus().getName().equals(FileStatus.MISSING_SAMPLES)) { - Map params = new PostLinkToolParams(Collections.singletonList(fileResult.getId())) + Map params = new PostLinkToolParams(Collections.singletonList(fileResult.getId()), null) .toParams(new ObjectMap(ParamConstants.STUDY_PARAM, study)); Job postLinkJob = catalogManager.getJobManager() .submit(getStudy(), PostLinkSampleAssociation.ID, Enums.Priority.MEDIUM, diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/file/PostLinkSampleAssociation.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/file/PostLinkSampleAssociation.java index 55a0e542706..a3744cae0ae 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/file/PostLinkSampleAssociation.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/file/PostLinkSampleAssociation.java @@ -12,6 +12,7 @@ import org.opencb.opencga.catalog.managers.SampleManager; import org.opencb.opencga.catalog.utils.Constants; import org.opencb.opencga.catalog.utils.ParamUtils; +import org.opencb.opencga.core.api.ParamConstants; import org.opencb.opencga.core.models.common.Enums; import org.opencb.opencga.core.models.file.*; import org.opencb.opencga.core.models.sample.Sample; @@ -32,6 +33,15 @@ public class PostLinkSampleAssociation extends OpenCgaToolScopeStudy { @ToolParams protected final PostLinkToolParams postLinkParams = new PostLinkToolParams(); + @Override + protected void check() throws Exception { + super.check(); + // Add default batch size + if (postLinkParams.getBatchSize() == null || postLinkParams.getBatchSize() <= 0) { + postLinkParams.setBatchSize(1000); + } + } + @Override protected void run() throws Exception { // Obtain an iterator to get all the files that were link and not associated to any of its samples @@ -45,10 +55,11 @@ protected void run() throws Exception { options.put(QueryOptions.COUNT, true); List files = null; - if (CollectionUtils.isNotEmpty(postLinkParams.getFiles())) { - files = new LinkedList<>(postLinkParams.getFiles()); - } else { + if (CollectionUtils.isEmpty(postLinkParams.getFiles()) + || postLinkParams.getFiles().size() == 1 && postLinkParams.getFiles().get(0).equals(ParamConstants.ALL)) { logger.info("Processing all files with internal status = '" + FileStatus.MISSING_SAMPLES + "'"); + } else { + files = new LinkedList<>(postLinkParams.getFiles()); } int numPendingFiles = -1; @@ -88,16 +99,36 @@ protected void run() throws Exception { if (CollectionUtils.isNotEmpty(file.getInternal().getMissingSamples().getNonExisting())) { logger.info("Create {} missing samples", file.getInternal().getMissingSamples().getNonExisting().size()); for (String sampleId : file.getInternal().getMissingSamples().getNonExisting()) { - Query sampleQuery = new Query(SampleDBAdaptor.QueryParams.ID.key(), sampleId); - OpenCGAResult sampleResult = catalogManager.getSampleManager().search(study, sampleQuery, - SampleManager.INCLUDE_SAMPLE_IDS, token); - - if (sampleResult.getNumResults() != 1) { - // Sample still doesn't exist, so we create it - sampleResult = catalogManager.getSampleManager().create(study, new Sample().setId(sampleId), - QueryOptions.empty(), token); - if (sampleResult.getNumResults() != 1) { - throw new CatalogException("Could not create sample '" + sampleId + "'"); + if (!sampleExists(sampleId)) { + try { + // Sample still doesn't exist, so we create it + OpenCGAResult sampleResult = catalogManager.getSampleManager().create(study, new Sample().setId(sampleId), + QueryOptions.empty(), token); + if (sampleResult.getNumResults() != 1) { + throw new CatalogException("Could not create sample '" + sampleId + "'"); + } + } catch (CatalogException e) { + try { + if (sampleExists(sampleId)) { + // If sample was successfully created, but still got an exception. + // Ignore exception + + // Log INFO without stack trace + logger.info("Caught exception creating sample \"" + sampleId + "\"," + + " but sample was actually created. Ignoring " + e.toString()); + + // Log DEBUG with full stack trace + logger.debug("Ignored exception", e); + } else { + // Sample could not be created. + // Throw exception + throw e; + } + } catch (Exception e1) { + // Something went wrong. Throw original exception, and add this new as suppressed + e.addSuppressed(e1); + throw e; + } } } @@ -111,7 +142,7 @@ protected void run() throws Exception { } // Create sample batches - int batchSize = 1000; + int batchSize = postLinkParams.getBatchSize(); List> sampleListList = new ArrayList<>((sampleList.size() / batchSize) + 1); // Create batches List currentList = null; @@ -160,4 +191,12 @@ protected void run() throws Exception { } } } + + private boolean sampleExists(String sampleId) throws CatalogException { + Query sampleQuery = new Query(SampleDBAdaptor.QueryParams.ID.key(), sampleId); + OpenCGAResult sampleResult = catalogManager.getSampleManager().search(study, sampleQuery, + SampleManager.INCLUDE_SAMPLE_IDS, token); + + return sampleResult.getNumResults() == 1; + } } diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantCatalogQueryUtils.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantCatalogQueryUtils.java index dcf346fb234..dc5dec29ff8 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantCatalogQueryUtils.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantCatalogQueryUtils.java @@ -218,9 +218,21 @@ public static VariantQueryException wrongReleaseException(VariantQueryParam para * @throws CatalogException if there is any catalog error */ public Query parseQuery(Query query, String token) throws CatalogException { + return parseQuery(query, null, token); + } + + /** + * Transforms a high level Query to a query fully understandable by storage. + * @param query High level query. Will be modified by the method. + * @param queryOptions Query options. Won't be modified + * @param token User's session id + * @return Modified input query (same instance) + * @throws CatalogException if there is any catalog error + */ + public Query parseQuery(Query query, QueryOptions queryOptions, String token) throws CatalogException { if (query == null) { // Nothing to do! - return null; + return new Query(); } if (isValidParam(query, SAVED_FILTER)) { @@ -257,8 +269,9 @@ public Query parseQuery(Query query, String token) throws CatalogException { cohortFilterValidator.processFilter(query, VariantQueryParam.MISSING_GENOTYPES, release, token, defaultStudyStr); if (release != null) { - // If no list of included files is specified: - if (VariantQueryProjectionParser.isIncludeFilesDefined(query, Collections.singleton(VariantField.STUDIES_FILES))) { + // If include all files: + if (VariantQueryProjectionParser.getIncludeFileStatus(query, VariantField.all()) + .equals(VariantQueryProjectionParser.IncludeStatus.ALL)) { List includeFiles = new ArrayList<>(); QueryOptions fileOptions = new QueryOptions(INCLUDE, FileDBAdaptor.QueryParams.UID.key()); Query fileQuery = new Query(FileDBAdaptor.QueryParams.RELEASE.key(), "<=" + release) @@ -272,8 +285,9 @@ public Query parseQuery(Query query, String token) throws CatalogException { } query.append(VariantQueryParam.INCLUDE_FILE.key(), includeFiles); } - // If no list of included samples is specified: - if (!VariantQueryProjectionParser.isIncludeSamplesDefined(query, Collections.singleton(VariantField.STUDIES_SAMPLES))) { + // If include all samples: + if (VariantQueryProjectionParser.getIncludeFileStatus(query, VariantField.all()) + .equals(VariantQueryProjectionParser.IncludeStatus.ALL)) { List includeSamples = new ArrayList<>(); Query sampleQuery = new Query(SampleDBAdaptor.QueryParams.RELEASE.key(), "<=" + release); QueryOptions sampleOptions = new QueryOptions(INCLUDE, SampleDBAdaptor.QueryParams.UID.key()); diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java index 6c80531c1d8..e5184eb82e1 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantStorageManager.java @@ -168,11 +168,11 @@ public void exportData(String outputFile, VariantOutputFormat outputFormat, Stri public void exportData(String outputFile, VariantOutputFormat outputFormat, String variantsFile, Query query, QueryOptions queryOptions, String token) throws CatalogException, IOException, StorageEngineException { - catalogUtils.parseQuery(query, token); - checkSamplesPermissions(query, queryOptions, token); - - secureOperation(VariantExportTool.ID, catalogUtils.getAnyStudy(query, token), queryOptions, token, engine -> { - new VariantExportOperationManager(this, engine).export(outputFile, outputFormat, variantsFile, query, queryOptions, token); + Query finalQuery = catalogUtils.parseQuery(query, queryOptions, token); + checkSamplesPermissions(finalQuery, queryOptions, token); + String anyStudy = catalogUtils.getAnyStudy(finalQuery, token); + secureOperation(VariantExportTool.ID, anyStudy, queryOptions, token, engine -> { + new VariantExportOperationManager(this, engine).export(outputFile, outputFormat, variantsFile, finalQuery, queryOptions, token); return null; }); } @@ -568,7 +568,7 @@ public VariantDBIterator iterator(Query query, QueryOptions queryOptions, String DataStore dataStore = getDataStore(study, token); VariantStorageEngine storageEngine = getVariantStorageEngine(dataStore); - catalogUtils.parseQuery(query, token); + query = catalogUtils.parseQuery(query, queryOptions, token); checkSamplesPermissions(query, queryOptions, storageEngine.getMetadataManager(), token); return storageEngine.iterator(query, queryOptions); } @@ -790,7 +790,7 @@ public SampleMetadata getSampleMetadata(String study, String sample, String toke protected VariantStorageEngine getVariantStorageEngine(Query query, String token) throws CatalogException, StorageEngineException { String study = catalogUtils.getAnyStudy(query, token); - catalogUtils.parseQuery(query, token); + query = catalogUtils.parseQuery(query, token); DataStore dataStore = getDataStore(study, token); return getVariantStorageEngine(dataStore); } @@ -1026,7 +1026,7 @@ private R secure(Query query, QueryOptions queryOptions, String token, Enums String study = catalogUtils.getAnyStudy(query, token); StopWatch stopWatch = StopWatch.createStarted(); - catalogUtils.parseQuery(query, token); + query = catalogUtils.parseQuery(query, queryOptions, token); auditAttributes.append("catalogParseQueryTimeMillis", stopWatch.getTime(TimeUnit.MILLISECONDS)); DataStore dataStore = getDataStore(study, token); VariantStorageEngine variantStorageEngine = getVariantStorageEngine(dataStore); @@ -1095,13 +1095,16 @@ Map> checkSamplesPermissions(Query query, QueryOptions quer String userId = catalogManager.getUserManager().getUserId(token); Set returnedFields = VariantField.getIncludeFields(queryOptions); if (auditAction == Enums.Action.FACET) { - if (!VariantQueryProjectionParser.isIncludeSamplesDefined(query, VariantField.getIncludeFields(null))) { + if (VariantQueryProjectionParser.isIncludeNoSamples(query, VariantField.all())) { // General facet query. Do not check samples. returnedFields = Collections.emptySet(); } } - if (!returnedFields.contains(VariantField.STUDIES_SAMPLES) && !returnedFields.contains(VariantField.STUDIES_FILES)) { + VariantQueryProjectionParser.IncludeStatus includeSample = VariantQueryProjectionParser.getIncludeSampleStatus(query, returnedFields); + VariantQueryProjectionParser.IncludeStatus includeFile = VariantQueryProjectionParser.getIncludeFileStatus(query, returnedFields); + if (includeSample == VariantQueryProjectionParser.IncludeStatus.NONE + && includeFile == VariantQueryProjectionParser.IncludeStatus.NONE) { if (isValidParam(query, STUDY)) { ParsedQuery studies = VariantQueryUtils.splitValue(query, STUDY); studies.getValues().replaceAll(VariantQueryUtils::removeNegation); @@ -1114,11 +1117,9 @@ Map> checkSamplesPermissions(Query query, QueryOptions quer query.put(STUDY.key(), validStudies); } } - return Collections.emptyMap(); - } - - if (VariantQueryProjectionParser.isIncludeSamplesDefined(query, returnedFields)) { - Map> samplesToReturn = VariantQueryUtils.getSamplesMetadata(query, queryOptions, mm); + // samplesMap = Collections.emptyMap(); + } else if (includeSample == VariantQueryProjectionParser.IncludeStatus.SOME) { + Map> samplesToReturn = VariantQueryProjectionParser.getIncludeSampleNames(query, queryOptions, mm); checkStudyPermissions(samplesToReturn.keySet(), userId, token); for (Map.Entry> entry : samplesToReturn.entrySet()) { @@ -1139,8 +1140,8 @@ Map> checkSamplesPermissions(Query query, QueryOptions quer samplesMap.put(studyId, Collections.emptyList()); } } - } else { - logger.debug("Missing include samples! Obtaining samples to include from catalog."); + } else if (includeSample == VariantQueryProjectionParser.IncludeStatus.ALL) { + logger.debug("Include all samples. Obtaining samples to include from catalog."); List includeStudies = VariantQueryProjectionParser.getIncludeStudies(query, queryOptions, mm) .stream() .map(mm::getStudyName) diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/operations/VariantFileIndexerOperationManager.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/operations/VariantFileIndexerOperationManager.java index 3d70332ad55..1726026b297 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/operations/VariantFileIndexerOperationManager.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/operations/VariantFileIndexerOperationManager.java @@ -81,8 +81,9 @@ public class VariantFileIndexerOperationManager extends OperationManager { public static final String TRANSFORM = "transform"; public static final String LOAD = "load"; - // FIXME : Needed? + @Deprecated // Deprecated with no replacement. public static final String TRANSFORMED_FILES = "transformedFiles"; + public static final String SKIP_INDEXED_FILES = "skipIndexedFiles"; private final Logger logger; @@ -93,6 +94,7 @@ public class VariantFileIndexerOperationManager extends OperationManager { private boolean transform; private boolean load; private boolean resume; + private boolean skipIndexedFiles; private boolean keepIntermediateFiles; private Type step; private URI outDirUri; @@ -144,6 +146,7 @@ private void check(String study, ObjectMap params, String token) throws Exceptio load = params.getBoolean(LOAD, false); } resume = params.getBoolean(VariantStorageOptions.RESUME.key()); + skipIndexedFiles = params.getBoolean(SKIP_INDEXED_FILES); // Obtain the type of analysis (transform, load or index) step = getType(load, transform); @@ -568,7 +571,7 @@ private String updateDefaultCohortStatus(String study, String status, String ses * @param resume If resume, get also TRANSFORMING and INDEXING files. * @return List of non transformed files */ - private List filterTransformFiles(List fileList, boolean resume) { + private List filterTransformFiles(List fileList, boolean resume) throws StorageEngineException { if (fileList == null || fileList.isEmpty()) { return new ArrayList<>(); } @@ -588,21 +591,29 @@ private List filterTransformFiles(List fileList, boolean resume) { break; case FileIndex.IndexStatus.INDEXING: case FileIndex.IndexStatus.TRANSFORMING: - if (!resume) { - logger.warn("File already being transformed. " - + "We can only transform VCF files not transformed, the status is {}. " - + "Do '" + VariantStorageOptions.RESUME.key() + "' to continue.", - indexStatus); - } else { + if (resume) { filteredFiles.add(file); + } else { + String message = "File already being transformed. " + + "We can only transform VCF files not transformed, the status is " + indexStatus + ". " + + "Do '" + VariantStorageOptions.RESUME.key() + "' to continue."; + if (skipIndexedFiles) { + logger.warn(message); + } else { + throw new StorageEngineException(message); + } } break; case FileIndex.IndexStatus.TRANSFORMED: case FileIndex.IndexStatus.LOADING: case FileIndex.IndexStatus.READY: default: - logger.warn("We can only transform VCF files not transformed, the status is {}", - indexStatus); + String msg = "We can only " + step + " VCF files not transformed, the status is " + indexStatus; + if (skipIndexedFiles) { + logger.warn(msg); + } else { + throw new StorageEngineException(msg); + } break; } } else { diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/operations/VariantFileIndexJobLauncherTool.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/operations/VariantFileIndexJobLauncherTool.java index 35672881a9d..3aa539551ec 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/operations/VariantFileIndexJobLauncherTool.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/operations/VariantFileIndexJobLauncherTool.java @@ -50,7 +50,7 @@ protected void check() throws Exception { @Override protected void run() throws Exception { Query filesQuery = new Query() - .append(FORMAT.key(), File.Format.VCF) + .append(FORMAT.key(), Arrays.asList(File.Format.VCF, File.Format.GVCF)) .append(INTERNAL_INDEX_STATUS_NAME.key(), "!" + FileIndex.IndexStatus.READY); filesQuery.putIfNotEmpty(NAME.key(), toolParams.getName()); filesQuery.putIfNotEmpty(DIRECTORY.key(), toolParams.getDirectory()); diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/operations/VariantIndexOperationTool.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/operations/VariantIndexOperationTool.java index 4b87955dd55..b08a7520d51 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/operations/VariantIndexOperationTool.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/operations/VariantIndexOperationTool.java @@ -20,10 +20,12 @@ import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.catalog.db.api.FileDBAdaptor; +import org.opencb.opencga.core.exceptions.ToolException; import org.opencb.opencga.core.models.common.Enums; import org.opencb.opencga.core.models.variant.VariantIndexParams; import org.opencb.opencga.core.response.OpenCGAResult; import org.opencb.opencga.core.tools.annotations.Tool; +import org.opencb.opencga.core.tools.annotations.ToolParams; import org.opencb.opencga.storage.core.StoragePipelineResult; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.io.VariantReaderUtils; @@ -36,8 +38,7 @@ import java.util.List; import java.util.Set; -import static org.opencb.opencga.analysis.variant.manager.operations.VariantFileIndexerOperationManager.LOAD; -import static org.opencb.opencga.analysis.variant.manager.operations.VariantFileIndexerOperationManager.TRANSFORM; +import static org.opencb.opencga.analysis.variant.manager.operations.VariantFileIndexerOperationManager.*; @Tool(id = VariantIndexOperationTool.ID, description = VariantIndexOperationTool.DESCRIPTION, type = Tool.Type.OPERATION, resource = Enums.Resource.VARIANT) @@ -45,7 +46,9 @@ public class VariantIndexOperationTool extends OperationTool { public static final String ID = "variant-index"; public static final String DESCRIPTION = "Index variant files into the variant storage"; - private VariantIndexParams indexParams = new VariantIndexParams(); + @ToolParams + protected VariantIndexParams indexParams = new VariantIndexParams(); + private String study; public void setFile(String file) { @@ -64,7 +67,6 @@ public void setLoad(boolean load) { protected void check() throws Exception { super.check(); - indexParams.updateParams(params); study = getStudyFqn(); params.put(LOAD, indexParams.isLoad()); @@ -104,6 +106,7 @@ protected void check() throws Exception { params.put(VariantStorageOptions.POST_LOAD_CHECK.key(), indexParams.getPostLoadCheck()); params.put(VariantStorageOptions.INDEX_SEARCH.key(), indexParams.isIndexSearch()); params.put(VariantStorageOptions.DEDUPLICATION_POLICY.key(), indexParams.getDeduplicationPolicy()); + params.put(SKIP_INDEXED_FILES, indexParams.isSkipIndexedFiles()); } @Override @@ -123,10 +126,14 @@ protected void run() throws Exception { List results = variantStorageManager.index(study, indexParams.getFile(), getOutDir(keepIntermediateFiles).toString(), params, token); addAttribute("indexedFiles", Collections.size(results)); + addAttribute("StoragePipelineResult", results); if (Collections.isEmpty(results)) { - addWarning("Nothing to do!"); + if (indexParams.isSkipIndexedFiles()) { + addWarning("Nothing to do!"); + } else { + throw new ToolException("Nothing to do!"); + } } - addAttribute("StoragePipelineResult", results); for (StoragePipelineResult result : results) { inputFiles.add(result.getInput()); } diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/samples/SampleEligibilityAnalysis.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/samples/SampleEligibilityAnalysis.java index d8f65fd1079..659aac02aae 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/samples/SampleEligibilityAnalysis.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/samples/SampleEligibilityAnalysis.java @@ -365,30 +365,27 @@ private List resolve(TreeQuery treeQuery) allSamplesFuture = executorService .submit(() -> new ArrayList<>(getVariantStorageManager().getIndexedSamples(studyFqn, getToken()))); - Query baseQuery = new Query(); - baseQuery.put(VariantQueryParam.STUDY.key(), studyFqn); - - return resolveNode(treeQuery.getRoot(), baseQuery, null); + return resolveNode(treeQuery.getRoot(), null); } - private List resolveNode(TreeQuery.Node node, Query baseQuery, List includeSamples) - throws CatalogException, StorageEngineException, IOException, ExecutionException, InterruptedException { + private List resolveNode(TreeQuery.Node node, List includeSamples) + throws CatalogException, ExecutionException, InterruptedException { switch (node.getType()) { case QUERY: - return resolveQuery(((TreeQuery.QueryNode) node), baseQuery, includeSamples); + return resolveQuery(((TreeQuery.QueryNode) node), includeSamples); case COMPLEMENT: - return resolveComplementQuery(((TreeQuery.ComplementNode) node), baseQuery, includeSamples); + return resolveComplementQuery(((TreeQuery.ComplementNode) node), includeSamples); case INTERSECTION: - return resolveIntersectNode(((TreeQuery.IntersectionNode) node), baseQuery, includeSamples); + return resolveIntersectNode(((TreeQuery.IntersectionNode) node), includeSamples); case UNION: - return resolveUnionNode(((TreeQuery.UnionNode) node), baseQuery, includeSamples); + return resolveUnionNode(((TreeQuery.UnionNode) node), includeSamples); default: throw new IllegalArgumentException("Unknown node type " + node.getType()); } } - private List resolveUnionNode(TreeQuery.UnionNode node, Query baseQuery, List includeSamples) - throws CatalogException, StorageEngineException, IOException, ExecutionException, InterruptedException { + private List resolveUnionNode(TreeQuery.UnionNode node, List includeSamples) + throws CatalogException, ExecutionException, InterruptedException { if (includeSamples == null) { includeSamples = getAllSamplesIfDone(); @@ -408,7 +405,7 @@ private List resolveUnionNode(TreeQuery.UnionNode node, Query baseQuery, if (includeSamples != null && includeSamples.isEmpty()) { logger.info("Skip node '{}'. All samples found", subNode); } else { - List thisNodeResult = resolveNode(subNode, baseQuery, includeSamples); + List thisNodeResult = resolveNode(subNode, includeSamples); if (includeSamples != null) { includeSamples.removeAll(thisNodeResult); } @@ -419,8 +416,8 @@ private List resolveUnionNode(TreeQuery.UnionNode node, Query baseQuery, return new ArrayList<>(result); } - private List resolveIntersectNode(TreeQuery.IntersectionNode node, Query baseQuery, List includeSamples) - throws CatalogException, StorageEngineException, IOException, ExecutionException, InterruptedException { + private List resolveIntersectNode(TreeQuery.IntersectionNode node, List includeSamples) + throws CatalogException, ExecutionException, InterruptedException { logger.info("Execute intersect-node with {} children at for {} samples", node.getNodes().size(), includeSamples == null ? "?" : includeSamples.size()); @@ -430,18 +427,18 @@ private List resolveIntersectNode(TreeQuery.IntersectionNode node, Query if (includeSamples != null && includeSamples.isEmpty()) { logger.info("Skip node '{}'", subNode); } else { - includeSamples = resolveNode(subNode, baseQuery, includeSamples); + includeSamples = resolveNode(subNode, includeSamples); } } return includeSamples; } - private List resolveComplementQuery(TreeQuery.ComplementNode node, Query baseQuery, List includeSamples) - throws CatalogException, IOException, StorageEngineException, ExecutionException, InterruptedException { + private List resolveComplementQuery(TreeQuery.ComplementNode node, List includeSamples) + throws CatalogException, ExecutionException, InterruptedException { logger.info("Execute complement-node for {} samples", includeSamples == null ? "?" : includeSamples.size()); - List subSamples = resolveNode(node.getNodes().get(0), baseQuery, includeSamples); + List subSamples = resolveNode(node.getNodes().get(0), includeSamples); if (includeSamples == null) { // Force get all samples includeSamples = getAllSamplesForce(); @@ -453,15 +450,15 @@ private List resolveComplementQuery(TreeQuery.ComplementNode node, Query return includeSamples; } - private List resolveQuery(TreeQuery.QueryNode node, Query baseQuery, List includeSamples) - throws CatalogException, StorageEngineException, IOException, ExecutionException, InterruptedException { + private List resolveQuery(TreeQuery.QueryNode node, List includeSamples) + throws CatalogException, ExecutionException, InterruptedException { if (includeSamples == null) { logger.info("Execute leaf-node '{}'", node); } else { logger.info("Execute leaf-node '{}' for {} samples", node, includeSamples.size()); } - Query variantsQuery = node.getQuery(); + Query variantsQuery = new Query(node.getQuery()); Query sampleQuery = new Query(); Query individualQuery = new Query(); for (String key : new HashSet<>(variantsQuery.keySet())) { @@ -475,7 +472,7 @@ private List resolveQuery(TreeQuery.QueryNode node, Query baseQuery, Lis } } - Set samples = resolveVariantQuery(node, baseQuery, includeSamples); + Set samples = resolveVariantQuery(node, variantsQuery, includeSamples); samples = resolveSampleCatalogQuery(sampleQuery, samples); samples = resolveIndividualCatalogQuery(individualQuery, samples); @@ -483,8 +480,15 @@ private List resolveQuery(TreeQuery.QueryNode node, Query baseQuery, Lis return new ArrayList<>(samples); } - private Set resolveVariantQuery(TreeQuery.QueryNode node, Query baseQuery, List includeSamples) - throws CatalogException, StorageEngineException, IOException, ExecutionException, InterruptedException { + private Set resolveVariantQuery(TreeQuery.QueryNode node, Query variantsQuery, List includeSamples) + throws ExecutionException, InterruptedException { + if (variantsQuery.isEmpty()) { + if (includeSamples == null) { + // Force get all samples + includeSamples = getAllSamplesForce(); + } + return new HashSet<>(includeSamples); + } // if (params.getBoolean("direct")) { // return resolveQueryDirect(node, baseQuery, includeSamples); // } else { @@ -492,12 +496,12 @@ private Set resolveVariantQuery(TreeQuery.QueryNode node, Query baseQuer // } try { - return resolveVariantQuerySamplesData(node, baseQuery, new AtomicReference<>(includeSamples)); + return resolveVariantQuerySamplesData(node, variantsQuery, new AtomicReference<>(includeSamples)); } catch (Exception e) { try { logger.warn("Error resolving variant query node: {}", e.getMessage()); logger.warn("Retry one time"); - return resolveVariantQuerySamplesData(node, baseQuery, new AtomicReference<>(includeSamples)); + return resolveVariantQuerySamplesData(node, variantsQuery, new AtomicReference<>(includeSamples)); } catch (Exception e2) { e.addSuppressed(e2); throw e; @@ -539,7 +543,7 @@ private Set resolveIndividualCatalogQuery(Query individualQuery, Set resolveIndividualCatalogQuery(Query individualQuery, Set resolveVariantQuerySamplesData(TreeQuery.QueryNode node, Query baseQuery, + private Set resolveVariantQuerySamplesData(TreeQuery.QueryNode node, Query query, AtomicReference> includeSamplesInputR) throws ExecutionException, InterruptedException { - Query query = new Query(baseQuery); - query.putAll(node.getQuery()); final String genotypes; + query = new Query(query); + query.put(VariantQueryParam.STUDY.key(), studyFqn); if (VariantQueryUtils.isValidParam(query, VariantQueryParam.GENOTYPE)) { String genotypesValue = query.getString(VariantQueryParam.GENOTYPE.key()); query.remove(VariantQueryParam.GENOTYPE.key()); diff --git a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantCatalogQueryUtilsTest.java b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantCatalogQueryUtilsTest.java index d5393eff848..9e16ed6f9b2 100644 --- a/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantCatalogQueryUtilsTest.java +++ b/opencga-analysis/src/test/java/org/opencb/opencga/analysis/variant/VariantCatalogQueryUtilsTest.java @@ -385,7 +385,7 @@ public void queryByFamilyWithoutStudy() throws CatalogException { @Test public void queryByFamilyNotFound() throws CatalogException { - CatalogException e = new CatalogException("Family asdf not found"); + CatalogException e = new CatalogException("Missing families: asdf not found"); thrown.expectMessage(e.getMessage()); thrown.expect(e.getClass()); queryUtils.parseQuery(new Query(STUDY.key(), "s1").append(FAMILY.key(), "asdf").append(FAMILY_DISORDER.key(), "asdf"), sessionId); diff --git a/opencga-app/app/misc/clients/python_client_generator.py b/opencga-app/app/misc/clients/python_client_generator.py index f7db16ecf8a..36ebb595f05 100644 --- a/opencga-app/app/misc/clients/python_client_generator.py +++ b/opencga-app/app/misc/clients/python_client_generator.py @@ -28,6 +28,11 @@ def __init__(self, server_url, output_dir): def to_snake_case(text): return re.sub(r'(? org.opencb.opencga opencga - 2.0.2 + 2.0.3 ../pom.xml diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/AdminCliOptionsParser.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/AdminCliOptionsParser.java index cac6664e196..f7a2d513c1f 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/AdminCliOptionsParser.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/AdminCliOptionsParser.java @@ -121,6 +121,7 @@ public AdminCliOptionsParser() { migrationSubCommands.addCommand("v1.4.0", this.migrationCommandOptions.getMigrateV140CommandOptions()); migrationSubCommands.addCommand("v2.0.0", this.migrationCommandOptions.getMigrateV200CommandOptions()); migrationSubCommands.addCommand("v2.0.1", this.migrationCommandOptions.getMigrateV201CommandOptions()); + migrationSubCommands.addCommand("v2.0.3", this.migrationCommandOptions.getMigrateV203CommandOptions()); } @Override diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/executors/MigrationCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/executors/MigrationCommandExecutor.java index d5a9b2fde02..5ad9d948e2a 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/executors/MigrationCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/executors/MigrationCommandExecutor.java @@ -1,6 +1,7 @@ package org.opencb.opencga.app.cli.admin.executors; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.builder.ToStringBuilder; import org.bson.Document; import org.opencb.biodata.models.clinical.interpretation.Software; import org.opencb.commons.datastore.core.ObjectMap; @@ -19,7 +20,9 @@ import org.opencb.opencga.catalog.db.api.FileDBAdaptor; import org.opencb.opencga.catalog.db.api.StudyDBAdaptor; import org.opencb.opencga.catalog.db.mongodb.MongoDBAdaptorFactory; +import org.opencb.opencga.catalog.db.mongodb.MongoDBUtils; import org.opencb.opencga.catalog.exceptions.CatalogAuthenticationException; +import org.opencb.opencga.catalog.exceptions.CatalogDBException; import org.opencb.opencga.catalog.exceptions.CatalogException; import org.opencb.opencga.catalog.managers.CatalogManager; import org.opencb.opencga.catalog.utils.UuidUtils; @@ -55,10 +58,7 @@ public class MigrationCommandExecutor extends AdminCommandExecutor { private final MigrationCommandOptions migrationCommandOptions; - - private int version; - private int release; - private int lastUpdate; + private OpencgaVersion opencgaVersion; public MigrationCommandExecutor(MigrationCommandOptions migrationCommandOptions) { super(migrationCommandOptions.getCommonOptions()); @@ -84,6 +84,9 @@ public void execute() throws Exception { case "v2.0.1": v2_0_1(); break; + case "v2.0.3": + v2_0_3(); + break; default: logger.error("Subcommand '{}' not valid", subCommandString); break; @@ -455,36 +458,99 @@ private void v2_0_1() throws Exception { } } + private void v2_0_3() throws CatalogException { + MigrationCommandOptions.MigrateV2_0_3CommandOptions options = migrationCommandOptions.getMigrateV203CommandOptions(); + setCatalogDatabaseCredentials(options, options.commonOptions); + + MongoDBAdaptorFactory factory = new MongoDBAdaptorFactory(configuration); + MongoDBCollection metaCollection = factory.getMongoDBCollectionMap().get(MongoDBAdaptorFactory.METADATA_COLLECTION); + + OpencgaVersion opencgaVersion = null; + try (CatalogManager catalogManager = new CatalogManager(configuration)) { + // Check admin password + String adminToken = catalogManager.getUserManager().loginAsAdmin(options.commonOptions.adminPassword).getToken(); + adminToken = catalogManager.getUserManager().getAdminNonExpiringToken(adminToken); + + if (!needsMigration(metaCollection, 20003, 1)) { + logger.info("DB already migrated to v2.0.3. Nothing to migrate"); + return; + } + opencgaVersion = new OpencgaVersion(20003, 1, this.opencgaVersion.getLastJavaUpdate(), this.opencgaVersion.getLastJsUpdate()); + + logger.info("Starting Catalog migration for 2.0.3"); + if (needsUpdate(1)) { + // Add automatically roles to all the family members + QueryOptions familyUpdateOptions = new QueryOptions(ParamConstants.FAMILY_UPDATE_ROLES_PARAM, true); + for (Project project : catalogManager.getProjectManager().get(new Query(), new QueryOptions(), adminToken).getResults()) { + if (project.getStudies() != null) { + for (Study study : project.getStudies()) { + logger.info("Updating family roles from study {}", study.getFqn()); + catalogManager.getFamilyManager().update(study.getFqn(), new Query(), null, familyUpdateOptions, adminToken); + } + } + } + + opencgaVersion.incrementLastJavaUpdate(); + } + } catch (CatalogException e) { + logger.error("Error migration to v2.0.3: {}", e.getMessage(), e); + } finally { + if (opencgaVersion != null) { + updateOpencgaVersion(metaCollection, opencgaVersion); + } + } + } private boolean needsMigration(MongoDBCollection metaCollection, int version, int release) { fetchUpdateVersionVariables(metaCollection); - return (this.version < version || (this.version == version && this.release <= release)); + logger.info("Current version: {}", this.opencgaVersion.getVersion()); + logger.info("Current release: {}", this.opencgaVersion.getRelease()); + logger.info("Expected version: {}", version); + logger.info("Expected release: {}", release); + boolean needsMigration = this.opencgaVersion.getVersion() < version + || (this.opencgaVersion.getVersion() == version && this.opencgaVersion.getRelease() <= release); + if (needsMigration && this.opencgaVersion.getVersion() < version) { + // Reset counters + this.opencgaVersion.setLastJavaUpdate(0); + this.opencgaVersion.setLastJsUpdate(0); + } + return needsMigration; } private void fetchUpdateVersionVariables(MongoDBCollection metaCollection) { // Obtain the latest changes made to the DB Document metaDocument = metaCollection.find(new Document(), QueryOptions.empty()).first(); Object fullVersion = metaDocument.get("_fullVersion"); + int version = 20000; + int release = 4; + int lastUpdate = 0; + int lastJsUpdate = 0; if (fullVersion != null) { - this.version = ((Document) fullVersion).getInteger("version"); - this.release = ((Document) fullVersion).getInteger("release"); - this.lastUpdate = ((Document) fullVersion).getInteger("lastJavaUpdate"); - } else { - this.version = 20000; - this.release = 4; - this.lastUpdate = 0; + version = ((Number) ((Document) fullVersion).get("version")).intValue(); + release = ((Number) ((Document) fullVersion).get("release")).intValue(); + lastUpdate = ((Number) ((Document) fullVersion).get("lastJavaUpdate")).intValue(); + lastJsUpdate = ((Number) ((Document) fullVersion).get("lastJsUpdate")).intValue(); } + + this.opencgaVersion = new OpencgaVersion(version, release, lastUpdate, lastJsUpdate); } private boolean needsUpdate(int update) { - return this.lastUpdate < update; + return this.opencgaVersion.getLastJavaUpdate() < update; } + @Deprecated private void updateLastUpdate(MongoDBCollection metaCollection, int update) { metaCollection.update(new Document(), new Document("$set", new Document("_fullVersion.lastJavaUpdate", update)), QueryOptions.empty()); } + private void updateOpencgaVersion(MongoDBCollection metaCollection, OpencgaVersion opencgaVersion) throws CatalogDBException { + logger.info("Updating migration metadata with: {}", opencgaVersion.toString()); + Document versionDoc = MongoDBUtils.getMongoDBDocument(opencgaVersion, "OpencgaVersion"); + metaCollection.update(new Document(), new Document("$set", new Document("_fullVersion", versionDoc)), QueryOptions.empty()); + } + private void runMigration(CatalogManager catalogManager, String scriptFolder, String scriptFileName) throws IOException, InterruptedException, CatalogException { String authentication = ""; @@ -536,4 +602,67 @@ private void runMigration(CatalogManager catalogManager, String scriptFolder, St } } + public static class OpencgaVersion { + private int version; + private int release; + private int lastJavaUpdate; + private int lastJsUpdate; + + public OpencgaVersion() { + } + + public OpencgaVersion(int version, int release, int lastJavaUpdate, int lastJsUpdate) { + this.version = version; + this.release = release; + this.lastJavaUpdate = lastJavaUpdate; + this.lastJsUpdate = lastJsUpdate; + } + + public OpencgaVersion(OpencgaVersion opencgaVersion) { + this(opencgaVersion.getVersion(), opencgaVersion.getRelease(), opencgaVersion.getLastJavaUpdate(), + opencgaVersion.getLastJsUpdate()); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("OpencgaVersion{"); + sb.append("version=").append(version); + sb.append(", release=").append(release); + sb.append(", lastJavaUpdate=").append(lastJavaUpdate); + sb.append(", lastJsUpdate=").append(lastJsUpdate); + sb.append('}'); + return sb.toString(); + } + + public int getVersion() { + return version; + } + + public int getRelease() { + return release; + } + + public int getLastJavaUpdate() { + return lastJavaUpdate; + } + + public OpencgaVersion setLastJavaUpdate(int lastJavaUpdate) { + this.lastJavaUpdate = lastJavaUpdate; + return this; + } + + public void incrementLastJavaUpdate() { + this.lastJavaUpdate++; + } + + public int getLastJsUpdate() { + return lastJsUpdate; + } + + public OpencgaVersion setLastJsUpdate(int lastJsUpdate) { + this.lastJsUpdate = lastJsUpdate; + return this; + } + } + } diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/options/MigrationCommandOptions.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/options/MigrationCommandOptions.java index d5fd54234e5..597f0c08d4c 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/options/MigrationCommandOptions.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/admin/options/MigrationCommandOptions.java @@ -21,6 +21,7 @@ public class MigrationCommandOptions extends GeneralCliOptions { private final MigrateV1_4_0CommandOptions migrateV140CommandOptions; private final MigrateV2_0_0CommandOptions migrateV200CommandOptions; private final MigrateV2_0_1CommandOptions migrateV201CommandOptions; + private final MigrateV2_0_3CommandOptions migrateV203CommandOptions; private final AdminCliOptionsParser.AdminCommonCommandOptions commonOptions; public MigrationCommandOptions(JCommander jCommander, AdminCliOptionsParser.AdminCommonCommandOptions commonOptions) { @@ -30,6 +31,7 @@ public MigrationCommandOptions(JCommander jCommander, AdminCliOptionsParser.Admi this.migrateV140CommandOptions = new MigrateV1_4_0CommandOptions(); this.migrateV200CommandOptions = new MigrateV2_0_0CommandOptions(); this.migrateV201CommandOptions = new MigrateV2_0_1CommandOptions(); + this.migrateV203CommandOptions = new MigrateV2_0_3CommandOptions(); } @Parameters(commandNames = {"v1.3.0"}, commandDescription = "Migrate OpenCGA from version 1.2.x to 1.3.0") @@ -85,6 +87,14 @@ public class MigrateV2_0_1CommandOptions extends AdminCliOptionsParser.CatalogDa } + @Parameters(commandNames = {"v2.0.3"}, commandDescription = "Migrate OpenCGA from version 2.0.1 to 2.0.3") + public class MigrateV2_0_3CommandOptions extends AdminCliOptionsParser.CatalogDatabaseCommandOptions { + + @ParametersDelegate + public AdminCliOptionsParser.AdminCommonCommandOptions commonOptions = MigrationCommandOptions.this.commonOptions; + + } + public enum MigrateRC { ALL, RC1, @@ -117,6 +127,10 @@ public MigrateV2_0_1CommandOptions getMigrateV201CommandOptions() { return migrateV201CommandOptions; } + public MigrateV2_0_3CommandOptions getMigrateV203CommandOptions() { + return migrateV203CommandOptions; + } + public AdminCliOptionsParser.AdminCommonCommandOptions getCommonOptions() { return commonOptions; } diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/FileCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/FileCommandExecutor.java index 58f944318be..fe28b75aaf6 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/FileCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/FileCommandExecutor.java @@ -90,7 +90,7 @@ private void postlink() throws ToolException { // Prepare analysis parameters and config ObjectMap params = new PostLinkToolParams( - options.files) + options.files, options.batchSize) .toObjectMap(options.commonOptions.params) .append(ParamConstants.STUDY_PARAM, options.studyId); diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/VariantInternalCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/VariantInternalCommandExecutor.java index e205b9ef056..9e5d0e781c9 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/VariantInternalCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/executors/VariantInternalCommandExecutor.java @@ -376,7 +376,9 @@ private void index() throws ToolException { cliOptions.genericVariantIndexOptions.aggregationMappingFile, cliOptions.genericVariantIndexOptions.annotate, cliOptions.genericVariantIndexOptions.annotator, - cliOptions.genericVariantIndexOptions.overwriteAnnotations, cliOptions.genericVariantIndexOptions.indexSearch) + cliOptions.genericVariantIndexOptions.overwriteAnnotations, + cliOptions.genericVariantIndexOptions.indexSearch, + cliOptions.skipIndexedFiles) .toObjectMap(cliOptions.commonOptions.params) .append(ParamConstants.STUDY_PARAM, cliOptions.study) .append(VariantStorageOptions.STDIN.key(), cliOptions.stdin) diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/FileCommandOptions.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/FileCommandOptions.java index 300912b7648..8fa80e486a3 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/FileCommandOptions.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/FileCommandOptions.java @@ -100,6 +100,9 @@ public class PostlinkCommandOptions { @Parameter(names = {"--files"}, description = "List of files to associate samples.", arity = 1) public List files; + @Parameter(names = {"--batch-size"}, description = "Samples update batch size") + public Integer batchSize; + @Parameter(names = {"-o", "--" + OUTDIR_PARAM_NAME}, description = "Directory where output files will be saved", required = true, arity = 1) public String outDir; diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/VariantCommandOptions.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/VariantCommandOptions.java index c5553598b5c..0b5ec3e2b78 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/VariantCommandOptions.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/internal/options/VariantCommandOptions.java @@ -226,6 +226,9 @@ public class VariantIndexCommandOptions extends GeneralCliOptions.StudyOption { @Parameter(names = {"--stdout"}, description = "Write the transformed variants file to the standard output") public boolean stdout; + + @Parameter(names = {"--skip-indexed-files"}, description = "Do not fail if any of the input files was already indexed.") + public boolean skipIndexedFiles; } @Parameters(commandNames = {SECONDARY_INDEX_COMMAND}, commandDescription = "Creates a secondary index using a search engine") diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/OpencgaCliOptionsParser.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/OpencgaCliOptionsParser.java index e6d1b9baecd..4af522ddce2 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/OpencgaCliOptionsParser.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/OpencgaCliOptionsParser.java @@ -162,6 +162,7 @@ public OpencgaCliOptionsParser() { fileSubCommands.addCommand("upload", fileCommandOptions.uploadCommandOptions); fileSubCommands.addCommand("link", fileCommandOptions.linkCommandOptions); fileSubCommands.addCommand("link-run", fileCommandOptions.linkRunCommandOptions); + fileSubCommands.addCommand("post-link-run", fileCommandOptions.postLinkRunCommandOptions); fileSubCommands.addCommand("unlink", fileCommandOptions.unlinkCommandOptions); // fileSubCommands.addCommand("relink", fileCommandOptions.relinkCommandOptions); fileSubCommands.addCommand("delete", fileCommandOptions.deleteCommandOptions); diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/analysis/VariantCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/analysis/VariantCommandExecutor.java index a9cc6fa6a7f..b0b8bf6275d 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/analysis/VariantCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/analysis/VariantCommandExecutor.java @@ -507,7 +507,8 @@ private RestResponse index() throws ClientException { variantIndex.genericVariantIndexOptions.annotate, variantIndex.genericVariantIndexOptions.annotator, variantIndex.genericVariantIndexOptions.overwriteAnnotations, - variantIndex.genericVariantIndexOptions.indexSearch), + variantIndex.genericVariantIndexOptions.indexSearch, + variantIndex.skipIndexedFiles), getParams(variantIndex.study)); } diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/catalog/FileCommandExecutor.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/catalog/FileCommandExecutor.java index f80ac347f1c..c45125c873a 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/catalog/FileCommandExecutor.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/executors/catalog/FileCommandExecutor.java @@ -120,6 +120,9 @@ public void execute() throws Exception { case "link-run": queryResponse = linkRun(); break; + case "post-link-run": + queryResponse = postLinkRun(); + break; case "unlink": queryResponse = unlink(); break; @@ -397,6 +400,17 @@ private RestResponse linkRun() throws ClientException { return openCGAClient.getFileClient().runLink(data, params); } + private RestResponse postLinkRun() throws ClientException { + FileCommandOptions.PostLinkRunCommandOptions commandOptions = filesCommandOptions.postLinkRunCommandOptions; + + ObjectMap params = getCommonParams(commandOptions.study, filesCommandOptions.commonCommandOptions.params); + addJobParams(commandOptions.jobOptions, params); + + PostLinkToolParams data = new PostLinkToolParams(commandOptions.files, commandOptions.batchSize); + + return openCGAClient.getFileClient().runPostlink(data, params); + } + // private RestResponse relink() throws CatalogException, IOException { // logger.debug("Change file location. Provided file must be either STAGED or an external file. [DEPRECATED]"); // diff --git a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/FileCommandOptions.java b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/FileCommandOptions.java index 11d4403a36f..52ef7f531cc 100644 --- a/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/FileCommandOptions.java +++ b/opencga-app/src/main/java/org/opencb/opencga/app/cli/main/options/FileCommandOptions.java @@ -53,6 +53,7 @@ public class FileCommandOptions { public DeleteCommandOptions deleteCommandOptions; public LinkCommandOptions linkCommandOptions; public LinkRunCommandOptions linkRunCommandOptions; + public PostLinkRunCommandOptions postLinkRunCommandOptions; // public RelinkCommandOptions relinkCommandOptions; public UnlinkCommandOptions unlinkCommandOptions; public RefreshCommandOptions refreshCommandOptions; @@ -96,6 +97,7 @@ public FileCommandOptions(CommonCommandOptions commonCommandOptions, DataModelOp this.unlinkCommandOptions = new UnlinkCommandOptions(); this.linkCommandOptions = new LinkCommandOptions(); this.linkRunCommandOptions = new LinkRunCommandOptions(); + this.postLinkRunCommandOptions = new PostLinkRunCommandOptions(); this.uploadCommandOptions = new UploadCommandOptions(); this.statsCommandOptions = new StatsCommandOptions(); this.fetchCommandOptions = new FetchCommandOptions(); @@ -429,6 +431,22 @@ public class LinkRunCommandOptions extends StudyOption { public boolean parents; } + @Parameters(commandNames = {"post-link-run"}, commandDescription = "Post link operation. Associate non-registered samples for files with high volumes of samples.") + public class PostLinkRunCommandOptions extends StudyOption { + + @ParametersDelegate + public CommonCommandOptions commonOptions = commonCommandOptions; + + @ParametersDelegate + public GeneralCliOptions.JobOptions jobOptions = new GeneralCliOptions.JobOptions(); + + @Parameter(names = {"--files"}, description = "Files that need to be processed. Use \"" + ParamConstants.ALL + "\" to process all files from the study", required = true, variableArity = true) + public List files; + + @Parameter(names = {"--batch-size"}, description = "Samples update batch size") + public Integer batchSize; + } + @Parameters(commandNames = {"upload"}, commandDescription = "Upload a physical local file to catalog.") public class UploadCommandOptions extends StudyOption { diff --git a/opencga-catalog/pom.xml b/opencga-catalog/pom.xml index c1bc43bb7ee..e3415599a7d 100644 --- a/opencga-catalog/pom.xml +++ b/opencga-catalog/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.0.2 + 2.0.3 ../pom.xml diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/MetaMongoDBAdaptor.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/MetaMongoDBAdaptor.java index 8dc580e70bd..44554ef219c 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/MetaMongoDBAdaptor.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/MetaMongoDBAdaptor.java @@ -173,10 +173,10 @@ public void initializeMetaCollection(Configuration configuration) throws Catalog Document adminDocument = getMongoDBDocument(configuration.getAdmin(), "Admin"); metadataObject.put("admin", adminDocument); metadataObject.put("_fullVersion", new Document() - .append("version", 20001) + .append("version", 20003) .append("release", 1) - .append("lastJsUpdate", 4) - .append("lastJavaUpdate", 0) + .append("lastJsUpdate", 0) + .append("lastJavaUpdate", 1) ); metaCollection.insert(metadataObject, null); diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/MongoDBUtils.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/MongoDBUtils.java index 39f4dee0dab..db00c765217 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/MongoDBUtils.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/db/mongodb/MongoDBUtils.java @@ -53,7 +53,7 @@ /** * Created by imedina on 21/11/14. */ -class MongoDBUtils { +public class MongoDBUtils { // Special queryOptions keys public static final Set DATASTORE_OPTIONS = Arrays.asList("include", "exclude", "sort", "limit", "skip").stream() @@ -158,7 +158,7 @@ static DBObject getDbObject(Object object, String objectName) throws CatalogDBEx return dbObject; } - static Document getMongoDBDocument(Object object, String objectName) throws CatalogDBException { + public static Document getMongoDBDocument(Object object, String objectName) throws CatalogDBException { Document document; String jsonString = null; try { diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/FamilyManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/FamilyManager.java index 3a09b0cdbc2..33f75b3be59 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/FamilyManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/FamilyManager.java @@ -1392,7 +1392,7 @@ private void validateFamily(Family family) throws CatalogException { Map membersMap = new HashMap<>(); // individualName|individualId: Individual Map> parentsMap = new HashMap<>(); // motherName||F---fatherName||M: List - Set noParentsSet = new HashSet<>(); // Set with individuals without parents + Set noParentsSet = new HashSet<>(); // Set with individuals without parents // 1. Fill in the objects initialised above for (Individual individual : family.getMembers()) { @@ -1426,7 +1426,7 @@ private void validateFamily(Family family) throws CatalogException { } } if (parentsKey == null) { - noParentsSet.add(individual); + noParentsSet.add(individual.getId()); } else { if (!parentsMap.containsKey(parentsKey)) { parentsMap.put(parentsKey, new ArrayList<>()); @@ -1457,7 +1457,7 @@ private void validateFamily(Family family) throws CatalogException { membersMap.get(name).setSex(sex); // We attempt to remove the individual from the noParentsSet - noParentsSet.remove(membersMap.get(name)); + noParentsSet.remove(membersMap.get(name).getId()); } } } @@ -1466,8 +1466,7 @@ private void validateFamily(Family family) throws CatalogException { if (noParentsSet.size() > 0) { // throw new CatalogException("Some members that are not related to any other have been found: " // + noParentsSet.stream().map(Individual::getName).collect(Collectors.joining(", "))); - logger.warn("Some members that are not related to any other have been found: {}", - noParentsSet.stream().map(Individual::getId).collect(Collectors.joining(", "))); + logger.warn("Some members that are not related to any other have been found: {}", StringUtils.join(noParentsSet, ", ")); } } @@ -1577,10 +1576,9 @@ private void calculateRoles(Study study, Family family, String user) Set individualIds = family.getMembers().stream().map(Individual::getId).collect(Collectors.toSet()); - QueryOptions options = new QueryOptions(QueryOptions.INCLUDE, IndividualDBAdaptor.QueryParams.ID.key()); Map> roles = new HashMap<>(); for (Individual member : family.getMembers()) { - List individualList = catalogManager.getIndividualManager().calculateRelationship(study, member, 2, options, user); + List individualList = catalogManager.getIndividualManager().calculateRelationship(study, member, 2, user); Map memberRelation = new HashMap<>(); for (Individual individual : individualList) { if (individualIds.contains(individual.getId())) { diff --git a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/IndividualManager.java b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/IndividualManager.java index 3f1933f4ff6..afbc39db090 100644 --- a/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/IndividualManager.java +++ b/opencga-catalog/src/main/java/org/opencb/opencga/catalog/managers/IndividualManager.java @@ -518,7 +518,7 @@ public OpenCGAResult relatives(String studyId, String individualId, individualList, individualList.size()); } - individualList.addAll(calculateRelationship(study, proband, degree, queryOptions, userId)); + individualList.addAll(calculateRelationship(study, proband, degree, userId)); auditManager.audit(userId, Enums.Action.RELATIVES, Enums.Resource.INDIVIDUAL, individualId, individualUuid, study.getId(), study.getUuid(), auditParams, new AuditRecord.Status(AuditRecord.Status.Result.SUCCESS)); @@ -580,8 +580,10 @@ void addDegreeRelatives(Map> relat } } - List calculateRelationship(Study study, Individual proband, int maxDegree, QueryOptions options, String userId) + List calculateRelationship(Study study, Individual proband, int maxDegree, String userId) throws CatalogDBException, CatalogParameterException, CatalogAuthorizationException { + QueryOptions options = fixOptionsForRelatives(new QueryOptions(QueryOptions.INCLUDE, IndividualDBAdaptor.QueryParams.ID.key())); + List individualList = new LinkedList<>(); individualList.add(proband); @@ -649,11 +651,9 @@ List calculateRelationship(Study study, Individual proband, int maxD // Update set of already obtained individuals Set skipIndividuals = individualList.stream().map(Individual::getId).collect(Collectors.toSet()); for (Individual child : children) { - // TODO: Change relations !! - relationMap.put(Family.FamiliarRelationship.SON, Family.FamiliarRelationship.BROTHER); - relationMap.put(Family.FamiliarRelationship.DAUGHTER, Family.FamiliarRelationship.SISTER); - relationMap.put(Family.FamiliarRelationship.CHILD_OF_UNKNOWN_SEX, - Family.FamiliarRelationship.FULL_SIBLING); + relationMap.put(Family.FamiliarRelationship.SON, Family.FamiliarRelationship.GRANDSON); + relationMap.put(Family.FamiliarRelationship.DAUGHTER, Family.FamiliarRelationship.GRANDDAUGHTER); + relationMap.put(Family.FamiliarRelationship.CHILD_OF_UNKNOWN_SEX, Family.FamiliarRelationship.GRANDCHILD); relativeMap = lookForChildren(study, child, skipIndividuals, options, userId); addDegreeRelatives(relativeMap, relationMap, 2, individualList); diff --git a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/FamilyManagerTest.java b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/FamilyManagerTest.java index 03f54fb82a1..66adbf0d286 100644 --- a/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/FamilyManagerTest.java +++ b/opencga-catalog/src/test/java/org/opencb/opencga/catalog/managers/FamilyManagerTest.java @@ -141,6 +141,126 @@ public void createFamily() throws CatalogException { assertTrue("Father id not associated to any children", fatherIdUpdated); } + @Test + public void createComplexFamily() throws CatalogException { + Individual paternalGrandfather = new Individual().setId("p_grandfather"); + Individual paternalGrandmother = new Individual().setId("p_grandmother"); + Individual maternalGrandfather = new Individual().setId("m_grandfather"); + Individual maternalGrandmother = new Individual().setId("m_grandmother"); + Individual father = new Individual().setId("father").setFather(paternalGrandfather).setMother(paternalGrandmother); + Individual mother = new Individual().setId("mother").setMother(maternalGrandmother).setFather(maternalGrandfather); + Individual proband = new Individual().setId("proband").setFather(father).setMother(mother); + Individual brother = new Individual().setId("brother").setFather(father).setMother(mother).setSex(IndividualProperty.Sex.MALE); + Individual sister = new Individual().setId("sister").setFather(father).setMother(mother).setSex(IndividualProperty.Sex.FEMALE); + Individual sibling = new Individual().setId("sibling").setFather(father).setMother(mother); + + catalogManager.getFamilyManager().create(STUDY, new Family().setId("family").setMembers( + Arrays.asList(paternalGrandfather, paternalGrandmother, maternalGrandfather, maternalGrandmother, mother, father, proband, + brother, sister, sibling)), QueryOptions.empty(), sessionIdUser); + OpenCGAResult family = catalogManager.getFamilyManager().get(STUDY, "family", QueryOptions.empty(), sessionIdUser); + Map> roles = family.first().getRoles(); + assertEquals(10, family.first().getMembers().size()); + + Map pGrandfather = roles.get("p_grandfather"); + assertEquals(5, pGrandfather.size()); + assertEquals(Family.FamiliarRelationship.SON, pGrandfather.get("father")); + assertEquals(Family.FamiliarRelationship.GRANDCHILD, pGrandfather.get("proband")); + assertEquals(Family.FamiliarRelationship.GRANDCHILD, pGrandfather.get("sibling")); + assertEquals(Family.FamiliarRelationship.GRANDSON, pGrandfather.get("brother")); + assertEquals(Family.FamiliarRelationship.GRANDDAUGHTER, pGrandfather.get("sister")); + + Map pGrandmother = roles.get("p_grandmother"); + assertEquals(5, pGrandmother.size()); + assertEquals(Family.FamiliarRelationship.SON, pGrandmother.get("father")); + assertEquals(Family.FamiliarRelationship.GRANDCHILD, pGrandmother.get("proband")); + assertEquals(Family.FamiliarRelationship.GRANDCHILD, pGrandmother.get("sibling")); + assertEquals(Family.FamiliarRelationship.GRANDSON, pGrandmother.get("brother")); + assertEquals(Family.FamiliarRelationship.GRANDDAUGHTER, pGrandmother.get("sister")); + + Map mGrandfather = roles.get("m_grandfather"); + assertEquals(5, mGrandfather.size()); + assertEquals(Family.FamiliarRelationship.DAUGHTER, mGrandfather.get("mother")); + assertEquals(Family.FamiliarRelationship.GRANDCHILD, mGrandfather.get("proband")); + assertEquals(Family.FamiliarRelationship.GRANDCHILD, mGrandfather.get("sibling")); + assertEquals(Family.FamiliarRelationship.GRANDSON, mGrandfather.get("brother")); + assertEquals(Family.FamiliarRelationship.GRANDDAUGHTER, mGrandfather.get("sister")); + + Map mGrandmother = roles.get("m_grandmother"); + assertEquals(5, mGrandmother.size()); + assertEquals(Family.FamiliarRelationship.DAUGHTER, mGrandmother.get("mother")); + assertEquals(Family.FamiliarRelationship.GRANDCHILD, mGrandmother.get("proband")); + assertEquals(Family.FamiliarRelationship.GRANDCHILD, mGrandmother.get("sibling")); + assertEquals(Family.FamiliarRelationship.GRANDSON, mGrandmother.get("brother")); + assertEquals(Family.FamiliarRelationship.GRANDDAUGHTER, mGrandmother.get("sister")); + + Map motherMap = roles.get("mother"); + assertEquals(6, motherMap.size()); + assertEquals(Family.FamiliarRelationship.MOTHER, motherMap.get("m_grandmother")); + assertEquals(Family.FamiliarRelationship.FATHER, motherMap.get("m_grandfather")); + assertEquals(Family.FamiliarRelationship.CHILD_OF_UNKNOWN_SEX, motherMap.get("proband")); + assertEquals(Family.FamiliarRelationship.CHILD_OF_UNKNOWN_SEX, motherMap.get("sibling")); + assertEquals(Family.FamiliarRelationship.SON, motherMap.get("brother")); + assertEquals(Family.FamiliarRelationship.DAUGHTER, motherMap.get("sister")); + + Map fatherMap = roles.get("father"); + assertEquals(6, fatherMap.size()); + assertEquals(Family.FamiliarRelationship.MOTHER, fatherMap.get("p_grandmother")); + assertEquals(Family.FamiliarRelationship.FATHER, fatherMap.get("p_grandfather")); + assertEquals(Family.FamiliarRelationship.CHILD_OF_UNKNOWN_SEX, fatherMap.get("proband")); + assertEquals(Family.FamiliarRelationship.CHILD_OF_UNKNOWN_SEX, fatherMap.get("sibling")); + assertEquals(Family.FamiliarRelationship.SON, fatherMap.get("brother")); + assertEquals(Family.FamiliarRelationship.DAUGHTER, fatherMap.get("sister")); + + Map probandMap = roles.get("proband"); + assertEquals(9, probandMap.size()); + assertEquals(Family.FamiliarRelationship.MATERNAL_GRANDMOTHER, probandMap.get("m_grandmother")); + assertEquals(Family.FamiliarRelationship.MATERNAL_GRANDFATHER, probandMap.get("m_grandfather")); + assertEquals(Family.FamiliarRelationship.PATERNAL_GRANDMOTHER, probandMap.get("p_grandmother")); + assertEquals(Family.FamiliarRelationship.PATERNAL_GRANDFATHER, probandMap.get("p_grandfather")); + assertEquals(Family.FamiliarRelationship.MOTHER, probandMap.get("mother")); + assertEquals(Family.FamiliarRelationship.FATHER, probandMap.get("father")); + assertEquals(Family.FamiliarRelationship.FULL_SIBLING, probandMap.get("sibling")); + assertEquals(Family.FamiliarRelationship.BROTHER, probandMap.get("brother")); + assertEquals(Family.FamiliarRelationship.SISTER, probandMap.get("sister")); + + Map siblingMap = roles.get("sibling"); + assertEquals(9, siblingMap.size()); + assertEquals(Family.FamiliarRelationship.MATERNAL_GRANDMOTHER, siblingMap.get("m_grandmother")); + assertEquals(Family.FamiliarRelationship.MATERNAL_GRANDFATHER, siblingMap.get("m_grandfather")); + assertEquals(Family.FamiliarRelationship.PATERNAL_GRANDMOTHER, siblingMap.get("p_grandmother")); + assertEquals(Family.FamiliarRelationship.PATERNAL_GRANDFATHER, siblingMap.get("p_grandfather")); + assertEquals(Family.FamiliarRelationship.MOTHER, siblingMap.get("mother")); + assertEquals(Family.FamiliarRelationship.FATHER, siblingMap.get("father")); + assertEquals(Family.FamiliarRelationship.FULL_SIBLING, siblingMap.get("proband")); + assertEquals(Family.FamiliarRelationship.BROTHER, siblingMap.get("brother")); + assertEquals(Family.FamiliarRelationship.SISTER, siblingMap.get("sister")); + + Map brotherMap = roles.get("brother"); + assertEquals(9, brotherMap.size()); + assertEquals(Family.FamiliarRelationship.MATERNAL_GRANDMOTHER, brotherMap.get("m_grandmother")); + assertEquals(Family.FamiliarRelationship.MATERNAL_GRANDFATHER, brotherMap.get("m_grandfather")); + assertEquals(Family.FamiliarRelationship.PATERNAL_GRANDMOTHER, brotherMap.get("p_grandmother")); + assertEquals(Family.FamiliarRelationship.PATERNAL_GRANDFATHER, brotherMap.get("p_grandfather")); + assertEquals(Family.FamiliarRelationship.MOTHER, brotherMap.get("mother")); + assertEquals(Family.FamiliarRelationship.FATHER, brotherMap.get("father")); + assertEquals(Family.FamiliarRelationship.FULL_SIBLING, brotherMap.get("sibling")); + assertEquals(Family.FamiliarRelationship.FULL_SIBLING, brotherMap.get("proband")); + assertEquals(Family.FamiliarRelationship.SISTER, brotherMap.get("sister")); + + Map sisterMap = roles.get("sister"); + assertEquals(9, sisterMap.size()); + assertEquals(Family.FamiliarRelationship.MATERNAL_GRANDMOTHER, sisterMap.get("m_grandmother")); + assertEquals(Family.FamiliarRelationship.MATERNAL_GRANDFATHER, sisterMap.get("m_grandfather")); + assertEquals(Family.FamiliarRelationship.PATERNAL_GRANDMOTHER, sisterMap.get("p_grandmother")); + assertEquals(Family.FamiliarRelationship.PATERNAL_GRANDFATHER, sisterMap.get("p_grandfather")); + assertEquals(Family.FamiliarRelationship.MOTHER, sisterMap.get("mother")); + assertEquals(Family.FamiliarRelationship.FATHER, sisterMap.get("father")); + assertEquals(Family.FamiliarRelationship.FULL_SIBLING, sisterMap.get("sibling")); + assertEquals(Family.FamiliarRelationship.BROTHER, sisterMap.get("brother")); + assertEquals(Family.FamiliarRelationship.FULL_SIBLING, sisterMap.get("proband")); + + } + @Test public void searchFamily() throws CatalogException { createDummyFamily("Martinez-Martinez", true); diff --git a/opencga-client/pom.xml b/opencga-client/pom.xml index 1827031bf89..720ec23cf91 100644 --- a/opencga-client/pom.xml +++ b/opencga-client/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.0.2 + 2.0.3 ../pom.xml diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/admin_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/admin_client.py index 21b72ffb03b..af61c0fb492 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/admin_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/admin_client.py @@ -2,7 +2,7 @@ WARNING: AUTOGENERATED CODE This code was generated by a tool. - Autogenerated on: 2020-11-19 12:03:50 + Autogenerated on: 2021-05-21 14:06:25 Manual changes to this file may cause unexpected behavior in your application. Manual changes to this file will be overwritten if the code is regenerated. @@ -14,7 +14,7 @@ class Admin(_ParentRestClient): """ This class contains methods for the 'Admin' webservices - Client version: 2.0.0 + Client version: 2.0.3 PATH: /{apiVersion}/admin """ @@ -46,6 +46,10 @@ def index_stats_catalog(self, **options): """ Sync Catalog into the Solr. PATH: /{apiVersion}/admin/catalog/indexStats + + :param str collection: Collection to be indexed (file, sample, + individual, family, cohort and/or job). If not provided, all of + them will be indexed. """ return self._post(category='admin', resource='indexStats', subcategory='catalog', **options) diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/alignment_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/alignment_client.py index f4768900226..2066d6f9004 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/alignment_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/alignment_client.py @@ -2,7 +2,7 @@ WARNING: AUTOGENERATED CODE This code was generated by a tool. - Autogenerated on: 2020-11-19 12:03:49 + Autogenerated on: 2021-05-21 14:06:25 Manual changes to this file may cause unexpected behavior in your application. Manual changes to this file will be overwritten if the code is regenerated. @@ -14,7 +14,7 @@ class Alignment(_ParentRestClient): """ This class contains methods for the 'Analysis - Alignment' webservices - Client version: 2.0.0 + Client version: 2.0.3 PATH: /{apiVersion}/analysis/alignment """ diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/clinical_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/clinical_client.py index e4938041a61..d5eec543ddc 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/clinical_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/clinical_client.py @@ -2,7 +2,7 @@ WARNING: AUTOGENERATED CODE This code was generated by a tool. - Autogenerated on: 2020-11-19 12:03:50 + Autogenerated on: 2021-05-21 14:06:25 Manual changes to this file may cause unexpected behavior in your application. Manual changes to this file will be overwritten if the code is regenerated. @@ -14,7 +14,7 @@ class Clinical(_ParentRestClient): """ This class contains methods for the 'Analysis - Clinical' webservices - Client version: 2.0.0 + Client version: 2.0.3 PATH: /{apiVersion}/analysis/clinical """ @@ -316,7 +316,9 @@ def query_variant(self, **options): chromosome name or regions in the format chr:start-end, e.g.: 2,3:100000-200000. :param str type: List of types, accepted values are SNV, MNV, INDEL, - SV, CNV, INSERTION, DELETION, e.g. SNV,INDEL. + SV, COPY_NUMBER, COPY_NUMBER_LOSS, COPY_NUMBER_GAIN, INSERTION, + DELETION, DUPLICATION, TANDEM_DUPLICATION, BREAKEND, e.g. + SNV,INDEL. :param str study: Filter variants from the given studies, these can be either the numeric ID or the alias with the format user@project:study. @@ -343,7 +345,7 @@ def query_variant(self, **options): versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: - HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT and MISS e.g. + HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: [ autosomalDominant, autosomalRecessive, diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/cohort_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/cohort_client.py index 3e395ea3f3c..942e0be1f81 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/cohort_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/cohort_client.py @@ -2,7 +2,7 @@ WARNING: AUTOGENERATED CODE This code was generated by a tool. - Autogenerated on: 2020-11-19 12:03:49 + Autogenerated on: 2021-05-21 14:06:25 Manual changes to this file may cause unexpected behavior in your application. Manual changes to this file will be overwritten if the code is regenerated. @@ -14,7 +14,7 @@ class Cohort(_ParentRestClient): """ This class contains methods for the 'Cohorts' webservices - Client version: 2.0.0 + Client version: 2.0.3 PATH: /{apiVersion}/cohorts """ @@ -85,7 +85,7 @@ def load_annotation_sets(self, variable_set_id, path, data=None, **options): this has not yet been registered into OpenCGA. """ - options['variable_set_id'] = variable_set_id + options['variableSetId'] = variable_set_id options['path'] = path return self._post(category='cohorts', resource='load', subcategory='annotationSets', data=data, **options) @@ -131,6 +131,7 @@ def distinct(self, field, **options): user john has both WRITE and WRITE_ANNOTATIONS permissions. Only study owners or administrators can query by this field. . :param str samples: Sample list. + :param str num_samples: Number of samples. :param str release: Release value. """ @@ -148,8 +149,8 @@ def generate(self, data=None, **options): :param str id: Comma separated list sample IDs or UUIDs up to a maximum of 100. :param bool somatic: Somatic sample. - :param str individual_id: Individual ID. - :param str file_ids: Comma separated list of file IDs. + :param str individual_id: Individual ID or UUID. + :param str file_ids: Comma separated list of file IDs, paths or UUIDs. :param str creation_date: Creation date. Format: yyyyMMddHHmmss. Examples: >2018, 2017-2018, <201805. :param str modification_date: Modification date. Format: @@ -206,6 +207,7 @@ def search(self, **options): user john has both WRITE and WRITE_ANNOTATIONS permissions. Only study owners or administrators can query by this field. . :param str samples: Sample list. + :param str num_samples: Number of samples. :param str release: Release value. """ diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/disease_panel_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/disease_panel_client.py index b48fb9148d4..2958e401d76 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/disease_panel_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/disease_panel_client.py @@ -2,7 +2,7 @@ WARNING: AUTOGENERATED CODE This code was generated by a tool. - Autogenerated on: 2020-11-19 12:03:49 + Autogenerated on: 2021-05-21 14:06:25 Manual changes to this file may cause unexpected behavior in your application. Manual changes to this file will be overwritten if the code is regenerated. @@ -14,7 +14,7 @@ class DiseasePanel(_ParentRestClient): """ This class contains methods for the 'Disease Panels' webservices - Client version: 2.0.0 + Client version: 2.0.3 PATH: /{apiVersion}/panels """ diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/family_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/family_client.py index 86d63472b9f..1739b2a4056 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/family_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/family_client.py @@ -2,7 +2,7 @@ WARNING: AUTOGENERATED CODE This code was generated by a tool. - Autogenerated on: 2020-11-19 12:03:49 + Autogenerated on: 2021-05-21 14:06:25 Manual changes to this file may cause unexpected behavior in your application. Manual changes to this file will be overwritten if the code is regenerated. @@ -14,7 +14,7 @@ class Family(_ParentRestClient): """ This class contains methods for the 'Families' webservices - Client version: 2.0.0 + Client version: 2.0.3 PATH: /{apiVersion}/families """ @@ -34,6 +34,8 @@ def update_acl(self, members, action, data=None, **options): (REQUIRED) :param str study: Study [[user@]project:]study where study and project can be either the ID or UUID. + :param str propagate: Propagate family permissions to related + individuals and samples. """ options['action'] = action @@ -87,7 +89,7 @@ def load_annotation_sets(self, variable_set_id, path, data=None, **options): this has not yet been registered into OpenCGA. """ - options['variable_set_id'] = variable_set_id + options['variableSetId'] = variable_set_id options['path'] = path return self._post(category='families', resource='load', subcategory='annotationSets', data=data, **options) diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/file_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/file_client.py index 3cd24acb96b..b5851b57cfb 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/file_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/file_client.py @@ -2,7 +2,7 @@ WARNING: AUTOGENERATED CODE This code was generated by a tool. - Autogenerated on: 2020-11-19 12:03:49 + Autogenerated on: 2021-05-21 14:06:25 Manual changes to this file may cause unexpected behavior in your application. Manual changes to this file will be overwritten if the code is regenerated. @@ -14,7 +14,7 @@ class File(_ParentRestClient): """ This class contains methods for the 'Files' webservices - Client version: 2.0.0 + Client version: 2.0.3 PATH: /{apiVersion}/files """ @@ -93,7 +93,7 @@ def load_annotation_sets(self, variable_set_id, path, data=None, **options): this has not yet been registered into OpenCGA. """ - options['variable_set_id'] = variable_set_id + options['variableSetId'] = variable_set_id options['path'] = path return self._post(category='files', resource='load', subcategory='annotationSets', data=data, **options) diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/ga4gh_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/ga4gh_client.py index af833890f22..f1f15721d0e 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/ga4gh_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/ga4gh_client.py @@ -2,7 +2,7 @@ WARNING: AUTOGENERATED CODE This code was generated by a tool. - Autogenerated on: 2020-11-19 12:03:50 + Autogenerated on: 2021-05-21 14:06:25 Manual changes to this file may cause unexpected behavior in your application. Manual changes to this file will be overwritten if the code is regenerated. @@ -14,7 +14,7 @@ class GA4GH(_ParentRestClient): """ This class contains methods for the 'GA4GH' webservices - Client version: 2.0.0 + Client version: 2.0.3 PATH: /{apiVersion}/ga4gh """ diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/individual_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/individual_client.py index 3219e25b5b9..d4ec948b582 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/individual_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/individual_client.py @@ -2,7 +2,7 @@ WARNING: AUTOGENERATED CODE This code was generated by a tool. - Autogenerated on: 2020-11-19 12:03:49 + Autogenerated on: 2021-05-21 14:06:25 Manual changes to this file may cause unexpected behavior in your application. Manual changes to this file will be overwritten if the code is regenerated. @@ -14,7 +14,7 @@ class Individual(_ParentRestClient): """ This class contains methods for the 'Individuals' webservices - Client version: 2.0.0 + Client version: 2.0.3 PATH: /{apiVersion}/individuals """ @@ -98,7 +98,7 @@ def load_annotation_sets(self, variable_set_id, path, data=None, **options): this has not yet been registered into OpenCGA. """ - options['variable_set_id'] = variable_set_id + options['variableSetId'] = variable_set_id options['path'] = path return self._post(category='individuals', resource='load', subcategory='annotationSets', data=data, **options) diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/job_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/job_client.py index 7bff7547e27..e97d1b79b66 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/job_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/job_client.py @@ -2,7 +2,7 @@ WARNING: AUTOGENERATED CODE This code was generated by a tool. - Autogenerated on: 2020-11-19 12:03:49 + Autogenerated on: 2021-05-21 14:06:25 Manual changes to this file may cause unexpected behavior in your application. Manual changes to this file will be overwritten if the code is regenerated. @@ -14,7 +14,7 @@ class Job(_ParentRestClient): """ This class contains methods for the 'Jobs' webservices - Client version: 2.0.0 + Client version: 2.0.3 PATH: /{apiVersion}/jobs """ diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/meta_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/meta_client.py index 825fda129c1..d94c537f41f 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/meta_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/meta_client.py @@ -2,7 +2,7 @@ WARNING: AUTOGENERATED CODE This code was generated by a tool. - Autogenerated on: 2020-11-19 12:03:50 + Autogenerated on: 2021-05-21 14:06:25 Manual changes to this file may cause unexpected behavior in your application. Manual changes to this file will be overwritten if the code is regenerated. @@ -14,7 +14,7 @@ class Meta(_ParentRestClient): """ This class contains methods for the 'Meta' webservices - Client version: 2.0.0 + Client version: 2.0.3 PATH: /{apiVersion}/meta """ diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/project_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/project_client.py index 805878985b4..587f54bda4f 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/project_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/project_client.py @@ -2,7 +2,7 @@ WARNING: AUTOGENERATED CODE This code was generated by a tool. - Autogenerated on: 2020-11-19 12:03:49 + Autogenerated on: 2021-05-21 14:06:25 Manual changes to this file may cause unexpected behavior in your application. Manual changes to this file will be overwritten if the code is regenerated. @@ -14,7 +14,7 @@ class Project(_ParentRestClient): """ This class contains methods for the 'Projects' webservices - Client version: 2.0.0 + Client version: 2.0.3 PATH: /{apiVersion}/projects """ diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/sample_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/sample_client.py index 3825c4f046e..c62bbe458d8 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/sample_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/sample_client.py @@ -2,7 +2,7 @@ WARNING: AUTOGENERATED CODE This code was generated by a tool. - Autogenerated on: 2020-11-19 12:03:49 + Autogenerated on: 2021-05-21 14:06:25 Manual changes to this file may cause unexpected behavior in your application. Manual changes to this file will be overwritten if the code is regenerated. @@ -14,7 +14,7 @@ class Sample(_ParentRestClient): """ This class contains methods for the 'Samples' webservices - Client version: 2.0.0 + Client version: 2.0.3 PATH: /{apiVersion}/samples """ @@ -36,8 +36,6 @@ def update_acl(self, members, action, data=None, **options): (REQUIRED) :param str study: Study [[user@]project:]study where study and project can be either the ID or UUID. - :param bool propagate: Propagate sample permissions to related - individuals. """ options['action'] = action @@ -92,7 +90,7 @@ def load_annotation_sets(self, variable_set_id, path, data=None, **options): this has not yet been registered into OpenCGA. """ - options['variable_set_id'] = variable_set_id + options['variableSetId'] = variable_set_id options['path'] = path return self._post(category='samples', resource='load', subcategory='annotationSets', data=data, **options) @@ -121,8 +119,8 @@ def distinct(self, field, **options): :param str id: Comma separated list sample IDs or UUIDs up to a maximum of 100. :param bool somatic: Somatic sample. - :param str individual_id: Individual ID. - :param str file_ids: Comma separated list of file IDs. + :param str individual_id: Individual ID or UUID. + :param str file_ids: Comma separated list of file IDs, paths or UUIDs. :param str creation_date: Creation date. Format: yyyyMMddHHmmss. Examples: >2018, 2017-2018, <201805. :param str modification_date: Modification date. Format: @@ -182,8 +180,8 @@ def search(self, **options): :param str id: Comma separated list sample IDs or UUIDs up to a maximum of 100. :param bool somatic: Somatic sample. - :param str individual_id: Individual ID. - :param str file_ids: Comma separated list of file IDs. + :param str individual_id: Individual ID or UUID. + :param str file_ids: Comma separated list of file IDs, paths or UUIDs. :param str creation_date: Creation date. Format: yyyyMMddHHmmss. Examples: >2018, 2017-2018, <201805. :param str modification_date: Modification date. Format: diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/study_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/study_client.py index 572df25b58a..0cf75d5ef0f 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/study_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/study_client.py @@ -2,7 +2,7 @@ WARNING: AUTOGENERATED CODE This code was generated by a tool. - Autogenerated on: 2020-11-19 12:03:49 + Autogenerated on: 2021-05-21 14:06:25 Manual changes to this file may cause unexpected behavior in your application. Manual changes to this file will be overwritten if the code is regenerated. @@ -14,7 +14,7 @@ class Study(_ParentRestClient): """ This class contains methods for the 'Studies' webservices - Client version: 2.0.0 + Client version: 2.0.3 PATH: /{apiVersion}/studies """ diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/user_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/user_client.py index 0e50d0b254e..59156d9f3d9 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/user_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/user_client.py @@ -2,7 +2,7 @@ WARNING: AUTOGENERATED CODE This code was generated by a tool. - Autogenerated on: 2020-11-19 12:03:49 + Autogenerated on: 2021-05-21 14:06:25 Manual changes to this file may cause unexpected behavior in your application. Manual changes to this file will be overwritten if the code is regenerated. @@ -14,7 +14,7 @@ class User(_ParentRestClient): """ This class contains methods for the 'Users' webservices - Client version: 2.0.0 + Client version: 2.0.3 PATH: /{apiVersion}/users """ diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/variant_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/variant_client.py index 753c2a0f3b1..a9cca3398b1 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/variant_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/variant_client.py @@ -2,7 +2,7 @@ WARNING: AUTOGENERATED CODE This code was generated by a tool. - Autogenerated on: 2020-11-19 12:03:49 + Autogenerated on: 2021-05-21 14:06:25 Manual changes to this file may cause unexpected behavior in your application. Manual changes to this file will be overwritten if the code is regenerated. @@ -14,7 +14,7 @@ class Variant(_ParentRestClient): """ This class contains methods for the 'Analysis - Variant' webservices - Client version: 2.0.0 + Client version: 2.0.3 PATH: /{apiVersion}/analysis/variant """ @@ -31,7 +31,9 @@ def aggregation_stats(self, **options): chromosome name or regions in the format chr:start-end, e.g.: 2,3:100000-200000. :param str type: List of types, accepted values are SNV, MNV, INDEL, - SV, CNV, INSERTION, DELETION, e.g. SNV,INDEL. + SV, COPY_NUMBER, COPY_NUMBER_LOSS, COPY_NUMBER_GAIN, INSERTION, + DELETION, DUPLICATION, TANDEM_DUPLICATION, BREAKEND, e.g. + SNV,INDEL. :param str project: Project [user@]project where project can be either the ID or the alias. :param str study: Filter variants from the given studies, these can be @@ -97,7 +99,7 @@ def aggregation_stats(self, **options): {key}[<|>|<=|>=]{number} or {key}[~=|=]{text}. :param str trait: List of traits, based on ClinVar, HPO, COSMIC, i.e.: IDs, histologies, descriptions,... - :param str fields: List of facet fields separated by semicolons, e.g.: + :param str field: List of facet fields separated by semicolons, e.g.: studies;type. For nested faceted fields use >>, e.g.: chromosome>>type;percentile(gerp). """ @@ -233,7 +235,7 @@ def genotypes_family(self, mode_of_inheritance, **options): :param str disorder: Disorder id. """ - options['mode_of_inheritance'] = mode_of_inheritance + options['modeOfInheritance'] = mode_of_inheritance return self._get(category='analysis', resource='genotypes', subcategory='variant/family', **options) def run_family_qc(self, data=None, **options): @@ -452,7 +454,7 @@ def metadata(self, **options): versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: - HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT and MISS e.g. + HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: [ autosomalDominant, autosomalRecessive, @@ -462,9 +464,12 @@ def metadata(self, **options): :param str include_study: List of studies to include in the result. Accepts 'all' and 'none'. :param str include_file: List of files to be returned. Accepts 'all' - and 'none'. + and 'none'. If undefined, automatically includes files used for + filtering. If none, no file is included. :param str include_sample: List of samples to be included in the - result. Accepts 'all' and 'none'. + result. Accepts 'all' and 'none'. If undefined, automatically + includes samples used for filtering. If none, no sample is + included. :param str include: Fields included in the response, whole JSON path must be provided. :param str exclude: Fields excluded in the response, whole JSON path @@ -486,6 +491,11 @@ def query_mutational_signature(self, **options): :param str ct: List of SO consequence types, e.g. missense_variant,stop_lost or SO:0001583,SO:0001578. :param str biotype: List of biotypes, e.g. protein_coding. + :param str file_data: Filter by file data (i.e. FILTER, QUAL and INFO + columns from VCF file). [{file}:]{key}{op}{value}[,;]* . If no file + is specified, will use all files from 'file' filter. e.g. AN>200 or + file_1.vcf:AN>200;file_2.vcf:AN<10 . Many fields can be combined. + e.g. file_1.vcf:AN>200;DB=true;file_2.vcf:AN<10,FILTER=PASS,LowDP. :param str filter: Specify the FILTER for any of the files. If 'file' filter is provided, will match the file and the filter. e.g.: PASS,LowGQX. @@ -567,7 +577,9 @@ def query(self, **options): chromosome name or regions in the format chr:start-end, e.g.: 2,3:100000-200000. :param str type: List of types, accepted values are SNV, MNV, INDEL, - SV, CNV, INSERTION, DELETION, e.g. SNV,INDEL. + SV, COPY_NUMBER, COPY_NUMBER_LOSS, COPY_NUMBER_GAIN, INSERTION, + DELETION, DUPLICATION, TANDEM_DUPLICATION, BREAKEND, e.g. + SNV,INDEL. :param str reference: Reference allele. :param str alternate: Main alternate allele. :param str project: Project [user@]project where project can be either @@ -598,7 +610,7 @@ def query(self, **options): versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: - HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT and MISS e.g. + HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: [ autosomalDominant, autosomalRecessive, @@ -612,7 +624,7 @@ def query(self, **options): versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: - HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT and MISS e.g. + HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT. This will automatically set 'includeSample' parameter when not provided. :param str sample_data: Filter by any SampleData field from samples. @@ -664,9 +676,12 @@ def query(self, **options): :param str include_study: List of studies to include in the result. Accepts 'all' and 'none'. :param str include_file: List of files to be returned. Accepts 'all' - and 'none'. + and 'none'. If undefined, automatically includes files used for + filtering. If none, no file is included. :param str include_sample: List of samples to be included in the - result. Accepts 'all' and 'none'. + result. Accepts 'all' and 'none'. If undefined, automatically + includes samples used for filtering. If none, no sample is + included. :param str include_sample_data: List of Sample Data keys (i.e. FORMAT column from VCF file) from Sample Data to include in the output. e.g: DP,AD. Accepts 'all' and 'none'. @@ -766,7 +781,9 @@ def aggregation_stats_sample(self, **options): chromosome name or regions in the format chr:start-end, e.g.: 2,3:100000-200000. :param str type: List of types, accepted values are SNV, MNV, INDEL, - SV, CNV, INSERTION, DELETION, e.g. SNV,INDEL. + SV, COPY_NUMBER, COPY_NUMBER_LOSS, COPY_NUMBER_GAIN, INSERTION, + DELETION, DUPLICATION, TANDEM_DUPLICATION, BREAKEND, e.g. + SNV,INDEL. :param str project: Project [user@]project where project can be either the ID or the alias. :param str study: Filter variants from the given studies, these can be @@ -788,7 +805,7 @@ def aggregation_stats_sample(self, **options): versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: - HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT and MISS e.g. + HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . 3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted.Accepted segregation modes: [ autosomalDominant, autosomalRecessive, @@ -802,7 +819,7 @@ def aggregation_stats_sample(self, **options): versa. When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... Genotype aliases accepted: - HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT and MISS e.g. + HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT. This will automatically set 'includeSample' parameter when not provided. :param str sample_annotation: Selects some samples using metadata @@ -826,7 +843,7 @@ def aggregation_stats_sample(self, **options): {study}:{population}[<|>|<=|>=]{number}. e.g. 1kG_phase3:ALL<0.01. :param str clinical_significance: Clinical significance: benign, likely_benign, likely_pathogenic, pathogenic. - :param str fields: List of facet fields separated by semicolons, e.g.: + :param str field: List of facet fields separated by semicolons, e.g.: studies;type. For nested faceted fields use >>, e.g.: chromosome>>type . Accepted values: chromosome, type, genotype, consequenceType, biotype, clinicalSignificance, dp, qual, filter. @@ -916,7 +933,9 @@ def query_sample_stats(self, sample, **options): chromosome name or regions in the format chr:start-end, e.g.: 2,3:100000-200000. :param str type: List of types, accepted values are SNV, MNV, INDEL, - SV, CNV, INSERTION, DELETION, e.g. SNV,INDEL. + SV, COPY_NUMBER, COPY_NUMBER_LOSS, COPY_NUMBER_GAIN, INSERTION, + DELETION, DUPLICATION, TANDEM_DUPLICATION, BREAKEND, e.g. + SNV,INDEL. :param str study: Study [[user@]project:]study where study and project can be either the ID or UUID. :param str file: Filter variants from the files specified. This will @@ -946,7 +965,10 @@ def run_sample_stats(self, data=None, **options): Compute sample variant stats for the selected list of samples. PATH: /{apiVersion}/analysis/variant/sample/stats/run - :param dict data: Sample variant stats params. (REQUIRED) + :param dict data: Sample variant stats params. Use index=true and + indexId='' to store the result in catalog sample QC. indexId=ALL + requires an empty query. Use sample=all to compute sample stats of + all samples in the variant storage. (REQUIRED) :param str study: Study [[user@]project:]study where study and project can be either the ID or UUID. :param str job_id: Job ID. It must be a unique string within the diff --git a/opencga-client/src/main/python/pyopencga/rest_clients/variant_operation_client.py b/opencga-client/src/main/python/pyopencga/rest_clients/variant_operation_client.py index 4ca48f1de54..7b11a3a3ccd 100644 --- a/opencga-client/src/main/python/pyopencga/rest_clients/variant_operation_client.py +++ b/opencga-client/src/main/python/pyopencga/rest_clients/variant_operation_client.py @@ -2,7 +2,7 @@ WARNING: AUTOGENERATED CODE This code was generated by a tool. - Autogenerated on: 2020-11-19 12:03:50 + Autogenerated on: 2021-05-21 14:06:25 Manual changes to this file may cause unexpected behavior in your application. Manual changes to this file will be overwritten if the code is regenerated. @@ -14,7 +14,7 @@ class VariantOperation(_ParentRestClient): """ This class contains methods for the 'Operations - Variant Storage' webservices - Client version: 2.0.0 + Client version: 2.0.3 PATH: /{apiVersion}/operation """ diff --git a/opencga-core/pom.xml b/opencga-core/pom.xml index 32fb7336b2d..dce90d1ff74 100644 --- a/opencga-core/pom.xml +++ b/opencga-core/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.0.2 + 2.0.3 ../pom.xml diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/family/Family.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/family/Family.java index 169363154e7..a00254605a5 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/family/Family.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/family/Family.java @@ -84,6 +84,9 @@ public enum FamiliarRelationship { PATERNAL_UNCLE("", "uncle"), NEPHEW("", "nephew"), NIECE("", "niece"), + GRANDSON("", "grandson"), + GRANDCHILD("", "grandchild"), + GRANDDAUGHTER("", "granddaughter"), GRANDFATHER("", "grandfather"), GRANDMOTHER("", "grandmother"), MATERNAL_GRANDMOTHER("", "grandmother"), diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/file/PostLinkToolParams.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/file/PostLinkToolParams.java index 113ed0453c6..c8fda2c46ac 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/file/PostLinkToolParams.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/file/PostLinkToolParams.java @@ -9,20 +9,14 @@ public class PostLinkToolParams extends ToolParams { public static final String DESCRIPTION = "File postlink params"; private List files; + private Integer batchSize; public PostLinkToolParams() { } - public PostLinkToolParams(List files) { + public PostLinkToolParams(List files, Integer batchSize) { this.files = files; - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder("PostLinkToolParams{"); - sb.append("files=").append(files); - sb.append('}'); - return sb.toString(); + this.batchSize = batchSize; } public List getFiles() { @@ -33,4 +27,13 @@ public PostLinkToolParams setFiles(List files) { this.files = files; return this; } + + public Integer getBatchSize() { + return batchSize; + } + + public PostLinkToolParams setBatchSize(Integer batchSize) { + this.batchSize = batchSize; + return this; + } } diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantIndexParams.java b/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantIndexParams.java index 0afc8848d97..a1d30031e46 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantIndexParams.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/models/variant/VariantIndexParams.java @@ -41,7 +41,7 @@ public VariantIndexParams(String file, boolean excludeGenotypes, String includeSampleData, String merge, String deduplicationPolicy, boolean calculateStats, Aggregation aggregated, String aggregationMappingFile, boolean annotate, - String annotator, boolean overwriteAnnotations, boolean indexSearch) { + String annotator, boolean overwriteAnnotations, boolean indexSearch, boolean skipIndexedFiles) { this.file = file; this.resume = resume; this.outdir = outdir; @@ -69,6 +69,7 @@ public VariantIndexParams(String file, this.annotator = annotator; this.overwriteAnnotations = overwriteAnnotations; this.indexSearch = indexSearch; + this.skipIndexedFiles = skipIndexedFiles; } private String file; @@ -106,6 +107,8 @@ public VariantIndexParams(String file, private boolean indexSearch; + private boolean skipIndexedFiles; + public String getFile() { return file; } @@ -349,4 +352,12 @@ public VariantIndexParams setIndexSearch(boolean indexSearch) { return this; } + public boolean isSkipIndexedFiles() { + return skipIndexedFiles; + } + + public VariantIndexParams setSkipIndexedFiles(boolean skipIndexedFiles) { + this.skipIndexedFiles = skipIndexedFiles; + return this; + } } diff --git a/opencga-master/pom.xml b/opencga-master/pom.xml index 4fdc079ecd1..0091a5863af 100644 --- a/opencga-master/pom.xml +++ b/opencga-master/pom.xml @@ -22,7 +22,7 @@ opencga org.opencb.opencga - 2.0.2 + 2.0.3 .. diff --git a/opencga-server/pom.xml b/opencga-server/pom.xml index 9935accb971..19c3f7e8f18 100644 --- a/opencga-server/pom.xml +++ b/opencga-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.0.2 + 2.0.3 ../pom.xml diff --git a/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java b/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java index aaa54698e2b..0b7db424837 100644 --- a/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java +++ b/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java @@ -260,6 +260,7 @@ static void init(String opencgaHomeStr) { logger.error(errorMessage); throw new IllegalStateException(errorMessage); } +// ActionableVariantManager.init(opencgaHome); logger.info("| OpenCGA REST successfully started!"); logger.info("| - Version " + GitRepositoryState.get().getBuildVersion()); diff --git a/opencga-storage/opencga-storage-app/pom.xml b/opencga-storage/opencga-storage-app/pom.xml index 39a4b5d9e20..d76d0f0557f 100644 --- a/opencga-storage/opencga-storage-app/pom.xml +++ b/opencga-storage/opencga-storage-app/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 2.0.2 + 2.0.3 ../pom.xml diff --git a/opencga-storage/opencga-storage-benchmark/pom.xml b/opencga-storage/opencga-storage-benchmark/pom.xml index cbb73581d15..46579b16d64 100644 --- a/opencga-storage/opencga-storage-benchmark/pom.xml +++ b/opencga-storage/opencga-storage-benchmark/pom.xml @@ -21,7 +21,7 @@ opencga-storage org.opencb.opencga - 2.0.2 + 2.0.3 4.0.0 diff --git a/opencga-storage/opencga-storage-core/pom.xml b/opencga-storage/opencga-storage-core/pom.xml index 638cfa8c7e7..29b7d034f06 100644 --- a/opencga-storage/opencga-storage-core/pom.xml +++ b/opencga-storage/opencga-storage-core/pom.xml @@ -25,7 +25,7 @@ org.opencb.opencga opencga-storage - 2.0.2 + 2.0.3 ../pom.xml diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java index f016ddf6f51..758cbc65a89 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java @@ -1160,15 +1160,19 @@ protected List initVariantAggregationExecutors() { * @return VariantQueryExecutor to use */ public VariantAggregationExecutor getVariantAggregationExecutor(Query query, QueryOptions options) { + List messages = new LinkedList<>(); for (VariantAggregationExecutor executor : getVariantAggregationExecutors()) { - if (executor.canUseThisExecutor(query, options)) { + if (executor.canUseThisExecutor(query, options, messages)) { return executor; } } String facet = options == null ? null : options.getString(QueryOptions.FACET); // This should rarely happen logger.warn("Unable to run aggregation facet '" + facet + "' with query " + VariantQueryUtils.printQuery(query)); - throw new VariantQueryException("No VariantAggregationExecutor found to run the query!").setQuery(query); + for (String message : messages) { + logger.warn(message); + } + throw new VariantQueryException("No VariantAggregationExecutor found to run the query. " + messages).setQuery(query); } @Override diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/GenotypeClass.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/GenotypeClass.java index 848035e9741..4d9249456e3 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/GenotypeClass.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/GenotypeClass.java @@ -9,6 +9,7 @@ import java.util.*; import java.util.function.Predicate; +import java.util.regex.Pattern; import java.util.stream.Collectors; /** @@ -34,22 +35,23 @@ public enum GenotypeClass implements Predicate { /** * Homozygous alternate. *

- * 1, 1/1, 1|1, 2/2, 3/3, 1/1/1, ... + * 1, 1/1, 1|1, 1/1/1, ... */ HOM_ALT(str -> { + if (str.equals("1/1") || str.equals("1|1")) { + return true; + } Genotype gt = parseGenotype(str); if (gt == null) { // Skip invalid genotypes return false; } int[] alleles = gt.getAllelesIdx(); - int firstAllele = alleles[0]; - if (firstAllele <= 0) { - // Discard if first allele is reference or missing - return false; + if (alleles.length == 2) { + return alleles[0] == 1 && alleles[1] == 1; } - for (int i = 1; i < alleles.length; i++) { - if (alleles[i] != firstAllele) { + for (int allele : alleles) { + if (allele != 1) { return false; } } @@ -59,7 +61,7 @@ public enum GenotypeClass implements Predicate { /** * Heterozygous. *

- * 0/1, 1/2, 0/2, 2/4, 0|1, 1|0, 0/0/1, ... + * 0/1, 1/2, 0|1, 1|0, ./1, 0/1/2, ... */ HET(str -> { if (str.equals("0/1")) { @@ -71,14 +73,17 @@ public enum GenotypeClass implements Predicate { return false; } int[] alleles = gt.getAllelesIdx(); - int firstAllele = alleles[0]; - if (firstAllele < 0 || gt.isHaploid()) { - // Discard if first allele is missing, or if haploid + if (alleles.length == 2) { + return alleles[0] != alleles[1] && (alleles[0] == 1 || alleles[1] == 1); + } + if (gt.isHaploid()) { + // Discard if haploid return false; } + int firstAllele = alleles[0]; for (int i = 1; i < alleles.length; i++) { int allele = alleles[i]; - if (allele == firstAllele || allele < 0) { + if (allele == firstAllele) { return false; } } @@ -88,7 +93,7 @@ public enum GenotypeClass implements Predicate { /** * Heterozygous Reference. *

- * 0/1, 0/2, 0/3, 0|1, ... + * 0/1, 0|1, 1|0, ... */ HET_REF(str -> { if (str.equals("0/1")) { @@ -99,24 +104,32 @@ public enum GenotypeClass implements Predicate { // Skip invalid genotypes return false; } + int[] alleles = gt.getAllelesIdx(); + if (alleles.length == 2) { + return alleles[0] == 0 && alleles[1] == 1 || alleles[0] == 1 && alleles[1] == 0; + } if (gt.isHaploid()) { // Discard if haploid return false; } boolean hasReference = false; boolean hasAlternate = false; + boolean hasMissing = false; + boolean hasOtherAlternate = false; - for (int allele : gt.getAllelesIdx()) { + for (int allele : alleles) { hasReference |= allele == 0; - hasAlternate |= allele > 0; // Discard ref and missing + hasAlternate |= allele == 1; // Discard ref and missing + hasMissing |= allele < 0; + hasOtherAlternate |= allele > 1; } - return hasReference && hasAlternate; + return hasReference && hasAlternate && !hasMissing && !hasOtherAlternate; }), /** * Heterozygous Alternate. *

- * 1/2, 1/3, 2/4, 2|1, ... + * 1/2, 1/3, 1/4, 2|1, ... */ HET_ALT(str -> { Genotype gt = parseGenotype(str); @@ -125,18 +138,58 @@ public enum GenotypeClass implements Predicate { return false; } int[] alleles = gt.getAllelesIdx(); - int firstAllele = alleles[0]; - if (firstAllele <= 0 || gt.isHaploid()) { - // Discard if first allele is reference or missing, or if haploid + if (alleles.length == 2) { + return alleles[0] == 1 && alleles[1] > 1 || alleles[0] > 1 && alleles[1] == 1; + } + if (gt.isHaploid()) { + // Discard if haploid return false; } - for (int i = 1; i < alleles.length; i++) { - int allele = alleles[i]; - if (allele == firstAllele || allele <= 0) { - return false; - } + boolean hasReference = false; + boolean hasAlternate = false; + boolean hasMissing = false; + boolean hasOtherAlternate = false; + + for (int allele : alleles) { + hasReference |= allele == 0; + hasAlternate |= allele == 1; + hasMissing |= allele < 0; + hasOtherAlternate |= allele > 1; } - return true; + return hasAlternate && hasOtherAlternate && !hasReference && !hasMissing; + }), + + /** + * Heterozygous Missing. + *

+ * 1/., ./1, ... + */ + HET_MISS(str -> { + Genotype gt = parseGenotype(str); + if (gt == null) { + // Skip invalid genotypes + return false; + } + int[] alleles = gt.getAllelesIdx(); + if (alleles.length == 2) { + return alleles[0] == 1 && alleles[1] < 0 || alleles[0] < 0 && alleles[1] == 1; + } + if (gt.isHaploid()) { + // Discard if haploid + return false; + } + boolean hasReference = false; + boolean hasAlternate = false; + boolean hasMissing = false; + boolean hasOtherAlternate = false; + + for (int allele : alleles) { + hasReference |= allele == 0; + hasAlternate |= allele == 1; + hasMissing |= allele < 0; + hasOtherAlternate |= allele > 1; + } + return hasAlternate && hasMissing && !hasReference && !hasOtherAlternate; }), /** @@ -158,6 +211,27 @@ public enum GenotypeClass implements Predicate { return true; }), + /** + * Genotypes containing reference and secondary alternates only. + *

+ * 0/2, 2/3, ./2, 2/2, ... + */ + SEC_ALT(str -> { + Genotype gt = parseGenotype(str); + if (gt == null) { + // Skip invalid genotypes + return false; + } + boolean hasSecondaryAlternate = false; + for (int allele : gt.getAllelesIdx()) { + if (allele == 1) { + return false; + } else if (allele > 1) { + hasSecondaryAlternate = true; + } + } + return hasSecondaryAlternate; + }), /** * Contains the main alternate. @@ -307,7 +381,7 @@ public static List getPhasedGenotypes(Genotype genotype, List lo } return phasedGts; } else { - return Collections.emptyList(); + return new ArrayList<>(); } } @@ -328,6 +402,49 @@ public static GenotypeClass from(String gt) { return genotypeClass; } + public static List expandMultiAllelicGenotype(String genotypeStr, List loadedGenotypes) { + List genotypes = new ArrayList<>(5); + if (from(genotypeStr) != null) { + // Discard GenotypeClass + return genotypes; + } + if (genotypeStr.equals(NA_GT_VALUE)) { + // Discard special genotypes + return genotypes; + } + Genotype genotype; + try { + genotype = new Genotype(genotypeStr); + } catch (RuntimeException e) { + throw new VariantQueryException("Malformed genotype '" + genotypeStr + "'", e); + } + int[] allelesIdx = genotype.getAllelesIdx(); + boolean hasSecAlt = false; + for (int i = 0; i < allelesIdx.length; i++) { + if (allelesIdx[i] > 1) { + allelesIdx[i] = 2; + hasSecAlt = true; + } + } + if (hasSecAlt) { + List phasedGenotypes = getPhasedGenotypes(genotype); + phasedGenotypes.add(genotype.toString()); + for (String phasedGenotype : phasedGenotypes) { + String regex = phasedGenotype + .replace(".", "\\.") + .replace("|", "\\|") + .replace("2", "([2-9]|[0-9][0-9])"); // Replace allele "2" with "any number >= 2") + Pattern pattern = Pattern.compile(regex); + for (String loadedGenotype : loadedGenotypes) { + if (pattern.matcher(loadedGenotype).matches()) { + genotypes.add(loadedGenotype); + } + } + } + } + return genotypes; + } + private static Genotype parseGenotype(String gt) { if (VariantQueryUtils.isNegated(gt)) { throw new IllegalStateException("Unable to parse negated genotype " + gt); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptor.java index 994ae596c06..c2ee42c9b00 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptor.java @@ -180,7 +180,7 @@ default DataResult count(Query query) { * @return Map key: StudyId, value: list of sampleIds */ default Map> getReturnedSamples(Query query, QueryOptions options) { - return VariantQueryProjectionParser.getIncludeSamples(query, options, getMetadataManager()); + return VariantQueryProjectionParser.getIncludeSampleIds(query, options, getMetadataManager()); } DataResult updateStats(List variantStatsWrappers, String studyName, long timestamp, QueryOptions queryOptions); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantField.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantField.java index 08bcc978c21..f0d2e65011e 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantField.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantField.java @@ -78,6 +78,8 @@ public enum VariantField { ANNOTATION_DRUGS(ANNOTATION, "annotation.drugs"), ANNOTATION_ADDITIONAL_ATTRIBUTES(ANNOTATION, "annotation.additionalAttributes"); + private static final Set ALL_FIELDS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList(values()))); + /** * Known additional attributes defined by OpenCGA. * @@ -191,41 +193,71 @@ public static Set getIncludeFields(QueryOptions options) { List includeList = options.getAsStringList(QueryOptions.INCLUDE); if (includeList != null && !includeList.isEmpty()) { - includeFields = new HashSet<>(); - for (String include : includeList) { - VariantField field = get(include); - if (field == null) { - throw VariantQueryException.unknownVariantField(QueryOptions.INCLUDE, include); -// continue; - } - if (field.getParent() != null) { - includeFields.add(field.getParent()); - } - includeFields.add(field); - includeFields.addAll(field.getChildren()); - } - + includeFields = parseInclude(includeList); } else { List excludeList = options.getAsStringList(QueryOptions.EXCLUDE); - includeFields = new HashSet<>(Arrays.asList(values())); if (excludeList != null && !excludeList.isEmpty()) { - for (String exclude : excludeList) { - VariantField field = get(exclude); - if (field == null) { - throw VariantQueryException.unknownVariantField(QueryOptions.EXCLUDE, exclude); -// continue; - } - includeFields.remove(field); - includeFields.removeAll(field.getChildren()); + includeFields = parseExclude(excludeList); + } else { + includeFields = new HashSet<>(Arrays.asList(values())); + if (options.getBoolean(SUMMARY, false)) { + includeFields.removeAll(SUMMARY_EXCLUDED_FIELDS); } - } else if (options.getBoolean(SUMMARY, false)) { - includeFields.removeAll(SUMMARY_EXCLUDED_FIELDS); } } return includeFields; } + public static Set parseInclude(String... includeList) { + return parseInclude(Arrays.asList(includeList)); + } + + public static Set parseInclude(List includeList) { + Set includeFields = new HashSet<>(); + if (includeList == null) { + return includeFields; + } + for (String include : includeList) { + VariantField field = get(include); + if (field == null) { + throw VariantQueryException.unknownVariantField(QueryOptions.INCLUDE, include); +// continue; + } + if (field.getParent() != null) { + includeFields.add(field.getParent()); + } + includeFields.add(field); + includeFields.addAll(field.getChildren()); + } + return includeFields; + } + + public static Set parseExclude(String... includeList) { + return parseExclude(Arrays.asList(includeList)); + } + + public static Set parseExclude(List excludeList) { + Set includeFields = new HashSet<>(Arrays.asList(values())); + if (excludeList == null) { + return includeFields; + } + for (String exclude : excludeList) { + VariantField field = get(exclude); + if (field == null) { + throw VariantQueryException.unknownVariantField(QueryOptions.EXCLUDE, exclude); +// continue; + } + includeFields.remove(field); + includeFields.removeAll(field.getChildren()); + } + return includeFields; + } + + public static Set all() { + return ALL_FIELDS; + } + /** * Remove intermediate nodes some child is missing, or all children from a node if all are present. * diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQueryParam.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQueryParam.java index 77b088f5ced..d52be41ca53 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQueryParam.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/adaptors/VariantQueryParam.java @@ -85,7 +85,7 @@ public final class VariantQueryParam implements QueryParam { + "Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. " + "When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position" + " e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... " - + "Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT and MISS " + + "Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS " + " e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT . " + "3) Sample with segregation mode: {sample}:{segregation}. Only one sample accepted." + "Accepted segregation modes: " @@ -101,7 +101,7 @@ public final class VariantQueryParam implements QueryParam { + "Unphased genotypes (e.g. 0/1, 1/1) will also include phased genotypes (e.g. 0|1, 1|0, 1|1), but not vice versa. " + "When filtering by multi-allelic genotypes, any secondary allele will match, regardless of its position" + " e.g. 1/2 will match with genotypes 1/2, 1/3, 1/4, .... " - + "Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT and MISS " + + "Genotype aliases accepted: HOM_REF, HOM_ALT, HET, HET_REF, HET_ALT, HET_MISS and MISS " + " e.g. HG0097:HOM_REF;HG0098:HET_REF,HOM_ALT. " + "This will automatically set 'includeSample' parameter when not provided"; public static final VariantQueryParam GENOTYPE = new VariantQueryParam("genotype", TEXT_ARRAY, GENOTYPE_DESCR); @@ -115,7 +115,7 @@ public final class VariantQueryParam implements QueryParam { public static final String INCLUDE_SAMPLE_DESCR = "List of samples to be included in the result. " - + ACCEPTS_ALL_NONE; + + ACCEPTS_ALL_NONE + " If undefined, automatically includes samples used for filtering. If none, no sample is included."; public static final VariantQueryParam INCLUDE_SAMPLE = new VariantQueryParam("includeSample", TEXT_ARRAY, INCLUDE_SAMPLE_DESCR); public static final String INCLUDE_SAMPLE_ID_DESCR @@ -167,7 +167,7 @@ public final class VariantQueryParam implements QueryParam { public static final String INCLUDE_FILE_DESCR = "List of files to be returned. " - + ACCEPTS_ALL_NONE; + + ACCEPTS_ALL_NONE + " If undefined, automatically includes files used for filtering. If none, no file is included."; public static final VariantQueryParam INCLUDE_FILE = new VariantQueryParam("includeFile", TEXT_ARRAY, INCLUDE_FILE_DESCR); public static final String COHORT_DESCR diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryParser.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryParser.java index d8b32f90ee0..dcf597a5766 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryParser.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryParser.java @@ -3,7 +3,6 @@ import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.EnumUtils; import org.apache.commons.lang3.StringUtils; -import org.opencb.biodata.models.variant.Genotype; import org.opencb.biodata.models.variant.StudyEntry; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.avro.ClinicalSignificance; @@ -26,7 +25,6 @@ import org.opencb.opencga.storage.core.variant.query.projection.VariantQueryProjectionParser; import java.util.*; -import java.util.regex.Pattern; import java.util.stream.Collectors; import static org.opencb.opencga.storage.core.variant.VariantStorageOptions.EXCLUDE_GENOTYPES; @@ -657,39 +655,8 @@ public static List preProcessGenotypesFilter(List genotypesInput if (negated) { genotypeStr = removeNegation(genotypeStr); } - if (GenotypeClass.from(genotypeStr) != null) { - // Discard GenotypeClass - continue; - } - if (genotypeStr.equals(GenotypeClass.NA_GT_VALUE)) { - // Discard special genotypes - continue; - } - Genotype genotype; - try { - genotype = new Genotype(genotypeStr); - } catch (RuntimeException e) { - throw new VariantQueryException("Malformed genotype '" + genotypeStr + "'", e); - } - int[] allelesIdx = genotype.getAllelesIdx(); - boolean multiallelic = false; - for (int i = 0; i < allelesIdx.length; i++) { - if (allelesIdx[i] > 1) { - allelesIdx[i] = 2; - multiallelic = true; - } - } - if (multiallelic) { - String regex = genotype.toString() - .replace(".", "\\.") - .replace("|", "\\|") - .replace("2", "([2-9]|[0-9][0-9])"); // Replace allele "2" with "any number >= 2") - Pattern pattern = Pattern.compile(regex); - for (String loadedGenotype : loadedGenotypes) { - if (pattern.matcher(loadedGenotype).matches()) { - genotypes.add((negated ? NOT : "") + loadedGenotype); - } - } + for (String multiAllelicGenotype : GenotypeClass.expandMultiAllelicGenotype(genotypeStr, loadedGenotypes)) { + genotypes.add((negated ? NOT : "") + multiAllelicGenotype); } } genotypes = GenotypeClass.filter(genotypes, loadedGenotypes); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryUtils.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryUtils.java index d9c4296fe56..180c835e38b 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryUtils.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/VariantQueryUtils.java @@ -370,16 +370,24 @@ public static boolean isNoneOrEmpty(List value) { return value != null && (value.isEmpty() || value.size() == 1 && isNone(value.get(0))); } - private static boolean isNone(String value) { - return value.equals(NONE); + public static boolean isNone(Query q, QueryParam queryParam) { + return isNone(q.getString(queryParam.key())); + } + + public static boolean isNone(String value) { + return NONE.equals(value); } public static boolean isAllOrNull(List value) { return value == null || value.size() == 1 && isAll(value.get(0)); } + public static boolean isAll(Query q, QueryParam queryParam) { + return isAll(q.getString(queryParam.key())); + } + public static boolean isAll(String s) { - return s.equals(ALL); + return ALL.equals(s); } /** @@ -499,29 +507,6 @@ public static boolean isOutputMultiStudy(Query query, QueryOptions options, Coll } } - public static Map> getSamplesMetadata(Query query, QueryOptions options, - VariantStorageMetadataManager metadataManager) { - if (VariantField.getIncludeFields(options).contains(VariantField.STUDIES)) { - Map> includeSamples = VariantQueryProjectionParser.getIncludeSamples(query, options, metadataManager); - Map> sampleMetadata = new HashMap<>(includeSamples.size()); - - for (Map.Entry> entry : includeSamples.entrySet()) { - Integer studyId = entry.getKey(); - List sampleIds = entry.getValue(); - String studyName = metadataManager.getStudyName(studyId); - ArrayList sampleNames = new ArrayList<>(sampleIds.size()); - for (Integer sampleId : sampleIds) { - sampleNames.add(metadataManager.getSampleName(studyId, sampleId)); - } - sampleMetadata.put(studyName, sampleNames); - } - - return sampleMetadata; - } else { - return Collections.emptyMap(); - } - } - public static VariantQueryResult addSamplesMetadataIfRequested(DataResult result, Query query, QueryOptions options, VariantStorageMetadataManager variantStorageMetadataManager) { return addSamplesMetadataIfRequested(new VariantQueryResult<>(result, null), query, options, variantStorageMetadataManager); @@ -532,7 +517,8 @@ public static VariantQueryResult addSamplesMetadataIfRequested(VariantQue if (query.getBoolean(SAMPLE_METADATA.key(), false)) { int numTotalSamples = query.getInt(NUM_TOTAL_SAMPLES.key(), -1); int numSamples = query.getInt(NUM_SAMPLES.key(), -1); - Map> samplesMetadata = getSamplesMetadata(query, options, variantStorageMetadataManager); + Map> samplesMetadata = VariantQueryProjectionParser + .getIncludeSampleNames(query, options, variantStorageMetadataManager); if (numTotalSamples < 0 && numSamples < 0) { numTotalSamples = samplesMetadata.values().stream().mapToInt(List::size).sum(); VariantQueryProjectionParser.skipAndLimitSamples(query, samplesMetadata); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/ChromDensityVariantAggregationExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/ChromDensityVariantAggregationExecutor.java index 4d91fac7139..b7a03ceac34 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/ChromDensityVariantAggregationExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/ChromDensityVariantAggregationExecutor.java @@ -41,7 +41,7 @@ public ChromDensityVariantAggregationExecutor(VariantIterable iterable, VariantS } @Override - protected boolean canUseThisExecutor(Query query, QueryOptions options, String facet) throws Exception { + protected boolean canUseThisExecutor(Query query, QueryOptions options, String facet, List reason) throws Exception { return isPureChromDensityFacet(facet); } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantAggregationExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantAggregationExecutor.java index b3ee87e3c92..08a8e5f1ece 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantAggregationExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/VariantAggregationExecutor.java @@ -9,6 +9,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.LinkedList; +import java.util.List; import java.util.regex.Pattern; import static org.opencb.opencga.storage.core.variant.search.solr.SolrQueryParser.CHROM_DENSITY; @@ -26,6 +28,10 @@ public abstract class VariantAggregationExecutor { private Logger logger = LoggerFactory.getLogger(VariantAggregationExecutor.class); public final boolean canUseThisExecutor(Query query, QueryOptions options) { + return canUseThisExecutor(query, options, new LinkedList<>()); + } + + public final boolean canUseThisExecutor(Query query, QueryOptions options, List reason) { if (query == null) { query = new Query(); } @@ -36,7 +42,7 @@ public final boolean canUseThisExecutor(Query query, QueryOptions options) { String facet = options.getString(QueryOptions.FACET); try { - return canUseThisExecutor(query, options, facet); + return canUseThisExecutor(query, options, facet, reason); } catch (Exception e) { throw VariantQueryException.internalException(e); } @@ -68,7 +74,7 @@ public final VariantQueryResult aggregation(Query query, QueryOption } } - protected abstract boolean canUseThisExecutor(Query query, QueryOptions options, String facet) throws Exception; + protected abstract boolean canUseThisExecutor(Query query, QueryOptions options, String facet, List reason) throws Exception; protected abstract VariantQueryResult aggregation(Query query, QueryOptions options, String facet) throws Exception; diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParser.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParser.java index a02f38aa90c..465996736e2 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParser.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParser.java @@ -20,12 +20,27 @@ import java.util.stream.Collectors; import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.*; -import static org.opencb.opencga.storage.core.variant.query.VariantQueryUtils.IS; +import static org.opencb.opencga.storage.core.variant.query.VariantQueryUtils.*; public class VariantQueryProjectionParser { private final VariantStorageMetadataManager metadataManager; + public enum IncludeStatus { + /** + * Return all elements. + */ + ALL, + /** + * Return none elements. Default value if undefined. + */ + NONE, + /** + * Return a subset of elements. + */ + SOME + } + public VariantQueryProjectionParser(VariantStorageMetadataManager metadataManager) { this.metadataManager = metadataManager; } @@ -52,7 +67,7 @@ public VariantQueryProjection parseVariantQueryProjection(Query query, QueryOpti studies.get(studyId).setStudyMetadata(sm); } - Map> sampleIdsMap = getIncludeSamples(query, options, includeStudies, metadataManager); + Map> sampleIdsMap = getIncludeSampleIds(query, options, includeStudies, metadataManager); for (VariantQueryProjection.StudyVariantQueryProjection study : studies.values()) { study.setSamples(sampleIdsMap.get(study.getId())); } @@ -180,11 +195,10 @@ private static List getIncludeStudies(Query query, QueryOptions options studyIds = metadataManager.getStudyIds(); if (studyIds.size() > 1) { Map> map = null; - if (isIncludeSamplesDefined(query, fields)) { - map = getIncludeSamples(query, options, studyIds, metadataManager); - } else if (isIncludeFilesDefined(query, fields)) { - map = getIncludeFiles(query, studyIds, fields, - metadataManager, null); + if (isIncludeSomeSamples(query, fields)) { + map = getIncludeSampleIds(query, options, studyIds, metadataManager); + } else if (getIncludeFileStatus(query, fields) == IncludeStatus.SOME) { + map = getIncludeFiles(query, studyIds, fields, metadataManager, Collections.emptyMap()); } if (map != null) { List studyIdsFromSubFields = new ArrayList<>(); @@ -231,15 +245,28 @@ public static List getIncludeStudiesList(Query query, Set return studies; } - public static boolean isIncludeFilesDefined(Query query, Set fields) { - if (getIncludeFilesList(query, fields) != null) { - return true; + + /** + * Get include file status. + * + * @param query Input variant query + * @param fields Variant fields to return + * @return If the result should include any file + */ + public static IncludeStatus getIncludeFileStatus(Query query, Set fields) { + IncludeStatus includeFilePartialStatus = getIncludeFilePartialStatus(query, fields); + if (includeFilePartialStatus != null) { + return includeFilePartialStatus; + } + + // Undefined include file status. Check sample partial status + IncludeStatus includeSamplePartialStatus = getIncludeSamplePartialStatus(query, fields); + if (includeSamplePartialStatus != null) { + return includeSamplePartialStatus; } - return VariantQueryUtils.isValidParam(query, SAMPLE, true) - || VariantQueryUtils.isValidParam(query, VariantQueryUtils.SAMPLE_MENDELIAN_ERROR, false) - || VariantQueryUtils.isValidParam(query, VariantQueryUtils.SAMPLE_DE_NOVO, false) - || VariantQueryUtils.isValidParam(query, INCLUDE_SAMPLE, false) - || VariantQueryUtils.isValidParam(query, GENOTYPE, false); + + // Default NONE + return IncludeStatus.NONE; } /** @@ -266,7 +293,7 @@ private static Map> getIncludeFiles(Query query, Collecti } List includeSamplesList = includeSamples == null ? getIncludeSamplesList(query) : null; List includeFilesList = getIncludeFilesList(query, fields); - boolean returnAllFiles = VariantQueryUtils.ALL.equals(query.getString(INCLUDE_FILE.key())); + IncludeStatus includeFileStatus = getIncludeFileStatus(query, fields); Map> files = new HashMap<>(studyIds.size()); for (Integer studyId : studyIds) { @@ -275,38 +302,45 @@ private static Map> getIncludeFiles(Query query, Collecti continue; } - List fileIds; - if (includeFilesList != null) { - fileIds = new ArrayList<>(); - for (String file : includeFilesList) { - Integer fileId = metadataManager.getFileId(studyId, file); - if (fileId != null) { - if (metadataManager.isFileIndexed(studyId, fileId)) { - fileIds.add(fileId); + final List fileIds; + switch (includeFileStatus) { + case NONE: + fileIds = new ArrayList<>(); + break; + case ALL: + fileIds = new ArrayList<>(metadataManager.getIndexedFiles(studyId)); + break; + case SOME: + if (includeFilesList != null) { + fileIds = new ArrayList<>(); + for (String file : includeFilesList) { + Integer fileId = metadataManager.getFileId(studyId, file); + if (fileId != null) { + if (metadataManager.isFileIndexed(studyId, fileId)) { + fileIds.add(fileId); + } + } } - } - } - } else if (returnAllFiles) { - fileIds = new ArrayList<>(metadataManager.getIndexedFiles(studyId)); - } else if (includeSamples != null) { - List sampleIds = includeSamples.get(studyId); - Set fileSet = metadataManager.getFileIdsFromSampleIds(studyId, sampleIds, true); - fileIds = new ArrayList<>(fileSet); - } else if (includeSamplesList != null && !includeSamplesList.isEmpty()) { - List sampleIds = new ArrayList<>(); - for (String sample : includeSamplesList) { - Integer sampleId = metadataManager.getSampleId(studyId, sample); - if (sampleId == null) { + } else if (includeSamples != null) { + List sampleIds = includeSamples.get(studyId); + Set fileSet = metadataManager.getFileIdsFromSampleIds(studyId, sampleIds, true); + fileIds = new ArrayList<>(fileSet); + } else { + List sampleIds = new ArrayList<>(); + for (String sample : includeSamplesList) { + Integer sampleId = metadataManager.getSampleId(studyId, sample); + if (sampleId == null) { // throw VariantQueryException.sampleNotFound(sample, sm.getName()); - break; + break; + } + sampleIds.add(sampleId); + } + Set fileSet = metadataManager.getFileIdsFromSampleIds(studyId, sampleIds, true); + fileIds = new ArrayList<>(fileSet); } - sampleIds.add(sampleId); - } - Set fileSet = metadataManager.getFileIdsFromSampleIds(studyId, sampleIds, true); - fileIds = new ArrayList<>(fileSet); - } else { - // Return all files - fileIds = new ArrayList<>(metadataManager.getIndexedFiles(studyId)); + break; + default: + throw new IllegalArgumentException("Unknown IncludeStats='" + includeFileStatus + "'"); } files.put(studyId, fileIds); } @@ -374,32 +408,116 @@ public static List getIncludeFilesList(Query query) { } } + + + /** + * Get include sample status. + * + * @param query Input variant query + * @param fields Variant fields to return + * @return Include status + */ + public static IncludeStatus getIncludeSampleStatus(Query query, Set fields) { + IncludeStatus includeSamplePartialStatus = getIncludeSamplePartialStatus(query, fields); + if (includeSamplePartialStatus != null) { + return includeSamplePartialStatus; + } + + // Undefined include sample status + IncludeStatus includeFilePartialStatus = getIncludeFilePartialStatus(query, fields); + if (includeFilePartialStatus != null) { + return includeFilePartialStatus; + } + + // Default NONE + return IncludeStatus.NONE; + } + + /** + * Include any set of samples, (some or all). + * + * @param query Input variant query + * @param fields Variant fields to return + * @return If the result should include any sample + */ + public static boolean isIncludeAnySample(Query query, Set fields) { + IncludeStatus includeSampleStatus = getIncludeSampleStatus(query, fields); + return includeSampleStatus == IncludeStatus.SOME || includeSampleStatus.equals(IncludeStatus.ALL); + } + + /** + * Include any set of samples, from one to all files. + * + * @param query Input variant query + * @param fields Variant fields to return + * @return If the result should include any sample + */ + public static boolean isIncludeAllSamples(Query query, Set fields) { + return getIncludeSampleStatus(query, fields).equals(IncludeStatus.ALL); + } + + /** + * Include any but not all samples. + * @param query Input variant query + * @param fields Variant fields to return + * @return If the result should include any sample + */ + public static boolean isIncludeSomeSamples(Query query, Set fields) { + return getIncludeSampleStatus(query, fields).equals(IncludeStatus.SOME); + } + + /** + * Do not include any sample. + * + * @param query Input variant query + * @param fields Variant fields to return + * @return If the result should NOT include any sample + */ + public static boolean isIncludeNoSamples(Query query, Set fields) { + return getIncludeSampleStatus(query, fields).equals(IncludeStatus.NONE); + } + public static boolean isIncludeSamplesDefined(Query query, Set fields) { - if (getIncludeSamplesList(query, fields) != null) { - return true; + return getIncludeSamplePartialStatus(query, fields) != null || getIncludeFilePartialStatus(query, fields) != null; + } + + public static Map> getIncludeSampleNames(Query query, QueryOptions options, + VariantStorageMetadataManager metadataManager) { + if (VariantField.getIncludeFields(options).contains(VariantField.STUDIES)) { + Map> includeSamples = getIncludeSampleIds(query, options, metadataManager); + Map> sampleMetadata = new HashMap<>(includeSamples.size()); + + for (Map.Entry> entry : includeSamples.entrySet()) { + Integer studyId = entry.getKey(); + List sampleIds = entry.getValue(); + String studyName = metadataManager.getStudyName(studyId); + ArrayList sampleNames = new ArrayList<>(sampleIds.size()); + for (Integer sampleId : sampleIds) { + sampleNames.add(metadataManager.getSampleName(studyId, sampleId)); + } + sampleMetadata.put(studyName, sampleNames); + } + + return sampleMetadata; + } else { + return Collections.emptyMap(); } - return VariantQueryUtils.isValidParam(query, FILE, true) || VariantQueryUtils.isValidParam(query, INCLUDE_FILE, true); } - public static Map> getIncludeSamples(Query query, QueryOptions options, - VariantStorageMetadataManager variantStorageMetadataManager) { + public static Map> getIncludeSampleIds(Query query, QueryOptions options, + VariantStorageMetadataManager variantStorageMetadataManager) { List includeStudies = getIncludeStudies(query, options, variantStorageMetadataManager); - return getIncludeSamples(query, options, includeStudies, variantStorageMetadataManager); + return getIncludeSampleIds(query, options, includeStudies, variantStorageMetadataManager); } - public static Map> getIncludeSamples( + private static Map> getIncludeSampleIds( Query query, QueryOptions options, Collection studyIds, VariantStorageMetadataManager metadataManager) { List includeFilesList = getIncludeFilesList(query); List includeSamplesList = getIncludeSamplesList(query, options); - boolean includeAllSamples = query.getString(VariantQueryParam.INCLUDE_SAMPLE.key()).equals(VariantQueryUtils.ALL); - boolean includeNoneSamples = query.getString(VariantQueryParam.INCLUDE_SAMPLE.key()).equals(VariantQueryUtils.NONE); - if (!includeNoneSamples) { - if (includeSamplesList == null && CollectionUtils.isEmpty(includeFilesList)) { - includeAllSamples = true; - } - } + Set includeFields = VariantField.getIncludeFields(options); + IncludeStatus includeSampleStatus = getIncludeSampleStatus(query, includeFields); Map> samples = new LinkedHashMap<>(studyIds.size()); for (Integer studyId : studyIds) { @@ -409,9 +527,9 @@ public static Map> getIncludeSamples( } List sampleIds; - if (includeNoneSamples) { + if (includeSampleStatus.equals(IncludeStatus.NONE)) { sampleIds = Collections.emptyList(); - } else if (includeAllSamples) { + } else if (includeSampleStatus.equals(IncludeStatus.ALL)) { sampleIds = metadataManager.getIndexedSamples(sm.getId()); } else if (includeSamplesList == null && CollectionUtils.isNotEmpty(includeFilesList)) { // Include from files @@ -462,10 +580,24 @@ public static Map> getIncludeSamples( return samples; } + /** + * Plain unvalidated list of samples to include in the response. + * + * @param query Variant query + * @param options Variant query options + * @return List of samples to include. Null if undefined + */ public static List getIncludeSamplesList(Query query, QueryOptions options) { return getIncludeSamplesList(query, VariantField.getIncludeFields(options)); } + /** + * Plain unvalidated list of samples to include in the response. + * + * @param query Variant query + * @param fields Fields + * @return List of samples to include. Null if undefined or all + */ public static List getIncludeSamplesList(Query query, Set fields) { List samples; if (!fields.contains(VariantField.STUDIES_SAMPLES)) { @@ -490,9 +622,9 @@ public static List getIncludeSamplesList(Query query) { List samples; if (VariantQueryUtils.isValidParam(query, INCLUDE_SAMPLE)) { String samplesString = query.getString(VariantQueryParam.INCLUDE_SAMPLE.key()); - if (samplesString.equals(VariantQueryUtils.ALL)) { - samples = null; // Undefined. All by default - } else if (samplesString.equals(VariantQueryUtils.NONE)) { + if (isAll(samplesString)) { + samples = null; // Undefined or all + } else if (isNone(samplesString)) { samples = Collections.emptyList(); } else { samples = query.getAsStringList(VariantQueryParam.INCLUDE_SAMPLE.key()); @@ -554,4 +686,62 @@ public static List getIncludeSamplesList(Query query) { } return samples; } + + + /** + * Get the include status for SAMPLE. Don't check any file fields. + * @param query Variant input query. + * @param fields Fields to include + * @return Partial sample status. Null if undefined. + */ + private static IncludeStatus getIncludeSamplePartialStatus(Query query, Set fields) { + if (!fields.contains(VariantField.STUDIES_SAMPLES)) { + return IncludeStatus.NONE; + } + if (isAll(query, INCLUDE_SAMPLE)) { + // Include all samples. Explicit ALL + return IncludeStatus.ALL; + } + List includeSamplesList = getIncludeSamplesList(query, fields); + if (includeSamplesList != null) { + // Defined list of files to include + if (includeSamplesList.isEmpty()) { + // Include no samples. Explicit NONE + return IncludeStatus.NONE; + } else { + // Include some samples + return IncludeStatus.SOME; + } + } + return null; + } + + /** + * Get the include status for FILE. Don't check any sample fields. + * @param query Variant input query. + * @param fields Fields to include + * @return Partial file status. Null if undefined. + */ + private static IncludeStatus getIncludeFilePartialStatus(Query query, Set fields) { + if (!fields.contains(VariantField.STUDIES_FILES)) { + return IncludeStatus.NONE; + } + if (isAll(query, INCLUDE_FILE)) { + // Include all files + return IncludeStatus.ALL; + } + List includeFilesList = getIncludeFilesList(query, fields); + if (includeFilesList != null) { + // Defined list of files to include + if (includeFilesList.isEmpty()) { + // Include no files + return IncludeStatus.NONE; + } else { + // Include some files + return IncludeStatus.SOME; + } + } + return null; + } + } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantAggregationExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantAggregationExecutor.java index e679548c8fd..cd466046999 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantAggregationExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/search/SearchIndexVariantAggregationExecutor.java @@ -10,6 +10,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.List; + public class SearchIndexVariantAggregationExecutor extends VariantAggregationExecutor { private final VariantSearchManager searchManager; @@ -23,7 +25,7 @@ public SearchIndexVariantAggregationExecutor(VariantSearchManager searchManager, } @Override - protected boolean canUseThisExecutor(Query query, QueryOptions options, String facet) throws Exception { + protected boolean canUseThisExecutor(Query query, QueryOptions options, String facet, List reason) throws Exception { return VariantSearchUtils.isQueryCovered(query); } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/stats/SampleVariantStatsAggregationQuery.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/stats/SampleVariantStatsAggregationQuery.java index 2b2f77f12f0..07653f10aa3 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/stats/SampleVariantStatsAggregationQuery.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/stats/SampleVariantStatsAggregationQuery.java @@ -20,6 +20,8 @@ import org.opencb.opencga.storage.core.variant.adaptors.GenotypeClass; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.Collections; import java.util.HashMap; @@ -36,6 +38,8 @@ public class SampleVariantStatsAggregationQuery { .build()); private final VariantStorageEngine engine; + private Logger logger = LoggerFactory.getLogger(SampleVariantStatsAggregationQuery.class); + public SampleVariantStatsAggregationQuery(VariantStorageEngine engine) { this.engine = engine; } @@ -58,6 +62,12 @@ public DataResult sampleStatsQuery(String studyStr, String s query.put(STUDY.key(), studyStr); query.remove(SAMPLE.key()); + + // Test if there is any valid VariantAggregationExecutor + // If no, fast fail + engine.getVariantAggregationExecutor(new Query(query) + .append(SAMPLE.key(), sample), new QueryOptions(QueryOptions.FACET, "chromosome")); + Future> submit = THREAD_POOL.submit(() -> { DataResult result = engine.facet( new Query(query) @@ -69,12 +79,17 @@ public DataResult sampleStatsQuery(String studyStr, String s Future> submitME = THREAD_POOL.submit(() -> { SampleMetadata sampleMetadata = engine.getMetadataManager().getSampleMetadata(studyId, sampleId); if (sampleMetadata.getMendelianErrorStatus().equals(TaskMetadata.Status.READY)) { - DataResult result = engine.facet( - new Query(query) - .append(VariantQueryUtils.SAMPLE_MENDELIAN_ERROR.key(), sample), - new QueryOptions(QueryOptions.FACET, - "chromosome>>mendelianError")); - return result; + try { + return engine.facet( + new Query(query) + .append(VariantQueryUtils.SAMPLE_MENDELIAN_ERROR.key(), sample), + new QueryOptions(QueryOptions.FACET, + "chromosome>>mendelianError")); + } catch (Exception e) { + logger.warn("Could not get mendelian error stats: " + e.toString()); + logger.debug("Could not get mendelian error stats", e); + return null; + } } else { return null; } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/stats/VariantStatisticsManager.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/stats/VariantStatisticsManager.java index f985e22ed9c..def6e26da9b 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/stats/VariantStatisticsManager.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/stats/VariantStatisticsManager.java @@ -16,6 +16,7 @@ package org.opencb.opencga.storage.core.variant.stats; +import org.apache.solr.common.StringUtils; import org.opencb.biodata.models.variant.metadata.Aggregation; import org.opencb.biodata.tools.variant.stats.AggregationUtils; import org.opencb.commons.datastore.core.ObjectMap; @@ -23,9 +24,9 @@ import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; -import org.opencb.opencga.storage.core.metadata.models.TaskMetadata; import org.opencb.opencga.storage.core.metadata.models.CohortMetadata; import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; +import org.opencb.opencga.storage.core.metadata.models.TaskMetadata; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; import org.opencb.opencga.storage.core.variant.adaptors.VariantField; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; @@ -33,7 +34,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStream; import java.util.*; import java.util.stream.Collectors; @@ -281,8 +284,18 @@ protected static String getUnknownGenotype(ObjectMap options) { return options.getString(STATS_DEFAULT_GENOTYPE.key(), STATS_DEFAULT_GENOTYPE.defaultValue()); } - public static Properties getAggregationMappingProperties(QueryOptions options) { - return options.get(VariantStorageOptions.STATS_AGGREGATION_MAPPING_FILE.key(), Properties.class, null); + public static Properties getAggregationMappingProperties(QueryOptions options) throws IOException { + Properties properties = options.get(VariantStorageOptions.STATS_AGGREGATION_MAPPING_FILE.key(), Properties.class, null); + if (properties == null) { + String path = options.getString(VariantStorageOptions.STATS_AGGREGATION_MAPPING_FILE.key()); + if (!StringUtils.isEmpty(path)) { + properties = new Properties(); + try (InputStream is = new FileInputStream(path)) { + properties.load(is); + } + } + } + return properties; } protected static Aggregation getAggregation(StudyMetadata studyMetadata, ObjectMap options) { diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/GenotypeClassTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/GenotypeClassTest.java index 036a149b02d..49114652e83 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/GenotypeClassTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/GenotypeClassTest.java @@ -14,27 +14,28 @@ */ public class GenotypeClassTest { - private final List loadedGenotypes = Arrays.asList( - "0/0", "0", - "0/1", "1/1", - "0|1", "1|0", "1|1", - "./.", ".|.", ".", - "1/2", "1|2", "./0", ".|0", "0|.", "0/.", "./1", "1/."); - @Test public void testGenotypes() throws Exception { - List gts = Arrays.asList("0/0", "0", "0/1", "1/1", "./.", ".", "1/2", "0/2", "2/2", "2/3", "./0", "0/.", "./1", "1/."); + List gts = Arrays.asList("0/0", "0", "0/1", "1/1", "./.", ".", "1/2", "0/2", "2/2", "2/3", "./0", "0/.", "./1", "1/.", "2/."); assertEquals(Arrays.asList("0/0", "0"), GenotypeClass.HOM_REF.filter(gts)); - assertEquals(Arrays.asList("1/1", "2/2"), GenotypeClass.HOM_ALT.filter(gts)); - assertEquals(Arrays.asList("0/1", "1/2", "0/2", "2/3"), GenotypeClass.HET.filter(gts)); - assertEquals(Arrays.asList("0/1", "0/2"), GenotypeClass.HET_REF.filter(gts)); - assertEquals(Arrays.asList("1/2", "2/3"), GenotypeClass.HET_ALT.filter(gts)); + assertEquals(Arrays.asList("1/1"), GenotypeClass.HOM_ALT.filter(gts)); + assertEquals(Arrays.asList("0/1", "1/2", "./1", "1/."), GenotypeClass.HET.filter(gts)); + assertEquals(Arrays.asList("0/1"), GenotypeClass.HET_REF.filter(gts)); + assertEquals(Arrays.asList("1/2"), GenotypeClass.HET_ALT.filter(gts)); + assertEquals(Arrays.asList("./1", "1/."), GenotypeClass.HET_MISS.filter(gts)); assertEquals(Arrays.asList("./.", "."), GenotypeClass.MISS.filter(gts)); + assertEquals(Arrays.asList("0/2", "2/2", "2/3", "2/."), GenotypeClass.SEC_ALT.filter(gts)); } @Test public void testPhasedGenotypes() throws Exception { + List loadedGenotypes = Arrays.asList( + "0/0", "0", + "0/1", "1/1", + "0|1", "1|0", "1|1", + "./.", ".|.", ".", + "1/2", "1|2", "./0", ".|0", "0|.", "0/.", "./1", "1/."); assertEquals(Arrays.asList("0/1", "0|1", "1|0"), GenotypeClass.filter(Arrays.asList("0/1"), loadedGenotypes)); assertEquals(Arrays.asList("!0/1", "!0|1", "!1|0"), GenotypeClass.filter(Arrays.asList("!0/1"), loadedGenotypes)); assertEquals(Arrays.asList("0/1", "0|1", "1|0"), GenotypeClass.filter(Arrays.asList("1/0"), loadedGenotypes)); @@ -44,4 +45,16 @@ public void testPhasedGenotypes() throws Exception { assertEquals(Arrays.asList("0"), GenotypeClass.filter(Arrays.asList("0"), loadedGenotypes)); assertEquals(Arrays.asList("1"), GenotypeClass.filter(Arrays.asList("1"), loadedGenotypes)); } + + + @Test + public void testMultiAllelicGenotypes() throws Exception { + List gts = Arrays.asList("0/0", "0", "0/1", "1/1", "./.", ".", "1/2", "1/3", "1|3", "2|1", "0/2", "2/2", "3/3", "2/3", + "2/4", "./0", "0/.", "./1", "1/.", "2/."); + assertEquals(Arrays.asList("1|3", "2|1", "1/2", "1/3"), GenotypeClass.expandMultiAllelicGenotype("1/4", gts)); + + assertEquals(Arrays.asList("2/2", "3/3", "2/3", "2/4"), GenotypeClass.expandMultiAllelicGenotype("2/2", gts)); +// assertEquals(Arrays.asList("2/2", "3/3"), GenotypeClass.expandMultiAllelicGenotype("2/2", gts)); +// assertEquals(Arrays.asList("2/3", "2/4"), GenotypeClass.expandMultiAllelicGenotype("2/3", gts)); + } } diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorMultiFileTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorMultiFileTest.java index a585091ccff..5a95b96db9e 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorMultiFileTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorMultiFileTest.java @@ -122,7 +122,7 @@ protected ObjectMap getOptions() { @Test public void testIncludeStudies() throws Exception { query = new Query() - .append(VariantQueryParam.INCLUDE_STUDY.key(), study1); + .append(VariantQueryParam.INCLUDE_STUDY.key(), study1).append(INCLUDE_SAMPLE.key(), ALL); this.queryResult = query(query, options); assertEquals(dbAdaptor.count().first().intValue(), this.queryResult.getNumResults()); assertThat(this.queryResult, everyResult(allOf(withStudy(study2, nullValue()), withStudy("S_3", nullValue()), withStudy("S_4", nullValue())))); @@ -166,7 +166,7 @@ public void testIncludeStudiesNone() throws Exception { @Test public void testIncludeSampleIdFileIdx() throws Exception { - for (Variant variant : query(new Query(INCLUDE_SAMPLE_ID.key(), true), new QueryOptions(QueryOptions.LIMIT, 1)).getResults()) { + for (Variant variant : query(new Query(INCLUDE_SAMPLE_ID.key(), true).append(INCLUDE_SAMPLE.key(), ALL), new QueryOptions(QueryOptions.LIMIT, 1)).getResults()) { for (StudyEntry study : variant.getStudies()) { assertEquals(new HashSet<>(Arrays.asList("GT", "GQX", "AD", "DP", "GQ", "MQ", "PL", "VF")), new HashSet<>(study.getSampleDataKeys())); @@ -202,6 +202,7 @@ public void testIncludeSampleIdFileIdx() throws Exception { @Test public void testIncludeSampleIdFileIdxExcludeFiles() throws Exception { for (Variant variant : query(new Query(INCLUDE_SAMPLE_ID.key(), true) + .append(INCLUDE_SAMPLE.key(), ALL) .append(INCLUDE_FILE.key(), NONE), new QueryOptions(QueryOptions.LIMIT, 1)).getResults()) { for (StudyEntry study : variant.getStudies()) { @@ -219,9 +220,12 @@ public void testIncludeSampleIdFileIdxExcludeFiles() throws Exception { } } + @Test public void testExcludeSamples() throws Exception { - for (Variant variant : query(new Query(), new QueryOptions(QueryOptions.EXCLUDE, VariantField.STUDIES_SAMPLES).append(QueryOptions.LIMIT, 10)).getResults()) { + // JACOBO CHECK THIS + for (Variant variant : query(new Query(INCLUDE_FILE.key(), ALL), new QueryOptions(QueryOptions.EXCLUDE, VariantField.STUDIES_SAMPLES) + .append(QueryOptions.LIMIT, 10)).getResults()) { for (StudyEntry study : variant.getStudies()) { assertEquals(0, study.getSamples().size()); assertNotEquals(0, study.getFiles().size()); @@ -232,7 +236,7 @@ public void testExcludeSamples() throws Exception { @Test public void testIncludeSamplesNone() throws Exception { for (Variant variant : query(new Query() - .append(INCLUDE_SAMPLE.key(), NONE), new QueryOptions(QueryOptions.LIMIT, 10)).getResults()) { + .append(INCLUDE_SAMPLE.key(), NONE).append(INCLUDE_FILE.key(), ALL), new QueryOptions(QueryOptions.LIMIT, 10)).getResults()) { for (StudyEntry study : variant.getStudies()) { assertEquals(0, study.getSamples().size()); assertNotEquals(0, study.getFiles().size()); @@ -264,14 +268,15 @@ public void testIncludeFiles() throws Exception { @Test public void testGetByStudies() throws Exception { query = new Query() - .append(VariantQueryParam.STUDY.key(), study1); + .append(VariantQueryParam.STUDY.key(), study1) + .append(INCLUDE_SAMPLE.key(), ALL); queryResult = query(query, options); VariantQueryResult allVariants = dbAdaptor.get(new Query() .append(VariantQueryParam.INCLUDE_STUDY.key(), study1), options); assertThat(queryResult, everyResult(allVariants, withStudy(study1))); - allVariants = dbAdaptor.get(new Query(), options); + allVariants = dbAdaptor.get(new Query().append(INCLUDE_SAMPLE.key(), ALL), options); query = new Query().append(VariantQueryParam.STUDY.key(), study1 + AND + study2); queryResult = query(query, options); assertThat(queryResult, everyResult(allVariants, allOf(withStudy(study1), withStudy(study2)))); @@ -364,17 +369,17 @@ public void testGetByGenotype() throws Exception { @Test public void testSampleLimitSkip() throws Exception { - VariantQueryResult result = query(new Query(SAMPLE_METADATA.key(), true), options); + VariantQueryResult result = query(new Query(SAMPLE_METADATA.key(), true).append(VariantQueryParam.INCLUDE_SAMPLE.key(), ALL), options); System.out.println("samples(ALL) = " + result.getSamples()); for (int i : new int[]{1, 3, 6, 8, 10}) { - result = query(new Query(VariantQueryParam.SAMPLE_SKIP.key(), i).append(SAMPLE_METADATA.key(), true), options); + result = query(new Query(VariantQueryParam.SAMPLE_SKIP.key(), i).append(VariantQueryParam.INCLUDE_SAMPLE.key(), ALL).append(SAMPLE_METADATA.key(), true), options); // System.out.println("samples(SKIP=" + i + ") = " + result.getSamples()); assertEquals(Math.max(0, 8 - i), result.getSamples().values().stream().mapToInt(List::size).sum()); assertEquals(Math.max(0, 8 - i), result.getNumSamples().intValue()); assertEquals(8, result.getNumTotalSamples().intValue()); - result = query(new Query(VariantQueryParam.SAMPLE_LIMIT.key(), i).append(SAMPLE_METADATA.key(), true), options); + result = query(new Query(VariantQueryParam.SAMPLE_LIMIT.key(), i).append(VariantQueryParam.INCLUDE_SAMPLE.key(), ALL).append(SAMPLE_METADATA.key(), true), options); // System.out.println("samples(LIMIT=" + i + ") = " + result.getSamples()); assertEquals(Math.min(8, i), result.getSamples().values().stream().mapToInt(List::size).sum()); assertEquals(Math.min(8, i), result.getNumSamples().intValue()); @@ -469,10 +474,7 @@ public void testGetByFileNamesAndNegated() { public void testGetByFileNamesMultiStudiesAnd() { query = new Query() .append(VariantQueryParam.STUDY.key(), study1 + "," + study2) - .append(VariantQueryParam.FILE.key(), - file12877 - + AND + - file12882); + .append(VariantQueryParam.FILE.key(), file12877 + AND + file12882); queryResult = query(query, options); VariantQueryResult allVariants = dbAdaptor.get(new Query() .append(VariantQueryParam.INCLUDE_STUDY.key(), study1 + "," + study2) @@ -683,10 +685,7 @@ public void testGetAllVariants_infoFail() { public void testGetByFileNamesMultiStudiesOr() { query = new Query() .append(VariantQueryParam.STUDY.key(), study1 + "," + study2) - .append(VariantQueryParam.FILE.key(), - file12877 - + VariantQueryUtils.OR + - file12882); + .append(VariantQueryParam.FILE.key(), file12877 + VariantQueryUtils.OR + file12882); queryResult = query(query, options); VariantQueryResult allVariants = dbAdaptor.get(new Query() .append(VariantQueryParam.INCLUDE_STUDY.key(), study1 + "," + study2) diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorTest.java index cee6ae05019..0859c4b34b1 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/adaptors/VariantDBAdaptorTest.java @@ -19,7 +19,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.HashMultiset; import com.google.common.collect.Multiset; -import htsjdk.variant.variantcontext.VariantContext; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.hamcrest.CoreMatchers; @@ -60,7 +59,6 @@ import java.util.stream.Collectors; import static org.hamcrest.CoreMatchers.*; -import static org.hamcrest.CoreMatchers.hasItem; import static org.junit.Assert.*; import static org.opencb.opencga.storage.core.variant.adaptors.VariantMatchers.*; import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.*; @@ -91,6 +89,7 @@ public abstract class VariantDBAdaptorTest extends VariantStorageBaseTest { protected QueryOptions options; protected DataResult queryResult; protected DataResult allVariants; + protected DataResult allVariantsSummary; private static Logger logger = LoggerFactory.getLogger(VariantDBAdaptorTest.class); private String homAlt; private String homRef; @@ -98,11 +97,6 @@ public abstract class VariantDBAdaptorTest extends VariantStorageBaseTest { private String het1; private String het2; protected int fileId = 1; - protected static int na19600; - protected static int na19660; - protected static int na19661; - protected static int na19685; - protected List sampleNames = Arrays.asList("NA19600", "NA19660", "NA19661", "NA19685"); protected Set cohorts = new HashSet<>(Arrays.asList("ALL", "cohort1", "cohort2")); @@ -143,11 +137,6 @@ public void before() throws Exception { fileIndexed = true; Integer indexedFileId = metadataManager.getIndexedFiles(studyMetadata.getId()).iterator().next(); - na19600 = metadataManager.getSampleId(studyMetadata.getId(), "NA19600"); - na19660 = metadataManager.getSampleId(studyMetadata.getId(), "NA19660"); - na19661 = metadataManager.getSampleId(studyMetadata.getId(), "NA19661"); - na19685 = metadataManager.getSampleId(studyMetadata.getId(), "NA19685"); - //Calculate stats if (getOtherParams().getBoolean(VariantStorageOptions.STATS_CALCULATE.key(), true)) { QueryOptions options = new QueryOptions(VariantStorageOptions.STUDY.key(), STUDY_NAME) @@ -176,15 +165,16 @@ public void before() throws Exception { } if (params.getBoolean(VariantStorageOptions.ANNOTATE.key())) { for (int i = 0; i < 30 ; i++) { - allVariants = dbAdaptor.get(new Query(), new QueryOptions(QueryOptions.SORT, true)); + allVariantsSummary = dbAdaptor.get(new Query(), + new QueryOptions(QueryOptions.SORT, true)); Long annotated = dbAdaptor.count(new Query(ANNOTATION_EXISTS.key(), true)).first(); Long all = dbAdaptor.count(new Query()).first(); System.out.println("count annotated = " + annotated); System.out.println("count = " + all); - System.out.println("get = " + allVariants.getNumResults()); + System.out.println("get = " + allVariantsSummary.getNumResults()); - List nonAnnotatedVariants = allVariants.getResults() + List nonAnnotatedVariants = allVariantsSummary.getResults() .stream() .filter(variant -> variant.getAnnotation() == null) .collect(Collectors.toList()); @@ -199,7 +189,10 @@ public void before() throws Exception { assertEquals(dbAdaptor.count(new Query(ANNOTATION_EXISTS.key(), true)).getNumMatches(), dbAdaptor.count().getNumMatches()); } } - allVariants = dbAdaptor.get(new Query(), new QueryOptions(QueryOptions.SORT, true)); + allVariants = dbAdaptor.get(new Query(INCLUDE_SAMPLE.key(), ALL).append(INCLUDE_FILE.key(), ALL), + new QueryOptions(QueryOptions.SORT, true)); + allVariantsSummary = dbAdaptor.get(new Query(), + new QueryOptions(QueryOptions.SORT, true)); options = new QueryOptions(); homAlt = getHomAltGT(); @@ -226,6 +219,9 @@ public VariantDBIterator iterator(Query query, QueryOptions options) { } protected Query preProcessQuery(Query query, QueryOptions options) { +// if (!VariantQueryProjectionParser.isIncludeSamplesDefined(query, VariantField.getIncludeFields(options))) { +// query.put(INCLUDE_SAMPLE.key(), ALL); +// } return variantStorageEngine.preProcessQuery(query, options); } @@ -334,23 +330,23 @@ public void testGetVariantsByType() { System.out.println("!SNV = " + not_snv.size()); not_snv.forEach(variant -> assertFalse(EnumSet.of(VariantType.SNV, VariantType.SNP).contains(variant.getType()))); - Set snv_snp = new HashSet<>(query(new Query(VariantQueryParam.TYPE.key(), VariantType.SNV + "," + VariantContext.Type.SNP), new QueryOptions()).getResults()); - System.out.println("SNV_SNP = " + snv_snp.size()); - assertEquals(snv_snp, snv); +// Set snv_snp = new HashSet<>(query(new Query(VariantQueryParam.TYPE.key(), VariantType.SNV + "," + VariantContext.Type.SNP), new QueryOptions()).getResults()); +// System.out.println("SNV_SNP = " + snv_snp.size()); +// assertEquals(snv_snp, snv); - Set snp = new HashSet<>(query(new Query(VariantQueryParam.TYPE.key(), VariantType.SNP), new QueryOptions()).getResults()); - snp.forEach(variant -> assertEquals(VariantType.SNP, variant.getType())); - snp.forEach(variant -> assertThat(snv, hasItem(variant))); - System.out.println("SNP = " + snp.size()); +// Set snp = new HashSet<>(query(new Query(VariantQueryParam.TYPE.key(), VariantType.SNP), new QueryOptions()).getResults()); +// snp.forEach(variant -> assertEquals(VariantType.SNP, variant.getType())); +// snp.forEach(variant -> assertThat(snv, hasItem(variant))); +// System.out.println("SNP = " + snp.size()); Set indels = new HashSet<>(query(new Query(VariantQueryParam.TYPE.key(), VariantType.INDEL), new QueryOptions()).getResults()); indels.forEach(variant -> assertEquals(VariantType.INDEL, variant.getType())); System.out.println("INDEL = " + indels.size()); - Set indels_snp = new HashSet<>(query(new Query(VariantQueryParam.TYPE.key(), VariantType.INDEL + "," + VariantType.SNP), new QueryOptions()).getResults()); - indels_snp.forEach(variant -> assertThat(EnumSet.of(VariantType.INDEL, VariantType.SNP), hasItem(variant.getType()))); - indels_snp.forEach(variant -> assertTrue(indels.contains(variant) || snp.contains(variant))); - System.out.println("INDEL_SNP = " + indels_snp.size()); +// Set indels_snp = new HashSet<>(query(new Query(VariantQueryParam.TYPE.key(), VariantType.INDEL + "," + VariantType.SNP), new QueryOptions()).getResults()); +// indels_snp.forEach(variant -> assertThat(EnumSet.of(VariantType.INDEL, VariantType.SNP), hasItem(variant.getType()))); +// indels_snp.forEach(variant -> assertTrue(indels.contains(variant) || snp.contains(variant))); +// System.out.println("INDEL_SNP = " + indels_snp.size()); Set indels_snv = new HashSet<>(query(new Query(VariantQueryParam.TYPE.key(), VariantType.INDEL + "," + VariantType.SNV), new QueryOptions()).getResults()); indels_snv.forEach(variant -> assertThat(EnumSet.of(VariantType.INDEL, VariantType.SNP, VariantType.SNV), hasItem(variant.getType()))); @@ -364,7 +360,7 @@ public void testGetAllVariants_populationFrequencyRef() { query = new Query() .append(ANNOT_POPULATION_REFERENCE_FREQUENCY.key(), GENOMES_PHASE_3 + ":AFR<=0.05001"); queryResult = query(query, options); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasPopRefFreq(GENOMES_PHASE_3, "AFR", lte(0.05001))))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasPopRefFreq(GENOMES_PHASE_3, "AFR", lte(0.05001))))); } @Test @@ -374,21 +370,21 @@ public void testGetAllVariants_populationFrequency() { query = new Query() .append(ANNOT_POPULATION_ALTERNATE_FREQUENCY.key(), ESP_6500 + ":AA>0.05001"); queryResult = query(query, options); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasPopAltFreq(ESP_6500, "AA", gt(0.05001))))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasPopAltFreq(ESP_6500, "AA", gt(0.05001))))); // filterPopulation(map -> (map.containsKey(ESP_6500 + ":AA") && map.get(ESP_6500 + ":AA").getAltAlleleFreq() > 0.05001), filter); query = new Query() .append(ANNOT_POPULATION_ALTERNATE_FREQUENCY.key(), GENOMES_PHASE_3 + ":AFR<=0.05001"); queryResult = query(query, options); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasPopAltFreq(GENOMES_PHASE_3, "AFR", lte(0.05001))))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasPopAltFreq(GENOMES_PHASE_3, "AFR", lte(0.05001))))); // filterPopulation(map -> (!map.containsKey(GENOMES_PHASE_3 + ":AFR") || map.get(GENOMES_PHASE_3 + ":AFR").getAltAlleleFreq() <= 0.05001), filter); query = new Query() .append(ANNOT_POPULATION_ALTERNATE_FREQUENCY.key(), ESP_6500 + ":AA>0.05001;" + GENOMES_PHASE_3 + ":AFR<=0.05001"); queryResult = query(query, options); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(allOf( + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(allOf( hasPopAltFreq(ESP_6500, "AA", gt(0.05001)), hasPopAltFreq(GENOMES_PHASE_3, "AFR", lte(0.05001)))))); @@ -399,7 +395,7 @@ public void testGetAllVariants_populationFrequency() { .append(ANNOT_POPULATION_ALTERNATE_FREQUENCY.key(), ESP_6500 + ":AA>0.05001," + GENOMES_PHASE_3 + ":AFR<=0.05001"); queryResult = query(query, options); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(anyOf( + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(anyOf( hasPopAltFreq(ESP_6500, "AA", gt(0.05001)), hasPopAltFreq(GENOMES_PHASE_3, "AFR", lte(0.05001)))))); @@ -417,21 +413,21 @@ public void testGetAllVariants_population_maf() { queryResult = query(query, options); // filterPopulation(map -> (Math.min(map.getOrDefault(GENOMES_PHASE_3 + ":AFR", defaultPopulation).getRefAlleleFreq(), // map.getOrDefault(GENOMES_PHASE_3 + ":AFR", defaultPopulation).getAltAlleleFreq()) <= 0.0501)); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasPopMaf(GENOMES_PHASE_3, "AFR", lte(0.05001))))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasPopMaf(GENOMES_PHASE_3, "AFR", lte(0.05001))))); query = new Query(baseQuery).append(ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY.key(), ESP_6500 + ":AA>0.0501"); queryResult = query(query, options); // filterPopulation(map -> (map.containsKey(ESP_6500 + ":AA") && Math.min(map.get(ESP_6500 + ":AA").getRefAlleleFreq(), // map.get(ESP_6500 + ":AA").getAltAlleleFreq()) > 0.0501)); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasPopMaf(ESP_6500, "AA", gt(0.05001))))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasPopMaf(ESP_6500, "AA", gt(0.05001))))); query = new Query(baseQuery).append(ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY.key(), GENOMES_PHASE_3 + ":ALL<=0.0501"); queryResult = query(query, options); // filterPopulation(map -> (Math.min(map.getOrDefault(GENOMES_PHASE_3 + ":ALL", defaultPopulation).getRefAlleleFreq(), // map.getOrDefault(GENOMES_PHASE_3 + ":ALL", defaultPopulation).getAltAlleleFreq()) <= 0.0501)); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasPopMaf(GENOMES_PHASE_3, "ALL", lt(0.05001))))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasPopMaf(GENOMES_PHASE_3, "ALL", lt(0.05001))))); query = new Query(baseQuery).append(ANNOT_POPULATION_MINOR_ALLELE_FREQUENCY.key(), ESP_6500 + ":AA>0.0501" + AND + GENOMES_PHASE_3 + ":AFR<=0.0501"); @@ -440,7 +436,7 @@ public void testGetAllVariants_population_maf() { // map.get(ESP_6500 + ":AA").getAltAlleleFreq()) > 0.0501 // && Math.min(map.getOrDefault(GENOMES_PHASE_3 + ":AFR", defaultPopulation).getRefAlleleFreq(), // map.getOrDefault(GENOMES_PHASE_3 + ":AFR", defaultPopulation).getAltAlleleFreq()) <= 0.0501)); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(allOf( + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(allOf( hasPopMaf(ESP_6500, "AA", gt(0.0501)), hasPopMaf(GENOMES_PHASE_3, "AFR", lte(0.0501)))))); @@ -451,7 +447,7 @@ public void testGetAllVariants_population_maf() { // map.get(ESP_6500 + ":AA").getAltAlleleFreq()) > 0.0501 // || Math.min(map.getOrDefault(GENOMES_PHASE_3 + ":AFR", defaultPopulation).getRefAlleleFreq(), // map.getOrDefault(GENOMES_PHASE_3 + ":AFR", defaultPopulation).getAltAlleleFreq()) <= 0.0501)); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(anyOf( + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(anyOf( hasPopMaf(ESP_6500, "AA", gt(0.0501)), hasPopMaf(GENOMES_PHASE_3, "AFR", lte(0.0501)))))); @@ -467,7 +463,7 @@ public long filterPopulation(DataResult queryResult, Predicate assertNotNull("In " + variant, variant.getAnnotation()); // assertNotNull("In " + variant, variant.getAnnotation().getPopulationFrequencies()); }); - Set expectedVariants = allVariants.getResults() + Set expectedVariants = allVariantsSummary.getResults() .stream() .filter(filterVariants.and(variant -> variant.getAnnotation() != null)) .filter(variant -> { @@ -557,7 +553,7 @@ public void testGetAllVariants_variantId() { public void testGetAllVariants_xref() { Query query = new Query(ANNOT_XREF.key(), "3:108634973:C:A,rs2032582,HP:0001250,VAR_048225,Q9BY64,ENSG00000250026,TMPRSS11B,COSM1421316"); queryResult = query(query, null); - assertThat(queryResult, everyResult(allVariants, anyOf( + assertThat(queryResult, everyResult(allVariantsSummary, anyOf( hasAnnotation(at("3:108634973:C:A")), with("id", Variant::getId, is("rs2032582")), hasAnnotation(with("GeneTraitAssociation", VariantAnnotation::getGeneTraitAssociation, @@ -591,12 +587,12 @@ public void testGetAllVariants_rs(String key) { Variant variant = queryResult.first(); assertEquals(1, queryResult.getNumResults()); assertEquals(variant.getStart(), Integer.valueOf(1650807)); - assertThat(variant.getIds(), hasItem("rs1137005")); + assertThat(variant.getNames(), hasItem("rs1137005")); query = new Query(key, "rs1137005,rs150535390"); queryResult = query(query, this.options); assertEquals(2, queryResult.getNumResults()); - queryResult.getResults().forEach(v -> assertThat(v.getIds(), anyOf(hasItem("rs1137005"), hasItem("rs150535390")))); + queryResult.getResults().forEach(v -> assertThat(v.getNames(), anyOf(hasItem("rs1137005"), hasItem("rs150535390")))); } @Test @@ -605,36 +601,36 @@ public void testGetAllVariants_ct() { query = new Query(ANNOT_CONSEQUENCE_TYPE.key(), "SO:0001566"); queryResult = query(query, null); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasSO(hasItem("SO:0001566"))))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasSO(hasItem("SO:0001566"))))); assertThat(queryResult, numResults(gt(0))); // assertEquals(911, queryResult.getNumResults()); query = new Query(ANNOT_CONSEQUENCE_TYPE.key(), "1566"); queryResult = query(query, null); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasSO(hasItem("SO:0001566"))))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasSO(hasItem("SO:0001566"))))); assertThat(queryResult, numResults(gt(0))); // assertEquals(911, queryResult.getNumResults()); query = new Query(ANNOT_CONSEQUENCE_TYPE.key(), "SO:0001566,SO:0001583"); queryResult = query(query, options); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasSO(anyOf(hasItem("SO:0001566"), hasItem("SO:0001583")))))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasSO(anyOf(hasItem("SO:0001566"), hasItem("SO:0001583")))))); assertThat(queryResult, numResults(gt(0))); // assertEquals(947, queryResult.getNumResults()); query = new Query(ANNOT_CONSEQUENCE_TYPE.key(), ConsequenceTypeMappings.accessionToTerm.get(1566) + ",SO:0001583"); queryResult = query(query, options); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasSO(anyOf(hasItem("SO:0001566"), hasItem("SO:0001583")))))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasSO(anyOf(hasItem("SO:0001566"), hasItem("SO:0001583")))))); assertThat(queryResult, numResults(gt(0))); query = new Query(ANNOT_CONSEQUENCE_TYPE.key(), "1566,SO:0001583"); queryResult = query(query, options); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasSO(anyOf(hasItem("SO:0001566"), hasItem("SO:0001583")))))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasSO(anyOf(hasItem("SO:0001566"), hasItem("SO:0001583")))))); assertThat(queryResult, numResults(gt(0))); // assertEquals(947, queryResult.getNumResults()); query = new Query(ANNOT_CONSEQUENCE_TYPE.key(), "SO:0001566;SO:0001583"); queryResult = query(query, options); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasSO(allOf(hasItem("SO:0001566"), hasItem("SO:0001583")))))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasSO(allOf(hasItem("SO:0001566"), hasItem("SO:0001583")))))); assertThat(queryResult, numResults(gt(0))); // assertEquals(396, queryResult.getNumResults()); @@ -768,7 +764,7 @@ public void testCombineGeneSoVariants() { at("7:100807230:G:T"))); assertThat(query(new Query(ANNOT_XREF.key(), "rs1171830").append(ANNOT_CONSEQUENCE_TYPE.key(), "SO:0001566"), null), - everyResult(allVariants, allOf( + everyResult(allVariantsSummary, allOf( with("id", Variant::getId, is("rs1171830")), hasAnnotation(hasSO(hasItem(is("SO:0001566"))))))); } @@ -789,7 +785,7 @@ private void queryGeneCT(String gene, String so, Query query, Matcher variants, biotypeMatcher = is(biotype); } - assertThat(queryResult, everyResult(allVariants, hasAnnotation( + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation( anyOf( allOf( hasAnyGeneOf(genes), @@ -994,7 +990,7 @@ public void testGetAllVariants_geneTrait() { query = new Query(ANNOT_GENE_TRAIT_ID.key(), String.join(OR, ids)); queryResult = query(query, null); System.out.println("queryResult.getNumResults() = " + queryResult.getNumResults()); - assertThat(queryResult, everyResult(allVariants, + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(with("GeneTraitAssociation", VariantAnnotation::getGeneTraitAssociation, hasItem(with("GeneTraitId", GeneTraitAssociation::getId, is(anyOf(ids.stream().map(CoreMatchers::is).collect(Collectors.toList()))))))))); @@ -1002,7 +998,7 @@ public void testGetAllVariants_geneTrait() { query = new Query(ANNOT_GENE_TRAIT_ID.key(), String.join(OR, ids) + OR + String.join(OR, hpos)); queryResult = query(query, null); System.out.println("queryResult.getNumResults() = " + queryResult.getNumResults()); - assertThat(queryResult, everyResult(allVariants, + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(with("GeneTraitAssociation", VariantAnnotation::getGeneTraitAssociation, anyOf( hasItem(with("GeneTraitId", GeneTraitAssociation::getId, is(anyOf(ids.stream().map(CoreMatchers::is).collect(Collectors.toList()))))), hasItem(with("HPO", GeneTraitAssociation::getHpo, is(anyOf(hpos.stream().map(CoreMatchers::is).collect(Collectors.toList()))))) @@ -1012,7 +1008,7 @@ public void testGetAllVariants_geneTrait() { query = new Query(ANNOT_GENE_TRAIT_ID.key(), String.join(AND, ids)); queryResult = query(query, null); System.out.println("queryResult.getNumResults() = " + queryResult.getNumResults()); - assertThat(queryResult, everyResult(allVariants, + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(with("GeneTraitAssociation", VariantAnnotation::getGeneTraitAssociation, allOf(ids.stream().map(id -> hasItem(with("GeneTraitId", GeneTraitAssociation::getId, is(id)))).collect(Collectors.toList())))))); @@ -1020,7 +1016,7 @@ public void testGetAllVariants_geneTrait() { query = new Query(ANNOT_GENE_TRAIT_ID.key(), String.join(AND, ids)); queryResult = query(query, null); System.out.println("queryResult.getNumResults() = " + queryResult.getNumResults()); - assertThat(queryResult, everyResult(allVariants, + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(with("GeneTraitAssociation", VariantAnnotation::getGeneTraitAssociation, allOf( hasItem(with("GeneTraitId", GeneTraitAssociation::getId, is("umls:C0007131"))), @@ -1222,26 +1218,26 @@ public void testGetAllVariants_polyphenSift() { System.out.println("q = " + q + " -> " + m); queryResult = query(new Query(ANNOT_SIFT.key(), q), null); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasAnySift(m)))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasAnySift(m)))); queryResult = query(new Query(ANNOT_POLYPHEN.key(), q), null); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasAnyPolyphen(m)))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasAnyPolyphen(m)))); queryResult = query(new Query(ANNOT_PROTEIN_SUBSTITUTION.key(), "polyphen" + q), null); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasAnyPolyphen(m)))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasAnyPolyphen(m)))); queryResult = query(new Query(ANNOT_PROTEIN_SUBSTITUTION.key(), "sift" + q), null); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasAnySift(m)))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasAnySift(m)))); // Duplicate operator q = q.charAt(0) + q; System.out.println("q = " + q); queryResult = query(new Query(ANNOT_PROTEIN_SUBSTITUTION.key(), "polyphen" + q), null); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasPolyphen(anyOf(hasItem(m), isEmpty()))))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasPolyphen(anyOf(hasItem(m), isEmpty()))))); queryResult = query(new Query(ANNOT_PROTEIN_SUBSTITUTION.key(), "sift" + q), null); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasSift(anyOf(hasItem(m), isEmpty()))))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasSift(anyOf(hasItem(m), isEmpty()))))); } // for (Map.Entry entry : polyphen.entrySet()) { @@ -1263,16 +1259,16 @@ public void testGetAllVariants_polyphenSiftMalformed() { public void testGetAlVariants_polyphenSiftDescription() { for (String p : Arrays.asList("benign", "possibly damaging", "probably damaging", "unknown")) { queryResult = query(new Query(ANNOT_POLYPHEN.key(), p), null); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasAnyPolyphenDesc(equalTo(p))))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasAnyPolyphenDesc(equalTo(p))))); queryResult = query(new Query(ANNOT_PROTEIN_SUBSTITUTION.key(), "polyphen=" + p), null); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasAnyPolyphenDesc(equalTo(p))))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasAnyPolyphenDesc(equalTo(p))))); } for (String s : Arrays.asList("deleterious", "tolerated")) { queryResult = query(new Query(ANNOT_SIFT.key(), s), null); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasAnySiftDesc(equalTo(s))))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasAnySiftDesc(equalTo(s))))); queryResult = query(new Query(ANNOT_PROTEIN_SUBSTITUTION.key(), "sift=" + s), null); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(hasAnySiftDesc(equalTo(s))))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(hasAnySiftDesc(equalTo(s))))); } } @@ -1339,7 +1335,7 @@ public void testGetAllVariants_functionalScore_wrongValue() { public void testGetAllVariants_conservationScore() { //ANNOT_CONSERVATION - long phastCons = countConservationScore("phastCons", allVariants, s -> s > 0.5); + long phastCons = countConservationScore("phastCons", allVariantsSummary, s -> s > 0.5); assertTrue(phastCons > 0); checkConservationScore(new Query(ANNOT_CONSERVATION.key(), "phylop>0.5"), s -> s > 0.5, "phylop"); @@ -1406,7 +1402,7 @@ public void checkScore(Query query, Predicate doublePredicate, String so public void checkScore(Query query, Predicate> scorePredicate, Function> mapper) { DataResult result = query(query, null); - Collection expected = filterByScore(allVariants, scorePredicate, mapper); + Collection expected = filterByScore(allVariantsSummary, scorePredicate, mapper); Collection filteredResult = filterByScore(result, scorePredicate, mapper); TreeSet actual = new TreeSet<>(Comparator.comparing(Variant::getChromosome).thenComparing(Variant::getStart).thenComparing(Variant::toString)); actual.addAll(result.getResults()); @@ -1513,7 +1509,7 @@ public void testGetAllVariants_region() { options.put(QueryOptions.SORT, true); query = new Query(REGION.key(), "1:14000000-160000000"); queryResult = query(query, options); - assertThat(queryResult, everyResult(allVariants, overlaps(new Region("1:14000000-160000000")))); + assertThat(queryResult, everyResult(allVariantsSummary, overlaps(new Region("1:14000000-160000000")))); int lastStart = 0; for (Variant variant : queryResult.getResults()) { @@ -1550,7 +1546,7 @@ public void testGetAllVariants_region() { query = new Query(REGION.key(), "chr2"); queryResult = query(query, options); - assertThat(queryResult, everyResult(allVariants, overlaps(new Region("2")))); + assertThat(queryResult, everyResult(allVariantsSummary, overlaps(new Region("2")))); } public void checkRegion(Region region) { @@ -1559,7 +1555,7 @@ public void checkRegion(Region region) { public void checkRegion(Region queryRegion, Region overlappingRegion) { queryResult = query(new Query(REGION.key(), queryRegion), null); - assertThat(queryResult, everyResult(allVariants, overlaps(overlappingRegion))); + assertThat(queryResult, everyResult(allVariantsSummary, overlaps(overlappingRegion))); } @Test @@ -1567,7 +1563,7 @@ public void testGetAllVariants_genes() { Query query = new Query(GENE.key(), "FLG-AS1"); DataResult result = query(query, new QueryOptions()); - assertThat(result, everyResult(allVariants, hasAnnotation(hasGenes(Collections.singletonList("FLG-AS1"))))); + assertThat(result, everyResult(allVariantsSummary, hasAnnotation(hasGenes(Collections.singletonList("FLG-AS1"))))); for (Variant variant : result.getResults()) { System.out.println("variant = " + variant); @@ -1586,11 +1582,11 @@ public void testGetAllVariants_studies() { Query query = new Query(STUDY.key(), studyMetadata.getName()); long numResults = count(query); - assertEquals(allVariants.getNumResults(), numResults); + assertEquals(allVariantsSummary.getNumResults(), numResults); query = new Query(STUDY.key(), studyMetadata.getId()); numResults = count(query); - assertEquals(allVariants.getNumResults(), numResults); + assertEquals(allVariantsSummary.getNumResults(), numResults); } @@ -1712,9 +1708,9 @@ public void checkSamplesData(String returnedSamples) { query.put(INCLUDE_SAMPLE.key(), returnedSamples); VariantQueryResult queryResult = query(query, options); List samplesName; - if (returnedSamples == null || returnedSamples.equals(VariantQueryUtils.ALL)) { + if (VariantQueryUtils.ALL.equals(returnedSamples)) { samplesName = this.sampleNames; - } else if (returnedSamples.equals(VariantQueryUtils.NONE)) { + } else if (returnedSamples == null || returnedSamples.equals(VariantQueryUtils.NONE)) { samplesName = Collections.emptyList(); } else { samplesName = query.getAsStringList(VariantQueryParam.INCLUDE_SAMPLE.key()); @@ -1763,7 +1759,7 @@ public void checkSamplesData(String returnedSamples) { @Test public void testIterator() { int numVariants = 0; - Query query = new Query(); + Query query = new Query(INCLUDE_SAMPLE.key(), ALL); for (VariantDBIterator iterator = iterator(query, new QueryOptions()); iterator.hasNext(); ) { Variant variant = iterator.next(); numVariants++; @@ -1778,7 +1774,7 @@ public void testIterator() { @Test public void testGetAllVariants_genotypes() { - Query query = new Query(GENOTYPE.key(), na19600 + IS + homAlt); + Query query = new Query(GENOTYPE.key(), "NA19600" + IS + homAlt); queryResult = query(query, new QueryOptions()); assertEquals(282, queryResult.getNumResults()); queryResult.getResults().forEach(v -> v.getStudiesMap().forEach((s, vse) -> assertEquals(homAlt, vse.getSampleData("NA19600", "GT") @@ -1810,17 +1806,17 @@ public void testGetAllVariants_genotypes() { ))); //get for each genotype. Should return all variants - query = new Query(GENOTYPE.key(), na19600 + IS + homRef + OR + het + OR + homAlt + OR + "./."); + query = new Query(GENOTYPE.key(), "NA19600" + IS + homRef + OR + het + OR + homAlt + OR + "./."); long numResults = count(query); assertEquals(NUM_VARIANTS, numResults); //get for each genotype. Should return all variants - query = new Query(GENOTYPE.key(), na19600 + IS + GenotypeClass.HOM_REF + OR + GenotypeClass.HET + OR + GenotypeClass.HOM_ALT + OR + GenotypeClass.MISS); + query = new Query(GENOTYPE.key(), "NA19600" + IS + GenotypeClass.HOM_REF + OR + GenotypeClass.HET + OR + GenotypeClass.HOM_ALT + OR + GenotypeClass.MISS); numResults = count(query); assertEquals(NUM_VARIANTS, numResults); //Get all missing genotypes for sample na19600 - query = new Query(GENOTYPE.key(), na19600 + IS + "./."); + query = new Query(GENOTYPE.key(), "NA19600" + IS + "./."); queryResult = query(query, new QueryOptions()); assertEquals(9, queryResult.getNumResults()); queryResult.getResults().forEach(v -> v.getStudiesMap().forEach((s, vse) -> { @@ -1828,7 +1824,7 @@ public void testGetAllVariants_genotypes() { })); //Get all variants with 1|1 for na19600 and 0|0 or 1|0 for na19685 - query = new Query(GENOTYPE.key(), na19600 + IS + homAlt + AND + na19685 + IS + homRef + OR + het); + query = new Query(GENOTYPE.key(), "NA19600" + IS + homAlt + AND + "NA19685" + IS + homRef + OR + het); queryResult = query(query, new QueryOptions()); assertEquals(40, queryResult.getNumResults()); Set refHet = new HashSet<>(); @@ -1846,23 +1842,23 @@ public void testGetAllVariants_negatedGenotypes() { DataResult allVariants = query(new Query(INCLUDE_SAMPLE.key(), "NA19600"), new QueryOptions()); //Get all variants with not 1|1 for na19600 - query = new Query(GENOTYPE.key(), na19600 + IS + NOT + homAlt); + query = new Query(GENOTYPE.key(), "NA19600" + IS + NOT + homAlt); queryResult = query(query, new QueryOptions()); assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, withSampleData("NA19600", "GT", not(is(homAlt)))))); //Get all variants with not 0/0 for na19600 - query = new Query(GENOTYPE.key(), na19600 + IS + NOT + homRef); + query = new Query(GENOTYPE.key(), "NA19600" + IS + NOT + homRef); queryResult = query(query, new QueryOptions()); assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, withSampleData("NA19600", "GT", not(is(homRef)))))); //Get all variants with not 0/0 or 0|1 for na19600 - query = new Query(GENOTYPE.key(), na19600 + IS + NOT + homRef + OR + NOT + het1); + query = new Query(GENOTYPE.key(), "NA19600" + IS + NOT + homRef + OR + NOT + het1); queryResult = query(query, new QueryOptions()); assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, withSampleData("NA19600", "GT", allOf(not(is(homRef)), not(is(het1))))))); allVariants = query(new Query(INCLUDE_SAMPLE.key(), "NA19600,NA19685"), new QueryOptions()); //Get all variants with 1|1 for na19600 and 0|0 or 1|0 for na19685 - query = new Query(GENOTYPE.key(), na19600 + IS + homAlt + AND + na19685 + IS + NOT + homRef + OR + NOT + het2); + query = new Query(GENOTYPE.key(), "NA19600" + IS + homAlt + AND + "NA19685" + IS + NOT + homRef + OR + NOT + het2); queryResult = query(query, new QueryOptions()); assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, allOf( withSampleData("NA19600", "GT", is(homAlt)), @@ -1874,7 +1870,7 @@ public void testGetAllVariants_negatedGenotypes() { public void testGetAllVariants_negatedGenotypesMixed() { Query query; - query = new Query(GENOTYPE.key(), na19600 + IS + NOT + homRef + OR + het1) + query = new Query(GENOTYPE.key(), "NA19600" + IS + NOT + homRef + OR + het1) .append(INCLUDE_SAMPLE.key(), ALL); thrown.expect(VariantQueryException.class); queryResult = query(query, new QueryOptions()); @@ -1937,13 +1933,13 @@ public void testGetAllVariants_clinicalSignificance() { Query query = new Query(ANNOT_CLINICAL_SIGNIFICANCE.key(), clinicalSignificance); queryResult = query(query, new QueryOptions()); System.out.println(clinicalSignificance + " --> " + queryResult.getNumResults()); - assertThat(queryResult, everyResult(allVariants, hasAnnotation(withClinicalSignificance(hasItem(clinicalSignificance))))); + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation(withClinicalSignificance(hasItem(clinicalSignificance))))); if (clinicalSignificance != ClinicalSignificance.pathogenic) { query = new Query(ANNOT_CLINICAL_SIGNIFICANCE.key(), clinicalSignificance + OR + ClinicalSignificance.pathogenic); queryResult = query(query, new QueryOptions()); System.out.println(query.toJson() + " --> " + queryResult.getNumResults()); - assertThat(queryResult, everyResult(allVariants, + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation( withClinicalSignificance( anyOf( @@ -1957,7 +1953,7 @@ public void testGetAllVariants_clinicalSignificance() { query = new Query(ANNOT_CLINICAL_SIGNIFICANCE.key(), clinicalSignificance + AND + ClinicalSignificance.pathogenic); queryResult = query(query, new QueryOptions()); System.out.println(query.toJson() + " --> " + queryResult.getNumResults()); - assertThat(queryResult, everyResult(allVariants, + assertThat(queryResult, everyResult(allVariantsSummary, hasAnnotation( withClinicalSignificance( allOf( @@ -2041,7 +2037,7 @@ public void testGetAllVariants_Freqs() throws Exception { queryResult = query(new Query(STATS_ALT.key(), STUDY_NAME + ":" + StudyEntry.DEFAULT_COHORT + "<0.3"), null); - assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, withStats(StudyEntry.DEFAULT_COHORT, with("af", + assertThat(queryResult, everyResult(allVariantsSummary, withStudy(STUDY_NAME, withStats(StudyEntry.DEFAULT_COHORT, with("af", VariantStats::getAltAlleleFreq, lt(0.3)))))); numResults += queryResult.getNumResults(); @@ -2060,9 +2056,9 @@ public void testGetAllVariants_maf() throws Exception { // System.out.println("queryResult.getNumTotalResults() = " + numResults); queryResult = query(new Query(STATS_MAF.key(), STUDY_NAME + ":" + StudyEntry.DEFAULT_COHORT + ">0.2"), null); - assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, withStats(StudyEntry.DEFAULT_COHORT, withMaf(gt(0.2)))))); + assertThat(queryResult, everyResult(allVariantsSummary, withStudy(STUDY_NAME, withStats(StudyEntry.DEFAULT_COHORT, withMaf(gt(0.2)))))); - int expectedCount = (int) VariantMatchers.count(allVariants.getResults(), withStudy(STUDY_NAME, withStats("cohort1", withMaf(gt(0.2))))); + int expectedCount = (int) VariantMatchers.count(allVariantsSummary.getResults(), withStudy(STUDY_NAME, withStats("cohort1", withMaf(gt(0.2))))); numResults = count(new Query(STATS_MAF.key(), STUDY_NAME + ":cohort1>0.2")); assertEquals(expectedCount, numResults); numResults = count(new Query(STATS_MAF.key(), STUDY_NAME + ":cohort1>0.2")); @@ -2071,24 +2067,24 @@ public void testGetAllVariants_maf() throws Exception { assertEquals(expectedCount, queryResult.getNumResults()); queryResult = query(new Query(STUDY.key(), STUDY_NAME).append(STATS_MAF.key(), "cohort1>0.2"), null); assertEquals(expectedCount, queryResult.getNumResults()); - assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, withStats("cohort1", withMaf(gt(0.2)))))); + assertThat(queryResult, everyResult(allVariantsSummary, withStudy(STUDY_NAME, withStats("cohort1", withMaf(gt(0.2)))))); queryResult = query(new Query(STATS_MAF.key(), STUDY_NAME + ":cohort2>0.2"), null); - assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, withStats("cohort2", withMaf(gt(0.2)))))); + assertThat(queryResult, everyResult(allVariantsSummary, withStudy(STUDY_NAME, withStats("cohort2", withMaf(gt(0.2)))))); queryResult = query(new Query(STATS_MAF.key(), STUDY_NAME + ":cohort2>0.2," + STUDY_NAME + ":cohort2<=0.2"), null); - assertThat(queryResult, numResults(is(allVariants.getNumResults()))); + assertThat(queryResult, numResults(is(allVariantsSummary.getNumResults()))); queryResult = query(new Query(STATS_MAF.key(), STUDY_NAME + ":cohort2>0.2;" + STUDY_NAME + ":cohort2<=0.2"), null); assertThat(queryResult, numResults(is(0))); queryResult = query(new Query(STATS_MAF.key(), STUDY_NAME + ":cohort2>0.2;" + STUDY_NAME + ":cohort1<0.2"), null); - assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, allOf( + assertThat(queryResult, everyResult(allVariantsSummary, withStudy(STUDY_NAME, allOf( withStats("cohort2", withMaf(gt(0.2))), withStats("cohort1", withMaf(lt(0.2))))))); queryResult = query(new Query(STATS_MAF.key(), STUDY_NAME + ":cohort2>0.2," + STUDY_NAME + ":cohort1<0.2"), null); - assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, anyOf( + assertThat(queryResult, everyResult(allVariantsSummary, withStudy(STUDY_NAME, anyOf( withStats("cohort2", withMaf(gt(0.2))), withStats("cohort1", withMaf(lt(0.2))))))); } @@ -2105,11 +2101,11 @@ public void testGetAllVariants_maf_cohortNotFound() throws Exception { public void testGetAllVariants_mgf() throws Exception { queryResult = query(new Query(STATS_MGF.key(), STUDY_NAME + ":ALL>0.2"), null); System.out.println(queryResult.getNumResults()); - assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, withStats("ALL", withMgf(gt(0.2)))))); + assertThat(queryResult, everyResult(allVariantsSummary, withStudy(STUDY_NAME, withStats("ALL", withMgf(gt(0.2)))))); queryResult = query(new Query(STATS_MGF.key(), STUDY_NAME + ":ALL<0.2"), null); System.out.println(queryResult.getNumResults()); - assertThat(queryResult, everyResult(allVariants, withStudy(STUDY_NAME, withStats("ALL", withMgf(lt(0.2)))))); + assertThat(queryResult, everyResult(allVariantsSummary, withStudy(STUDY_NAME, withStats("ALL", withMgf(lt(0.2)))))); } @Test @@ -2156,7 +2152,21 @@ public void testGetAllVariants_missingAllele() throws Exception { @Test public void testIncludeAll() { - for (Variant variant : allVariants.getResults()) { + for (Variant variant : query(new Query(INCLUDE_SAMPLE.key(), ALL).append(INCLUDE_FILE.key(), ALL), new QueryOptions()).getResults()) { + assertThat(variant.getStudies(), not(is(Collections.emptyList()))); + assertThat(variant.getStudies().get(0).getStats(), not(is(Collections.emptyList()))); + assertThat(variant.getStudies().get(0).getFiles(), not(is(Collections.emptyList()))); + assertThat(variant.getStudies().get(0).getSamples(), not(is(Collections.emptyList()))); + assertNotNull(variant.getAnnotation()); + } + for (Variant variant : query(new Query(INCLUDE_SAMPLE.key(), ALL), new QueryOptions()).getResults()) { + assertThat(variant.getStudies(), not(is(Collections.emptyList()))); + assertThat(variant.getStudies().get(0).getStats(), not(is(Collections.emptyList()))); + assertThat(variant.getStudies().get(0).getFiles(), not(is(Collections.emptyList()))); + assertThat(variant.getStudies().get(0).getSamples(), not(is(Collections.emptyList()))); + assertNotNull(variant.getAnnotation()); + } + for (Variant variant : query(new Query(INCLUDE_FILE.key(), ALL), new QueryOptions()).getResults()) { assertThat(variant.getStudies(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getStats(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getFiles(), not(is(Collections.emptyList()))); @@ -2165,6 +2175,17 @@ public void testIncludeAll() { } } + @Test + public void testEmptyQuery() { + for (Variant variant : query(new Query(), new QueryOptions()).getResults()) { + assertThat(variant.getStudies(), not(is(Collections.emptyList()))); + assertThat(variant.getStudies().get(0).getStats(), not(is(Collections.emptyList()))); + assertThat(variant.getStudies().get(0).getFiles(), is(Collections.emptyList())); + assertThat(variant.getStudies().get(0).getSamples(), is(Collections.emptyList())); + assertNotNull(variant.getAnnotation()); + } + } + @Test public void testExcludeChromosome() { @@ -2219,7 +2240,7 @@ public void testExcludeFiles() { @Test public void testReturnNoneFiles() { - queryResult = query(new Query(INCLUDE_FILE.key(), VariantQueryUtils.NONE), new QueryOptions()); + queryResult = query(new Query(INCLUDE_FILE.key(), VariantQueryUtils.NONE).append(INCLUDE_SAMPLE.key(), ALL), new QueryOptions()); assertEquals(allVariants.getResults().size(), queryResult.getResults().size()); for (Variant variant : queryResult.getResults()) { assertThat(variant.getStudies().get(0).getFiles(), is(Collections.emptyList())); @@ -2295,7 +2316,7 @@ public void testExcludeAnnotationParts() { @Test public void testInclude() { - queryResult = query(new Query(), new QueryOptions(QueryOptions.INCLUDE, "studies")); + queryResult = query(new Query(INCLUDE_SAMPLE.key(), ALL), new QueryOptions(QueryOptions.INCLUDE, "studies")); assertEquals(allVariants.getResults().size(), queryResult.getResults().size()); for (Variant variant : queryResult.getResults()) { assertThat(variant.getStudies(), not(is(Collections.emptyList()))); @@ -2306,7 +2327,7 @@ public void testInclude() { } queryResult = query(new Query(), new QueryOptions(QueryOptions.INCLUDE, "annotation")); - assertEquals(allVariants.getResults().size(), queryResult.getResults().size()); + assertEquals(allVariantsSummary.getResults().size(), queryResult.getResults().size()); for (Variant variant : queryResult.getResults()) { assertThat(variant.getStudies(), is(Collections.emptyList())); assertNotNull(variant.getAnnotation()); @@ -2316,29 +2337,29 @@ public void testInclude() { @Test public void testIncludeFormat() { - Variant variant = query(new Query(INCLUDE_SAMPLE_DATA.key(), "GT"), new QueryOptions(QueryOptions.LIMIT, 1)).first(); + Variant variant = query(new Query(INCLUDE_SAMPLE.key(), ALL).append(INCLUDE_SAMPLE_DATA.key(), "GT"), new QueryOptions(QueryOptions.LIMIT, 1)).first(); System.out.println("variant.toJson() = " + variant.toJson()); assertEquals("GT", variant.getStudies().get(0).getSampleDataKeysAsString()); - variant = query(new Query(INCLUDE_SAMPLE_DATA.key(), "GL"), new QueryOptions(QueryOptions.LIMIT, 1)).first(); + variant = query(new Query(INCLUDE_SAMPLE.key(), ALL).append(INCLUDE_SAMPLE_DATA.key(), "GL"), new QueryOptions(QueryOptions.LIMIT, 1)).first(); System.out.println("variant.toJson() = " + variant.toJson()); assertEquals("GL", variant.getStudies().get(0).getSampleDataKeysAsString()); - variant = query(new Query(INCLUDE_SAMPLE_DATA.key(), "GT,GL,DS"), new QueryOptions(QueryOptions.LIMIT, 1)).first(); + variant = query(new Query(INCLUDE_SAMPLE.key(), ALL).append(INCLUDE_SAMPLE_DATA.key(), "GT,GL,DS"), new QueryOptions(QueryOptions.LIMIT, 1)).first(); System.out.println("variant.toJson() = " + variant.toJson()); assertEquals("GT:GL:DS", variant.getStudies().get(0).getSampleDataKeysAsString()); - variant = query(new Query(INCLUDE_SAMPLE_DATA.key(), "GT,XX,GL"), new QueryOptions(QueryOptions.LIMIT, 1)).first(); + variant = query(new Query(INCLUDE_SAMPLE.key(), ALL).append(INCLUDE_SAMPLE_DATA.key(), "GT,XX,GL"), new QueryOptions(QueryOptions.LIMIT, 1)).first(); System.out.println("variant.toJson() = " + variant.toJson()); assertEquals("GT:XX:GL", variant.getStudies().get(0).getSampleDataKeysAsString()); - variant = query(new Query(INCLUDE_SAMPLE_DATA.key(), "GT,SAMPLE_ID"), new QueryOptions(QueryOptions.LIMIT, 1)).first(); + variant = query(new Query(INCLUDE_SAMPLE.key(), ALL).append(INCLUDE_SAMPLE_DATA.key(), "GT,SAMPLE_ID"), new QueryOptions(QueryOptions.LIMIT, 1)).first(); assertEquals("GT:SAMPLE_ID", variant.getStudies().get(0).getSampleDataKeysAsString()); - variant = query(new Query(INCLUDE_SAMPLE_DATA.key(), "all"), new QueryOptions(QueryOptions.LIMIT, 1)).first(); + variant = query(new Query(INCLUDE_SAMPLE.key(), ALL).append(INCLUDE_SAMPLE_DATA.key(), "all"), new QueryOptions(QueryOptions.LIMIT, 1)).first(); assertEquals("GT:DS:GL", variant.getStudies().get(0).getSampleDataKeysAsString()); - variant = query(new Query(INCLUDE_SAMPLE_DATA.key(), "none"), new QueryOptions(QueryOptions.LIMIT, 1)).first(); + variant = query(new Query(INCLUDE_SAMPLE.key(), ALL).append(INCLUDE_SAMPLE_DATA.key(), "none"), new QueryOptions(QueryOptions.LIMIT, 1)).first(); assertEquals("", variant.getStudies().get(0).getSampleDataKeysAsString()); } diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParserTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParserTest.java index f474c4e51ae..88a2a020002 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParserTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/projection/VariantQueryProjectionParserTest.java @@ -2,20 +2,61 @@ import org.junit.Test; import org.opencb.commons.datastore.core.Query; -import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; +import org.opencb.opencga.storage.core.variant.adaptors.VariantField; import java.util.Arrays; +import java.util.Set; import static org.junit.Assert.*; +import static org.opencb.opencga.core.api.ParamConstants.ALL; +import static org.opencb.opencga.core.api.ParamConstants.NONE; import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.*; -import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.SAMPLE; +import static org.opencb.opencga.storage.core.variant.query.projection.VariantQueryProjectionParser.*; public class VariantQueryProjectionParserTest { @Test public void queryBySampleGenotype() throws Exception { Query query = new Query(STUDY.key(), "s1").append(SAMPLE.key(), "sample1:0/1,0|1,1|0;sample2:0/1,0|1,1|0;sample3:1/1,1|1"); - assertEquals(Arrays.asList("sample1", "sample2", "sample3"), VariantQueryProjectionParser.getIncludeSamplesList(query)); + assertEquals(Arrays.asList("sample1", "sample2", "sample3"), getIncludeSamplesList(query)); + } + + @Test + public void getIncludeStatus() { + checkIncludeStatus(new Query(), IncludeStatus.NONE); + checkIncludeStatus(new Query(INCLUDE_FILE.key(), NONE), IncludeStatus.NONE); + checkIncludeStatus(new Query(INCLUDE_FILE.key(), ALL), IncludeStatus.ALL); + checkIncludeStatus(new Query(INCLUDE_FILE.key(), "myFile.vcf"), IncludeStatus.SOME); + + checkIncludeStatus(new Query(FILE.key(), "myFile.vcf"), IncludeStatus.SOME); + checkIncludeStatus(new Query(FILE_DATA.key(), "myFile.vcfL:FILTER=PASS"), IncludeStatus.SOME); + checkIncludeStatus(new Query(FILE.key(), "myFile.vcf").append(INCLUDE_FILE.key(), ALL), IncludeStatus.ALL); + checkIncludeStatus(new Query(FILE.key(), "myFile.vcf").append(INCLUDE_FILE.key(), NONE), IncludeStatus.NONE); + + checkIncludeStatus(new Query(INCLUDE_SAMPLE.key(), NONE), IncludeStatus.NONE); + checkIncludeStatus(new Query(INCLUDE_SAMPLE.key(), ALL), IncludeStatus.ALL); + checkIncludeStatus(new Query(INCLUDE_SAMPLE.key(), "mySample"), IncludeStatus.SOME); + + checkIncludeStatus(new Query(SAMPLE.key(), "mySample"), IncludeStatus.SOME); + checkIncludeStatus(new Query(SAMPLE_DATA.key(), "mySample:DP>5"), IncludeStatus.SOME); + + checkIncludeStatus(new Query(SAMPLE.key(), "mySample").append(INCLUDE_FILE.key(), NONE), IncludeStatus.SOME, IncludeStatus.NONE); + checkIncludeStatus(new Query(SAMPLE.key(), "mySample").append(INCLUDE_FILE.key(), NONE), IncludeStatus.SOME, IncludeStatus.NONE, VariantField.parseInclude("samples")); + checkIncludeStatus(new Query(SAMPLE.key(), "mySample").append(INCLUDE_FILE.key(), NONE), IncludeStatus.NONE, IncludeStatus.NONE, VariantField.parseInclude("annotation")); + checkIncludeStatus(new Query(SAMPLE.key(), "mySample"), IncludeStatus.NONE, IncludeStatus.NONE, VariantField.parseInclude("annotation")); + } + + private void checkIncludeStatus(Query query, IncludeStatus includeStatus) { + checkIncludeStatus(query, includeStatus, includeStatus); + } + + private void checkIncludeStatus(Query query, IncludeStatus sampleStatus, IncludeStatus fileStatus) { + checkIncludeStatus(query, sampleStatus, fileStatus, VariantField.all()); + } + + private void checkIncludeStatus(Query query, IncludeStatus sampleStatus, IncludeStatus fileStatus, Set all) { + assertEquals(sampleStatus, getIncludeSampleStatus(query, all)); + assertEquals(fileStatus, getIncludeFileStatus(query, all)); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml index 0167014f7dd..f16e244e7fe 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/pom.xml @@ -23,7 +23,7 @@ org.opencb.opencga opencga-storage-hadoop - 2.0.2 + 2.0.3 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java index 700d6006864..557f6675403 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageEngine.java @@ -26,7 +26,10 @@ import org.apache.hadoop.hbase.util.Bytes; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.avro.VariantType; -import org.opencb.commons.datastore.core.*; +import org.opencb.commons.datastore.core.DataResult; +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.commons.datastore.core.Query; +import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.opencga.core.common.UriUtils; import org.opencb.opencga.core.config.DatabaseCredentials; import org.opencb.opencga.storage.core.StoragePipelineResult; @@ -38,7 +41,6 @@ import org.opencb.opencga.storage.core.io.managers.IOConnectorProvider; import org.opencb.opencga.storage.core.metadata.VariantMetadataFactory; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; -import org.opencb.opencga.storage.core.metadata.models.CohortMetadata; import org.opencb.opencga.storage.core.metadata.models.SampleMetadata; import org.opencb.opencga.storage.core.metadata.models.StudyMetadata; import org.opencb.opencga.storage.core.metadata.models.TaskMetadata; @@ -85,7 +87,7 @@ import org.opencb.opencga.storage.hadoop.variant.index.SampleIndexMendelianErrorQueryExecutor; import org.opencb.opencga.storage.hadoop.variant.index.SampleIndexVariantAggregationExecutor; import org.opencb.opencga.storage.hadoop.variant.index.SampleIndexVariantQueryExecutor; -import org.opencb.opencga.storage.hadoop.variant.index.family.FamilyIndexDriver; +import org.opencb.opencga.storage.hadoop.variant.index.family.FamilyIndexLoader; import org.opencb.opencga.storage.hadoop.variant.index.sample.*; import org.opencb.opencga.storage.hadoop.variant.io.HadoopVariantExporter; import org.opencb.opencga.storage.hadoop.variant.score.HadoopVariantScoreLoader; @@ -336,78 +338,8 @@ public void sampleIndexAnnotate(String study, List samples, ObjectMap op @Override public DataResult> familyIndex(String study, List> trios, ObjectMap options) throws StorageEngineException { options = getMergedOptions(options); - trios = new LinkedList<>(trios); - DataResult> dr = new DataResult<>(); - dr.setResults(trios); - dr.setEvents(new LinkedList<>()); - - boolean overwrite = options.getBoolean(FamilyIndexDriver.OVERWRITE); - if (trios.isEmpty()) { - throw new StorageEngineException("Undefined family trios"); - } - int studyId = getMetadataManager().getStudyId(study); - Iterator> iterator = trios.iterator(); - while (iterator.hasNext()) { - List trioIds = new ArrayList<>(3); - List trio = iterator.next(); - for (String sample : trio) { - Integer sampleId; - if (sample.equals("-")) { - sampleId = -1; - } else { - sampleId = getMetadataManager().getSampleId(studyId, sample); - if (sampleId == null) { - throw new IllegalArgumentException("Sample '" + sample + "' not found."); - } - } - trioIds.add(sampleId); - } - if (trioIds.size() != 3) { - throw new IllegalArgumentException("Found trio with " + trioIds.size() + " members, instead of 3: " + trioIds); - } - SampleMetadata sampleMetadata = getMetadataManager().getSampleMetadata(studyId, trioIds.get(2)); - if (!overwrite && sampleMetadata.getMendelianErrorStatus().equals(TaskMetadata.Status.READY)) { - String msg = "Skip sample " + sampleMetadata.getName() + ". Already precomputed!"; - logger.info(msg); - dr.getEvents().add(new Event(Event.Type.INFO, msg)); - iterator.remove(); - } else { - Integer fatherId = trioIds.get(0); - boolean fatherDefined = fatherId != -1; - Integer motherId = trioIds.get(1); - boolean motherDefined = motherId != -1; - if (fatherDefined && !fatherId.equals(sampleMetadata.getFather()) - || motherDefined && !motherId.equals(sampleMetadata.getMother())) { - getMetadataManager().updateSampleMetadata(studyId, sampleMetadata.getId(), s -> { - if (fatherDefined) { - sampleMetadata.setFather(fatherId); - } - if (motherDefined) { - sampleMetadata.setMother(motherId); - } - return sampleMetadata; - }); - } - } - } - if (trios.isEmpty()) { - logger.info("Nothing to do!"); - return dr; - } - if (trios.size() < 500) { - options.put(FamilyIndexDriver.TRIOS, trios.stream().map(trio -> String.join(",", trio)).collect(Collectors.joining(";"))); - } else { - CohortMetadata cohortMetadata = getMetadataManager().registerTemporaryCohort(study, "pendingFamilyIndexSamples", - trios.stream().map(t -> t.get(2)).collect(Collectors.toList())); - - options.put(FamilyIndexDriver.TRIOS_COHORT, cohortMetadata.getName()); - options.put(FamilyIndexDriver.TRIOS_COHORT_DELETE, true); - } - - getMRExecutor().run(FamilyIndexDriver.class, FamilyIndexDriver.buildArgs(getArchiveTableName(studyId), getVariantTableName(), - studyId, null, options), options, - "Precompute mendelian errors for " + (trios.size() == 1 ? "trio " + trios.get(0) : trios.size() + " trios")); - return dr; + return new FamilyIndexLoader(getSampleIndexDBAdaptor(), getDBAdaptor(), getMRExecutor()) + .load(study, trios, options); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageOptions.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageOptions.java index 88b0a3ef9b7..3bb92616694 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageOptions.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/HadoopVariantStorageOptions.java @@ -88,6 +88,7 @@ public enum HadoopVariantStorageOptions implements ConfigurationOption { SAMPLE_INDEX_TABLE_PRESPLIT_SIZE("storage.hadoop.sampleIndex.table.preSplit.samplesPerSplit", 15), SAMPLE_INDEX_BUILD_MAX_SAMPLES_PER_MR("storage.hadoop.sampleIndex.build.maxSamplesPerMR", 5000), SAMPLE_INDEX_ANNOTATION_MAX_SAMPLES_PER_MR("storage.hadoop.sampleIndex.annotation.maxSamplesPerMR", 5000), + SAMPLE_INDEX_FAMILY_MAX_TRIOS_PER_MR("storage.hadoop.sampleIndex.family.maxTriosPerMR", 1000), ///////////////////////// // Annotation index table configuration diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutor.java index 2cdf7ee875f..66f50aac247 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/MRExecutor.java @@ -116,5 +116,11 @@ protected List getEnv() { return env; } + protected static void redactSecureString(String[] args, String key) { + int passwordIdx = Arrays.binarySearch(args, key); + if (passwordIdx > 0 && args.length > passwordIdx) { + args[passwordIdx + 1] = "_redacted_"; + } + } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java index af25edd2fdd..3b483dd842b 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SshMRExecutor.java @@ -81,10 +81,8 @@ private Path copyOutputFiles(String[] args, List env) throws StorageEngi } protected String buildCommand(String executable, String... args) { - int passwordIdx = Arrays.binarySearch(args, MR_EXECUTOR_SSH_PASSWORD.key()); - if (passwordIdx > 0 && args.length > passwordIdx) { - args[passwordIdx + 1] = "_redacted_"; - } + redactSecureString(args, MR_EXECUTOR_SSH_PASSWORD.key()); + redactSecureString(args, "token"); String argsString = Commandline.toString(args); String remoteOpencgaHome = getOptions().getString(MR_EXECUTOR_SSH_REMOTE_OPENCGA_HOME.key()); String commandLine = getBinPath(HADOOP_SSH_BIN); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SystemMRExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SystemMRExecutor.java index 18597b23ddb..bb226be8d44 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SystemMRExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/executors/SystemMRExecutor.java @@ -19,6 +19,8 @@ import org.apache.tools.ant.types.Commandline; import org.opencb.commons.exec.Command; +import static org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageOptions.MR_EXECUTOR_SSH_PASSWORD; + /** * Created on 18/01/16 . * @@ -28,7 +30,7 @@ public class SystemMRExecutor extends MRExecutor { @Override public int run(String executable, String[] args) { - return run(executable + " " + Commandline.toString(args)); + return run(buildCommandLine(executable, args)); } public int run(String commandLine) { @@ -36,4 +38,10 @@ public int run(String commandLine) { command.run(); return command.getExitValue(); } + + private String buildCommandLine(String executable, String[] args) { + redactSecureString(args, MR_EXECUTOR_SSH_PASSWORD.key()); + redactSecureString(args, "token"); + return executable + " " + Commandline.toString(args); + } } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantAggregationExecutor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantAggregationExecutor.java index 51d4059c8ca..4a0cb73a85a 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantAggregationExecutor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/SampleIndexVariantAggregationExecutor.java @@ -10,12 +10,13 @@ import org.opencb.commons.datastore.core.FacetField; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; -import org.opencb.commons.datastore.solr.FacetQueryParser; import org.opencb.opencga.core.response.VariantQueryResult; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; import org.opencb.opencga.storage.core.utils.iterators.CloseableIterator; import org.opencb.opencga.storage.core.variant.adaptors.VariantField; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; +import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam; +import org.opencb.opencga.storage.core.variant.query.VariantQueryUtils; import org.opencb.opencga.storage.core.variant.query.executors.VariantAggregationExecutor; import org.opencb.opencga.storage.core.variant.query.executors.accumulators.*; import org.opencb.opencga.storage.hadoop.variant.index.annotation.AnnotationIndexConverter; @@ -24,6 +25,7 @@ import java.util.*; import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.REGION; @@ -48,6 +50,7 @@ public class SampleIndexVariantAggregationExecutor extends VariantAggregationExe "length", "titv" )); + public static final Pattern CATEGORICAL_PATTERN = Pattern.compile("^([a-zA-Z][a-zA-Z0-9_.]+)(\\[[a-zA-Z0-9\\-,:*]+])?(:\\*|:\\d+)?$"); public SampleIndexVariantAggregationExecutor(VariantStorageMetadataManager metadataManager, SampleIndexDBAdaptor sampleIndexDBAdaptor) { @@ -56,8 +59,23 @@ public SampleIndexVariantAggregationExecutor(VariantStorageMetadataManager metad } @Override - protected boolean canUseThisExecutor(Query query, QueryOptions options, String facet) throws Exception { + protected boolean canUseThisExecutor(Query query, QueryOptions options, String facet, List reason) throws Exception { if (SampleIndexQueryParser.validSampleIndexQuery(query)) { + // Check if the query is fully covered + Query filteredQuery = new Query(query); + sampleIndexDBAdaptor.getSampleIndexQueryParser().parse(filteredQuery); + Set params = VariantQueryUtils.validParams(filteredQuery, true); + params.remove(VariantQueryParam.STUDY); + + if (!params.isEmpty()) { + // Query filters not covered + for (VariantQueryParam param : params) { + reason.add("Can't use " + getClass().getSimpleName() + " filtering by \"" + + param.key() + " : " + filteredQuery.getString(param.key()) + "\""); + } + return false; + } + for (String fieldFacedMulti : facet.split(FACET_SEPARATOR)) { for (String fieldFaced : fieldFacedMulti.split(NESTED_FACET_SEPARATOR)) { String key = fieldFaced.split("\\[")[0]; @@ -125,7 +143,7 @@ private FacetFieldAccumulator createAccumulator(Query q // Reverse traverse for (int i = split.length - 1; i >= 0; i--) { String facetField = split[i]; - Matcher matcher = FacetQueryParser.CATEGORICAL_PATTERN.matcher(facetField); + Matcher matcher = CATEGORICAL_PATTERN.matcher(facetField); if (!matcher.find()) { throw new VariantQueryException("Malformed aggregation stats query: " + facetField); } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexDriver.java index fc245cdde2e..7e459506fec 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexDriver.java @@ -50,8 +50,11 @@ public class FamilyIndexDriver extends AbstractVariantsTableDriver { public static final String OUTPUT = "output"; private static final String TRIOS_LIST = "FamilyIndexDriver.trios_list"; + // Samples where at least one parent is not in its file + private static final String SAMPLES_WITH_UNKNOWN_PARENT_GENOTYPES = "FamilyIndexDriver.samples_with_unknown_parent_genotypes"; private static final int MISSING_SAMPLE = -1; private List sampleIds; + private List samplesWithUnknownParentGenotypes; private boolean partial; private String region; private String sampleIndexTableName; @@ -87,6 +90,7 @@ protected void parseAndValidateParameters() throws IOException { String triosStr = getParam(TRIOS); sampleIds = new LinkedList<>(); + samplesWithUnknownParentGenotypes = new LinkedList<>(); if (StringUtils.isNotEmpty(triosStr)) { String[] trios = triosStr.split(";"); List trioList = new ArrayList<>(3); @@ -129,12 +133,12 @@ protected void parseAndValidateParameters() throws IOException { for (Integer sample : cohortMetadata.getSamples()) { SampleMetadata sampleMetadata = getMetadataManager().getSampleMetadata(getStudyId(), sample); if (sampleMetadata.getFather() == null) { - sampleIds.add(-1); + sampleIds.add(MISSING_SAMPLE); } else { sampleIds.add(sampleMetadata.getFather()); } if (sampleMetadata.getMother() == null) { - sampleIds.add(-1); + sampleIds.add(MISSING_SAMPLE); } else { sampleIds.add(sampleMetadata.getMother()); } @@ -151,6 +155,41 @@ protected void parseAndValidateParameters() throws IOException { if (sampleIds.size() % 3 != 0) { throw new IllegalArgumentException("Wrong number of samples in trios!"); } + sampleIds = new ArrayList<>(sampleIds); + for (int i = 0; i < sampleIds.size(); i += 3) { + Integer father = sampleIds.get(i); + Integer mother = sampleIds.get(i + 1); + Integer child = sampleIds.get(i + 2); + + boolean parentsInSeparatedFile = false; + SampleMetadata childMetadata = getMetadataManager().getSampleMetadata(getStudyId(), child); + List childFiles = childMetadata.getFiles(); + SampleMetadata fatherMetadata = null; + if (father != MISSING_SAMPLE) { + fatherMetadata = getMetadataManager().getSampleMetadata(getStudyId(), father); + List fatherFiles = fatherMetadata.getFiles(); + if (fatherFiles.size() != childFiles.size() || !fatherFiles.containsAll(childFiles)) { + parentsInSeparatedFile = true; + } + } + SampleMetadata motherMetadata = null; + if (mother != MISSING_SAMPLE) { + motherMetadata = getMetadataManager().getSampleMetadata(getStudyId(), mother); + List motherFiles = motherMetadata.getFiles(); + if (motherFiles.size() != childFiles.size() || !motherFiles.containsAll(childFiles)) { + parentsInSeparatedFile = true; + } + } + if (parentsInSeparatedFile) { + samplesWithUnknownParentGenotypes.add(child); + if (samplesWithUnknownParentGenotypes.size() < 20) { + LOGGER.info(" - Trio from multiple files: sample: " + childMetadata.getName() + + ", father: " + (fatherMetadata == null ? "none" : fatherMetadata.getName()) + + ", mother: " + (motherMetadata == null ? "none" : motherMetadata.getName())); + } + } + } + LOGGER.info("Found {} trios where some parent was loaded from a different file", samplesWithUnknownParentGenotypes.size()); region = getParam(VariantQueryParam.REGION.key(), ""); @@ -169,7 +208,10 @@ protected Job setupJob(Job job, String archiveTable, String variantTable) throws LOGGER.info("Calculate Mendelian Errors for " + (sampleIds.size() / 3) + " trios"); - job.getConfiguration().set(TRIOS_LIST, sampleIds.stream().map(Objects::toString).collect(Collectors.joining(","))); + job.getConfiguration().set(TRIOS_LIST, + sampleIds.stream().map(Objects::toString).collect(Collectors.joining(","))); + job.getConfiguration().set(SAMPLES_WITH_UNKNOWN_PARENT_GENOTYPES, + samplesWithUnknownParentGenotypes.stream().map(Objects::toString).collect(Collectors.joining(","))); for (Integer sampleId : sampleIds) { if (sampleId != MISSING_SAMPLE) { @@ -229,29 +271,32 @@ public static class FamilyIndexMapper extends VariantTableSampleIndexOrderMapper private Map> genotypeCount = new HashMap<>(); private Map familyIndexBuilder = new HashMap<>(); private List> trios; - private byte[] family; + private List triosWithUnknownGenotypes; @Override protected void setup(Context context) throws IOException, InterruptedException { new GenomeHelper(context.getConfiguration()); - family = GenomeHelper.COLUMN_FAMILY_BYTES; int[] sampleIds = context.getConfiguration().getInts(TRIOS_LIST); + Set samplesWithUnknownParentGenotypes = + Arrays.stream(context.getConfiguration().getInts(SAMPLES_WITH_UNKNOWN_PARENT_GENOTYPES)) + .boxed() + .collect(Collectors.toSet()); trios = new ArrayList<>(sampleIds.length / 3); + triosWithUnknownGenotypes = new ArrayList<>(sampleIds.length / 3); for (int i = 0; i < sampleIds.length; i += 3) { + int father = sampleIds[i]; + int mother = sampleIds[i + 1]; + int child = sampleIds[i + 2]; + triosWithUnknownGenotypes.add(samplesWithUnknownParentGenotypes.contains(child)); trios.add(Arrays.asList( - sampleIds[i], - sampleIds[i + 1], - sampleIds[i + 2])); - } - - for (List trio : trios) { - Integer child = trio.get(2); + father, + mother, + child)); familyIndexBuilder.put(child, new FamilyIndexPutBuilder(child)); genotypeCount.put(child, new HashMap<>()); } - } @Override @@ -270,7 +315,8 @@ protected void map(ImmutableBytesWritable key, Result value, Context context) th } }).walk(); - for (List trio : trios) { + for (int i = 0, triosSize = trios.size(); i < triosSize; i++) { + List trio = trios.get(i); Integer father = trio.get(0); Integer mother = trio.get(1); Integer child = trio.get(2); @@ -280,23 +326,44 @@ protected void map(ImmutableBytesWritable key, Result value, Context context) th Set motherDiscrepancies = discrepanciesGtMap.get(mother); Set childDiscrepancies = discrepanciesGtMap.get(child); + String defaultGenotype = triosWithUnknownGenotypes.get(i) ? null : "0/0"; if (fatherDiscrepancies == null && motherDiscrepancies == null && childDiscrepancies == null) { - String fatherGtStr = gtMap.get(father); - String motherGtStr = gtMap.get(mother); - String childGtStr = gtMap.get(child); + String fatherGtStr; + if (father == MISSING_SAMPLE) { + fatherGtStr = null; + } else { + fatherGtStr = gtMap.get(father); + if (fatherGtStr == null) { + context.getCounter(COUNTER_GROUP_NAME, "missing_father_gt").increment(1); + fatherGtStr = defaultGenotype; + } + } + String motherGtStr; + if (mother == MISSING_SAMPLE) { + motherGtStr = null; + } else { + motherGtStr = gtMap.get(mother); + if (motherGtStr == null) { + context.getCounter(COUNTER_GROUP_NAME, "missing_mother_gt").increment(1); + motherGtStr = defaultGenotype; + } + } + String childGtStr = gtMap.getOrDefault(child, "0/0"); builder.addParents(childGtStr, fatherGtStr, motherGtStr); int idx = genotypeCount.get(child).merge(childGtStr, 1, Integer::sum) - 1; computeMendelianError(variant, father, mother, fatherGtStr, motherGtStr, childGtStr, context, builder, idx); } else { if (fatherDiscrepancies == null) { - fatherDiscrepancies = Collections.singleton(gtMap.get(father)); + fatherDiscrepancies = Collections.singleton(father == MISSING_SAMPLE ? null + : gtMap.getOrDefault(father, defaultGenotype)); } if (motherDiscrepancies == null) { - motherDiscrepancies = Collections.singleton(gtMap.get(mother)); + motherDiscrepancies = Collections.singleton(mother == MISSING_SAMPLE ? null + : gtMap.getOrDefault(mother, defaultGenotype)); } if (childDiscrepancies == null) { - childDiscrepancies = Collections.singleton(gtMap.get(child)); + childDiscrepancies = Collections.singleton(gtMap.getOrDefault(child, "0/0")); } // System.out.println("variant = " + variant); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexLoader.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexLoader.java new file mode 100644 index 00000000000..3835a0ddfa8 --- /dev/null +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/family/FamilyIndexLoader.java @@ -0,0 +1,147 @@ +package org.opencb.opencga.storage.hadoop.variant.index.family; + +import org.opencb.commons.datastore.core.DataResult; +import org.opencb.commons.datastore.core.Event; +import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.opencga.storage.core.exceptions.StorageEngineException; +import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; +import org.opencb.opencga.storage.core.metadata.models.CohortMetadata; +import org.opencb.opencga.storage.core.metadata.models.SampleMetadata; +import org.opencb.opencga.storage.core.metadata.models.TaskMetadata; +import org.opencb.opencga.storage.hadoop.variant.HadoopVariantStorageOptions; +import org.opencb.opencga.storage.hadoop.variant.adaptors.VariantHadoopDBAdaptor; +import org.opencb.opencga.storage.hadoop.variant.executors.MRExecutor; +import org.opencb.opencga.storage.hadoop.variant.index.sample.SampleIndexDBAdaptor; +import org.opencb.opencga.storage.hadoop.variant.utils.HBaseVariantTableNameGenerator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.stream.Collectors; + +public class FamilyIndexLoader { + + private final VariantStorageMetadataManager metadataManager; + private final SampleIndexDBAdaptor sampleIndexDBAdaptor; + private final MRExecutor mrExecutor; + private final Logger logger = LoggerFactory.getLogger(FamilyIndexLoader.class); + private final HBaseVariantTableNameGenerator tableNameGenerator; + + public FamilyIndexLoader(SampleIndexDBAdaptor sampleIndexDBAdaptor, VariantHadoopDBAdaptor dbAdaptor, MRExecutor mrExecutor) { + this.sampleIndexDBAdaptor = sampleIndexDBAdaptor; + this.metadataManager = dbAdaptor.getMetadataManager(); + this.tableNameGenerator = dbAdaptor.getTableNameGenerator(); + this.mrExecutor = mrExecutor; + } + + public DataResult> load(String study, List> trios, ObjectMap options) throws StorageEngineException { + trios = new LinkedList<>(trios); + DataResult> dr = new DataResult<>(); + dr.setResults(trios); + dr.setEvents(new LinkedList<>()); + + boolean overwrite = options.getBoolean(FamilyIndexDriver.OVERWRITE); + if (trios.isEmpty()) { + throw new StorageEngineException("Undefined family trios"); + } + int studyId = metadataManager.getStudyId(study); + Iterator> iterator = trios.iterator(); + while (iterator.hasNext()) { + List trioIds = new ArrayList<>(3); + List trio = iterator.next(); + for (String sample : trio) { + Integer sampleId; + if (sample.equals("-")) { + sampleId = -1; + } else { + sampleId = metadataManager.getSampleId(studyId, sample); + if (sampleId == null) { + throw new IllegalArgumentException("Sample '" + sample + "' not found."); + } + } + trioIds.add(sampleId); + } + if (trioIds.size() != 3) { + throw new IllegalArgumentException("Found trio with " + trioIds.size() + " members, instead of 3: " + trioIds); + } + SampleMetadata sampleMetadata = metadataManager.getSampleMetadata(studyId, trioIds.get(2)); + if (!overwrite && sampleMetadata.getMendelianErrorStatus().equals(TaskMetadata.Status.READY)) { + String msg = "Skip sample " + sampleMetadata.getName() + ". Already precomputed!"; + logger.info(msg); + dr.getEvents().add(new Event(Event.Type.INFO, msg)); + iterator.remove(); + } else { + Integer fatherId = trioIds.get(0); + boolean fatherDefined = fatherId != -1; + Integer motherId = trioIds.get(1); + boolean motherDefined = motherId != -1; + if (fatherDefined && !fatherId.equals(sampleMetadata.getFather()) + || motherDefined && !motherId.equals(sampleMetadata.getMother())) { + metadataManager.updateSampleMetadata(studyId, sampleMetadata.getId(), s -> { + if (fatherDefined) { + sampleMetadata.setFather(fatherId); + } + if (motherDefined) { + sampleMetadata.setMother(motherId); + } + return sampleMetadata; + }); + } + } + } + if (trios.isEmpty()) { + logger.info("Nothing to do!"); + return dr; + } + + int batchSize = options.getInt(HadoopVariantStorageOptions.SAMPLE_INDEX_FAMILY_MAX_TRIOS_PER_MR.key(), + HadoopVariantStorageOptions.SAMPLE_INDEX_FAMILY_MAX_TRIOS_PER_MR.defaultValue()); + List>> batches = splitLists(trios, batchSize); + if (batches.size() == 1) { + run(study, trios, options, studyId); + } else { + logger.warn("Unable to run family index in one single MapReduce operation."); + logger.info("Split in {} jobs of {} samples each.", batches, batches.get(0).size()); + for (int i = 0; i < batches.size(); i++) { + List> batch = batches.get(i); + logger.info("Running MapReduce {}/{} over {} trios", i + 1, batches, batch.size()); + run(study, batch, options, studyId); + } + } + return dr; + } + + private void run(String study, List> trios, ObjectMap options, int studyId) throws StorageEngineException { + if (trios.size() < 500) { + options.put(FamilyIndexDriver.TRIOS, trios.stream().map(trio -> String.join(",", trio)).collect(Collectors.joining(";"))); + } else { + CohortMetadata cohortMetadata = metadataManager.registerTemporaryCohort(study, "pendingFamilyIndexSamples", + trios.stream().map(t -> t.get(2)).collect(Collectors.toList())); + + options.put(FamilyIndexDriver.TRIOS_COHORT, cohortMetadata.getName()); + options.put(FamilyIndexDriver.TRIOS_COHORT_DELETE, true); + } + options.put(FamilyIndexDriver.OUTPUT, tableNameGenerator.getSampleIndexTableName(studyId)); + + mrExecutor.run(FamilyIndexDriver.class, FamilyIndexDriver.buildArgs( + tableNameGenerator.getArchiveTableName(studyId), + tableNameGenerator.getVariantTableName(), + studyId, null, options), + "Precompute mendelian errors for " + (trios.size() == 1 ? "trio " + trios.get(0) : trios.size() + " trios")); + } + + private static List> splitLists(List list, int maxBatchSize) { + int batchSize = maxBatchSize; + int batches = (int) Math.round(Math.ceil(list.size() / ((float) batchSize))); + batchSize = (int) Math.round(Math.ceil(list.size() / ((float) batches))); + List> parts = new ArrayList<>(batches); + for (int i = 0; i < batches; i++) { + parts.add(list.subList(i * batchSize, Math.min((i + 1) * batchSize, list.size()))); + } + return parts; + } + +} diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java index 56fcc8858cc..8062860e3b8 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexDBAdaptor.java @@ -600,6 +600,19 @@ private Scan parse(SingleSampleIndexQuery query, List regions, boolean o // logger.info("Filters = " + scan.getFilter()); // logger.info("Batch = " + scan.getBatch()); logger.info("Caching = " + scan.getCaching()); + printQuery(query); + +// try { +// System.out.println("scan = " + scan.toJSON() + " " + rowKeyToString(scan.getStartRow()) + " -> + " +// + rowKeyToString(scan.getStopRow())); +// } catch (IOException e) { +// throw VariantQueryException.internalException(e); +// } + + return scan; + } + + protected static void printQuery(SingleSampleIndexQuery query) { logger.info("AnnotationIndex = " + IndexUtils.maskToString(query.getAnnotationIndexMask(), query.getAnnotationIndex())); if (query.getAnnotationIndexQuery().getBiotypeMask() != EMPTY_MASK) { logger.info("BiotypeIndex = " + IndexUtils.byteToString(query.getAnnotationIndexQuery().getBiotypeMask())); @@ -640,15 +653,6 @@ private Scan parse(SingleSampleIndexQuery query, List regions, boolean o if (query.hasMotherFilter()) { logger.info("MotherFilter = " + IndexUtils.parentFilterToString(query.getMotherFilter())); } - -// try { -// System.out.println("scan = " + scan.toJSON() + " " + rowKeyToString(scan.getStartRow()) + " -> + " -// + rowKeyToString(scan.getStopRow())); -// } catch (IOException e) { -// throw VariantQueryException.internalException(e); -// } - - return scan; } private int toSampleId(int studyId, String sample) { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParser.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParser.java index 2615c50121e..8d055659fe9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParser.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParser.java @@ -171,6 +171,7 @@ public SampleIndexQuery parse(Query query) { // Get samples with non negated genotypes Map> map = new HashMap<>(); + Map allSamples = new HashMap<>(); queryOperation = parseGenotypeFilter(query.getString(GENOTYPE.key()), map); // Extract parents from each sample @@ -181,6 +182,7 @@ public SampleIndexQuery parse(Query query) { Integer sampleId = metadataManager.getSampleId(studyId, sample); SampleMetadata sampleMetadata = metadataManager.getSampleMetadata(studyId, sampleId); + allSamples.put(sampleMetadata.getName(), sampleMetadata); List gts = GenotypeClass.filter(entry.getValue(), allGenotypes); if (gts.stream().allMatch(SampleIndexSchema::validGenotype)) { @@ -234,6 +236,7 @@ public SampleIndexQuery parse(Query query) { // Discard samples with negated genotypes negatedGenotypesSamples.add(sampleName); partialIndex = true; + logger.info("Set partialGtIndex to true. Prev value: {}", partialGtIndex); partialGtIndex = true; } else { samplesMap.put(sampleName, entry.getValue()); @@ -247,28 +250,47 @@ public SampleIndexQuery parse(Query query) { Integer fatherId = metadataManager.getSampleId(studyId, father); boolean includeDiscrepancies = VariantStorageEngine.SplitData.MULTI .equals(metadataManager.getLoadSplitData(studyId, fatherId)); - boolean[] filter = buildParentGtFilter(gtMap.get(father), includeDiscrepancies); + List fatherFiles = allSamples.get(father).getFiles(); + List sampleFiles = allSamples.get(sampleName).getFiles(); + boolean parentInSeparatedFile = + fatherFiles.size() != sampleFiles.size() || !fatherFiles.containsAll(sampleFiles); + boolean[] filter = buildParentGtFilter(gtMap.get(father), includeDiscrepancies, parentInSeparatedFile); if (!isFullyCoveredParentFilter(filter)) { + logger.debug("FATHER - Set partialGtIndex to true. Prev value: {}", partialGtIndex); partialGtIndex = true; } +// logger.info("Father={}, includeDiscrepancies={}, fatherFiles={}, sampleFiles={}, " +// + "parentInSeparatedFile={}, fullyCoveredFilter={}", +// father, includeDiscrepancies, fatherFiles, sampleFiles, +// parentInSeparatedFile, isFullyCoveredParentFilter(filter)); fatherFilterMap.put(sampleName, filter); } if (mother != null) { Integer motherId = metadataManager.getSampleId(studyId, mother); boolean includeDiscrepancies = VariantStorageEngine.SplitData.MULTI .equals(metadataManager.getLoadSplitData(studyId, motherId)); - boolean[] filter = buildParentGtFilter(gtMap.get(mother), includeDiscrepancies); + List motherFiles = allSamples.get(mother).getFiles(); + List sampleFiles = allSamples.get(sampleName).getFiles(); + boolean parentInSeparatedFile = + motherFiles.size() != sampleFiles.size() || !motherFiles.containsAll(sampleFiles); + boolean[] filter = buildParentGtFilter(gtMap.get(mother), includeDiscrepancies, parentInSeparatedFile); if (!isFullyCoveredParentFilter(filter)) { + logger.debug("MOTHER - Set partialGtIndex to true. Prev value: {}", partialGtIndex); partialGtIndex = true; } +// logger.info("Mother={}, includeDiscrepancies={}, motherFiles={}, sampleFiles={}, " +// + "parentInSeparatedFile={}, fullyCoveredFilter={}", +// mother, includeDiscrepancies, motherFiles, sampleFiles, +// parentInSeparatedFile, isFullyCoveredParentFilter(filter)); motherFilterMap.put(sampleName, filter); } } } - // If not all genotypes are valid, query is not covered - if (!negatedSamples.isEmpty()) { - partialGtIndex = true; - } + } + // If not all genotypes are valid, query is not covered + if (!negatedSamples.isEmpty()) { + logger.debug("NEG_SAMPLES - Set partialGtIndex to true. Prev value: {}", partialGtIndex); + partialGtIndex = true; } for (String negatedSample : negatedSamples) { @@ -522,7 +544,7 @@ protected Set findChildren(Map> gtMap, QueryOperati return childrenSet; } - protected static boolean[] buildParentGtFilter(List parentGts, boolean includeDiscrepancies) { + protected static boolean[] buildParentGtFilter(List parentGts, boolean includeDiscrepancies, boolean parentInSeparatedFile) { boolean[] filter = new boolean[GenotypeCodec.NUM_CODES]; // all false by default for (String gt : parentGts) { filter[GenotypeCodec.encode(gt)] = true; @@ -531,8 +553,11 @@ protected static boolean[] buildParentGtFilter(List parentGts, boolean i filter[GenotypeCodec.DISCREPANCY_SIMPLE] = true; filter[GenotypeCodec.DISCREPANCY_ANY] = true; } - if (filter[GenotypeCodec.MISSING_HOM] || filter[GenotypeCodec.HOM_REF_UNPHASED] || filter[GenotypeCodec.HOM_REF_PHASED]) { - filter[GenotypeCodec.UNKNOWN] = true; + if (parentInSeparatedFile) { + // If parents were in separated files, missing and hom_ref might be registered as "unknown" + if (filter[GenotypeCodec.MISSING_HOM] || filter[GenotypeCodec.HOM_REF_UNPHASED] || filter[GenotypeCodec.HOM_REF_PHASED]) { + filter[GenotypeCodec.UNKNOWN] = true; + } } return filter; } diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/HadoopMRVariantStatisticsManager.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/HadoopMRVariantStatisticsManager.java index e02994f2aae..e7d88a901d6 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/HadoopMRVariantStatisticsManager.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/HadoopMRVariantStatisticsManager.java @@ -70,7 +70,7 @@ public void calculateStatistics(String study, List cohorts, QueryOptions dbAdaptor.getTableNameGenerator().getArchiveTableName(sm.getId()), dbAdaptor.getTableNameGenerator().getVariantTableName(), sm.getId(), Collections.emptyList(), options); - mrExecutor.run(VariantStatsDriver.class, args, options, "Calculate stats of cohorts " + cohorts); + mrExecutor.run(VariantStatsDriver.class, args, "Calculate stats of cohorts " + cohorts); } catch (Exception e) { error = true; throw e; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/VariantStatsDriver.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/VariantStatsDriver.java index e964a848fc1..3600988a448 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/VariantStatsDriver.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/main/java/org/opencb/opencga/storage/hadoop/variant/stats/VariantStatsDriver.java @@ -44,7 +44,7 @@ * * @author Jacobo Coll <jacobo167@gmail.com> */ -public class VariantStatsDriver extends AbstractVariantsTableDriver { +public class VariantStatsDriver extends AbstractVariantsTableDriver { private static final String STATS_OPERATION_NAME = "stats"; public static final String STATS_PARTIAL_RESULTS = "stats.partial-results"; public static final String OUTPUT = "output"; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHbaseTestUtils.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHbaseTestUtils.java index 348226d8a14..f50047fcade 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHbaseTestUtils.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/VariantHbaseTestUtils.java @@ -38,6 +38,7 @@ import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.io.DataWriter; import org.opencb.commons.utils.CompressionUtils; +import org.opencb.opencga.core.api.ParamConstants; import org.opencb.opencga.core.common.TimeUtils; import org.opencb.opencga.storage.core.StoragePipelineResult; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; @@ -416,7 +417,9 @@ private static void printAnnotationIndexTable(VariantHadoopDBAdaptor dbAdaptor, private static void printVcf(StudyMetadata studyMetadata, VariantHadoopDBAdaptor dbAdaptor, Path outDir) throws IOException { try (OutputStream os = new FileOutputStream(outDir.resolve("variant." + studyMetadata.getName() + ".vcf").toFile())) { - Query query = new Query(VariantQueryParam.STUDY.key(), studyMetadata.getName()).append(VariantQueryParam.UNKNOWN_GENOTYPE.key(), "."); + Query query = new Query(VariantQueryParam.STUDY.key(), studyMetadata.getName()) + .append(VariantQueryParam.INCLUDE_SAMPLE.key(), ParamConstants.ALL) + .append(VariantQueryParam.UNKNOWN_GENOTYPE.key(), "."); QueryOptions queryOptions = new QueryOptions(); DataWriter writer = new VariantWriterFactory(dbAdaptor).newDataWriter(VariantWriterFactory.VariantOutputFormat.VCF, os, query, queryOptions); writer.open(); diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorMultiFileTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorMultiFileTest.java index 5e1f15a4e63..48a0c6f7234 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorMultiFileTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/adaptors/HadoopVariantDBAdaptorMultiFileTest.java @@ -9,6 +9,7 @@ import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; +import org.opencb.opencga.core.api.ParamConstants; import org.opencb.opencga.core.response.VariantQueryResult; import org.opencb.opencga.storage.core.variant.VariantStorageEngine; import org.opencb.opencga.storage.core.variant.VariantStorageOptions; diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java index 693f4ede3ec..9912108f5ed 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexQueryParserTest.java @@ -474,13 +474,26 @@ public void parseFamilyQuery() { indexQuery = parse(query); assertEquals(Collections.singleton("fam1_child"), indexQuery.getSamplesMap().keySet()); assertEquals(1, indexQuery.getFatherFilterMap().size()); + assertFalse(isValidParam(query, GENOTYPE)); + + query = new Query(GENOTYPE.key(), "fam2_child:0/1;fam2_father:0/1;fam2_mother:0/1,0/0"); + indexQuery = parse(query); + assertEquals(Collections.singleton("fam2_child"), indexQuery.getSamplesMap().keySet()); + assertEquals(1, indexQuery.getFatherFilterMap().size()); + assertEquals(1, indexQuery.getMotherFilterMap().size()); + assertEquals(true, indexQuery.getMotherFilter("fam2_child")[GenotypeCodec.HOM_REF_UNPHASED]); + assertEquals(true, indexQuery.getMotherFilter("fam2_child")[GenotypeCodec.HET_REF_UNPHASED]); + // Family2 members are from different files. Can't exclude genotype filter + assertTrue(isValidParam(query, GENOTYPE)); query = new Query(GENOTYPE.key(), "fam1_child:0/1;fam1_father:0/1;fam1_mother:0/1,0/0"); indexQuery = parse(query); assertEquals(Collections.singleton("fam1_child"), indexQuery.getSamplesMap().keySet()); assertEquals(1, indexQuery.getFatherFilterMap().size()); + assertEquals(1, indexQuery.getMotherFilterMap().size()); assertEquals(true, indexQuery.getMotherFilter("fam1_child")[GenotypeCodec.HOM_REF_UNPHASED]); assertEquals(true, indexQuery.getMotherFilter("fam1_child")[GenotypeCodec.HET_REF_UNPHASED]); + assertFalse(isValidParam(query, GENOTYPE)); // Can not use family query with OR operator query = new Query(SAMPLE.key(), "fam1_child,fam1_father,fam1_mother"); @@ -505,14 +518,16 @@ public void parseFamilyQuery_filter() { indexQuery = parse(query); assertEquals(Collections.singleton("fam1_child"), indexQuery.getSamplesMap().keySet()); assertEquals(1, indexQuery.getFatherFilterMap().size()); - assertFalse(query.containsKey(FILTER.key())); + assertFalse(isValidParam(query, FILTER)); + assertFalse(isValidParam(query, GENOTYPE)); // Samples from family2 are not in the same file, so we can not remove the FILTER parameter query = new Query(SAMPLE.key(), "fam2_child;fam2_father;fam2_mother").append(FILTER.key(), "PASS"); indexQuery = parse(query); assertEquals(Collections.singleton("fam2_child"), indexQuery.getSamplesMap().keySet()); assertEquals(1, indexQuery.getFatherFilterMap().size()); - assertTrue(query.containsKey(FILTER.key())); + assertTrue(isValidParam(query, FILTER)); + assertFalse(isValidParam(query, GENOTYPE)); } @Test diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java index cebaf78729b..864ae3b054e 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-core/src/test/java/org/opencb/opencga/storage/hadoop/variant/index/sample/SampleIndexTest.java @@ -84,6 +84,9 @@ public class SampleIndexTest extends VariantStorageBaseTest implements HadoopVar Arrays.asList("NA19660", "NA19661", "NA19685"), Arrays.asList("NA19660", "NA19661", "NA19600") ); + private static List> triosPlatinum = Arrays.asList( + Arrays.asList("NA12877", "-", "NA12878") + ); @Before public void before() throws Exception { @@ -128,6 +131,7 @@ public void load() throws Exception { runETL(engine, getPlatinumFile(1), outputUri, params, true, true, true); this.variantStorageEngine.annotate(new Query(), new QueryOptions(DefaultVariantAnnotationManager.OUT_DIR, outputUri)); + engine.familyIndex(STUDY_NAME_3, triosPlatinum, new ObjectMap()); VariantHbaseTestUtils.printVariants(dbAdaptor, newOutputUri()); } @@ -208,6 +212,14 @@ public void regenerateSampleIndex() throws Exception { dbAdaptor.getVariantTable(), studyId, Collections.emptySet(), options), options); + } else if (study.equals(STUDY_NAME_3)) { + options.put(FamilyIndexDriver.TRIOS, triosPlatinum.stream().map(trio -> String.join(",", trio)).collect(Collectors.joining(";"))); + options.put(FamilyIndexDriver.OVERWRITE, true); + new TestMRExecutor().run(FamilyIndexDriver.class, FamilyIndexDriver.buildArgs( + dbAdaptor.getArchiveTableName(studyId), + dbAdaptor.getVariantTable(), + studyId, + Collections.emptySet(), options), options); } Connection c = dbAdaptor.getHBaseManager().getConnection(); @@ -276,6 +288,9 @@ public void testQueryFileIndex() throws Exception { new Query() .append(STUDY.key(), STUDY_NAME_2) .append(GENOTYPE.key(), "NA19600:0/1;NA19661:0/0")); + + testQueryIndex(new Query(QUAL.key(), ">=10").append(FILTER.key(), "PASS"), new Query(STUDY.key(), STUDY_NAME_3) + .append(GENOTYPE.key(), "NA12878:0/1;NA12877:0/0,0|0")); } @Test @@ -423,9 +438,15 @@ public SampleIndexQuery testQueryIndex(Query testQuery, Query query) throws Exce System.out.println("biotypeMask = " + IndexUtils.byteToString(indexQuery.getAnnotationIndexQuery().getBiotypeMask())); System.out.println("ctMask = " + IndexUtils.shortToString(indexQuery.getAnnotationIndexQuery().getConsequenceTypeMask())); System.out.println("clinicalMask = " + IndexUtils.byteToString(indexQuery.getAnnotationIndexQuery().getClinicalMask())); -// for (String sample : indexQuery.getSamplesMap().keySet()) { + for (String sample : indexQuery.getSamplesMap().keySet()) { + if (indexQuery.forSample(sample).hasFatherFilter()) { + System.out.println("FatherFilter = " + IndexUtils.parentFilterToString(indexQuery.forSample(sample).getFatherFilter())); + } + if (indexQuery.forSample(sample).hasMotherFilter()) { + System.out.println("MotherFilter = " + IndexUtils.parentFilterToString(indexQuery.forSample(sample).getMotherFilter())); + } // System.out.println("fileIndex("+sample+") = " + IndexUtils.maskToString(indexQuery.getFileIndexMask(sample), indexQuery.getFileIndex(sample))); -// } + } System.out.println("Query SampleIndex = " + onlyIndex); System.out.println("Query DBAdaptor = " + onlyDBAdaptor); System.out.println("Query SampleIndex+DBAdaptor = " + indexAndDBAdaptor); @@ -590,6 +611,16 @@ public void testSampleVariantStats() throws Exception { } } + @Test + public void testSampleVariantStatsFail() throws Exception { + for (String study : studies) { + for (String sample : sampleNames.get(study)) { + DataResult result = variantStorageEngine.sampleStatsQuery(study, sample, new Query(ANNOT_CONSEQUENCE_TYPE.key(), "synonymous_variant")); + System.out.println(JacksonUtils.getDefaultObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(result.first())); + } + } + } + @Test public void testApproximateCount() { diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-cdh5.13/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-cdh5.13/pom.xml index 90057de2756..4a88d09f50c 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-cdh5.13/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-cdh5.13/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.0.2 + 2.0.3 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr5.31/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr5.31/pom.xml index 4608acfa490..c9466e543f6 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr5.31/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr5.31/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.0.2 + 2.0.3 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr5.8/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr5.8/pom.xml index bbe8d356420..4ef03133af3 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr5.8/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr5.8/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.0.2 + 2.0.3 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml index 6bc6ea229f6..c62f9c262d3 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-emr6.1/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.0.2 + 2.0.3 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.5/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.5/pom.xml index b59cafddddd..6b820491ce9 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.5/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.5/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.0.2 + 2.0.3 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml index 8c58176e4e5..5ba96f2c0ab 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp2.6/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.0.2 + 2.0.3 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml index 21c8735cd5a..ddccc1fd640 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/opencga-storage-hadoop-deps-hdp3.1/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage-hadoop-deps - 2.0.2 + 2.0.3 ../pom.xml diff --git a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml index 1cfc425c1c2..fd0465364a6 100644 --- a/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/opencga-storage-hadoop-deps/pom.xml @@ -50,7 +50,7 @@ org.opencb.opencga opencga-storage-hadoop - 2.0.2 + 2.0.3 ../pom.xml @@ -66,7 +66,7 @@ opencga-storage-hadoop-deps - 2.0.2 + 2.0.3 pom diff --git a/opencga-storage/opencga-storage-hadoop/pom.xml b/opencga-storage/opencga-storage-hadoop/pom.xml index 613500a1e38..f4c333fae3b 100644 --- a/opencga-storage/opencga-storage-hadoop/pom.xml +++ b/opencga-storage/opencga-storage-hadoop/pom.xml @@ -30,12 +30,12 @@ org.opencb.opencga opencga-storage - 2.0.2 + 2.0.3 ../pom.xml opencga-storage-hadoop - 2.0.2 + 2.0.3 pom diff --git a/opencga-storage/opencga-storage-mongodb/pom.xml b/opencga-storage/opencga-storage-mongodb/pom.xml index 6313dae1803..c9331b2b3fe 100644 --- a/opencga-storage/opencga-storage-mongodb/pom.xml +++ b/opencga-storage/opencga-storage-mongodb/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 2.0.2 + 2.0.3 ../pom.xml diff --git a/opencga-storage/opencga-storage-server/pom.xml b/opencga-storage/opencga-storage-server/pom.xml index adaabdebae3..c11ca4510c3 100644 --- a/opencga-storage/opencga-storage-server/pom.xml +++ b/opencga-storage/opencga-storage-server/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga-storage - 2.0.2 + 2.0.3 ../pom.xml diff --git a/opencga-storage/pom.xml b/opencga-storage/pom.xml index a7ee809f731..3a4823a7f3d 100644 --- a/opencga-storage/pom.xml +++ b/opencga-storage/pom.xml @@ -22,13 +22,13 @@ org.opencb.opencga opencga - 2.0.2 + 2.0.3 ../pom.xml opencga-storage - 2.0.2 + 2.0.3 pom diff --git a/opencga-test/pom.xml b/opencga-test/pom.xml index 70a2a655b03..a485e2fe45e 100644 --- a/opencga-test/pom.xml +++ b/opencga-test/pom.xml @@ -24,7 +24,7 @@ org.opencb.opencga opencga-test - 2.0.2 + 2.0.3 pom diff --git a/pom.xml b/pom.xml index 7d790107efa..e0748894aba 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ org.opencb.opencga opencga - 2.0.2 + 2.0.3 pom OpenCGA @@ -41,8 +41,8 @@ - 2.0.2 - 2.0.2 + 2.0.3 + 2.0.3 2.1.0-beta 4.8.0 2.0.5