Skip to content

Commit

Permalink
Merge branch 'release-2.0.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
imedina committed May 27, 2021
2 parents caf04bf + ea9c8d0 commit 0058497
Show file tree
Hide file tree
Showing 113 changed files with 1,821 additions and 712 deletions.
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
File renamed without changes
2 changes: 1 addition & 1 deletion opencga-analysis/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
<parent>
<groupId>org.opencb.opencga</groupId>
<artifactId>opencga</artifactId>
<version>2.0.2</version>
<version>2.0.3</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@

package org.opencb.opencga.analysis.clinical;

import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.VariantBuilder;
import org.opencb.commons.utils.FileUtils;
import org.opencb.commons.utils.URLUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.File;
Expand All @@ -35,12 +36,13 @@
public class ActionableVariantManager {
// Folder where actionable variant files are located, multiple assemblies are supported, i.e.: one variant actionable file per assembly
// File name format: actionableVariants_xxx.txt[.gz] where xxx = assembly in lower case
private final String ACTIONABLE_URL = "http://resources.opencb.org/opencb/opencga/analysis/commons/";
private final static String ACTIONABLE_URL = "http://resources.opencb.org/opencb/opencga/analysis/commons/";
private final static Logger logger = LoggerFactory.getLogger(ActionableVariantManager.class);

// We keep a Map for each assembly with a Map of variant IDs with the phenotype list
private static Map<String, Map<String, List<String>>> actionableVariants = null;

private Path openCgaHome;
private final Path openCgaHome;

public ActionableVariantManager(Path openCgaHome) {
this.openCgaHome = openCgaHome;
Expand All @@ -49,7 +51,7 @@ public ActionableVariantManager(Path openCgaHome) {
public Map<String, List<String>> getActionableVariants(String assembly) throws IOException {
// Lazy loading
if (actionableVariants == null) {
actionableVariants = loadActionableVariants();
actionableVariants = loadActionableVariants(openCgaHome);
}

if (actionableVariants.containsKey(assembly)) {
Expand All @@ -58,38 +60,47 @@ public Map<String, List<String>> getActionableVariants(String assembly) throws I
return null;
}

public static void init(Path openCgaHome) throws IOException {
if (actionableVariants == null) {
actionableVariants = loadActionableVariants(openCgaHome);
}
}

private Map<String, Map<String, List<String>>> loadActionableVariants() throws IOException {
private static Map<String, Map<String, List<String>>> loadActionableVariants(Path openCgaHome) throws IOException {
// Load actionable variants for each assembly, if present
// First, read all actionableVariants filenames, actionableVariants_xxx.txt[.gz] where xxx = assembly in lower case
Map<String, Map<String, List<String>>> actionableVariantsByAssembly = new HashMap<>();

String[] assemblies = new String[]{"grch37", "grch38"};
for (String assembly : assemblies) {
File actionableFile;
boolean temporalFile = false;
try {
String filename = "actionableVariants_" + assembly + ".txt.gz";

Path path = openCgaHome.resolve("analysis/commons/" + filename);
if (path.toFile().exists()) {
System.out.println("loadActionableVariants from path: " + path);
logger.info("loadActionableVariants from path: " + path);
actionableFile = path.toFile();
} else {
// Donwload 'actionable variant' file
System.out.println("loadActionableVariants from URL: " + (ACTIONABLE_URL + filename) + ", (path does not exist: "
logger.info("loadActionableVariants from URL: " + (ACTIONABLE_URL + filename) + ", (path does not exist: "
+ path + ")");
actionableFile = URLUtils.download(new URL(ACTIONABLE_URL + filename), Paths.get("/tmp"));
temporalFile = true;
}
} catch (IOException e) {
continue;
}

if (actionableFile != null) {
actionableVariantsByAssembly.put(assembly, loadActionableVariants(actionableFile));
}

// Delete
actionableFile.delete();
if (temporalFile) {
// Delete
actionableFile.delete();
}
}
}

return actionableVariantsByAssembly;
Expand All @@ -101,14 +112,17 @@ private Map<String, Map<String, List<String>>> loadActionableVariants() throws I
* @return Map of variant IDs with a alist of phenotypes
* @throws IOException If file is not found
*/
private Map<String, List<String>> loadActionableVariants(File file) throws IOException {
private static Map<String, List<String>> loadActionableVariants(File file) throws IOException {

// System.out.println("ActionableVariantManager: path = " + file.toString());
// logger.info("ActionableVariantManager: path = " + file.toString());

Map<String, List<String>> actionableVariants = new HashMap<>();

if (file != null && file.exists()) {
BufferedReader bufferedReader = FileUtils.newBufferedReader(file.toPath());
if (file == null || !file.exists()) {
return actionableVariants;
}

try (BufferedReader bufferedReader = FileUtils.newBufferedReader(file.toPath())) {
List<String> lines = bufferedReader.lines().collect(Collectors.toList());
for (String line : lines) {
if (line.startsWith("#")) {
Expand All @@ -134,16 +148,16 @@ private Map<String, List<String>> loadActionableVariants(File file) throws IOExc
actionableVariants.put(variant.toString(), phenotypes);
} catch (NumberFormatException e) {
// Skip this variant
System.err.println("Skip actionable variant: " + line + "\nCause: " + e.getMessage());
logger.error("Skip actionable variant: " + line + "\nCause: " + e.getMessage());
}
} else {
// Skip this variant
System.err.println("Skip actionable variant, invalid format: " + line);
logger.error("Skip actionable variant, invalid format: " + line);
}
}
}

// System.out.println("ActionableVariantManager: size = " + actionableVariants.size());
// logger.info("ActionableVariantManager: size = " + actionableVariants.size());

return actionableVariants;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ protected void run() throws Exception {
addGeneratedFile(result.first());
for (File fileResult : result.getResults()) {
if (fileResult.getInternal().getStatus().getName().equals(FileStatus.MISSING_SAMPLES)) {
Map<String, Object> params = new PostLinkToolParams(Collections.singletonList(fileResult.getId()))
Map<String, Object> params = new PostLinkToolParams(Collections.singletonList(fileResult.getId()), null)
.toParams(new ObjectMap(ParamConstants.STUDY_PARAM, study));
Job postLinkJob = catalogManager.getJobManager()
.submit(getStudy(), PostLinkSampleAssociation.ID, Enums.Priority.MEDIUM,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.opencb.opencga.catalog.managers.SampleManager;
import org.opencb.opencga.catalog.utils.Constants;
import org.opencb.opencga.catalog.utils.ParamUtils;
import org.opencb.opencga.core.api.ParamConstants;
import org.opencb.opencga.core.models.common.Enums;
import org.opencb.opencga.core.models.file.*;
import org.opencb.opencga.core.models.sample.Sample;
Expand All @@ -32,6 +33,15 @@ public class PostLinkSampleAssociation extends OpenCgaToolScopeStudy {
@ToolParams
protected final PostLinkToolParams postLinkParams = new PostLinkToolParams();

@Override
protected void check() throws Exception {
super.check();
// Add default batch size
if (postLinkParams.getBatchSize() == null || postLinkParams.getBatchSize() <= 0) {
postLinkParams.setBatchSize(1000);
}
}

@Override
protected void run() throws Exception {
// Obtain an iterator to get all the files that were link and not associated to any of its samples
Expand All @@ -45,10 +55,11 @@ protected void run() throws Exception {
options.put(QueryOptions.COUNT, true);

List<String> files = null;
if (CollectionUtils.isNotEmpty(postLinkParams.getFiles())) {
files = new LinkedList<>(postLinkParams.getFiles());
} else {
if (CollectionUtils.isEmpty(postLinkParams.getFiles())
|| postLinkParams.getFiles().size() == 1 && postLinkParams.getFiles().get(0).equals(ParamConstants.ALL)) {
logger.info("Processing all files with internal status = '" + FileStatus.MISSING_SAMPLES + "'");
} else {
files = new LinkedList<>(postLinkParams.getFiles());
}

int numPendingFiles = -1;
Expand Down Expand Up @@ -88,16 +99,36 @@ protected void run() throws Exception {
if (CollectionUtils.isNotEmpty(file.getInternal().getMissingSamples().getNonExisting())) {
logger.info("Create {} missing samples", file.getInternal().getMissingSamples().getNonExisting().size());
for (String sampleId : file.getInternal().getMissingSamples().getNonExisting()) {
Query sampleQuery = new Query(SampleDBAdaptor.QueryParams.ID.key(), sampleId);
OpenCGAResult<Sample> sampleResult = catalogManager.getSampleManager().search(study, sampleQuery,
SampleManager.INCLUDE_SAMPLE_IDS, token);

if (sampleResult.getNumResults() != 1) {
// Sample still doesn't exist, so we create it
sampleResult = catalogManager.getSampleManager().create(study, new Sample().setId(sampleId),
QueryOptions.empty(), token);
if (sampleResult.getNumResults() != 1) {
throw new CatalogException("Could not create sample '" + sampleId + "'");
if (!sampleExists(sampleId)) {
try {
// Sample still doesn't exist, so we create it
OpenCGAResult<Sample> sampleResult = catalogManager.getSampleManager().create(study, new Sample().setId(sampleId),
QueryOptions.empty(), token);
if (sampleResult.getNumResults() != 1) {
throw new CatalogException("Could not create sample '" + sampleId + "'");
}
} catch (CatalogException e) {
try {
if (sampleExists(sampleId)) {
// If sample was successfully created, but still got an exception.
// Ignore exception

// Log INFO without stack trace
logger.info("Caught exception creating sample \"" + sampleId + "\","
+ " but sample was actually created. Ignoring " + e.toString());

// Log DEBUG with full stack trace
logger.debug("Ignored exception", e);
} else {
// Sample could not be created.
// Throw exception
throw e;
}
} catch (Exception e1) {
// Something went wrong. Throw original exception, and add this new as suppressed
e.addSuppressed(e1);
throw e;
}
}
}

Expand All @@ -111,7 +142,7 @@ protected void run() throws Exception {
}

// Create sample batches
int batchSize = 1000;
int batchSize = postLinkParams.getBatchSize();
List<List<String>> sampleListList = new ArrayList<>((sampleList.size() / batchSize) + 1);
// Create batches
List<String> currentList = null;
Expand Down Expand Up @@ -160,4 +191,12 @@ protected void run() throws Exception {
}
}
}

private boolean sampleExists(String sampleId) throws CatalogException {
Query sampleQuery = new Query(SampleDBAdaptor.QueryParams.ID.key(), sampleId);
OpenCGAResult<Sample> sampleResult = catalogManager.getSampleManager().search(study, sampleQuery,
SampleManager.INCLUDE_SAMPLE_IDS, token);

return sampleResult.getNumResults() == 1;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -218,9 +218,21 @@ public static VariantQueryException wrongReleaseException(VariantQueryParam para
* @throws CatalogException if there is any catalog error
*/
public Query parseQuery(Query query, String token) throws CatalogException {
return parseQuery(query, null, token);
}

/**
* Transforms a high level Query to a query fully understandable by storage.
* @param query High level query. Will be modified by the method.
* @param queryOptions Query options. Won't be modified
* @param token User's session id
* @return Modified input query (same instance)
* @throws CatalogException if there is any catalog error
*/
public Query parseQuery(Query query, QueryOptions queryOptions, String token) throws CatalogException {
if (query == null) {
// Nothing to do!
return null;
return new Query();
}

if (isValidParam(query, SAVED_FILTER)) {
Expand Down Expand Up @@ -257,8 +269,9 @@ public Query parseQuery(Query query, String token) throws CatalogException {
cohortFilterValidator.processFilter(query, VariantQueryParam.MISSING_GENOTYPES, release, token, defaultStudyStr);

if (release != null) {
// If no list of included files is specified:
if (VariantQueryProjectionParser.isIncludeFilesDefined(query, Collections.singleton(VariantField.STUDIES_FILES))) {
// If include all files:
if (VariantQueryProjectionParser.getIncludeFileStatus(query, VariantField.all())
.equals(VariantQueryProjectionParser.IncludeStatus.ALL)) {
List<String> includeFiles = new ArrayList<>();
QueryOptions fileOptions = new QueryOptions(INCLUDE, FileDBAdaptor.QueryParams.UID.key());
Query fileQuery = new Query(FileDBAdaptor.QueryParams.RELEASE.key(), "<=" + release)
Expand All @@ -272,8 +285,9 @@ public Query parseQuery(Query query, String token) throws CatalogException {
}
query.append(VariantQueryParam.INCLUDE_FILE.key(), includeFiles);
}
// If no list of included samples is specified:
if (!VariantQueryProjectionParser.isIncludeSamplesDefined(query, Collections.singleton(VariantField.STUDIES_SAMPLES))) {
// If include all samples:
if (VariantQueryProjectionParser.getIncludeFileStatus(query, VariantField.all())
.equals(VariantQueryProjectionParser.IncludeStatus.ALL)) {
List<String> includeSamples = new ArrayList<>();
Query sampleQuery = new Query(SampleDBAdaptor.QueryParams.RELEASE.key(), "<=" + release);
QueryOptions sampleOptions = new QueryOptions(INCLUDE, SampleDBAdaptor.QueryParams.UID.key());
Expand Down
Loading

0 comments on commit 0058497

Please sign in to comment.