diff --git a/intact-orthology-import/importOrtholgy.sh b/intact-orthology-import/importOrtholgy.sh
new file mode 100755
index 000000000..cc5f3c644
--- /dev/null
+++ b/intact-orthology-import/importOrtholgy.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+#SBATCH --time=02-00:00:00 # walltime
+#SBATCH --ntasks=1 # number of tasks
+#SBATCH --cpus-per-task=5 # number of CPUs Per Task i.e if your code is multi-threaded
+#SBATCH -p research # partition(s)
+#SBATCH --mem=32G # memory per node
+#SBATCH -J "ORTHOLOG_IMPORT" # job name
+#SBATCH -o "/nfs/production/hhe/intact/data/panther/logs/ortholog-import-%j.out" # job output file
+#SBATCH --mail-type=ALL
+#SBATCH --mail-user=intact-dev@ebi.ac.uk # email address
+export JAVA_HOME=/hps/software/users/hhe/intact/third-party-softwares/latest_intact_jdk11
+
+if [ $# -ne 1 ]; then
+ echo ""
+ echo "ERROR: wrong number of parameters ($#)."
+ echo ""
+ exit 1
+fi
+
+PROFILE=$1;
+
+echo "Profile: $PROFILE"
+
+mvn clean -U install -P import-orthology,${PROFILE} -Djob.name=orthologyImport -Dmaven.test.skip
\ No newline at end of file
diff --git a/intact-orthology-import/pom.xml b/intact-orthology-import/pom.xml
new file mode 100644
index 000000000..ff10e1961
--- /dev/null
+++ b/intact-orthology-import/pom.xml
@@ -0,0 +1,132 @@
+
+
+
+ 4.0.0
+
+
+ uk.ac.ebi.intact.dataexchange
+ intact-dataexchange-master
+ 4.2.0-SNAPSHOT
+
+
+ intact-orthology-import
+ jar
+ intact-orthology-import
+
+
+
+ ${db.protocol}:${db.subprotocol}${db.separator}${db.alias}
+ none
+
+
+
+
+ import-orthology
+
+
+
+ org.codehaus.mojo
+ exec-maven-plugin
+
+
+ install
+
+ exec
+
+
+
+
+ java
+
+ -Xmx12288m
+ -Xms2048m
+ -classpath
+
+ psidev.psi.mi.jami.batch.MIBatchJobManager
+ classpath*:/META-INF/orthology-import-spring.xml
+ ${job.name}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ psidev.psi.mi.jami
+ jami-core
+ ${psi.jami.version}
+
+
+
+ psidev.psi.mi.jami
+ jami-batch
+ ${psi.jami.version}
+
+
+
+ psidev.psi.mi.jami.bridges
+ jami-uniprot
+ ${psi.jami.version}
+
+
+ org.slf4j
+ jcl-over-slf4j
+
+
+
+
+
+ uk.ac.ebi.intact.jami
+ intact-jami
+ ${intact.jami.version}
+
+
+
+ org.springframework.retry
+ spring-retry
+ 1.3.4
+
+
+
+ org.springframework
+ spring-aspects
+ ${spring.version}
+
+
+
+ org.projectlombok
+ lombok
+ 1.18.30
+
+
+
+
+
+ org.apache.commons
+ commons-compress
+ 1.21
+ compile
+
+
+
+ commons-io
+ commons-io
+ 2.4
+
+
+ jakarta.persistence
+ jakarta.persistence-api
+ 2.2.3
+ compile
+
+
+
diff --git a/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/OrthologsFileParser.java b/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/OrthologsFileParser.java
new file mode 100644
index 000000000..5a724d30e
--- /dev/null
+++ b/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/OrthologsFileParser.java
@@ -0,0 +1,89 @@
+package uk.ac.ebi.intact.ortholog;
+
+import lombok.extern.log4j.Log4j;
+import org.apache.commons.io.FileUtils;
+
+import java.io.*;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+@Log4j
+public class OrthologsFileParser {
+
+ private static final Pattern UNIPROT_KB_REGEX = Pattern.compile("UniProtKB=([A-Z0-9]+)");
+ private static final Pattern PANTHER_REGEX = Pattern.compile("PTHR\\d+");
+
+ public static void parseFileAndSave(String inputFilePath, String outputDirPath) throws IOException {
+ log.info("Parsing file...");
+
+ File outputDir = new File(outputDirPath);
+ // First, we empty de directory to start clean
+ if (outputDir.exists()) {
+ FileUtils.deleteDirectory(outputDir);
+ }
+ outputDir.mkdirs();
+ long linesRead = 0;
+
+ // First we store all matches in a map to ensure there's no duplication
+ Map> uniprotAndPTHR = new HashMap<>();
+ try (BufferedReader reader = new BufferedReader(new FileReader(inputFilePath))) {
+ String line;
+ while ((line = reader.readLine()) != null) {
+ linesRead++;
+ ArrayList uniprotMatches = new ArrayList<>();
+
+ Matcher uniprotMatcher = UNIPROT_KB_REGEX.matcher(line);
+ Matcher pantherMatcher = PANTHER_REGEX.matcher(line);
+
+ while (uniprotMatcher.find()) {
+ uniprotMatches.add(uniprotMatcher.group(1));
+ }
+ while (pantherMatcher.find()) {
+ for (String uniprotMatch : uniprotMatches) {
+ uniprotAndPTHR.putIfAbsent(uniprotMatch, new HashSet<>());
+ uniprotAndPTHR.get(uniprotMatch).add(pantherMatcher.group());
+ }
+ }
+
+ if (linesRead % 250_000 == 0) {
+ log.info(linesRead + " lines read, " + uniprotAndPTHR.size() + " proteins read");
+ }
+ }
+ }
+
+ log.info(linesRead + " lines read, " + uniprotAndPTHR.size() + " proteins read");
+ log.info("File parsed.");
+
+ log.info("Saving map to files...");
+
+ // Then, we write all the files
+ long uniprotAndPantherCount = 0;
+ for (String uniprotMatch : uniprotAndPTHR.keySet()) {
+ for (String pantherMatch : uniprotAndPTHR.get(uniprotMatch)) {
+ writePair(outputDir.toPath(), uniprotMatch, pantherMatch);
+ }
+ uniprotAndPantherCount += uniprotAndPTHR.get(uniprotMatch).size();
+ if (uniprotAndPantherCount % 25_000 == 0) {
+ log.info(uniprotAndPantherCount + " proteins saved");
+ }
+ }
+
+ log.info("All protein files saved.");
+ log.info("Number of Panther identifiers: " + uniprotAndPantherCount);
+ }
+
+ private static void writePair(Path dirPath, String uniprotId, String pantherId) throws IOException {
+ Path filePath = dirPath.resolve(uniprotId);
+ try (FileWriter fileWriter = new FileWriter(filePath.toFile(), true);
+ BufferedWriter bufferedWriter = new BufferedWriter(fileWriter)) {
+ bufferedWriter.write(uniprotId + "," + pantherId);
+ bufferedWriter.newLine();
+ }
+ }
+}
\ No newline at end of file
diff --git a/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/OrthologsFileReader.java b/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/OrthologsFileReader.java
new file mode 100644
index 000000000..67a91eb04
--- /dev/null
+++ b/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/OrthologsFileReader.java
@@ -0,0 +1,43 @@
+package uk.ac.ebi.intact.ortholog;
+
+import lombok.extern.log4j.Log4j;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.io.IOUtils;
+import java.io.*;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.util.zip.GZIPInputStream;
+
+@Log4j
+public class OrthologsFileReader{
+
+ public static void decompressGzip(String url, String filePath) throws IOException {
+ URL gzipUrl = new URL(url);
+ HttpURLConnection connection = (HttpURLConnection) gzipUrl.openConnection();
+ int responseCode = connection.getResponseCode();
+
+ if (responseCode == HttpURLConnection.HTTP_OK) {
+ log.info("Connected to URL.");
+ try (InputStream in = connection.getInputStream();
+ GZIPInputStream gis = new GZIPInputStream(in);
+ TarArchiveInputStream tis = new TarArchiveInputStream(gis)) {
+ log.info("Decompressing...");
+ while (tis.getNextTarEntry() != null) {
+ File outputFile = new File(filePath);
+ try (FileOutputStream fos = new FileOutputStream(outputFile, false)) {
+ // the false make it write over existing data
+ IOUtils.copy(tis, fos);
+ }
+ }
+ log.info("File decompressed, data in " + filePath);
+ }
+ finally {
+ connection.disconnect();
+ log.info("Disconnected from URL.");
+ }
+ }
+ else {
+ log.info("GZIP returned unexpected response: " + responseCode);
+ }
+ }
+}
\ No newline at end of file
diff --git a/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/OrthologsProteinAssociation.java b/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/OrthologsProteinAssociation.java
new file mode 100644
index 000000000..c62e82e88
--- /dev/null
+++ b/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/OrthologsProteinAssociation.java
@@ -0,0 +1,60 @@
+package uk.ac.ebi.intact.ortholog;
+
+import lombok.extern.log4j.Log4j;
+import uk.ac.ebi.intact.jami.dao.IntactDao;
+import uk.ac.ebi.intact.jami.model.extension.IntactProtein;
+import javax.annotation.Resource;
+import javax.persistence.Query;
+import java.io.*;
+import java.nio.file.Path;
+import java.util.*;
+
+@Log4j
+public class OrthologsProteinAssociation {
+
+ @Resource(name="intactDao")
+ private final IntactDao intactDao;
+
+ public OrthologsProteinAssociation(IntactDao intactDao) {
+ this.intactDao = intactDao;
+ }
+
+ public List getProteinAcs() {
+ String sqlQuery = "select CAST(REPLACE(ac,'EBI-','') as integer) as numberAC from intact.ia_interactor p where category = 'protein' order by numberAC asc";
+ Query query = intactDao.getEntityManager().createNativeQuery(sqlQuery);
+ return query.getResultList();
+ }
+
+ public List fetchProteins(Integer startAc, Integer endAc) {
+ String sqlQuery = "select p FROM IntactProtein p where CAST(REPLACE(ac,'EBI-','') as integer) BETWEEN :startAc and :endAc";
+ Query query = intactDao.getEntityManager().createQuery(sqlQuery);
+ query.setParameter("startAc", startAc);
+ query.setParameter("endAc", endAc);
+ return query.getResultList();
+ }
+
+ public static Collection associateOneProteinToPantherIds(String dirPath, IntactProtein protein) throws IOException {
+ String proteinAc = protein.getUniprotkb();
+ List pantherIds = new ArrayList<>();
+ if (proteinAc != null) {
+ Path filePath = Path.of(dirPath).resolve(protein.getUniprotkb());
+ if (filePath.toFile().exists()) {
+ try (BufferedReader reader = new BufferedReader(new FileReader(filePath.toFile()))) {
+ String line;
+ while ((line = reader.readLine()) != null) {
+ String[] parts = line.split(",");
+ if (parts.length == 2) {
+ String proteinId = parts[0];
+ if (proteinId.equals(protein.getUniprotkb())) {
+ pantherIds.add(parts[1]);
+ }
+
+ }
+ }
+ }
+ return pantherIds;
+ }
+ }
+ return pantherIds;
+ }
+}
\ No newline at end of file
diff --git a/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/OrthologsXrefWriter.java b/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/OrthologsXrefWriter.java
new file mode 100644
index 000000000..8cf924956
--- /dev/null
+++ b/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/OrthologsXrefWriter.java
@@ -0,0 +1,56 @@
+package uk.ac.ebi.intact.ortholog;
+
+import lombok.RequiredArgsConstructor;
+import uk.ac.ebi.intact.jami.dao.IntactDao;
+import uk.ac.ebi.intact.jami.model.extension.IntactCvTerm;
+import uk.ac.ebi.intact.jami.model.extension.IntactProtein;
+import uk.ac.ebi.intact.jami.model.extension.InteractorXref;
+import uk.ac.ebi.intact.jami.utils.IntactUtils;
+import java.util.*;
+
+@RequiredArgsConstructor
+public class OrthologsXrefWriter {
+
+ private final static String PANTHER_DATABASE_MI = "MI:0702";
+ private final static String ORTHOLOGY_MI = "MI:2426";
+ private final Map cvTermMap = new HashMap<>();
+ private final IntactDao intactDao;
+
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(cvTermMap, intactDao);
+ }
+
+ public void addOrthologyXrefs(IntactProtein protein, Collection pantherIds) throws Exception{
+ for (String pantherId: pantherIds) {
+ addOrthologyXref(protein, pantherId);
+ }
+ }
+
+ public void addOrthologyXref(IntactProtein protein, String pantherId) throws Exception{
+ InteractorXref xref = newOrthologsXref(pantherId);
+ if (!protein.getXrefs().contains(xref)){
+ protein.getXrefs().add(xref);
+ }
+ }
+
+ private InteractorXref newOrthologsXref(String id) throws Exception{
+ IntactCvTerm database = findCvTerm(IntactUtils.DATABASE_OBJCLASS, PANTHER_DATABASE_MI);
+ IntactCvTerm qualifier = findCvTerm(IntactUtils.QUALIFIER_OBJCLASS, ORTHOLOGY_MI);
+ return new InteractorXref(database, id, qualifier);
+ }
+
+ private IntactCvTerm findCvTerm(String clazz, String id) throws Exception {
+ String key = clazz + "_" + id;
+ if (cvTermMap.containsKey(key)) {
+ return cvTermMap.get(key);
+ }
+ IntactCvTerm cvTerm = intactDao.getCvTermDao().getByMIIdentifier(id,clazz);
+ if (cvTerm != null) {
+ cvTermMap.put(key, cvTerm);
+ return cvTerm;
+ }
+ throw new Exception("CV Term not found with class '" + clazz + "' and id '" + id + "'");
+ }
+}
\ No newline at end of file
diff --git a/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/ProteinPartitioner.java b/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/ProteinPartitioner.java
new file mode 100644
index 000000000..48a406e06
--- /dev/null
+++ b/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/ProteinPartitioner.java
@@ -0,0 +1,57 @@
+package uk.ac.ebi.intact.ortholog;
+
+import lombok.extern.log4j.Log4j;
+import org.springframework.batch.core.partition.support.Partitioner;
+import org.springframework.batch.item.ExecutionContext;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+@Log4j
+public class ProteinPartitioner implements Partitioner {
+
+ private final OrthologsProteinAssociation orthologsProteinAssociation;
+
+ public ProteinPartitioner(OrthologsProteinAssociation orthologsProteinAssociation) {
+ this.orthologsProteinAssociation = orthologsProteinAssociation;
+ }
+
+ @Override
+ public Map partition(int partSize) {
+ List proteinAcs = orthologsProteinAssociation.getProteinAcs();
+
+ log.info("Starting new partitions");
+ log.info("Number of partitions: " + partSize);
+ Map partitionMap = new HashMap<>();
+
+ int totalCount = proteinAcs.size();
+ int targetSize = (int) Math.ceil((double) totalCount / partSize);
+ int startingIndex = 0;
+ int endingIndex = targetSize;
+ int number = 0;
+
+ log.info("Proteins per partitions: " + targetSize);
+
+ while (startingIndex < totalCount) {
+ ExecutionContext ctxMap = new ExecutionContext();
+ partitionMap.put("Thread:-" + number, ctxMap);
+
+ if (endingIndex > totalCount) {
+ endingIndex = totalCount;
+ }
+
+ ctxMap.putInt("startAc", proteinAcs.get(startingIndex));
+ ctxMap.putInt("endAc", proteinAcs.get(endingIndex - 1));
+
+ // Next start index is the previous end index
+ // Next end index is increased by target size (number of proteins per partition)
+ startingIndex = endingIndex;
+ endingIndex += targetSize;
+
+ number++;
+ }
+ log.info("END: Created " + partitionMap.size() + " partitions");
+ return partitionMap;
+ }
+}
\ No newline at end of file
diff --git a/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/jobs/IntactProteinAndPantherProcessor.java b/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/jobs/IntactProteinAndPantherProcessor.java
new file mode 100644
index 000000000..53bd2ed35
--- /dev/null
+++ b/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/jobs/IntactProteinAndPantherProcessor.java
@@ -0,0 +1,36 @@
+package uk.ac.ebi.intact.ortholog.jobs;
+
+import lombok.RequiredArgsConstructor;
+import org.springframework.batch.item.ExecutionContext;
+import org.springframework.batch.item.ItemProcessor;
+import org.springframework.batch.item.ItemStream;
+import org.springframework.batch.item.ItemStreamException;
+import uk.ac.ebi.intact.jami.model.extension.IntactProtein;
+import uk.ac.ebi.intact.ortholog.OrthologsXrefWriter;
+import uk.ac.ebi.intact.ortholog.model.ProteinAndPantherGroup;
+
+@RequiredArgsConstructor
+public class IntactProteinAndPantherProcessor implements ItemProcessor, ItemStream {
+
+ private final OrthologsXrefWriter orthologsXrefWriter;
+
+ @Override
+ public IntactProtein process(ProteinAndPantherGroup proteinAndPantherGroup) throws Exception {
+ orthologsXrefWriter.addOrthologyXrefs(proteinAndPantherGroup.getProtein(), proteinAndPantherGroup.getPantherIds());
+ return proteinAndPantherGroup.getProtein();
+ }
+
+ @Override
+ public void open(ExecutionContext executionContext) throws ItemStreamException {
+
+ }
+
+ @Override
+ public void update(ExecutionContext executionContext) throws ItemStreamException {
+
+ }
+
+ @Override
+ public void close() throws ItemStreamException {
+ }
+}
\ No newline at end of file
diff --git a/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/jobs/IntactProteinAndPantherReader.java b/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/jobs/IntactProteinAndPantherReader.java
new file mode 100644
index 000000000..199f44be8
--- /dev/null
+++ b/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/jobs/IntactProteinAndPantherReader.java
@@ -0,0 +1,53 @@
+package uk.ac.ebi.intact.ortholog.jobs;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.log4j.Log4j;
+import org.springframework.batch.item.ExecutionContext;
+import org.springframework.batch.item.ItemReader;
+import org.springframework.batch.item.ItemStream;
+import org.springframework.batch.item.ItemStreamException;
+import uk.ac.ebi.intact.jami.model.extension.IntactProtein;
+import uk.ac.ebi.intact.ortholog.OrthologsProteinAssociation;
+import uk.ac.ebi.intact.ortholog.model.ProteinAndPantherGroup;
+import java.util.*;
+
+@Log4j
+@RequiredArgsConstructor
+public class IntactProteinAndPantherReader implements ItemReader, ItemStream {
+
+ private final OrthologsProteinAssociation orthologsProteinAssociation;
+ private final String proteinPantherPairDirPath;
+ private Iterator proteinIterator;
+
+ @Override
+ public ProteinAndPantherGroup read() throws Exception{
+ while (proteinIterator.hasNext()) {
+ IntactProtein protein = proteinIterator.next();
+ Collection pantherIds = OrthologsProteinAssociation
+ .associateOneProteinToPantherIds(proteinPantherPairDirPath, protein);
+ if (!pantherIds.isEmpty()) {
+ return new ProteinAndPantherGroup(protein, pantherIds);
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public void open(ExecutionContext executionContext) throws ItemStreamException {
+ int startAc = executionContext.getInt("startAc");
+ int endAc = executionContext.getInt("endAc");
+ List allProteins = orthologsProteinAssociation.fetchProteins(startAc, endAc);
+ log.info("Reading " + allProteins.size() + " proteins");
+ proteinIterator = allProteins.iterator();
+ }
+
+ @Override
+ public void update(ExecutionContext executionContext) throws ItemStreamException {
+
+ }
+
+ @Override
+ public void close() throws ItemStreamException {
+
+ }
+}
\ No newline at end of file
diff --git a/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/jobs/OrthologsReaderTasklet.java b/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/jobs/OrthologsReaderTasklet.java
new file mode 100644
index 000000000..eae46f442
--- /dev/null
+++ b/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/jobs/OrthologsReaderTasklet.java
@@ -0,0 +1,31 @@
+package uk.ac.ebi.intact.ortholog.jobs;
+
+import lombok.RequiredArgsConstructor;
+import org.springframework.batch.core.StepContribution;
+import org.springframework.batch.core.scope.context.ChunkContext;
+import org.springframework.batch.core.step.tasklet.Tasklet;
+import org.springframework.batch.item.ItemStreamException;
+import org.springframework.batch.repeat.RepeatStatus;
+import uk.ac.ebi.intact.ortholog.OrthologsFileReader;
+import uk.ac.ebi.intact.ortholog.OrthologsFileParser;
+
+import java.io.IOException;
+
+@RequiredArgsConstructor
+public class OrthologsReaderTasklet implements Tasklet {
+
+ private final String urlPanther;
+ private final String filePath;
+ private final String proteinPantherPairDirPath;
+
+ @Override
+ public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext ) throws Exception {
+ OrthologsFileReader.decompressGzip(urlPanther, filePath);
+ try {
+ OrthologsFileParser.parseFileAndSave(filePath, proteinPantherPairDirPath);
+ } catch (IOException e) {
+ throw new ItemStreamException("Error parsing the file: " + filePath, e);
+ }
+ return RepeatStatus.FINISHED;
+ }
+}
diff --git a/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/jobs/ProteinCollectionWriter.java b/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/jobs/ProteinCollectionWriter.java
new file mode 100644
index 000000000..9c9e9d69f
--- /dev/null
+++ b/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/jobs/ProteinCollectionWriter.java
@@ -0,0 +1,36 @@
+package uk.ac.ebi.intact.ortholog.jobs;
+
+import lombok.RequiredArgsConstructor;
+import org.springframework.batch.item.ExecutionContext;
+import org.springframework.batch.item.ItemStream;
+import org.springframework.batch.item.ItemStreamException;
+import org.springframework.batch.item.ItemWriter;
+import uk.ac.ebi.intact.jami.model.extension.IntactProtein;
+import uk.ac.ebi.intact.jami.service.InteractorService;
+import java.util.List;
+
+@RequiredArgsConstructor
+public class ProteinCollectionWriter implements ItemWriter, ItemStream {
+
+ private final InteractorService interactorService;
+
+ @Override
+ public void open(ExecutionContext executionContext) throws ItemStreamException {
+
+ }
+
+ @Override
+ public void update(ExecutionContext executionContext) throws ItemStreamException {
+
+ }
+
+ @Override
+ public void close() throws ItemStreamException {
+
+ }
+
+ @Override
+ public void write(List extends IntactProtein> items) throws Exception {
+ interactorService.saveOrUpdate(items);
+ }
+}
\ No newline at end of file
diff --git a/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/model/ProteinAndPantherGroup.java b/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/model/ProteinAndPantherGroup.java
new file mode 100644
index 000000000..bb7b6053b
--- /dev/null
+++ b/intact-orthology-import/src/main/java/uk/ac/ebi/intact/ortholog/model/ProteinAndPantherGroup.java
@@ -0,0 +1,12 @@
+package uk.ac.ebi.intact.ortholog.model;
+
+import lombok.Value;
+import uk.ac.ebi.intact.jami.model.extension.IntactProtein;
+
+import java.util.Collection;
+
+@Value
+public class ProteinAndPantherGroup {
+ IntactProtein protein;
+ Collection pantherIds;
+}
\ No newline at end of file
diff --git a/intact-orthology-import/src/main/resources/META-INF/orthology-import-spring.xml b/intact-orthology-import/src/main/resources/META-INF/orthology-import-spring.xml
new file mode 100644
index 000000000..bc51cd0ae
--- /dev/null
+++ b/intact-orthology-import/src/main/resources/META-INF/orthology-import-spring.xml
@@ -0,0 +1,243 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ true
+ false
+ ${db.hbm2ddl}
+ intact
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/intact-orthology-import/src/main/resources/META-INF/orthology-import.properties b/intact-orthology-import/src/main/resources/META-INF/orthology-import.properties
new file mode 100644
index 000000000..771548c3f
--- /dev/null
+++ b/intact-orthology-import/src/main/resources/META-INF/orthology-import.properties
@@ -0,0 +1,5 @@
+jami.user.context.id=ORTHOLOG_IMPORTER
+uncompressedPantherFilePath=/nfs/production/hhe/intact/data/panther/orthologsData.txt
+urlPanther=http://data.pantherdb.org/ftp/ortholog/current_release/AllOrthologs.tar.gz
+proteinPantherPairDirPath=/nfs/production/hhe/intact/data/panther/data
+ac.prefix=EBI
\ No newline at end of file
diff --git a/intact-orthology-import/src/main/resources/log4j.properties b/intact-orthology-import/src/main/resources/log4j.properties
new file mode 100644
index 000000000..02e80decb
--- /dev/null
+++ b/intact-orthology-import/src/main/resources/log4j.properties
@@ -0,0 +1,43 @@
+log4j.rootCategory=INFO, A
+
+log4j.category.uk.ac.ebi.intact=INFO
+
+# Hibernate
+log4j.category.org.hibernate=ERROR
+
+log4j.category.org.hibernate.cfg=WARN
+
+log4j.category.org.hibernate.impl=ERROR
+log4j.category.org.hibernate.loader=WARN
+log4j.category.org.hibernate.util=ERROR
+log4j.category.org.hibernate.persister=ERROR
+log4j.category.org.hibernate.validator=ERROR
+
+log4j.category.org.hibernate.engine=INFO
+log4j.category.org.hibernate.engine.internal=WARN
+log4j.category.org.hibernate.event=INFO
+
+#Log all SQL DML statements as they are executed
+log4j.category.org.hibernate.SQL=INFO
+#Log all JDBC parameters
+log4j.category.org.hibernate.type=INFO
+#Log all SQL DDL statements as they are executed
+log4j.category.org.hibernate.tool.hbm2ddl=ERROR
+#Log the state of all entities (max 20 entities) associated with the session at flush time
+log4j.category.org.hibernate.pretty=ERROR
+#Log all second-level cache activity
+log4j.category.org.hibernate.cache=INFO
+#Log transaction related activity
+log4j.category.org.hibernate.transaction=INFO
+#Log all JDBC resource acquisition
+log4j.category.org.hibernate.jdbc=INFO
+#Log HQL and SQL ASTs during query parsing
+log4j.category.org.hibernate.hql.ast.AST=INFO
+#Log all JAAS authorization requests
+log4j.category.org.hibernate.secure=INFO
+
+# ***** A is set to be a ConsoleAppender.
+log4j.appender.A=org.apache.log4j.ConsoleAppender
+# ***** A uses PatternLayout.
+log4j.appender.A.layout=org.apache.log4j.PatternLayout
+log4j.appender.A.layout.ConversionPattern=%d [%t] %-5p (%C{1},%L) - %m%n
\ No newline at end of file
diff --git a/intact-orthology-import/src/test/java/uk/ac/ebi/intact/dataexchange/OrthologsManagerTest.java b/intact-orthology-import/src/test/java/uk/ac/ebi/intact/dataexchange/OrthologsManagerTest.java
new file mode 100644
index 000000000..808a60629
--- /dev/null
+++ b/intact-orthology-import/src/test/java/uk/ac/ebi/intact/dataexchange/OrthologsManagerTest.java
@@ -0,0 +1,38 @@
+package uk.ac.ebi.intact.dataexchange;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+/**
+ * Unit test for simple App.
+ */
+public class OrthologsManagerTest
+ extends TestCase
+{
+ /**
+ * Create the test case
+ *
+ * @param testName name of the test case
+ */
+ public OrthologsManagerTest(String testName )
+ {
+ super( testName );
+ }
+
+ /**
+ * @return the suite of tests being tested
+ */
+ public static Test suite()
+ {
+ return new TestSuite( OrthologsManagerTest.class );
+ }
+
+ /**
+ * Rigourous Test :-)
+ */
+ public void testApp()
+ {
+ assertTrue( true );
+ }
+}
diff --git a/pom.xml b/pom.xml
index ef0ec8a0a..9f6dfbb39 100644
--- a/pom.xml
+++ b/pom.xml
@@ -67,6 +67,7 @@
complex-pdb-export
complex-tab-export
complex-uniprot-dr-export
+ intact-orthology-import
diff --git a/psimi/psimitab/intact-solr/src/main/java/uk/ac/ebi/intact/dataexchange/psimi/solr/complex/ComplexInteractor.java b/psimi/psimitab/intact-solr/src/main/java/uk/ac/ebi/intact/dataexchange/psimi/solr/complex/ComplexInteractor.java
index d2241f99b..18f9658dd 100644
--- a/psimi/psimitab/intact-solr/src/main/java/uk/ac/ebi/intact/dataexchange/psimi/solr/complex/ComplexInteractor.java
+++ b/psimi/psimitab/intact-solr/src/main/java/uk/ac/ebi/intact/dataexchange/psimi/solr/complex/ComplexInteractor.java
@@ -1,5 +1,7 @@
package uk.ac.ebi.intact.dataexchange.psimi.solr.complex;
+import java.util.List;
+
public class ComplexInteractor {
/*************************/
@@ -15,7 +17,8 @@ public ComplexInteractor(String identifier,
String description,
String stochiometry,
String interactorType,
- String organismName) {
+ String organismName,
+ List xrefs) {
this.identifier = identifier;
this.identifierLink = identifierLink;
this.name = name;
@@ -23,6 +26,7 @@ public ComplexInteractor(String identifier,
this.stochiometry = stochiometry;
this.interactorType = interactorType;
this.organismName = organismName;
+ this.xrefs = xrefs;
}
/*********************************/
@@ -85,6 +89,14 @@ public void setOrganismName(String organismName) {
this.organismName = organismName;
}
+ public List getXrefs() {
+ return xrefs;
+ }
+
+ public void setXrefs(List xrefs) {
+ this.xrefs = xrefs;
+ }
+
/********************************/
/* Private attributes */
/********************************/
@@ -96,4 +108,5 @@ public void setOrganismName(String organismName) {
private String stochiometry = null;
private String interactorType;
private String organismName;
+ private List xrefs;
}
diff --git a/psimi/psimitab/intact-solr/src/main/java/uk/ac/ebi/intact/dataexchange/psimi/solr/complex/ComplexInteractorXref.java b/psimi/psimitab/intact-solr/src/main/java/uk/ac/ebi/intact/dataexchange/psimi/solr/complex/ComplexInteractorXref.java
new file mode 100644
index 000000000..7df01def6
--- /dev/null
+++ b/psimi/psimitab/intact-solr/src/main/java/uk/ac/ebi/intact/dataexchange/psimi/solr/complex/ComplexInteractorXref.java
@@ -0,0 +1,66 @@
+package uk.ac.ebi.intact.dataexchange.psimi.solr.complex;
+
+public class ComplexInteractorXref {
+
+ /*************************/
+ /* Constructors */
+ /*************************/
+
+ public ComplexInteractorXref() {
+ }
+
+ public ComplexInteractorXref(String identifier,
+ String identifierLink,
+ String database,
+ String qualifier) {
+ this.identifier = identifier;
+ this.identifierLink = identifierLink;
+ this.database = database;
+ this.qualifier = qualifier;
+ }
+
+ /*********************************/
+ /* Getters and Setters */
+ /*********************************/
+
+ public String getIdentifier() {
+ return identifier;
+ }
+
+ public void setIdentifier(String identifier) {
+ this.identifier = identifier;
+ }
+
+ public String getIdentifierLink() {
+ return identifierLink;
+ }
+
+ public void setIdentifierLink(String identifierLink) {
+ this.identifierLink = identifierLink;
+ }
+
+ public String getDatabase() {
+ return database;
+ }
+
+ public void setDatabase(String database) {
+ this.database = database;
+ }
+
+ public String getQualifier() {
+ return qualifier;
+ }
+
+ public void setQualifier(String qualifier) {
+ this.qualifier = qualifier;
+ }
+
+ /********************************/
+ /* Private attributes */
+ /********************************/
+
+ private String identifier = null;
+ private String identifierLink = null;
+ private String database = null;
+ private String qualifier = null;
+}
diff --git a/psimi/psimitab/intact-solr/src/main/java/uk/ac/ebi/intact/dataexchange/psimi/solr/enricher/ComplexSolrEnricher.java b/psimi/psimitab/intact-solr/src/main/java/uk/ac/ebi/intact/dataexchange/psimi/solr/enricher/ComplexSolrEnricher.java
index 1a8418617..0cad4ecc0 100644
--- a/psimi/psimitab/intact-solr/src/main/java/uk/ac/ebi/intact/dataexchange/psimi/solr/enricher/ComplexSolrEnricher.java
+++ b/psimi/psimitab/intact-solr/src/main/java/uk/ac/ebi/intact/dataexchange/psimi/solr/enricher/ComplexSolrEnricher.java
@@ -12,6 +12,7 @@
import uk.ac.ebi.intact.bridges.ontologies.term.OntologyTerm;
import uk.ac.ebi.intact.dataexchange.psimi.solr.complex.ComplexFieldNames;
import uk.ac.ebi.intact.dataexchange.psimi.solr.complex.ComplexInteractor;
+import uk.ac.ebi.intact.dataexchange.psimi.solr.complex.ComplexInteractorXref;
import uk.ac.ebi.intact.dataexchange.psimi.solr.ontology.OntologySearcher;
import uk.ac.ebi.intact.dataexchange.psimi.solr.util.ComplexUtils;
import uk.ac.ebi.intact.model.*;
@@ -20,6 +21,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
+import java.util.stream.Collectors;
/**
* Complex Field Enricher is such as Ontoly Field Enricher
@@ -38,8 +40,12 @@ public class ComplexSolrEnricher extends AbstractOntologyEnricher{
private PsimiTabReader reader;
private final ObjectMapper mapper;
- private final static String EXP_EVIDENCE="exp-evidence";
- private final static String INTACT_SECONDARY="intact-secondary";
+ private static final String EXP_EVIDENCE="exp-evidence";
+ private static final String INTACT_SECONDARY="intact-secondary";
+
+ // Currently, we are only storing interactors xrefs from the following databases:
+ // - panther (MI:0702)
+ private static final Set INTERACTOR_XREF_DATABASE_MIS_TO_STORE = Set.of("MI:0702");
/*************************/
/* Constructor */
@@ -202,6 +208,16 @@ public void enrichSerialisedParticipant(Component participant, SolrInputDocument
Interactor interactor = participant.getInteractor();
String identifier = ComplexUtils.getParticipantIdentifier(participant);
+ List xrefs = interactor.getXrefs().stream()
+ .filter(xref -> xref.getCvDatabase() != null)
+ .filter(xref -> INTERACTOR_XREF_DATABASE_MIS_TO_STORE.contains(xref.getCvDatabase().getIdentifier()))
+ .map(xref -> new ComplexInteractorXref(
+ xref.getPrimaryId(),
+ ComplexUtils.getIdentifierLink(xref, xref.getPrimaryId()),
+ xref.getCvDatabase() != null ? xref.getCvDatabase().getShortLabel() : null,
+ xref.getCvXrefQualifier() != null ? xref.getCvXrefQualifier().getShortLabel() : null))
+ .collect(Collectors.toList());
+
ComplexInteractor complexInteractor = new ComplexInteractor(
identifier,
ComplexUtils.getParticipantIdentifierLink(participant, identifier),
@@ -209,7 +225,8 @@ public void enrichSerialisedParticipant(Component participant, SolrInputDocument
interactor.getFullName(),
ComplexUtils.getParticipantStoichiometry(participant),
interactor.getCvInteractorType().getFullName(),
- findInteractorOrganismName(interactor));
+ findInteractorOrganismName(interactor),
+ xrefs);
String serialisedInteractor = mapper.writeValueAsString(complexInteractor);
solrDocument.addField(ComplexFieldNames.SERIALISED_INTERACTION, serialisedInteractor);
}
diff --git a/psimi/psimitab/intact-solr/src/main/java/uk/ac/ebi/intact/dataexchange/psimi/solr/util/ComplexUtils.java b/psimi/psimitab/intact-solr/src/main/java/uk/ac/ebi/intact/dataexchange/psimi/solr/util/ComplexUtils.java
index aad8cd0ef..0f4bed800 100644
--- a/psimi/psimitab/intact-solr/src/main/java/uk/ac/ebi/intact/dataexchange/psimi/solr/util/ComplexUtils.java
+++ b/psimi/psimitab/intact-solr/src/main/java/uk/ac/ebi/intact/dataexchange/psimi/solr/util/ComplexUtils.java
@@ -59,6 +59,10 @@ public static String getParticipantIdentifier(Component participant) {
public static String getParticipantIdentifierLink(Component participant, String identifier) {
InteractorXref xref = getParticipantIdentifierXref(participant);
+ return getIdentifierLink(xref, identifier);
+ }
+
+ public static String getIdentifierLink(InteractorXref xref, String identifier) {
if (xref != null && xref.getParent() != null) {
Annotation annot = AnnotatedObjectUtils.findAnnotationByTopicMiOrLabel(xref.getCvDatabase(), SEARCH_MI);
if (annot == null) {