Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add convenience methods for finding rows by name #641

Open
wants to merge 1 commit into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import javax.annotation.Nullable;
import java.io.Serializable;
import java.util.*;
import java.util.stream.Collectors;

/**
* Base class for ExpressionDataMatrix implementations.
Expand Down Expand Up @@ -129,6 +130,14 @@ public BioAssayDimension getBestBioAssayDimension() {

}

@Override
public Collection<Integer> findRowsByName( String name ) {
return rowElementMap.entrySet().stream()
.filter( entry -> entry.getKey().getName().equals( name ) )
.map( entry -> entry.getValue() )
.collect( Collectors.toList() );
ppavlidis marked this conversation as resolved.
Show resolved Hide resolved
}

@Override
public BioAssayDimension getBioAssayDimension( CompositeSequence designElement ) {

Expand Down Expand Up @@ -258,7 +267,7 @@ void addToRowMaps( Integer row, CompositeSequence designElement ) {
* For example, in the following diagram "-" indicates a biomaterial, while "*" indicates a bioassay. Each row of
* "*" indicates samples run on a different microarray design (a different bio assay material). In the examples we
* assume there is just a single biomaterial dimension.
*
*
* <pre>
* ---------------
* ***** -- only a few samples run on this platform
Expand All @@ -268,7 +277,7 @@ void addToRowMaps( Integer row, CompositeSequence designElement ) {
* <p>
* A simpler case:
* </p>
*
*
* <pre>
* ---------------
* ***************
Expand All @@ -278,23 +287,23 @@ void addToRowMaps( Integer row, CompositeSequence designElement ) {
* <p>
* A more typical and easy case (one microarray design used):
* </p>
*
*
* <pre>
* ----------------
* ****************
* </pre>
* <p>
* If every sample was run on two different array designs:
* </p>
*
*
* <pre>
* ----------------
* ****************
* ****************
* </pre>
* <p>
* Every sample was run on a different array design:
*
*
* <pre>
* -----------------------
* ******
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ public Object[] getColumn( BioAssay bioAssay ) {
throw new UnsupportedOperationException();
}

@Override
public Object[][] getRows( Collection<Integer> indices ) {
throw new UnsupportedOperationException();
}

@Override
public Object[] getColumn( Integer column ) {
throw new UnsupportedOperationException();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,18 @@ public ExpressionDataBooleanMatrix( Collection<? extends DesignElementDataVector
this.vectorsToMatrix( selectedVectors );
}

@Override
public Boolean[][] getRows( Collection<Integer> indices ) {
if ( indices == null || indices.isEmpty() ) {
return null;
}

return indices.stream()
.map( index -> getRow( index ) )
.toArray( Boolean[][]::new );

}

@Override
public int columns() {
return matrix.columns();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,19 @@ public Double[] getRow( Integer index ) {
return ArrayUtils.toObject( rawRow );
}


@Override
public Double[][] getRows( Collection<Integer> indices ) {
if ( indices == null || indices.isEmpty() ) {
return null;
}

return indices.stream()
.map( index -> getRow( index ) )
.toArray( Double[][]::new );

}

@Override
public Double[][] getRows( List<CompositeSequence> designElements ) {
if ( designElements == null ) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,16 @@ public static ExpressionDataDoubleMatrix filterAndLog2Transform( ExpressionDataD
if ( dmatrix.rows() < r ) {
ExpressionDataDoubleMatrixUtil.log.info( ( r - dmatrix.rows() ) + " rows removed due to too many identical values" );
}
}

/*
* As noted in https://github.com/PavlidisLab/GemmaCuration/issues/218, this filter can still do things
* that are counter-intuitive because we are using quantile-normalization. In some cases very highly expressed genes
* will be assigned multiple identical values, though they weren't identical in the raw data.
*
* Fixing this "once and for all" might mean making the filter less stringent at very high expression levels (like the top 1% of genes)
* or changing the approach.
*/
}
return dmatrix;

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,19 @@ public Integer[][] get( List<CompositeSequence> designElements, List<BioAssay> b
throw new UnsupportedOperationException();
}


@Override
public Integer[][] getRows( Collection<Integer> indices ) {
if ( indices == null || indices.isEmpty() ) {
return null;
}

return indices.stream()
.map( index -> getRow( index ) )
.toArray( Integer[][]::new );

}

@Override
public Integer[] getColumn( BioAssay bioAssay ) {
int index = this.columnAssayMap.get( bioAssay );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,23 @@ public interface ExpressionDataMatrix<T> {
*/
T[][] getRows( List<CompositeSequence> designElements );

/**
* Access a submatrix
* @param rowIndices of integers to select
* @return T[][] or null if rowIndices is null or empty.
*/
T[][] getRows( Collection<Integer> rowIndices);
arteymix marked this conversation as resolved.
Show resolved Hide resolved


/**
* Convenience function to locate the indices with an (exact match) CompositeSequence.name
*
* @param name The CompositeSequence name to look for
* @return array of row indices matching (usually will just be one value)
*/
Collection<Integer> findRowsByName(String name);


/**
* @return true if any values are null or NaN (for Doubles); all other values are considered non-missing.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,20 @@ public String[][] get( List<CompositeSequence> designElements, List<BioAssay> bi
throw new UnsupportedOperationException();
}


@Override
public String[][] getRows( Collection<Integer> indices ) {
if ( indices == null || indices.isEmpty() ) {
return null;
}

return indices.stream()
.map( index -> getRow( index ) )
.toArray( String[][]::new );

}


@Override
public String[] getColumn( BioAssay bioAssay ) {
int index = this.columnAssayMap.get( bioAssay );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,8 @@ public void testConstructExpressionDataDoubleMatrix() {
log.debug( aRow );
}

assertEquals(2, expressionDataDoubleMatrix.getRows( Arrays.asList( new Integer[]{1,2} )).length );

Double[][] dMatrix = expressionDataDoubleMatrix.getRawMatrix();
assertEquals( dMatrix.length, 200 );
assertEquals( dMatrix[0].length, 59 );
Expand Down