Skip to content

Commit

Permalink
add pept2filtered without filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
tibvdm committed Apr 22, 2024
1 parent 3e73746 commit 3e0b7b5
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 75 deletions.
61 changes: 8 additions & 53 deletions app/controllers/mpa/pept2filtered_controller.rb
Original file line number Diff line number Diff line change
@@ -1,64 +1,19 @@
class Mpa::Pept2filteredController < Mpa::MpaController
def pept2filtered
peptides = params[:peptides] || []
missed = params[:missed].nil? ? false : params[:missed]
equate_il = params[:equate_il].nil? ? true : params[:equate_il]
cutoff = params[:cutoff] || 1000
# missed = params[:missed] || false
taxa_filter_ids = (params[:taxa] || []).map(&:to_i)

@equate_il = params[:equate_il].nil? ? true : params[:equate_il]

@seq_entries = {}
uniprot_ids = []

peptides_under_cutoff = Sequence
.joins(:peptides)
.where(sequence: peptides)
.group('sequences.id')
.having('count(peptides.id) < ?', cutoff)
.pluck(:sequence)

taxa_filter_ids.each_slice(5000) do |taxa_slice|
# For all given taxon id's, filter out those that are invalid according to Unipept's filter
# i.e. these taxa are typically rubbish (for example those that end in bacterium and are classified at the
# species rank).
taxa_slice = Taxon
.where(id: taxa_slice)
.where(valid_taxon: 1)
.pluck(:id)

# If none of the taxa in this slice are valid, skip this iteration of the loop and continue with the next one.
next if taxa_slice.empty?

sequence_subset = Sequence
.joins(peptides: [:uniprot_entry])
.includes(peptides: [:uniprot_entry])
.where(sequence: peptides_under_cutoff)
.where(uniprot_entry: { taxon_id: taxa_slice })
.uniq
@response = Hash.new

sequence_subset.each do |seq_info|
@seq_entries[seq_info.sequence] = [] unless @seq_entries.key?(seq_info.sequence)

@seq_entries[seq_info.sequence] += seq_info
.peptides
.map(&:uniprot_entry)
.select { |e| taxa_filter_ids.include? e.taxon_id }

@seq_entries[seq_info.sequence].uniq!
end

uniprot_ids += sequence_subset.map { |s| s.peptides.map(&:uniprot_entry_id) }.flatten.uniq
if peptides.empty?
return
end

uniprot_ids = uniprot_ids.uniq

@go_terms = GoCrossReference
.where(uniprot_entry_id: uniprot_ids)

@ec_numbers = EcCrossReference
.where(uniprot_entry_id: uniprot_ids)
taxa_filter_ids = (params[:taxa] || []).map(&:to_i)

@ipr_entries = InterproCrossReference
.where(uniprot_entry_id: uniprot_ids)
# Request the suffix array search service
@response = search(peptides, equate_il).uniq
end
end
27 changes: 5 additions & 22 deletions app/views/mpa/pept2filtered/pept2filtered.json.jbuilder
Original file line number Diff line number Diff line change
@@ -1,25 +1,8 @@
json.peptides @seq_entries.each do |sequence, uniprot_entries|
json.sequence sequence
json.taxa uniprot_entries.map(&:taxon_id).uniq
json.peptides @response do |peptide|
json.sequence peptide["sequence"]
json.taxa peptide["taxa"]
json.fa do
json.go_terms(@go_terms
.select { |go| uniprot_entries.map(&:id).include? go.uniprot_entry_id }
.map(&:go_term_code)
.reject(&:empty?)
.uniq)

json.ec_numbers(@ec_numbers
.select { |ec| uniprot_entries.map(&:id).include? ec.uniprot_entry_id }
.map(&:ec_number_code)
.reject(&:empty?)
.uniq
.map { |ec| "EC:#{ec}" })

json.interpro_entries(@ipr_entries
.select { |ipr| uniprot_entries.map(&:id).include? ipr.uniprot_entry_id }
.map(&:interpro_entry_code)
.reject(&:empty?)
.uniq
.map { |ipr| ipr.sub('IPR', 'IPR:') })
json.counts peptide["fa"]["counts"]
json.data peptide["fa"]["data"]
end
end

0 comments on commit 3e0b7b5

Please sign in to comment.