Skip to content

Commit

Permalink
Merge pull request #591 from tira-io/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
mam10eks authored Dec 7, 2023
2 parents 1201e4f + aaf4ffd commit 48e4903
Show file tree
Hide file tree
Showing 9 changed files with 646 additions and 456 deletions.
4 changes: 2 additions & 2 deletions application/Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
.PHONY: help setup run-develop build-docker clean

VERSION_APPLICATION=0.0.96
VERSION_GRPC=0.0.96
VERSION_APPLICATION=0.0.97
VERSION_GRPC=0.0.97

.DEFAULT: help
help:
Expand Down
146 changes: 128 additions & 18 deletions application/src/tira/endpoints/data_api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import logging
import json
import textwrap

from django.core.exceptions import BadRequest

from tira.forms import *
import tira.tira_model as model
from tira.checks import check_permissions, check_resources_exist, check_conditional_permissions
Expand Down Expand Up @@ -434,36 +438,142 @@ def add_registration(request, context, task_id, vm_id):
return JsonResponse({'status': 0, "message": f"Encountered an exception: {e}"}, status=HTTPStatus.INTERNAL_SERVER_ERROR)


def expand_links(component):
links = [*component.get('links', [])]
ir_datasets_id = component.get('ir_datasets_id', None)
if ir_datasets_id:
if '/' in ir_datasets_id:
base = ir_datasets_id.split('/')[0]
fragment = f'#{ir_datasets_id}'
else:
base = ir_datasets_id
fragment = ''

links.append({
'display_name': 'ir_datasets',
'href': f'https://ir-datasets.com/{base}.html{fragment}',
'target': '_blank',
})

tirex_submission_id = component.get('tirex_submission_id', None)
if tirex_submission_id:
links.append({
'display_name': 'Submission in TIREx',
'href': f'/submissions/{tirex_submission_id}',
})

if links:
component['links'] = links

return component


def flatten_components(components):
flattened_components = []
for identifier, data in components.items():
component = {'identifier': identifier, **data}

if 'components' in component:
component['components'] = flatten_components(data['components'])

if 'tirex_submission_id' in data:
component['tirex_submission_id'] = data['tirex_submission_id']

flattened_components.append(expand_links(component))

return flattened_components


@add_context
def tirex_components(request, context):
context['tirex_components'] = settings.TIREX_COMPONENTS
context['tirex_components'] = flatten_components(settings.TIREX_COMPONENTS)
return JsonResponse({'status': 0, 'context': context})

def get_snippet_to_run_components(request):
all_components = settings.TIREX_COMPONENTS
component_ids = request.GET.get('components', 'false')
def flatten_tirex_components_to_id(obj, t=None):
ret = {}

# All links with display_name == "Submission in TIREx" have the ID of the component in their link, its a bit ugly, but at the moment we need to extract the ID from there.
# E.g., the ID from the URL "/submissions/ir-benchmarks/ows/query-segmentation-hyp-a" would be ir-benchmarks/ows/query-segmentation-hyp-a
if type(obj) != dict:
return ret

# Also Ugly: we need to determine which type of processor (query processor, document processor, etc) something is by using its top-level category, e.g., "Query Processing".
if 'tirex_submission_id' in obj:
assert obj['tirex_submission_id'] not in ret
obj['type'] = t
ret[obj['tirex_submission_id']] = obj

# I think it makes sense to build a small method that uses the settings.TIREX_COMPONENTS as input and produces a mapping form component ID (the thing below "Submission in TIREx") to the properties, e.g., query processor true or false, etc.
for k, v in obj.items():
for i, j in flatten_tirex_components_to_id(v, t if t else k).items():
ret[i] = j

# I think we can hard code everything against ROBUST04, we can switch this later.
dataset_initialization = 'dataset = pt.get_dataset("irds:disks45/nocr/trec-robust-2004")\n'
return ret

additional_variables = ''

# If we have a query processor, we need to add an additional variable "topics"
# just for this hard coded example:
current_component_is_query_processor = True
if current_component_is_query_processor:
additional_variables += "topics = dataset.get_topics(variant='title')\n"
TIREX_ID_TO_COMPONENT = flatten_tirex_components_to_id(settings.TIREX_COMPONENTS)


def get_snippet_to_run_components(request):
component_key = request.GET.get('component')

if component_key not in TIREX_ID_TO_COMPONENT:
return JsonResponse({'status': 1, 'message': f'Component "{component_key}" not found.'})

component = TIREX_ID_TO_COMPONENT[component_key]
component_type = component['type']
dataset_initialization = textwrap.dedent('''
# You can replace Robust04 with other datasets
dataset = pt.get_dataset("irds:disks45/nocr/trec-robust-2004")
''').strip()
snippet = ''

if component_type == 'dataset':
dataset_initialization = ''
ir_datasets_id = component.get('ir_datasets_id')
if ir_datasets_id:
snippet = f'''
dataset = pt.get_dataset('irds:{ir_datasets_id}')
indexer = pt.IterDictIndexer('./index')
indexref = indexer.index(dataset.get_corpus_iter())
'''
else:
snippet = f'''
def get_corpus_iter():
# Iterate over the {component['display_name']} corpus
corpus = ...
for doc in corpus:
yield {{'docno': doc.docno, 'text': doc.content}}
indexer = pt.IterDictIndexer('./index')
indexref = indexer.index(get_corpus_iter())
'''
elif component_type == 'document_processing':
tirex_submission_id = component.get('tirex_submission_id')
if tirex_submission_id:
snippet = f'''
transformed_docs = tira.pt.transform_documents('{tirex_submission_id}', dataset)
'''
elif component_type == 'query_processing':
tirex_submission_id = component.get('tirex_submission_id')
if tirex_submission_id:
snippet = f'''
topics = dataset.get_topics(variant='title')
transformed_queries = tira.pt.transform_queries('{tirex_submission_id}', topics)
'''
elif component_type in ('retrieval', 'reranking'):
tirex_submission_id = component.get('tirex_submission_id')
if tirex_submission_id:
snippet = f'''
run = tira.pt.from_retriever_submission('{tirex_submission_id}', dataset=dataset_id)
'''
elif component_type == 'dataset':
pass
else:
JsonResponse({'status': 1, 'message': f'Component type "{component_type}" does not exist...'})

component_definitions = "tira.pt.transform_queries('ir-benchmarks/ows/query-segmentation-hyb-i', dataset)\n"
if snippet:
snippet = textwrap.dedent(snippet).strip()

snippet = (dataset_initialization + additional_variables + component_definitions).strip()
if dataset_initialization:
snippet = dataset_initialization + '\n' + snippet

return JsonResponse({'status': 0, 'context': {'snippet': snippet}})

Expand Down
29 changes: 19 additions & 10 deletions application/src/tira/frontend-vuetify/src/IrComponents.vue
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,16 @@
<v-list-item v-for="link in vectorizedComponents[index][i-1].links">
<v-list-item-title><a :href="link.href" :target="link.target">{{ link.display_name }}</a></v-list-item-title>
</v-list-item>
<v-list-item v-if="vectorizedComponents[index][i-1].code">
<v-list-item v-if="vectorizedComponents[index][i-1].tirex_submission_id">
<v-dialog>
<template v-slot:activator="{ props }">
<v-list-item-title v-bind="props" class="show-code-button">Show code</v-list-item-title>
<v-list-item-title v-bind="props" class="show-code-button" @click="fetch_code(index, i-1)">Show code</v-list-item-title>
</template>
<template v-slot:default="{ isActive }">
<v-card class="bg-grey-darken-3">
<v-card-text content="code">
<code-snippet :title="'Example code snippet for ' + vectorizedComponents[index][i-1]?.display_name" :code="vectorizedComponents[index][i-1].code" expand_message=""/>
<loading v-if="!code" loading="true"/>
<code-snippet v-if="code" :title="'Example code snippet for ' + vectorizedComponents[index][i-1]?.display_name" :code="code" expand_message=""/>
</v-card-text>

<v-card-actions>
Expand Down Expand Up @@ -85,6 +86,7 @@ import {compareArrays, extractComponentTypesFromCurrentUrl, extractFocusTypesFro
import CodeSnippet from "@/components/CodeSnippet.vue";
interface Component {
identifier: string;
display_name: string;
components?: Component[];
links?: { display_name: string; href: string; target: string }[];
Expand All @@ -103,8 +105,9 @@ export default {
max_width: 1500,
loading: true,
tirex_components: [
{'display_name': 'loading', 'components': [{'display_name': 'loading'}], 'links': [{'display_name': '.', 'href': '.', 'target': '.'}]},
{'identifier': 'loading', 'display_name': 'loading', 'components': [{'identifier': 'loading', 'display_name': 'loading'}], 'links': [{'display_name': '.', 'href': '.', 'target': '.'}], 'tirex_submission_id': null},
],
code: '',
colors: {
'Dataset': 'green', 'Document Processing': 'yellow-lighten-1',
'Query Processing': 'yellow-darken-4', 'Retrieval': 'cyan-lighten-1',
Expand Down Expand Up @@ -148,6 +151,11 @@ export default {
return componentSet;
},
fetch_code(index: number, i: number) {
this.code = ''
get('/api/tirex-snippet?component='+ this.vectorizedComponents[index][i].tirex_submission_id)
.then((message) => {this.code = message['context']['snippet']})
},
colorOfComponent(c:string) : string {
return this.colors[c] ?? "grey"
},
Expand All @@ -172,8 +180,8 @@ export default {
is_collapsed(component:any) {
return !this.computed_expanded_entries.includes(component.display_name)
},
filtered_sub_components(component:any) : {display_name: string, subItems: number, pos: number, links: any[], focus_type: string|undefined|null, component_type: string|undefined|null}[] {
let ret: {display_name: string, subItems: number, pos: number, links: any[], focus_type: string|undefined|null, component_type: string|undefined|null}[] = []
filtered_sub_components(component:any) : {display_name: string, subItems: number, pos: number, links: any[], focus_type: string|undefined|null, component_type: string|undefined|null, tirex_submission_id: string|undefined|null}[] {
let ret: {display_name: string, subItems: number, pos: number, links: any[], focus_type: string|undefined|null, component_type: string|undefined|null, tirex_submission_id: string|undefined|null}[] = []
if (this.is_collapsed(component) || !component['components']) {
return ret
Expand All @@ -188,6 +196,7 @@ export default {
'links': c.hasOwnProperty('links') ? c['links'] : null,
'focus_type': c.hasOwnProperty('focus_type') ? c['focus_type'] : null,
'component_type': c.hasOwnProperty('component_type') ? c['component_type'] : null,
'tirex_submission_id': c['tirex_submission_id']
})
for (let sub_c of this.filtered_sub_components(c)) {
Expand All @@ -198,6 +207,7 @@ export default {
'links': sub_c['links'],
'focus_type': sub_c.hasOwnProperty('focus_type') ? sub_c['focus_type'] : null,
'component_type': sub_c.hasOwnProperty('component_type') ? sub_c['component_type'] : null,
'tirex_submission_id': sub_c['tirex_submission_id']
})
}
}
Expand Down Expand Up @@ -284,8 +294,7 @@ export default {
ret = ret.concat(terms[i]);
}
}
}
}
return ret
},
Expand All @@ -306,7 +315,7 @@ export default {
let c = this.tirex_components[i]
// we set row 0, aka the headers
ret[0][i] = {'display_name': c.display_name, 'links': c.links, 'collapsed': this.is_collapsed(c), 'subItems':this.countSubItems(c), 'hide': false}
ret[0][i] = {'display_name': c.display_name, 'links': c.links, 'collapsed': this.is_collapsed(c), 'subItems':this.countSubItems(c), 'hide': false, 'tirex_submission_id': ''}
// we loop through each categories subcomponents and enrich them with information needed for the grid display
for (let subcomponent of this.filtered_sub_components(c)) {
Expand All @@ -320,7 +329,7 @@ export default {
'links': subcomponent.links,
'collapsed': this.is_collapsed(subcomponent),
'hide': this.hide_component(subcomponent),
'code': "this will show example code for executing this components method"
'tirex_submission_id': subcomponent['tirex_submission_id'] || null
}
}
}
Expand Down
1 change: 1 addition & 0 deletions application/src/tira/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@
path('api/registration/add_registration/<str:vm_id>/<str:task_id>', data_api.add_registration, name='add_registration'),
path('api/submissions-for-task/<str:task_id>/<str:user_id>/<str:submission_type>', data_api.submissions_for_task, name="submissions_for_task"),
path('api/tirex-components', data_api.tirex_components, name='tirex_components'),
path('api/tirex-snippet', data_api.get_snippet_to_run_components, name='get_snippet_to_run_components'),
path('api/snippets-for-tirex-components', data_api.get_snippet_to_run_components, name='get_snippet_to_run_components'),
path('api/re-ranking-datasets/<str:task_id>', data_api.reranking_datasets, name='reranking_datasets'),
path('api/submissions-of-user/<str:vm_id>', data_api.submissions_of_user, name='submissions_of_user'),
Expand Down
Loading

0 comments on commit 48e4903

Please sign in to comment.