Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove unneccessary returns #273

Open
wants to merge 3 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/source/getting-started/complete-tour.rst
Original file line number Diff line number Diff line change
Expand Up @@ -834,7 +834,7 @@ the Ranker method.

.. code-block:: python

myranker.mentions_to_wikidata = myranker.load_resources()
myranker.load_resources()

.. note::

Expand Down Expand Up @@ -1056,7 +1056,7 @@ of the Linker method.

.. code-block:: python

mylinker.linking_resources = mylinker.load_resources()
mylinker.load_resources()

.. note::

Expand Down
4 changes: 2 additions & 2 deletions examples/train_use_deezy_model_1.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@
"outputs": [],
"source": [
"# Load the resources:\n",
"myranker.mentions_to_wikidata = myranker.load_resources()"
"myranker.load_resources()"
]
},
{
Expand Down Expand Up @@ -177,7 +177,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
"version": "3.9.17"
},
"orig_nbformat": 4
},
Expand Down
4 changes: 2 additions & 2 deletions examples/train_use_deezy_model_2.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@
"outputs": [],
"source": [
"# Load the resources:\n",
"myranker.mentions_to_wikidata = myranker.load_resources()"
"myranker.load_resources()"
]
},
{
Expand Down Expand Up @@ -173,7 +173,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
"version": "3.9.17"
},
"orig_nbformat": 4
},
Expand Down
4 changes: 2 additions & 2 deletions examples/train_use_deezy_model_3.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@
"outputs": [],
"source": [
"# Load the resources:\n",
"myranker.mentions_to_wikidata = myranker.load_resources()"
"myranker.load_resources()"
]
},
{
Expand Down Expand Up @@ -168,7 +168,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
"version": "3.9.17"
},
"orig_nbformat": 4
},
Expand Down
5 changes: 1 addition & 4 deletions experiments/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,10 +224,7 @@ def prepare_data(self) -> dict:
# Obtain candidates per sentence:
for sentence_id in tqdm(dMentionsPred):
pred_mentions_sent = dMentionsPred[sentence_id]
(
wk_cands,
self.myranker.already_collected_cands,
) = self.myranker.find_candidates(pred_mentions_sent)
wk_cands = self.myranker.find_candidates(pred_mentions_sent)
dCandidates[sentence_id] = wk_cands

# -------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions experiments/toponym_resolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,14 +166,14 @@
# -----------------------------------------
# Ranker loading resources and training a model:
# Load the resources:
myranker.mentions_to_wikidata = myranker.load_resources()
myranker.load_resources()
# Train a DeezyMatch model if needed:
myranker.train()

# -----------------------------------------
# Linker loading resources:
# Load linking resources:
mylinker.linking_resources = mylinker.load_resources()
mylinker.load_resources()

# -----------------------------------------
# Prepare experiment:
Expand Down
12 changes: 4 additions & 8 deletions geoparser/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def __init__(
# Ranker loading resources and training a model:

# Load the resources:
self.myranker.mentions_to_wikidata = self.myranker.load_resources()
self.myranker.load_resources()

# Train a DeezyMatch model if needed:
self.myranker.train()
Expand All @@ -127,7 +127,7 @@ def __init__(
# Linker loading resources:

# Load linking resources:
self.mylinker.linking_resources = self.mylinker.load_resources()
self.mylinker.load_resources()

# Train a linking model if needed (it requires myranker to generate
# potential candidates to the training set):
Expand Down Expand Up @@ -223,9 +223,7 @@ def run_sentence(
rmentions = [{"mention": y["mention"]} for y in mentions]

# Perform candidate ranking:
wk_cands, self.myranker.already_collected_cands = self.myranker.find_candidates(
rmentions
)
wk_cands = self.myranker.find_candidates(rmentions)

mentions_dataset = dict()
mentions_dataset["linking"] = []
Expand Down Expand Up @@ -685,9 +683,7 @@ def run_candidate_selection(self, document_dataset: List[dict]) -> dict:
mentions = [{"mention": m} for m in mentions]

# Perform candidate ranking:
wk_cands, self.myranker.already_collected_cands = self.myranker.find_candidates(
mentions
)
wk_cands = self.myranker.find_candidates(rmentions)
return wk_cands

def run_disambiguation(
Expand Down
48 changes: 24 additions & 24 deletions geoparser/ranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,18 @@ class Ranker:
)

>>> # Load resources
>>> ranker.mentions_to_wikidata = ranker.load_resources()
>>> ranker.load_resources()

>>> # Train the ranker (if applicable)
>>> ranker.train()

>>> # Perform candidate selection
>>> queries = ['London', 'Paraguay']
>>> candidates, already_collected = ranker.run(queries)
>>> candidates = ranker.run(queries)

>>> # Find candidates for mentions
>>> mentions = [{'mention': 'London'}, {'mention': 'Paraguay'}]
>>> mention_candidates, mention_already_collected = ranker.find_candidates(mentions)
>>> mention_candidates = ranker.find_candidates(mentions)

>>> # Print the results
>>> print("Candidate Selection Results:")
Expand Down Expand Up @@ -136,7 +136,7 @@ def __init__(
"overwrite_training": False,
"do_test": False,
},
already_collected_cands: Optional[dict] = dict(),
already_collected_cands: Optional[dict] = None,
):
"""
Initialize a Ranker object.
Expand All @@ -147,7 +147,11 @@ def __init__(
self.wikidata_to_mentions = wikidata_to_mentions
self.strvar_parameters = strvar_parameters
self.deezy_parameters = deezy_parameters
self.already_collected_cands = already_collected_cands

if already_collected_cands:
self.already_collected_cands = already_collected_cands
else:
self.already_collected_cands = dict()

def __str__(self) -> str:
"""
Expand All @@ -173,18 +177,10 @@ def __str__(self) -> str:

return s

def load_resources(self) -> dict:
def load_resources(self):
"""
Load the ranker resources.

Returns:
dict:
The loaded mentions-to-wikidata dictionary, which maps a
mention (e.g. ``"London"``) to the Wikidata entities that are
referred to by this mention on Wikipedia (e.g. ``Q84``,
``Q2477346``). The data also includes, for each entity, their
normalized "relevance", i.e. number of in-links across Wikipedia.

Note:
This method loads the mentions-to-wikidata and
wikidata-to-mentions dictionaries from the resources directory,
Expand All @@ -195,6 +191,12 @@ def load_resources(self) -> dict:
It filters the dictionaries to remove noise and updates the class
attributes accordingly.

The loaded mentions-to-wikidata dictionary, which maps a mention
(e.g. ``"London"``) to the Wikidata entities that are
referred to by this mention on Wikipedia (e.g. ``Q84``,
``Q2477346``). The data also includes, for each entity, their
normalized "relevance", i.e. number of in-links across Wikipedia.

The method also initialises ``pandarallel`` if needed by the
candidate ranking method (if the ``method`` set in the initialiser
of the ``Ranker`` was set to "partialmatch" or "levenshtein").
Expand Down Expand Up @@ -254,8 +256,6 @@ def load_resources(self) -> dict:
pandarallel.initialize(nb_workers=10)
os.environ["TOKENIZERS_PARALLELISM"] = "true"

return self.mentions_to_wikidata

def train(self) -> None:
"""
Training a DeezyMatch model. The training will be skipped if the model
Expand Down Expand Up @@ -466,7 +466,7 @@ def partial_match(self, queries: List[str], damlev: bool) -> Tuple[dict, dict]:

self.already_collected_cands[query] = mention_df

return candidates, self.already_collected_cands
return candidates

def deezy_on_the_fly(self, queries: List[str]) -> Tuple[dict, dict]:
"""
Expand All @@ -490,7 +490,7 @@ def deezy_on_the_fly(self, queries: List[str]) -> Tuple[dict, dict]:

Example:
>>> ranker = Ranker(...)
>>> ranker.mentions_to_wikidata = ranker.load_resources()
>>> ranker.load_resources()
>>> queries = ['London', 'Shefrield']
>>> candidates, already_collected = ranker.deezy_on_the_fly(queries)
>>> print(candidates)
Expand All @@ -515,7 +515,7 @@ def deezy_on_the_fly(self, queries: List[str]) -> Tuple[dict, dict]:
dm_output = self.deezy_parameters["dm_output"]

# first we fill in the perfect matches and already collected queries
cands_dict, self.already_collected_cands = self.perfect_match(queries)
cands_dict = self.perfect_match(queries)

# the rest go through
remainers = [x for x, y in cands_dict.items() if len(y) == 0]
Expand Down Expand Up @@ -565,7 +565,7 @@ def deezy_on_the_fly(self, queries: List[str]) -> Tuple[dict, dict]:

self.already_collected_cands[row["query"]] = returned_cands

return cands_dict, self.already_collected_cands
return cands_dict

def run(self, queries: List[str]) -> Tuple[dict, dict]:
"""
Expand All @@ -583,9 +583,9 @@ def run(self, queries: List[str]) -> Tuple[dict, dict]:

Example:
>>> myranker = Ranker(method="perfectmatch", ...)
>>> myranker.mentions_to_wikidata = myranker.load_resources()
>>> myranker.load_resources()
>>> queries = ['London', 'Barcelona', 'Bologna']
>>> candidates, already_collected = myranker.run(queries)
>>> candidates = myranker.run(queries)
>>> print(candidates)
{'London': {'London': 1.0}, 'Barcelona': {'Barcelona': 1.0}, 'Bologna': {'Bologna': 1.0}}
>>> print(already_collected)
Expand Down Expand Up @@ -674,7 +674,7 @@ def find_candidates(self, mentions: List[dict]) -> Tuple[dict, dict]:
queries = list(set([mention["mention"] for mention in mentions]))

# Pass the mentions to :py:meth:`geoparser.ranking.Ranker.run`
cands, self.already_collected_cands = self.run(queries)
cands = self.run(queries)

# Get Wikidata candidates
wk_cands = dict()
Expand Down Expand Up @@ -702,4 +702,4 @@ def find_candidates(self, mentions: List[dict]) -> Tuple[dict, dict]:
"Candidates": found_cands,
}

return wk_cands, self.already_collected_cands
return wk_cands
8 changes: 4 additions & 4 deletions tests/test_disambiguation.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,14 +134,14 @@ def test_train():
# -----------------------------------------
# Ranker loading resources and training a model:
# Load the resources:
myranker.mentions_to_wikidata = myranker.load_resources()
myranker.load_resources()
# Train a DeezyMatch model if needed:
myranker.train()

# -----------------------------------------
# Linker loading resources:
# Load linking resources:
mylinker.linking_resources = mylinker.load_resources()
mylinker.load_resources()
# Train a linking model if needed (it requires myranker to generate potential
# candidates to the training set):
mylinker.rel_params["ed_model"] = mylinker.train_load_model(myranker)
Expand Down Expand Up @@ -236,14 +236,14 @@ def test_load_eval_model():
# -----------------------------------------
# Ranker loading resources and training a model:
# Load the resources:
myranker.mentions_to_wikidata = myranker.load_resources()
myranker.load_resources()
# Train a DeezyMatch model if needed:
myranker.train()

# -----------------------------------------
# Linker loading resources:
# Load linking resources:
mylinker.linking_resources = mylinker.load_resources()
mylinker.load_resources()
# Train a linking model if needed (it requires myranker to generate potential
# candidates to the training set):
mylinker.rel_params["ed_model"] = mylinker.train_load_model(myranker)
Expand Down
8 changes: 4 additions & 4 deletions tests/test_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,10 @@ def test_load_data():
myner.train()
myner.pipe = myner.create_pipeline()

myranker.mentions_to_wikidata = myranker.load_resources()
myranker.load_resources()
myranker.train()

mylinker.linking_resources = mylinker.load_resources()
mylinker.load_resources()

# --------------------------------------
# Instantiate the experiment:
Expand Down Expand Up @@ -172,10 +172,10 @@ def test_apply():
myner.train()
myner.pipe = myner.create_pipeline()

myranker.mentions_to_wikidata = myranker.load_resources()
myranker.load_resources()
myranker.train()

mylinker.linking_resources = mylinker.load_resources()
mylinker.load_resources()

# --------------------------------------
# Instantiate the experiment:
Expand Down
Loading