Skip to content

Commit

Permalink
add searcher & tools modules with function names from findpapers #41
Browse files Browse the repository at this point in the history
  • Loading branch information
Kashyap Maheshwari committed Jun 9, 2023
1 parent 899ecb0 commit c42f9af
Show file tree
Hide file tree
Showing 18 changed files with 845 additions and 0 deletions.
41 changes: 41 additions & 0 deletions findpapers/searchers/acm_searcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import datetime
import logging
from typing import Optional
from urllib.parse import urlencode

from lxml import html

import findpapers.utils.common_utils as common_util
import findpapers.utils.query_utils as query_util
from findpapers.models.paper import Paper
from findpapers.models.publication import Publication
from findpapers.models.search import Search
from findpapers.utils.requests_utils import DefaultSession

DATABASE_LABEL = "ACM"
BASE_URL = "https://dl.acm.org"
MAX_ENTRIES_PER_PAGE = 100


def _get_search_url(search: Search, start_record: Optional[int] = 0) -> str:
pass


def _get_result(search: Search, start_record: Optional[int] = 0) -> dict: # pragma: no cover
pass


def _get_paper_page(url: str) -> html.HtmlElement: # pragma: no cover
pass


def _get_paper_metadata(doi: str) -> dict: # pragma: no cover
pass


def _get_paper(paper_page: html.HtmlElement, paper_doi: str, paper_url: str) -> Paper:
pass


def run(search: Search, pbar=None) -> None:
pass
44 changes: 44 additions & 0 deletions findpapers/searchers/arxiv_searcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import datetime
import logging
import math
import re
import time
from typing import Optional

import requests
import xmltodict
from lxml import html

import findpapers.utils.common_utils as common_util
import findpapers.utils.query_utils as query_util
from findpapers.models.paper import Paper
from findpapers.models.publication import Publication
from findpapers.models.search import Search
from findpapers.utils.requests_utils import DefaultSession

from findpapers.data.subject_area_by_key import SUBJECT_AREA_BY_KEY

DATABASE_LABEL = "arXiv"
BASE_URL = "http://export.arxiv.org"
MAX_ENTRIES_PER_PAGE = 200


def _get_search_url(search: Search, start_record: Optional[int] = 0) -> str:
pass


# pragma: no cover
def _get_api_result(search: Search, start_record: Optional[int] = 0) -> dict:
pass


def _get_publication(paper_entry: dict) -> Publication:
pass


def _get_paper(paper_entry: dict, paper_publication_date: datetime.date, publication: Publication) -> Paper:
pass


def run(search: Search, pbar=None) -> None:
pass
8 changes: 8 additions & 0 deletions findpapers/searchers/biorxiv_searcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import findpapers.searchers.rxiv_searcher as rxiv_searcher
from findpapers.models.search import Search

DATABASE_LABEL = "bioRxiv"


def run(search: Search, pbar=None) -> None:
pass
56 changes: 56 additions & 0 deletions findpapers/searchers/cross_ref_searcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import logging
from datetime import date

import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

from findpapers.models.paper import Paper
from findpapers.models.publication import Publication
from findpapers.models.search import Search

CROSSREF_API = "https://api.crossref.org/works/"
DATABASE_LABEL = "CR" # short for opencitations
SPLIT_AUTHOR = "; "


class DateConverter(object):
def __init__(self, date_parts: list) -> None:
self.date_parts = date_parts
date_functions = {3: "_ymd_date", 2: "_ym_date", 1: "_y_date"}

date_getter = date_functions.get(len(date_parts))
converter = getattr(self, date_getter)
converter()
self.date = date(year=self.year, month=self.month, day=self.day)

def _ymd_date(self) -> None:
self.year = int(self.date_parts[0])
self.month = int(self.date_parts[1])
self.day = int(self.date_parts[2])

def _ym_date(self) -> None:
self.year = int(self.date_parts[0])
self.month = int(self.date_parts[1])
self.day = 1

def _y_date(self) -> None:
self.year = int(self.date_parts[0])
self.month = 1
self.day = 1


def _get_paper_entry(doi: str) -> dict:
pass


def _get_publication(paper_entry: dict) -> Publication:
pass


def _get_paper(paper_entry: dict, publication: Publication) -> Paper:
pass


def _add_papers(search: Search, source: str) -> None:
pass
39 changes: 39 additions & 0 deletions findpapers/searchers/ieee_searcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import datetime
import logging
import math
import re
from typing import Optional

import requests
from lxml import html

import findpapers.utils.common_utils as common_util
import findpapers.utils.query_utils as query_util
from findpapers.models.paper import Paper
from findpapers.models.publication import Publication
from findpapers.models.search import Search
from findpapers.utils.requests_utils import DefaultSession

DATABASE_LABEL = "IEEE"
BASE_URL = "http://ieeexploreapi.ieee.org"
MAX_ENTRIES_PER_PAGE = 200


def _get_search_url(search: Search, api_token: str, start_record: Optional[int] = 1) -> str:
pass


def _get_api_result(search: Search, api_token: str, start_record: Optional[int] = 1) -> dict: # pragma: no cover
pass


def _get_publication(paper_entry: dict) -> Publication:
pass


def _get_paper(paper_entry: dict, publication: Publication) -> Paper:
pass


def run(search: Search, api_token: str, pbar=None) -> None:
pass
8 changes: 8 additions & 0 deletions findpapers/searchers/medrxiv_searcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import findpapers.searchers.rxiv_searcher as rxiv_searcher
from findpapers.models.search import Search

DATABASE_LABEL = "medRxiv"


def run(search: Search, pbar=None) -> None:
pass
33 changes: 33 additions & 0 deletions findpapers/searchers/opencitations_searcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import logging
import requests

from datetime import date
from findpapers.models.paper import Paper
from findpapers.models.publication import Publication
from findpapers.models.search import Search

# from findpapers.tools.references_tool import References

OPENCITATIONS_API = "https://opencitations.net/index/api/v1/metadata/"
DATABASE_LABEL = "OC" # short for opencitations
SPLIT_AUTHOR = "; "


def _get_paper_entry(doi: str) -> dict:
pass


def _get_publication(paper_entry: dict) -> Publication:
pass


def _get_paper(paper_entry: dict, publication: Publication) -> Paper:
pass


def _add_papers(search: Search, source: str) -> None:
pass


def run(search: Search, references: bool = True, citations: bool = True) -> None:
pass
44 changes: 44 additions & 0 deletions findpapers/searchers/pubmed_searcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import datetime
import logging
from typing import Optional

import xmltodict

import findpapers.utils.common_utils as common_util
import findpapers.utils.query_utils as query_util
from findpapers.models.paper import Paper
from findpapers.models.publication import Publication
from findpapers.models.search import Search
from findpapers.utils.requests_utils import DefaultSession

DATABASE_LABEL = "PubMed"
BASE_URL = "https://eutils.ncbi.nlm.nih.gov"
MAX_ENTRIES_PER_PAGE = 50


def _get_search_url(search: Search, start_record: Optional[int] = 0) -> str:
pass


def _get_api_result(search: Search, start_record: Optional[int] = 0) -> dict:
pass


def _get_paper_entry(pubmed_id: str) -> dict: # pragma: no cover
pass


def _get_publication(paper_entry: dict) -> Publication:
pass


def _get_text_recursively(text_entry) -> str:
pass


def _get_paper(paper_entry: dict, publication: Publication) -> Paper:
pass


def run(search: Search, pbar=None) -> None:
pass
47 changes: 47 additions & 0 deletions findpapers/searchers/rxiv_searcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import datetime
import logging
from typing import List

from lxml import html

import findpapers.utils.common_utils as common_util
import findpapers.utils.query_utils as query_util
from findpapers.models.paper import Paper
from findpapers.models.publication import Publication
from findpapers.models.search import Search
from findpapers.utils.requests_utils import DefaultSession

BASE_URL = "https://www.medrxiv.org"
API_BASE_URL = "https://api.biorxiv.org"


def _get_search_urls(search: Search, database: str) -> List[str]:
pass


def _get_result(url: str) -> html.HtmlElement: # pragma: no cover
pass


def _get_result_page_data(result_page: html.HtmlElement) -> dict:
pass


def _get_paper_metadata(doi: str, database: str) -> dict: # pragma: no cover
pass


def _get_data(url: str) -> List[dict]:
pass


def _get_publication(paper_entry: dict, database: str) -> Publication:
pass


def _get_paper(paper_metadata: dict, database: str) -> Paper:
pass


def run(search: Search, database: str, pbar=None) -> None:
pass
49 changes: 49 additions & 0 deletions findpapers/searchers/scopus_searcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import datetime
import logging
import re
from typing import Optional

import requests
from lxml import html

import findpapers.utils.common_utils as common_util
import findpapers.utils.query_utils as query_util
from findpapers.models.paper import Paper
from findpapers.models.publication import Publication
from findpapers.models.search import Search
from findpapers.utils.requests_utils import DefaultSession

DATABASE_LABEL = "Scopus"
BASE_URL = "https://api.elsevier.com"


def _get_query(search: Search) -> str:
pass


def _get_publication_entry(publication_issn: str, api_token: str) -> dict: # pragma: no cover
pass


def _get_publication(paper_entry: dict, api_token: str) -> Publication:
pass


def _get_paper_page(url: str) -> object: # pragma: no cover
pass


def _get_paper(paper_entry: dict, publication: Publication) -> Paper:
pass


def _get_search_results(search: Search, api_token: str, url: Optional[str] = None) -> dict: # pragma: no cover
pass


def enrich_publication_data(search: Search, api_token: str) -> None:
pass


def run(search: Search, api_token: str, pbar=None, url: Optional[str] = None, papers_count: Optional[int] = 0) -> None:
pass
17 changes: 17 additions & 0 deletions findpapers/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import logging
import os
from typing import Optional

from findpapers.tools.bibtex_generator_tool import generate_bibtex
from findpapers.tools.downloader_tool import download
from findpapers.tools.rayyan_tool import RayyanExport
from findpapers.tools.refiner_tool import refine
from findpapers.tools.refman_tool import RisExport
from findpapers.tools.search_runner_tool import search

try:
import importlib.metadata as importlib_metadata
except ModuleNotFoundError:
import importlib_metadata

__version__ = importlib_metadata.version(__name__)
Loading

0 comments on commit c42f9af

Please sign in to comment.