Fixes & Version update in pyproject.toml

SpanishCat · Nov 14, 2024 · c42de88 · c42de88
1 parent 7d625fa
commit c42de88
Show file tree

Hide file tree

Showing 17 changed files with 230 additions and 7 deletions.
diff --git a/dist/py_wikipls-0.0.1a1-py3-none-any.whl b/dist/py_wikipls-0.0.1a1-py3-none-any.whl
diff --git a/dist/py_wikipls-0.0.1a1.tar.gz b/dist/py_wikipls-0.0.1a1.tar.gz
diff --git a/pyproject.toml b/pyproject.toml
@@ -3,8 +3,8 @@ requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
 
 [project]
-name = "py-wiki-pls"
-version = "0.0.1"
+name = "py-wikipls"
+version = "0.0.1a1"
 authors = [
   { name="Yonathan Katz", email="[email protected]" },
 ]
@@ -18,5 +18,5 @@ classifiers = [
 ]
 
 [project.urls]
-Homepage = "https://github.com/SpanishCat/py-wiki-pls"
-Issues = "https://github.com/SpanishCat/py-wiki-pls/issues"
+Homepage = "https://github.com/SpanishCat/py-wikipls"
+Issues = "https://github.com/SpanishCat/py-wikipls/issues"
diff --git a/requirements.txt b/requirements.txt
diff --git a/src/py_wikipls.egg-info/PKG-INFO b/src/py_wikipls.egg-info/PKG-INFO
@@ -0,0 +1,12 @@
+Metadata-Version: 2.1
+Name: py-wikipls
+Version: 0.0.1a1
+Summary: A package for requesting data from Wikipedia using the REST API.
+Author-email: Yonathan Katz <[email protected]>
+Project-URL: Homepage, https://github.com/SpanishCat/py-wikipls
+Project-URL: Issues, https://github.com/SpanishCat/py-wikipls/issues
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
diff --git a/src/py_wikipls.egg-info/SOURCES.txt b/src/py_wikipls.egg-info/SOURCES.txt
@@ -0,0 +1,9 @@
+pyproject.toml
+src/py_wikipls.egg-info/PKG-INFO
+src/py_wikipls.egg-info/SOURCES.txt
+src/py_wikipls.egg-info/dependency_links.txt
+src/py_wikipls.egg-info/top_level.txt
+src/wikipls/__init__.py
+src/wikipls/article.py
+src/wikipls/utils.py
+tests/test1.py
diff --git a/src/py_wikipls.egg-info/dependency_links.txt b/src/py_wikipls.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/src/py_wikipls.egg-info/top_level.txt b/src/py_wikipls.egg-info/top_level.txt
@@ -0,0 +1 @@
+wikipls
diff --git a/src/wikipls/__init__.py b/src/wikipls/__init__.py
@@ -0,0 +1 @@
+from .article import *
diff --git a/src/wikipls/__pycache__/__init__.cpython-310.pyc b/src/wikipls/__pycache__/__init__.cpython-310.pyc
diff --git a/src/wikipls/__pycache__/article.cpython-310.pyc b/src/wikipls/__pycache__/article.cpython-310.pyc
diff --git a/src/wikipls/__pycache__/utils.cpython-310.pyc b/src/wikipls/__pycache__/utils.cpython-310.pyc
diff --git a/src/wikipls/article.py b/src/wikipls/article.py
@@ -0,0 +1,104 @@
+# -*- coding: hebrew -*-
+from typing import Any
+
+from .utils import *
+
+# Config
+TEST_DATE = date(2024, 11, 1)
+
+
+class Article:
+    def __init__(self, name: str):
+        """
+        :param name: Case-sensitive
+        """
+        self.details: dict[str, Any] = get_page_data(name)
+
+        # Map details to class
+        self.id: int = self.details["id"]
+        self.title: str = self.details["title"]
+        self.key: str = self.details["key"]
+        self.content_model: str = self.details["content_model"]
+        self.license: dict = self.details["license"]
+        self.latest: dict = self.details["latest"]
+        self.html_url: str = self.details["html_url"]
+
+    def __repr__(self):
+        return f"Article({self.title}, {self.id})"
+
+    def __eq__(self, other):
+        return self.id == other.id and self.key == other.key
+
+    def get_page(self, date_: date, lang: str = "en"):
+        return Page(self, date_, lang)
+
+        # todo Revisions
+
+
+class Page:
+    """
+    The difference between a wikipy.Page and a wikipy.Article:
+    Article - Collection of all versions of all languages of all dates for a single article. A 'collection' of WikiPages
+    Page - One specific version of an article, in a specific date and a specific language
+    """
+
+    memory: dict = {}
+
+    def __init__(self, article: Article, date_: date, lang="en"):
+        self.from_article: Article = article
+        self.name = self.from_article.key
+        self.date: date = date_
+        self.lang: str = lang
+
+        self.details: dict[str, Any] = get_page_data(article.key)
+
+        # Map details to class
+        self.id: int = self.details["id"]
+        self.title: str = self.details["title"]
+        self.key: str = self.details["key"]
+        self.content_model: str = self.details["content_model"]
+        self.license: dict = self.details["license"]
+        self.latest: dict = self.details["latest"]
+        self.html_url: str = self.details["html_url"]
+
+    def __repr__(self):
+        return f"Page({self.title}, {self.date}, {self.id})"
+
+    def __eq__(self, other):
+        return self.id == other.id and self.key == other.key
+
+    @property
+    def views(self) -> int:
+        if "views" not in self.memory:
+            self.memory["views"]: int = get_views(self.name, self.date, self.lang)
+        return self.memory["views"]
+
+    @property
+    def html(self) -> str:
+        if "html" not in self.memory:
+            self.memory["html"]: str = get_html(self.name)
+        return self.memory["html"]
+
+    @property
+    def summery(self) -> str:
+        if "summery" not in self.memory:
+            self.memory["summery"]: str = get_summary(self.name)
+        return self.memory["summery"]
+
+    @property
+    def media(self) -> tuple[dict, ...]:
+        if "media" not in self.memory:
+            self.memory["media"]: tuple[dict, ...] = get_media(self.name)
+        return self.memory["media"]
+
+    @property
+    def as_pdf(self) -> bytes:
+        if "pdf_code" not in self.memory:
+            self.memory["pdf_code"]: bytes = get_pdf(self.name)
+        return self.memory["pdf_code"]
+
+    @property
+    def data(self) -> dict[str, Any]:
+        if "data" not in self.memory:
+            self.memory["data"]: dict = get_page_data(self.name)
+        return self.memory["data"]
diff --git a/src/wikipls/utils.py b/src/wikipls/utils.py
@@ -0,0 +1,95 @@
+from typing import overload
+
+import requests
+import json
+import urllib.parse
+
+from datetime import date
+
+LANG = "en"
+HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64"}  # todo Check wiki's docs and change headers
+
+
+def to_timestamp(date_: date) -> str:
+    return date_.strftime("%Y%m%d")
+
+
+@overload
+def get_views(name: str, date_: date, lang: str = LANG) -> int: ...
+@overload
+def get_views(name: str, date_: str, lang: str = LANG) -> int: ...
+
+
+def get_views(name: str, date_: str | date, lang: str = LANG) -> int:
+    if isinstance(date_, date):
+        date_ = to_timestamp(date_)
+    elif not isinstance(date_, str):
+        raise AttributeError("date_ must be a string or a datetime.date object")
+
+    url = u"https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/" \
+          u"{}.wikipedia.org/all-access/all-agents/{}/daily/{}/{}" \
+        .format(lang.lower(), urllib.parse.quote(name), date_, date_)
+
+    response = response_for(url)
+
+    return response["items"][0]["views"]
+
+
+def get_html(name: str) -> str:
+    response = requests.get(f"https://en.wikipedia.org/api/rest_v1/page/html/{name}")
+
+    if response.status_code == 200:
+        return response.content.decode("utf-8")
+
+
+def get_summary(name: str) -> str:
+    response = response_for(f"https://en.wikipedia.org/api/rest_v1/page/summary/{name}")
+
+    if response:
+        return response["extract"]
+
+
+def get_media(name: str) -> tuple[dict, ...]:
+    response = response_for(f"https://en.wikipedia.org/api/rest_v1/page/media-list/{name}")
+
+    if response:
+        return tuple(response["items"])
+
+
+def get_segments(name: str) -> str:
+    # todo Add strict=False option that'll raise an error if response is None
+    response = response_for(f"https://en.wikipedia.org/api/rest_v1/page/segments/{name}")
+
+    if response:
+        return response["segmentedContent"]
+
+
+def get_pdf(name: str) -> bytes:
+    response = requests.get(f"https://en.wikipedia.org/api/rest_v1/page/pdf/{name}")
+
+    if response.status_code == 200:
+        return response.content
+
+
+def get_page_data(name: str) -> dict:
+    response = response_for(f"https://api.wikimedia.org/core/v1/wikipedia/en/page/{name}/bare")
+    return response
+
+
+def response_for(url: str) -> dict | None:
+    response = requests.get(url, headers=HEADERS)
+    result = json.loads(response.text)
+
+    if response.status_code == 200:
+        return result
+    elif response.status_code == 400:
+        raise AttributeError(f"One or more of the arguments given is invalid. "
+                             f"\n{result['title']}: {result['detail']}")
+    elif response.status_code == 404:
+        if 'title' in result and 'detail' in result:
+            raise Exception(f"No page was found. \n{result['title']}: {result['detail']}")
+        elif 'messageTranslations' in result and 'en' in result['messageTranslations']:
+            raise Exception(result["messageTranslations"]["en"])
+    else:
+        result = json.loads(response.text)
+        print(f"New error: {response.status_code}, {result['title']}: {result['detail']}")
diff --git a/tests/class_memory_test.py b/tests/class_memory_test.py
@@ -1,4 +1,4 @@
-import src.wiki_pls as wikipy
+import src.wikipls as wikipy
 from time import perf_counter
 
 page_key = "Faded_(Alan_Walker_song)"

diff --git a/tests/print_details.py b/tests/print_details.py
@@ -1,4 +1,4 @@
-from src.wiki_pls import *
+from src.wikipls import *
 
 a = Article("Pumped_Up_Kicks")
 print(f"{a.details=}")

diff --git a/tests/test1.py b/tests/test1.py
@@ -1,4 +1,4 @@
-from src.wiki_pls import *
+from src.wikipls import *
 
 a = Article("Faded_(Alan_Walker_song)")