Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DOM validator & make all str -> array #185

Merged
merged 2 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions .github/workflows/scripts/technology_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import string
from typing import Final, Any, Type, Optional

from bs4 import BeautifulSoup


class MissingRequiredFieldException(Exception):
def __init__(self, msg: str):
Expand Down Expand Up @@ -150,11 +152,6 @@ def get_type(self) -> list[Type]:
return [list]


class StringOrArrayOrDictValidator(AbstractValidator):
def get_type(self) -> list[Type]:
return [str, list, dict]


class DictValidator(RegexValidator):
def get_type(self) -> list[Type]:
return [dict]
Expand All @@ -176,6 +173,19 @@ def _validate(self, tech_name: str, data: Any) -> bool:
return True


class DomValidator(AbstractValidator):
def _validate(self, tech_name: str, data: Any) -> bool:
if isinstance(data, list):
for element in data:
BeautifulSoup("", "html.parser").select(element.split(r"\;")[0])
elif isinstance(data, dict):
for k, _ in data.items():
BeautifulSoup("", "html.parser").select(k.split(r"\;")[0])
else:
return False
return True


class IconValidator(StringValidator):
def __init__(self, icons: list[str], required: bool = False):
super().__init__(required)
Expand Down Expand Up @@ -237,7 +247,7 @@ def __init__(self, file_name: str):
"excludes": ArrayValidator(), # TODO ^
"requiresCategory": CategoryValidator(self._CATEGORIES),
"cookies": DictValidator(contains_regex=True),
"dom": StringOrArrayOrDictValidator(), # TODO query selector validator
"dom": DomValidator(),
"dns": DictValidator(contains_regex=True),
"js": DictValidator(contains_regex=True),
"headers": DictValidator(contains_regex=True),
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/validate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ jobs:
python-version: ${{ matrix.python-version }}

- name: run tech validator
run: python3 .github/workflows/scripts/technology_validator.py
run: python3 -m pip install bs4 && python3 .github/workflows/scripts/technology_validator.py
env:
TECH_FILE_NAME: ${{ matrix.file_name }}

Expand Down
16 changes: 12 additions & 4 deletions src/technologies/_.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@
14
],
"description": "30namaPlayer is a modified version of Video.js to work with videos on HTML using javascript.",
"dom": "section[class*='player30nama']",
"dom": [
"section[class*='player30nama']"
],
"icon": "30namaPlayer.png",
"website": "https://30nama.com/"
},
Expand All @@ -58,7 +60,9 @@
36
],
"description": "33Across is a technology company focused on solving the challenge of consumer attention for automated advertising.",
"dom": "iframe[src*='.33across.com'], link[href*='.33across.com'], link[href*='.tynt.com']",
"dom": [
"iframe[src*='.33across.com'], link[href*='.33across.com'], link[href*='.tynt.com']"
],
"icon": "33Across.png",
"js": {
"Tynt": ""
Expand Down Expand Up @@ -149,7 +153,9 @@
31
],
"description": "5centsCDN is a content delivery networks service provider.",
"dom": "link[href*='.5centscdn.com/']",
"dom": [
"link[href*='.5centscdn.com/']"
],
"headers": {
"x-cdn": "^5centsCDN$"
},
Expand Down Expand Up @@ -207,7 +213,9 @@
105
],
"description": "<model-viewer> is an open-source web component developed by Google and maintained through GitHub. <model-viewer> aims at putting 3D content on the web easily with a few lines of HTML code. This was first introduced with Chrome 72 in July 2019 and enables users to view 3D in the browser and mobile devices.",
"dom": "model-viewer",
"dom": [
"model-viewer"
],
"icon": "model-viewer.svg",
"oss": true,
"scriptSrc": [
Expand Down
Loading