Skip to content

Commit

Permalink
Merge pull request #185 from enthec/dom
Browse files Browse the repository at this point in the history
DOM validator & make all str -> array
  • Loading branch information
enthec-opensource authored Jul 25, 2024
2 parents e934f2a + e228654 commit cff5879
Show file tree
Hide file tree
Showing 28 changed files with 2,384 additions and 796 deletions.
22 changes: 16 additions & 6 deletions .github/workflows/scripts/technology_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import string
from typing import Final, Any, Type, Optional

from bs4 import BeautifulSoup


class MissingRequiredFieldException(Exception):
def __init__(self, msg: str):
Expand Down Expand Up @@ -150,11 +152,6 @@ def get_type(self) -> list[Type]:
return [list]


class StringOrArrayOrDictValidator(AbstractValidator):
def get_type(self) -> list[Type]:
return [str, list, dict]


class DictValidator(RegexValidator):
def get_type(self) -> list[Type]:
return [dict]
Expand All @@ -176,6 +173,19 @@ def _validate(self, tech_name: str, data: Any) -> bool:
return True


class DomValidator(AbstractValidator):
def _validate(self, tech_name: str, data: Any) -> bool:
if isinstance(data, list):
for element in data:
BeautifulSoup("", "html.parser").select(element.split(r"\;")[0])
elif isinstance(data, dict):
for k, _ in data.items():
BeautifulSoup("", "html.parser").select(k.split(r"\;")[0])
else:
return False
return True


class IconValidator(StringValidator):
def __init__(self, icons: list[str], required: bool = False):
super().__init__(required)
Expand Down Expand Up @@ -237,7 +247,7 @@ def __init__(self, file_name: str):
"excludes": ArrayValidator(), # TODO ^
"requiresCategory": CategoryValidator(self._CATEGORIES),
"cookies": DictValidator(contains_regex=True),
"dom": StringOrArrayOrDictValidator(), # TODO query selector validator
"dom": DomValidator(),
"dns": DictValidator(contains_regex=True),
"js": DictValidator(contains_regex=True),
"headers": DictValidator(contains_regex=True),
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/validate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ jobs:
python-version: ${{ matrix.python-version }}

- name: run tech validator
run: python3 .github/workflows/scripts/technology_validator.py
run: python3 -m pip install bs4 && python3 .github/workflows/scripts/technology_validator.py
env:
TECH_FILE_NAME: ${{ matrix.file_name }}

Expand Down
16 changes: 12 additions & 4 deletions src/technologies/_.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@
14
],
"description": "30namaPlayer is a modified version of Video.js to work with videos on HTML using javascript.",
"dom": "section[class*='player30nama']",
"dom": [
"section[class*='player30nama']"
],
"icon": "30namaPlayer.png",
"website": "https://30nama.com/"
},
Expand All @@ -58,7 +60,9 @@
36
],
"description": "33Across is a technology company focused on solving the challenge of consumer attention for automated advertising.",
"dom": "iframe[src*='.33across.com'], link[href*='.33across.com'], link[href*='.tynt.com']",
"dom": [
"iframe[src*='.33across.com'], link[href*='.33across.com'], link[href*='.tynt.com']"
],
"icon": "33Across.png",
"js": {
"Tynt": ""
Expand Down Expand Up @@ -149,7 +153,9 @@
31
],
"description": "5centsCDN is a content delivery networks service provider.",
"dom": "link[href*='.5centscdn.com/']",
"dom": [
"link[href*='.5centscdn.com/']"
],
"headers": {
"x-cdn": "^5centsCDN$"
},
Expand Down Expand Up @@ -207,7 +213,9 @@
105
],
"description": "<model-viewer> is an open-source web component developed by Google and maintained through GitHub. <model-viewer> aims at putting 3D content on the web easily with a few lines of HTML code. This was first introduced with Chrome 72 in July 2019 and enables users to view 3D in the browser and mobile devices.",
"dom": "model-viewer",
"dom": [
"model-viewer"
],
"icon": "model-viewer.svg",
"oss": true,
"scriptSrc": [
Expand Down
Loading

0 comments on commit cff5879

Please sign in to comment.