diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 00000000000..faec9f095ab --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,8 @@ +FROM rssbridge/rss-bridge:latest + +RUN apt-get update && \ + apt-get install --yes --no-install-recommends \ + git && \ + pecl install xdebug && \ + pear install PHP_CodeSniffer && \ + docker-php-ext-enable xdebug \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000000..6e625b8a008 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,27 @@ +{ + "name": "rss-bridge dev", + "build": { "dockerfile": "Dockerfile" }, + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "php.validate.executablePath": "/usr/local/bin/php", + "phpSniffer.executablesFolder": "/usr/local/bin/", + "phpcs.executablePath": "/usr/local/bin/phpcs", + "phpcs.lintOnType": false + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": [ + "xdebug.php-debug", + "bmewburn.vscode-intelephense-client", + "philfontaine.autolaunch", + "eamodio.gitlens", + "shevaua.phpcs" + ] + } + }, + "forwardPorts": [3100, 9000, 9003], + "postCreateCommand": "cp .devcontainer/nginx.conf /etc/nginx/conf.d/default.conf && cp .devcontainer/xdebug.ini /usr/local/etc/php/conf.d/xdebug.ini && mkdir .vscode && cp .devcontainer/launch.json .vscode && echo '*' > whitelist.txt && chmod a+x \"$(pwd)\" && rm -rf /var/www/html && ln -s \"$(pwd)\" /var/www/html && nginx && php-fpm -D" +} \ No newline at end of file diff --git a/.devcontainer/launch.json b/.devcontainer/launch.json new file mode 100644 index 00000000000..e1b473b8ec2 --- /dev/null +++ b/.devcontainer/launch.json @@ -0,0 +1,49 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Listen for Xdebug", + "type": "php", + "request": "launch", + "port": 9003, + "auto": true + }, + { + "name": "Launch currently open script", + "type": "php", + "request": "launch", + "program": "${file}", + "cwd": "${fileDirname}", + "port": 0, + "runtimeArgs": [ + "-dxdebug.start_with_request=yes" + ], + "env": { + "XDEBUG_MODE": "debug,develop", + "XDEBUG_CONFIG": "client_port=${port}" + } + }, + { + "name": "Launch Built-in web server", + "type": "php", + "request": "launch", + "runtimeArgs": [ + "-dxdebug.mode=debug", + "-dxdebug.start_with_request=yes", + "-S", + "localhost:0" + ], + "program": "", + "cwd": "${workspaceRoot}", + "port": 9003, + "serverReadyAction": { + "pattern": "Development Server \\(http://localhost:([0-9]+)\\) started", + "uriFormat": "http://localhost:%s", + "action": "openExternally" + } + } + ] +} \ No newline at end of file diff --git a/.devcontainer/nginx.conf b/.devcontainer/nginx.conf new file mode 100644 index 00000000000..0e5db6dcc9c --- /dev/null +++ b/.devcontainer/nginx.conf @@ -0,0 +1,17 @@ +server { + listen 3100 default_server; + root /workspaces/rss-bridge; + access_log /var/log/nginx/rssbridge.access.log; + error_log /var/log/nginx/rssbridge.error.log; + index index.php; + + location ~ /(\.|vendor|tests) { + deny all; + return 403; # Forbidden + } + + location ~ \.php$ { + include snippets/fastcgi-php.conf; + fastcgi_pass unix:/var/run/php/php8.2-fpm.sock; + } +} diff --git a/.devcontainer/xdebug.ini b/.devcontainer/xdebug.ini new file mode 100644 index 00000000000..1079f0b8807 --- /dev/null +++ b/.devcontainer/xdebug.ini @@ -0,0 +1,7 @@ +[xdebug] +xdebug.mode=develop,debug +xdebug.client_host=localhost +xdebug.client_port=9003 +xdebug.start_with_request=yes +xdebug.discover_client_host=false +xdebug.log='/var/www/html/xdebug.log' \ No newline at end of file diff --git a/.dockerignore b/.dockerignore index db313697054..90ca9f256aa 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,6 @@ .git +!.git/HEAD +!.git/refs/heads/* .gitattributes .github/* .travis.yml diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000000..d231c97e6d8 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,4 @@ +# Reformat code base to PSR12 +4f75591060d95208a301bc6bf460d875631b29cc +# Fix coding style missed by phpbcf +951092eef374db048b77bac85e75e3547bfac702 diff --git a/.gitattributes b/.gitattributes index 36544021208..280532568cc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,5 +1,6 @@ # Auto detect text files and perform LF normalization * text=auto +*.sh text eol=lf # Custom for Visual Studio *.cs diff=csharp @@ -46,8 +47,6 @@ phpcs.xml export-ignore phpcompatibility.xml export-ignore tests/ export-ignore cache/.gitkeep export-ignore -bridges/DemoBridge.php export-ignore -bridges/FeedExpanderExampleBridge.php export-ignore ## Composer # diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 00000000000..7ebb4030e82 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1,7 @@ +# Visual Studio Code +.vscode/* + +# Generated files +comment*.md +comment*.txt +*.html diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index ec47ab0dbee..69976e4169d 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -1,49 +1,7 @@ ### Pull request policy -* [Fix one issue per pull request](https://github.com/RSS-Bridge/rss-bridge/wiki/Pull-request-policy#fix-one-issue-per-pull-request) -* [Respect the coding style policy](https://github.com/RSS-Bridge/rss-bridge/wiki/Pull-request-policy#respect-the-coding-style-policy) -* [Properly name your commits](https://github.com/RSS-Bridge/rss-bridge/wiki/Pull-request-policy#properly-name-your-commits) - * When fixing a bridge (located in the `bridges` directory), write `[BridgeName] Feature`
(i.e. `[YoutubeBridge] Fix typo in video titles`). - * When fixing other files, use `[FileName] Feature`
(i.e. `[index.php] Add multilingual support`). - * When fixing a general problem that applies to multiple files, write `category: feature`
(i.e. `bridges: Fix various typos`). - -Note that all pull-requests must pass all tests before they can be merged. +See the [Pull request policy page on the documentation](https://rss-bridge.github.io/rss-bridge/For_Developers/Pull_Request_policy.html) for more information on the pull request policy. ### Coding style -* [Whitespace](https://github.com/RSS-Bridge/rss-bridge/wiki/Whitespace) - * [Add a new line at the end of a file](https://github.com/RSS-Bridge/rss-bridge/wiki/Whitespace#add-a-new-line-at-the-end-of-a-file) - * [Do not add a whitespace before a semicolon](https://github.com/RSS-Bridge/rss-bridge/wiki/Whitespace#add-a-new-line-at-the-end-of-a-file) - * [Do not add whitespace at start or end of a file or end of a line](https://github.com/RSS-Bridge/rss-bridge/wiki/Whitespace#do-not-add-whitespace-at-start-or-end-of-a-file-or-end-of-a-line) -* [Indentation](https://github.com/RSS-Bridge/rss-bridge/wiki/Indentation) - * [Use tabs for indentation](https://github.com/RSS-Bridge/rss-bridge/wiki/Indentation#use-tabs-for-indentation) -* [Maximum line length](https://github.com/RSS-Bridge/rss-bridge/wiki/Maximum-line-length) - * [The maximum line length should not exceed 80 characters](https://github.com/RSS-Bridge/rss-bridge/wiki/Maximum-line-length#the-maximum-line-length-should-not-exceed-80-characters) -* [Strings](https://github.com/RSS-Bridge/rss-bridge/wiki/Strings) - * [Whenever possible use single quoted strings](https://github.com/RSS-Bridge/rss-bridge/wiki/Strings#whenever-possible-use-single-quote-strings) - * [Add spaces around the concatenation operator](https://github.com/RSS-Bridge/rss-bridge/wiki/Strings#add-spaces-around-the-concatenation-operator) - * [Use a single string instead of concatenating](https://github.com/RSS-Bridge/rss-bridge/wiki/Strings#use-a-single-string-instead-of-concatenating) -* [Constants](https://github.com/RSS-Bridge/rss-bridge/wiki/Constants) - * [Use UPPERCASE for constants](https://github.com/RSS-Bridge/rss-bridge/wiki/Constants#use-uppercase-for-constants) -* [Keywords](https://github.com/RSS-Bridge/rss-bridge/wiki/Keywords) - * [Use lowercase for `true`, `false` and `null`](https://github.com/RSS-Bridge/rss-bridge/wiki/Keywords#use-lowercase-for-true-false-and-null) -* [Operators](https://github.com/RSS-Bridge/rss-bridge/wiki/Operators) - * [Operators must have a space around them](https://github.com/RSS-Bridge/rss-bridge/wiki/Operators#operators-must-have-a-space-around-them) -* [Functions](https://github.com/RSS-Bridge/rss-bridge/wiki/Functions) - * [Parameters with default values must appear last in functions](https://github.com/RSS-Bridge/rss-bridge/wiki/Functions#parameters-with-default-values-must-appear-last-in-functions) - * [Calling functions](https://github.com/RSS-Bridge/rss-bridge/wiki/Functions#calling-functions) - * [Do not add spaces after opening or before closing bracket](https://github.com/RSS-Bridge/rss-bridge/wiki/Functions#do-not-add-spaces-after-opening-or-before-closing-bracket) -* [Structures](https://github.com/RSS-Bridge/rss-bridge/wiki/Structures) - * [Structures must always be formatted as multi-line blocks](https://github.com/RSS-Bridge/rss-bridge/wiki/Structures#structures-must-always-be-formatted-as-multi-line-blocks) -* [If-Statement](https://github.com/RSS-Bridge/rss-bridge/wiki/if-Statement) - * [Use `elseif` instead of `else if`](https://github.com/RSS-Bridge/rss-bridge/wiki/if-Statement#use-elseif-instead-of-else-if) - * [Do not write empty statements](https://github.com/RSS-Bridge/rss-bridge/wiki/if-Statement#do-not-write-empty-statements) - * [Do not write unconditional if-statements](https://github.com/RSS-Bridge/rss-bridge/wiki/if-Statement#do-not-write-unconditional-if-statements) -* [Classes](https://github.com/RSS-Bridge/rss-bridge/wiki/Classes) - * [Use PascalCase for class names](https://github.com/RSS-Bridge/rss-bridge/wiki/Classes#use-pascalcase-for-class-names) - * [Do not use final statements inside final classes](https://github.com/RSS-Bridge/rss-bridge/wiki/Classes#do-not-use-final-statements-inside-final-classes) - * [Do not override methods to call their parent](https://github.com/RSS-Bridge/rss-bridge/wiki/Classes#do-not-override-methods-to-call-their-parent) - * [abstract and final declarations MUST precede the visibility declaration](https://github.com/RSS-Bridge/rss-bridge/wiki/Classes#abstract-and-final-declarations-must-precede-the-visibility-declaration) - * [static declaration MUST come after the visibility declaration](https://github.com/RSS-Bridge/rss-bridge/wiki/Classes#static-declaration-must-come-after-the-visibility-declaration) -* [Casting](https://github.com/RSS-Bridge/rss-bridge/wiki/Casting) - * [Do not add spaces when casting](https://github.com/RSS-Bridge/rss-bridge/wiki/Casting#do-not-add-spaces-when-casting) +See the [Coding style policy page on the documentation](https://rss-bridge.github.io/rss-bridge/For_Developers/Coding_style_policy.html) for more information on the coding style of the project. diff --git a/.github/ISSUE_TEMPLATE/bridge-request.md b/.github/ISSUE_TEMPLATE/bridge-request.md index a0080b8b8d7..088cc3d6a5c 100644 --- a/.github/ISSUE_TEMPLATE/bridge-request.md +++ b/.github/ISSUE_TEMPLATE/bridge-request.md @@ -49,9 +49,9 @@ Please describe what you expect from the bridge. Whenever possible provide sampl - _Default limit_: 5 - [ ] Load full articles - _Cache articles_ (articles are stored in a local cache on first request): yes - - _Cache timeout_ (max = 24 hours): 24 hours + - _Cache timeout_ : 24 hours - [X] Balance requests (RSS-Bridge uses cached versions to reduce bandwith usage) - - _Timeout_ (default = 5 minutes, max = 24 hours): 5 minutes + - _Timeout_ (default = 5 minutes): 5 minutes @@ -60,5 +60,5 @@ Please describe what you expect from the bridge. Whenever possible provide sampl Keep in mind that opening a request does not guarantee the bridge being implemented! That depends entirely on the interest and time of others to make the bridge for you. -You can also implement your own bridge (with support of the community if needed). Find more information in the [RSS-Bridge Wiki](https://github.com/RSS-Bridge/rss-bridge/wiki/For-developers) developer section. +You can also implement your own bridge (with support of the community if needed). Find more information in the [RSS-Bridge Documentation](https://rss-bridge.github.io/rss-bridge/For_Developers/index.html) developer section. --> diff --git a/.github/prtester-requirements.txt b/.github/prtester-requirements.txt new file mode 100644 index 00000000000..4fb08b5752a --- /dev/null +++ b/.github/prtester-requirements.txt @@ -0,0 +1,2 @@ +beautifulsoup4>=4.10.0 +requests>=2.26.0 \ No newline at end of file diff --git a/.github/prtester.py b/.github/prtester.py new file mode 100644 index 00000000000..c5c5be2274f --- /dev/null +++ b/.github/prtester.py @@ -0,0 +1,208 @@ +import argparse +import requests +import re +from bs4 import BeautifulSoup +from datetime import datetime +from typing import Iterable +import os +import glob +import urllib + +# This script is specifically written to be used in automation for https://github.com/RSS-Bridge/rss-bridge +# +# This will scrape the whitelisted bridges in the current state (port 3000) and the PR state (port 3001) of +# RSS-Bridge, generate a feed for each of the bridges and save the output as html files. +# It also add a tag with the url of em's public instance, so viewing +# the HTML file locally will actually work as designed. + +ARTIFACT_FILE_EXTENSION = '.html' + +class Instance: + name = '' + url = '' + +def main(instances: Iterable[Instance], with_upload: bool, with_reduced_upload: bool, title: str, output_file: str): + start_date = datetime.now() + + prid = os.getenv('PR') + artifact_base_url = f'https://rss-bridge.github.io/rss-bridge-tests/prs/{prid}' + artifact_directory = os.getcwd() + for file in glob.glob(f'*{ARTIFACT_FILE_EXTENSION}', root_dir=artifact_directory): + os.remove(file) + + table_rows = [] + for instance in instances: + page = requests.get(instance.url) # Use python requests to grab the rss-bridge main page + soup = BeautifulSoup(page.content, "html.parser") # use bs4 to turn the page into soup + bridge_cards = soup.select('.bridge-card') # get a soup-formatted list of all bridges on the rss-bridge page + table_rows += testBridges( + instance=instance, + bridge_cards=bridge_cards, + with_upload=with_upload, + with_reduced_upload=with_reduced_upload, + artifact_directory=artifact_directory, + artifact_base_url=artifact_base_url) # run the main scraping code with the list of bridges + with open(file=output_file, mode='w+', encoding='utf-8') as file: + table_rows_value = '\n'.join(sorted(table_rows)) + file.write(f''' +## {title} +| Bridge | Context | Status | +| - | - | - | +{table_rows_value} + +*last change: {start_date.strftime("%A %Y-%m-%d %H:%M:%S")}* + '''.strip()) + +def testBridges(instance: Instance, bridge_cards: Iterable, with_upload: bool, with_reduced_upload: bool, artifact_directory: str, artifact_base_url: str) -> Iterable: + instance_suffix = '' + if instance.name: + instance_suffix = f' ({instance.name})' + table_rows = [] + for bridge_card in bridge_cards: + bridgeid = bridge_card.get('id') + bridgeid = bridgeid.split('-')[1] # this extracts a readable bridge name from the bridge metadata + print(f'{bridgeid}{instance_suffix}') + bridge_name = bridgeid.replace('Bridge', '') + context_forms = bridge_card.find_all("form") + form_number = 1 + for context_form in context_forms: + # a bridge can have multiple contexts, named 'forms' in html + # this code will produce a fully working url that should create a working feed when called + # this will create an example feed for every single context, to test them all + context_parameters = {} + error_messages = [] + context_name = '*untitled*' + context_name_element = context_form.find_previous_sibling('h5') + if context_name_element and context_name_element.text.strip() != '': + context_name = context_name_element.text + parameters = context_form.find_all("input") + lists = context_form.find_all("select") + # this for/if mess cycles through all available input parameters, checks if it required, then pulls + # the default or examplevalue and then combines it all together into the url parameters + # if an example or default value is missing for a required attribute, it will throw an error + # any non-required fields are not tested!!! + for parameter in parameters: + parameter_type = parameter.get('type') + parameter_name = parameter.get('name') + if parameter_type == 'hidden': + context_parameters[parameter_name] = parameter.get('value') + if parameter_type == 'number' or parameter_type == 'text': + if parameter.has_attr('required'): + if parameter.get('placeholder') == '': + if parameter.get('value') == '': + error_messages.append(f'Missing example or default value for parameter "{parameter_name}"') + else: + context_parameters[parameter_name] = parameter.get('value') + else: + context_parameters[parameter_name] = parameter.get('placeholder') + # same thing, just for checkboxes. If a checkbox is checked per default, it gets added to the url parameters + if parameter_type == 'checkbox': + if parameter.has_attr('checked'): + context_parameters[parameter_name] = 'on' + for listing in lists: + selectionvalue = '' + listname = listing.get('name') + cleanlist = [] + options = listing.find_all('option') + for option in options: + if 'optgroup' in option.name: + cleanlist.extend(option) + else: + cleanlist.append(option) + firstselectionentry = 1 + for selectionentry in cleanlist: + if firstselectionentry: + selectionvalue = selectionentry.get('value') + firstselectionentry = 0 + else: + if 'selected' in selectionentry.attrs: + selectionvalue = selectionentry.get('value') + break + context_parameters[listname] = selectionvalue + artifact_url = 'about:blank' + if error_messages: + status = '
'.join(map(lambda m: f'❌ `{m}`', error_messages)) + else: + # if all example/default values are present, form the full request url, run the request, add a tag with + # the url of em's public instance to the response text (so that relative paths work, e.g. to the static css file) and + # then save it to a html file. + context_parameters.update({ + 'action': 'display', + 'bridge': bridgeid, + 'format': 'Html', + }) + request_url = f'{instance.url}/?{urllib.parse.urlencode(context_parameters)}' + response = requests.get(request_url) + page_text = response.text.replace('','') + page_text = page_text.encode("utf_8") + soup = BeautifulSoup(page_text, "html.parser") + status_messages = [] + if response.status_code != 200: + status_messages += [f'❌ `HTTP status {response.status_code} {response.reason}`'] + else: + feed_items = soup.select('.feeditem') + feed_items_length = len(feed_items) + if feed_items_length <= 0: + status_messages += [f'⚠️ `The feed has no items`'] + elif feed_items_length == 1 and len(soup.select('.error')) > 0: + status_messages += [f'❌ `{getFirstLine(feed_items[0].text)}`'] + status_messages += map(lambda e: f'❌ `{getFirstLine(e.text)}`', soup.select('.error .error-type') + soup.select('.error .error-message')) + for item_element in soup.select('.feeditem'): # remove all feed items to not accidentally selected
 tags from item content
+                    item_element.decompose()
+                status_messages += map(lambda e: f'⚠️ `{getFirstLine(e.text)}`', soup.find_all('pre'))
+                status_messages = list(dict.fromkeys(status_messages)) # remove duplicates
+                status = '
'.join(status_messages) + status_is_ok = status == ''; + if status_is_ok: + status = '✔️' + if with_upload and (not with_reduced_upload or not status_is_ok): + filename = f'{bridge_name} {form_number}{instance_suffix}{ARTIFACT_FILE_EXTENSION}' + filename = re.sub(r'[^a-z0-9 \_\-\.]', '', filename, flags=re.I).replace(' ', '_') + with open(file=f'{artifact_directory}/{filename}', mode='wb') as file: + file.write(page_text) + artifact_url = f'{artifact_base_url}/{filename}' + table_rows.append(f'| {bridge_name} | [{form_number} {context_name}{instance_suffix}]({artifact_url}) | {status} |') + form_number += 1 + return table_rows + +def getFirstLine(value: str) -> str: + # trim whitespace and remove text that can break the table or is simply unnecessary + clean_value = re.sub(r'^\[[^\]]+\]\s*rssbridge\.|[\|`]', '', value.strip()) + first_line = next(iter(clean_value.splitlines()), '') + max_length = 250 + if (len(first_line) > max_length): + first_line = first_line[:max_length] + '...' + return first_line + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--instances', nargs='+') + parser.add_argument('--no-upload', action='store_true') + parser.add_argument('--reduced-upload', action='store_true') + parser.add_argument('--title', default='Pull request artifacts') + parser.add_argument('--output-file', default=os.getcwd() + '/comment.txt') + args = parser.parse_args() + instances = [] + if args.instances: + for instance_arg in args.instances: + instance_arg_parts = instance_arg.split('::') + instance = Instance() + instance.name = instance_arg_parts[1].strip() if len(instance_arg_parts) >= 2 else '' + instance.url = instance_arg_parts[0].strip().rstrip("/") + instances.append(instance) + else: + instance = Instance() + instance.name = 'current' + instance.url = 'http://localhost:3000' + instances.append(instance) + instance = Instance() + instance.name = 'pr' + instance.url = 'http://localhost:3001' + instances.append(instance) + main( + instances=instances, + with_upload=not args.no_upload, + with_reduced_upload=args.reduced_upload and not args.no_upload, + title=args.title, + output_file=args.output_file + ); diff --git a/.github/workflows/dockerbuild.yml b/.github/workflows/dockerbuild.yml new file mode 100644 index 00000000000..3964555823a --- /dev/null +++ b/.github/workflows/dockerbuild.yml @@ -0,0 +1,61 @@ +name: Build Image on Commit and Release + +on: + push: + branches: + - 'master' + tags: + - '20*' + +env: + DOCKERHUB_SLUG: rssbridge/rss-bridge + GHCR_SLUG: ghcr.io/rss-bridge/rss-bridge + +jobs: + bake: + runs-on: ubuntu-latest + steps: + - + name: Checkout + uses: actions/checkout@v3 + - + name: Docker meta + id: docker_meta + uses: docker/metadata-action@v5 + with: + images: | + ${{ env.DOCKERHUB_SLUG }} + ${{ env.GHCR_SLUG }} + tags: | + type=raw,value=latest + type=sha + type=ref,event=tag,enable=${{ startsWith(github.ref, 'refs/tags/20') }} + type=raw,value=stable,enable=${{ startsWith(github.ref, 'refs/tags/20') }} + - + name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - + name: Login to DockerHub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + - + name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - + name: Build and push + uses: docker/bake-action@v5 + with: + files: | + ./docker-bake.hcl + ${{ steps.docker_meta.outputs.bake-file }} + targets: image-all + push: true diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml new file mode 100644 index 00000000000..e0201022e73 --- /dev/null +++ b/.github/workflows/documentation.yml @@ -0,0 +1,27 @@ +name: Documentation + +on: + push: + paths: + - 'docs/**' + +jobs: + documentation: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - name: Setup PHP + uses: shivammathur/setup-php@v2 + with: + php-version: 8.0 + - name: Install dependencies + run: composer global require daux/daux.io + - name: Generate documentation + run: daux generate + - name: Deploy same repository 🚀 + uses: JamesIves/github-pages-deploy-action@v4 + with: + folder: "static" + branch: gh-pages diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000000..206b53de0fb --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,49 @@ +name: Lint + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + phpcs: + runs-on: ubuntu-20.04 + strategy: + matrix: + php-versions: ['7.4'] + steps: + - uses: actions/checkout@v4 + - uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php-versions }} + tools: phpcs + - run: phpcs . --standard=phpcs.xml --warning-severity=0 --extensions=php -p + + phpcompatibility: + runs-on: ubuntu-20.04 + strategy: + matrix: + php-versions: ['7.4'] + steps: + - uses: actions/checkout@v4 + - uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php-versions }} + - run: composer global config --no-plugins allow-plugins.dealerdirect/phpcodesniffer-composer-installer true + - run: composer global require dealerdirect/phpcodesniffer-composer-installer + - run: composer global require phpcompatibility/php-compatibility + - run: ~/.composer/vendor/bin/phpcs . --standard=phpcompatibility.xml --warning-severity=0 --extensions=php -p + + executable_php_files_check: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v4 + - run: | + if find -name "*.php" -executable -type f -print -exec false {} + + then + echo 'Good, no executable php scripts found' + else + echo 'Please unmark php scripts above as non-executable' + exit 1 + fi diff --git a/.github/workflows/prhtmlgenerator.yml b/.github/workflows/prhtmlgenerator.yml new file mode 100644 index 00000000000..163d51e3a7c --- /dev/null +++ b/.github/workflows/prhtmlgenerator.yml @@ -0,0 +1,126 @@ +name: 'PR Testing' + +on: + pull_request_target: + branches: [ master ] + +jobs: + check-bridges: + name: Check if bridges were changed + runs-on: ubuntu-latest + outputs: + BRIDGES: ${{ steps.check1.outputs.BRIDGES }} + steps: + - name: Check number of bridges + id: check1 + run: | + PR=${{github.event.number}}; + wget https://patch-diff.githubusercontent.com/raw/$GITHUB_REPOSITORY/pull/$PR.patch; + bridgeamount=$(cat $PR.patch | grep "\bbridges/[A-Za-z0-9]*Bridge\.php\b" | sed "s=.*\bbridges/\([A-Za-z0-9]*\)Bridge\.php\b.*=\1=g" | sort | uniq | wc -l); + echo "BRIDGES=$bridgeamount" >> "$GITHUB_OUTPUT" + test-pr: + name: Generate HTML + runs-on: ubuntu-latest + needs: check-bridges + if: needs.check-bridges.outputs.BRIDGES > 0 + env: + PYTHONUNBUFFERED: 1 + # Needs additional permissions https://github.com/actions/first-interaction/issues/10#issuecomment-1041402989 + steps: + - name: Check out self + uses: actions/checkout@v4 + with: + ref: ${{github.event.pull_request.head.ref}} + repository: ${{github.event.pull_request.head.repo.full_name}} + - name: Check out rss-bridge + run: | + PR=${{github.event.number}}; + wget -O requirements.txt https://raw.githubusercontent.com/$GITHUB_REPOSITORY/${{ github.event.pull_request.base.ref }}/.github/prtester-requirements.txt; + wget https://raw.githubusercontent.com/$GITHUB_REPOSITORY/${{ github.event.pull_request.base.ref }}/.github/prtester.py; + wget https://patch-diff.githubusercontent.com/raw/$GITHUB_REPOSITORY/pull/$PR.patch; + touch DEBUG; + cat $PR.patch | grep "\bbridges/[A-Za-z0-9]*Bridge\.php\b" | sed "s=.*\bbridges/\([A-Za-z0-9]*\)Bridge\.php\b.*=\1=g" | sort | uniq > whitelist.txt + - name: Start Docker - Current + run: | + docker run -d -v $GITHUB_WORKSPACE/whitelist.txt:/app/whitelist.txt -v $GITHUB_WORKSPACE/DEBUG:/app/DEBUG -p 3000:80 ghcr.io/rss-bridge/rss-bridge:latest + - name: Start Docker - PR + run: | + docker build -t prbuild .; + docker run -d -v $GITHUB_WORKSPACE/whitelist.txt:/app/whitelist.txt -v $GITHUB_WORKSPACE/DEBUG:/app/DEBUG -p 3001:80 prbuild + - name: Setup python + uses: actions/setup-python@v5 + with: + python-version: '3.13' + cache: 'pip' + - name: Install requirements + run: | + cd $GITHUB_WORKSPACE + pip install -r requirements.txt + - name: Run bridge tests + id: testrun + run: | + mkdir results; + python prtester.py; + body="$(cat comment.txt)"; + body="${body//'%'/'%25'}"; + body="${body//$'\n'/'%0A'}"; + body="${body//$'\r'/'%0D'}"; + echo "bodylength=${#body}" >> $GITHUB_OUTPUT + env: + PR: ${{ github.event.number }} + - name: Upload generated tests + uses: actions/upload-artifact@v4 + id: upload-generated-tests + with: + name: tests + path: '*.html' + - name: Find Comment + if: ${{ steps.testrun.outputs.bodylength > 130 }} + uses: peter-evans/find-comment@v3 + id: fc + with: + issue-number: ${{ github.event.pull_request.number }} + comment-author: 'github-actions[bot]' + body-includes: Pull request artifacts + - name: Create or update comment + if: ${{ steps.testrun.outputs.bodylength > 130 }} + uses: peter-evans/create-or-update-comment@v4 + with: + comment-id: ${{ steps.fc.outputs.comment-id }} + issue-number: ${{ github.event.pull_request.number }} + body-file: comment.txt + edit-mode: replace + upload_tests: + name: Upload tests + runs-on: ubuntu-latest + needs: test-pr + steps: + - uses: actions/checkout@v4 + with: + repository: 'RSS-Bridge/rss-bridge-tests' + ref: 'main' + token: ${{ secrets.RSSTESTER_ACTION }} + + - name: Setup git config + run: | + git config --global user.name "GitHub Actions" + git config --global user.email "<>" + + - name: Download tests + uses: actions/download-artifact@v4 + with: + name: tests + + - name: Move tests + run: | + cd prs + mkdir -p ${{github.event.number}} + cd ${{github.event.number}} + mv -f $GITHUB_WORKSPACE/*.html . + + - name: Commit and push generated tests + run: | + export COMMIT_MESSAGE="Added tests for PR ${{github.event.number}}" + git add . + git commit -m "$COMMIT_MESSAGE" + git push diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000000..93f07b0f0cd --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,23 @@ +name: Tests + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + phpunit8: + runs-on: ubuntu-20.04 + strategy: + matrix: + php-versions: ['7.4', '8.0', '8.1'] + steps: + - uses: actions/checkout@v4 + - uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php-versions }} + env: + update: true + - run: composer install + - run: composer test diff --git a/.gitignore b/.gitignore index 680260c7044..6ed95489e41 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,6 @@ data/ *.pydevproject .project .metadata -bin/ tmp/ *.tmp *.bak @@ -213,6 +212,7 @@ pip-log.txt # Unit test / coverage reports .coverage +.phpunit.result.cache .tox #Translations @@ -228,6 +228,10 @@ pip-log.txt /whitelist.txt DEBUG config.ini.php +config/* +!config/nginx.conf +!config/php-fpm.conf +!config/php.ini ###################### ## VisualStudioCode ## diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 841ac5dbb5a..00000000000 --- a/.travis.yml +++ /dev/null @@ -1,46 +0,0 @@ -dist: trusty -language: php - -install: - - composer global require dealerdirect/phpcodesniffer-composer-installer; - - composer global require phpcompatibility/php-compatibility; - - if [[ "$PHPUNIT" ]]; then - composer global require phpunit/phpunit ^$PHPUNIT; - fi - -script: - - phpenv rehash - # Run PHP_CodeSniffer on all versions - - ~/.config/composer/vendor/bin/phpcs . --standard=phpcs.xml --warning-severity=0 --extensions=php -p; - # Check PHP compatibility for the lowest and highest supported version - - if [[ $TRAVIS_PHP_VERSION == "5.6" || $TRAVIS_PHP_VERSION == "7.3" ]]; then - ~/.config/composer/vendor/bin/phpcs . --standard=phpcompatibility.xml --extensions=php -p; - fi - # Run unit tests on highest major version - - if [[ ${TRAVIS_PHP_VERSION:0:1} == "7" ]]; then - ~/.config/composer/vendor/bin/phpunit --configuration=phpunit.xml --include-path=lib/; - fi - -php: - - 7.3 - -env: - - PHPUNIT=6 - - PHPUNIT=7 - - PHPUNIT=8 - -matrix: - fast_finish: true - - include: - - php: 5.6 - env: PHPUNIT= - - php: 7.0 - - php: 7.1 - - php: 7.2 - - allow_failures: - - php: 7.3 - env: PHPUNIT=7 - - php: 7.3 - env: PHPUNIT=8 diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md new file mode 100644 index 00000000000..d27421aa969 --- /dev/null +++ b/CONTRIBUTORS.md @@ -0,0 +1,225 @@ +# Contributors + +* [16mhz](https://github.com/16mhz) +* [adamchainz](https://github.com/adamchainz) +* [Ahiles3005](https://github.com/Ahiles3005) +* [akirk](https://github.com/akirk) +* [Albirew](https://github.com/Albirew) +* [aledeg](https://github.com/aledeg) +* [alex73](https://github.com/alex73) +* [alexAubin](https://github.com/alexAubin) +* [Alkarex](https://github.com/Alkarex) +* [AmauryCarrade](https://github.com/AmauryCarrade) +* [arnd-s](https://github.com/arnd-s) +* [ArthurHoaro](https://github.com/ArthurHoaro) +* [Astalaseven](https://github.com/Astalaseven) +* [Astyan-42](https://github.com/Astyan-42) +* [austinhuang0131](https://github.com/austinhuang0131) +* [axor-mst](https://github.com/axor-mst) +* [ayacoo](https://github.com/ayacoo) +* [az5he6ch](https://github.com/az5he6ch) +* [b1nj](https://github.com/b1nj) +* [benasse](https://github.com/benasse) +* [Binnette](https://github.com/Binnette) +* [BoboTiG](https://github.com/BoboTiG) +* [Bockiii](https://github.com/Bockiii) +* [brtsos](https://github.com/brtsos) +* [captn3m0](https://github.com/captn3m0) +* [chemel](https://github.com/chemel) +* [Chouchen](https://github.com/Chouchen) +* [ckiw](https://github.com/ckiw) +* [cn-tools](https://github.com/cn-tools) +* [cnlpete](https://github.com/cnlpete) +* [corenting](https://github.com/corenting) +* [couraudt](https://github.com/couraudt) +* [csisoap](https://github.com/csisoap) +* [da2x](https://github.com/da2x) +* [dabenzel](https://github.com/dabenzel) +* [Daiyousei](https://github.com/Daiyousei) +* [dawidsowa](https://github.com/dawidsowa) +* [DevonHess](https://github.com/DevonHess) +* [dhuschde](https://github.com/dhuschde) +* [disk0x](https://github.com/disk0x) +* [DJCrashdummy](https://github.com/DJCrashdummy) +* [Djuuu](https://github.com/Djuuu) +* [DnAp](https://github.com/DnAp) +* [dominik-th](https://github.com/dominik-th) +* [Draeli](https://github.com/Draeli) +* [Dreckiger-Dan](https://github.com/Dreckiger-Dan) +* [drego85](https://github.com/drego85) +* [drklee3](https://github.com/drklee3) +* [DRogueRonin](https://github.com/DRogueRonin) +* [dvikan](https://github.com/dvikan) +* [eggwhalefrog](https://github.com/eggwhalefrog) +* [em92](https://github.com/em92) +* [eMerzh](https://github.com/eMerzh) +* [EtienneM](https://github.com/EtienneM) +* [f0086](https://github.com/f0086) +* [fanch317](https://github.com/fanch317) +* [fatuuse](https://github.com/fatuuse) +* [fivefilters](https://github.com/fivefilters) +* [floviolleau](https://github.com/floviolleau) +* [fluffy-critter](https://github.com/fluffy-critter) +* [fmachen](https://github.com/fmachen) +* [Frenzie](https://github.com/Frenzie) +* [fulmeek](https://github.com/fulmeek) +* [ggiessen](https://github.com/ggiessen) +* [gileri](https://github.com/gileri) +* [Ginko-Aloe](https://github.com/Ginko-Aloe) +* [girlpunk](https://github.com/girlpunk) +* [Glandos](https://github.com/Glandos) +* [gloony](https://github.com/gloony) +* [GregThib](https://github.com/GregThib) +* [griffaurel](https://github.com/griffaurel) +* [Grummfy](https://github.com/Grummfy) +* [gsantner](https://github.com/gsantner) +* [guigot](https://github.com/guigot) +* [hollowleviathan](https://github.com/hollowleviathan) +* [hpacleb](https://github.com/hpacleb) +* [hunhejj](https://github.com/hunhejj) +* [husim0](https://github.com/husim0) +* [IceWreck](https://github.com/IceWreck) +* [imagoiq](https://github.com/imagoiq) +* [j0k3r](https://github.com/j0k3r) +* [JackNUMBER](https://github.com/JackNUMBER) +* [jacquesh](https://github.com/jacquesh) +* [jakubvalenta](https://github.com/jakubvalenta) +* [JasonGhent](https://github.com/JasonGhent) +* [jcgoette](https://github.com/jcgoette) +* [jdesgats](https://github.com/jdesgats) +* [jdigilio](https://github.com/jdigilio) +* [JeremyRand](https://github.com/JeremyRand) +* [JimDog546](https://github.com/JimDog546) +* [jNullj](https://github.com/jNullj) +* [Jocker666z](https://github.com/Jocker666z) +* [johnnygroovy](https://github.com/johnnygroovy) +* [johnpc](https://github.com/johnpc) +* [joni1993](https://github.com/joni1993) +* [jtojnar](https://github.com/jtojnar) +* [KamaleiZestri](https://github.com/KamaleiZestri) +* [kkoyung](https://github.com/kkoyung) +* [klimplant](https://github.com/klimplant) +* [KN4CK3R](https://github.com/KN4CK3R) +* [kolarcz](https://github.com/kolarcz) +* [kranack](https://github.com/kranack) +* [kraoc](https://github.com/kraoc) +* [krisu5](https://github.com/krisu5) +* [l1n](https://github.com/l1n) +* [laBecasse](https://github.com/laBecasse) +* [lagaisse](https://github.com/lagaisse) +* [lalannev](https://github.com/lalannev) +* [langfingaz](https://github.com/langfingaz) +* [lassana](https://github.com/lassana) +* [ldidry](https://github.com/ldidry) +* [Leomaradan](https://github.com/Leomaradan) +* [leyrer](https://github.com/leyrer) +* [liamka](https://github.com/liamka) +* [Limero](https://github.com/Limero) +* [LogMANOriginal](https://github.com/LogMANOriginal) +* [lorenzos](https://github.com/lorenzos) +* [lukasklinger](https://github.com/lukasklinger) +* [m0zes](https://github.com/m0zes) +* [Mar-Koeh](https://github.com/Mar-Koeh) +* [marcus-at-localhost](https://github.com/marcus-at-localhost) +* [marius8510000-bot](https://github.com/marius8510000-bot) +* [matthewseal](https://github.com/matthewseal) +* [mcbyte-it](https://github.com/mcbyte-it) +* [mdemoss](https://github.com/mdemoss) +* [melangue](https://github.com/melangue) +* [metaMMA](https://github.com/metaMMA) +* [mibe](https://github.com/mibe) +* [mickaelBert](https://github.com/mickaelBert) +* [mightymt](https://github.com/mightymt) +* [mitsukarenai](https://github.com/mitsukarenai) +* [Monocularity](https://github.com/Monocularity) +* [MonsieurPoutounours](https://github.com/MonsieurPoutounours) +* [mr-flibble](https://github.com/mr-flibble) +* [mro](https://github.com/mro) +* [mschwld](https://github.com/mschwld) +* [muekoeff](https://github.com/muekoeff) +* [mw80](https://github.com/mw80) +* [mxmehl](https://github.com/mxmehl) +* [Mynacol](https://github.com/Mynacol) +* [nel50n](https://github.com/nel50n) +* [niawag](https://github.com/niawag) +* [Niehztog](https://github.com/Niehztog) +* [NikNikYkt](https://github.com/NikNikYkt) +* [Nono-m0le](https://github.com/Nono-m0le) +* [NotsoanoNimus](https://github.com/NotsoanoNimus) +* [obsiwitch](https://github.com/obsiwitch) +* [Ololbu](https://github.com/Ololbu) +* [ORelio](https://github.com/ORelio) +* [otakuf](https://github.com/otakuf) +* [Park0](https://github.com/Park0) +* [Paroleen](https://github.com/Paroleen) +* [Patricol](https://github.com/Patricol) +* [paulchen](https://github.com/paulchen) +* [PaulVayssiere](https://github.com/PaulVayssiere) +* [pellaeon](https://github.com/pellaeon) +* [PeterDaveHello](https://github.com/PeterDaveHello) +* [Peterr-K](https://github.com/Peterr-K) +* [Piranhaplant](https://github.com/Piranhaplant) +* [pirnz](https://github.com/pirnz) +* [pit-fgfjiudghdf](https://github.com/pit-fgfjiudghdf) +* [pitchoule](https://github.com/pitchoule) +* [pmaziere](https://github.com/pmaziere) +* [Pofilo](https://github.com/Pofilo) +* [prysme01](https://github.com/prysme01) +* [pubak42](https://github.com/pubak42) +* [Qluxzz](https://github.com/Qluxzz) +* [quentinus95](https://github.com/quentinus95) +* [quickwick](https://github.com/quickwick) +* [rakoo](https://github.com/rakoo) +* [RawkBob](https://github.com/RawkBob) +* [regisenguehard](https://github.com/regisenguehard) +* [Riduidel](https://github.com/Riduidel) +* [rogerdc](https://github.com/rogerdc) +* [Roliga](https://github.com/Roliga) +* [ronansalmon](https://github.com/ronansalmon) +* [rremizov](https://github.com/rremizov) +* [s0lesurviv0r](https://github.com/s0lesurviv0r) +* [sal0max](https://github.com/sal0max) +* [sebsauvage](https://github.com/sebsauvage) +* [shutosg](https://github.com/shutosg) +* [simon816](https://github.com/simon816) +* [Simounet](https://github.com/Simounet) +* [somini](https://github.com/somini) +* [SpangleLabs](https://github.com/SpangleLabs) +* [SqrtMinusOne](https://github.com/SqrtMinusOne) +* [squeek502](https://github.com/squeek502) +* [StelFux](https://github.com/StelFux) +* [stjohnjohnson](https://github.com/stjohnjohnson) +* [Stopka](https://github.com/Stopka) +* [Strubbl](https://github.com/Strubbl) +* [sublimz](https://github.com/sublimz) +* [sunchaserinfo](https://github.com/sunchaserinfo) +* [SuperSandro2000](https://github.com/SuperSandro2000) +* [sysadminstory](https://github.com/sysadminstory) +* [t0stiman](https://github.com/t0stiman) +* [tameroski](https://github.com/tameroski) +* [teromene](https://github.com/teromene) +* [tgkenney](https://github.com/tgkenney) +* [thefranke](https://github.com/thefranke) +* [TheRadialActive](https://github.com/TheRadialActive) +* [theScrabi](https://github.com/theScrabi) +* [thezeroalpha](https://github.com/thezeroalpha) +* [thibaultcouraud](https://github.com/thibaultcouraud) +* [timendum](https://github.com/timendum) +* [TitiTestScalingo](https://github.com/TitiTestScalingo) +* [tomaszkane](https://github.com/tomaszkane) +* [tomershvueli](https://github.com/tomershvueli) +* [TotalCaesar659](https://github.com/TotalCaesar659) +* [tpikonen](https://github.com/tpikonen) +* [TReKiE](https://github.com/TReKiE) +* [triatic](https://github.com/triatic) +* [User123698745](https://github.com/User123698745) +* [VerifiedJoseph](https://github.com/VerifiedJoseph) +* [vitkabele](https://github.com/vitkabele) +* [WalterBarrett](https://github.com/WalterBarrett) +* [wtuuju](https://github.com/wtuuju) +* [xurxof](https://github.com/xurxof) +* [yamanq](https://github.com/yamanq) +* [yardenac](https://github.com/yardenac) +* [ymeister](https://github.com/ymeister) +* [yue-dongchen](https://github.com/yue-dongchen) +* [ZeNairolf](https://github.com/ZeNairolf) diff --git a/Dockerfile b/Dockerfile index fa9979d6787..1326dba0dbd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,72 @@ -FROM php:7-apache +FROM debian:12-slim AS rssbridge -ENV APACHE_DOCUMENT_ROOT=/app +LABEL description="RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for websites that don't have one." +LABEL repository="https://github.com/RSS-Bridge/rss-bridge" +LABEL website="https://github.com/RSS-Bridge/rss-bridge" -RUN mv "$PHP_INI_DIR/php.ini-production" "$PHP_INI_DIR/php.ini" \ - && apt-get --yes update && apt-get --yes install libxml2-dev \ - && docker-php-ext-install -j$(nproc) simplexml \ - && sed -ri -e 's!/var/www/html!${APACHE_DOCUMENT_ROOT}!g' /etc/apache2/sites-available/*.conf \ - && sed -ri -e 's!/var/www/!${APACHE_DOCUMENT_ROOT}!g' /etc/apache2/apache2.conf /etc/apache2/conf-available/*.conf \ - && sed -ri -e 's/(MinProtocol\s*=\s*)TLSv1\.2/\1None/' /etc/ssl/openssl.cnf \ - && sed -ri -e 's/(CipherString\s*=\s*DEFAULT)@SECLEVEL=2/\1/' /etc/ssl/openssl.cnf +ARG DEBIAN_FRONTEND=noninteractive +RUN set -xe && \ + apt-get update && \ + apt-get install --yes --no-install-recommends \ + ca-certificates \ + nginx \ + nss-plugin-pem \ + php-curl \ + php-fpm \ + php-intl \ + # php-json is enabled by default with PHP 8.2 in Debian 12 + php-mbstring \ + php-memcached \ + # php-opcache is enabled by default with PHP 8.2 in Debian 12 + # php-openssl is enabled by default with PHP 8.2 in Debian 12 + php-sqlite3 \ + php-xml \ + php-zip \ + # php-zlib is enabled by default with PHP 8.2 in Debian 12 + # for downloading libcurl-impersonate + curl \ + && \ + # install curl-impersonate library + curlimpersonate_version=0.6.0 && \ + { \ + { \ + [ $(arch) = 'aarch64' ] && \ + archive="libcurl-impersonate-v${curlimpersonate_version}.aarch64-linux-gnu.tar.gz" && \ + sha512sum="d04b1eabe71f3af06aa1ce99b39a49c5e1d33b636acedcd9fad163bc58156af5c3eb3f75aa706f335515791f7b9c7a6c40ffdfa47430796483ecef929abd905d" \ + ; } \ + || { \ + [ $(arch) = 'armv7l' ] && \ + archive="libcurl-impersonate-v${curlimpersonate_version}.arm-linux-gnueabihf.tar.gz" && \ + sha512sum="05906b4efa1a6ed8f3b716fd83d476b6eea6bfc68e3dbc5212d65a2962dcaa7bd1f938c9096a7535252b11d1d08fb93adccc633585ff8cb8cec5e58bfe969bc9" \ + ; } \ + || { \ + [ $(arch) = 'x86_64' ] && \ + archive="libcurl-impersonate-v${curlimpersonate_version}.x86_64-linux-gnu.tar.gz" && \ + sha512sum="480bbe9452cd9aff2c0daaaf91f1057b3a96385f79011628a9237223757a9b0d090c59cb5982dc54ea0d07191657299ea91ca170a25ced3d7d410fcdff130ace" \ + ; } \ + } && \ + curl -LO "https://github.com/lwthiker/curl-impersonate/releases/download/v${curlimpersonate_version}/${archive}" && \ + echo "$sha512sum $archive" | sha512sum -c - && \ + mkdir -p /usr/local/lib/curl-impersonate && \ + tar xaf "$archive" -C /usr/local/lib/curl-impersonate --wildcards 'libcurl-impersonate-ff.so*' && \ + rm "$archive" && \ + apt-get purge --assume-yes curl && \ + rm -rf /var/lib/apt/lists/* -COPY --chown=www-data:www-data ./ /app/ \ No newline at end of file +ENV LD_PRELOAD /usr/local/lib/curl-impersonate/libcurl-impersonate-ff.so +ENV CURL_IMPERSONATE ff91esr + +# logs should go to stdout / stderr +RUN ln -sfT /dev/stderr /var/log/nginx/error.log; \ + ln -sfT /dev/stdout /var/log/nginx/access.log; \ + chown -R --no-dereference www-data:adm /var/log/nginx/ + +COPY ./config/nginx.conf /etc/nginx/sites-available/default +COPY ./config/php-fpm.conf /etc/php/8.2/fpm/pool.d/rss-bridge.conf +COPY ./config/php.ini /etc/php/8.2/fpm/conf.d/90-rss-bridge.ini + +COPY --chown=www-data:www-data ./ /app/ + +EXPOSE 80 + +ENTRYPOINT ["/app/docker-entrypoint.sh"] diff --git a/README.md b/README.md index a9db8eafb38..dadf7094939 100644 --- a/README.md +++ b/README.md @@ -1,257 +1,527 @@ +# RSS-Bridge + ![RSS-Bridge](static/logo_600px.png) -=== -[![LICENSE](https://img.shields.io/badge/license-UNLICENSE-blue.svg)](UNLICENSE) [![GitHub release](https://img.shields.io/github/release/rss-bridge/rss-bridge.svg?logo=github)](https://github.com/rss-bridge/rss-bridge/releases/latest) [![Debian Release](https://img.shields.io/badge/dynamic/json.svg?logo=debian&label=debian%20release&url=https%3A%2F%2Fsources.debian.org%2Fapi%2Fsrc%2Frss-bridge%2F&query=%24.versions%5B0%5D.version&colorB=blue)](https://tracker.debian.org/pkg/rss-bridge) [![Guix Release](https://img.shields.io/badge/guix%20release-unknown-blue.svg)](https://www.gnu.org/software/guix/packages/R/) [![Build Status](https://travis-ci.org/RSS-Bridge/rss-bridge.svg?branch=master)](https://travis-ci.org/RSS-Bridge/rss-bridge) [![Docker Build Status](https://img.shields.io/docker/build/rssbridge/rss-bridge.svg?logo=docker)](https://hub.docker.com/r/rssbridge/rss-bridge/) -RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for websites that don't have one. It can be used on webservers or as a stand-alone application in CLI mode. +RSS-Bridge is a PHP web application. + +It generates web feeds for websites that don't have one. + +Officially hosted instance: https://rss-bridge.org/bridge01/ + +IRC channel #rssbridge at https://libera.chat/ + +[Full documentation](https://rss-bridge.github.io/rss-bridge/index.html) + +Alternatively find another +[public instance](https://rss-bridge.github.io/rss-bridge/General/Public_Hosts.html). + +Requires minimum PHP 7.4. + + +[![LICENSE](https://img.shields.io/badge/license-UNLICENSE-blue.svg)](UNLICENSE) +[![GitHub release](https://img.shields.io/github/release/rss-bridge/rss-bridge.svg?logo=github)](https://github.com/rss-bridge/rss-bridge/releases/latest) +[![irc.libera.chat](https://img.shields.io/badge/irc.libera.chat-%23rssbridge-blue.svg)](https://web.libera.chat/#rssbridge) +[![Actions Status](https://img.shields.io/github/actions/workflow/status/RSS-Bridge/rss-bridge/tests.yml?branch=master&label=GitHub%20Actions&logo=github)](https://github.com/RSS-Bridge/rss-bridge/actions) + +||| +|:-:|:-:| +|![Screenshot #1](/static/screenshot-1.png?raw=true)|![Screenshot #2](/static/screenshot-2.png?raw=true)| +|![Screenshot #3](/static/screenshot-3.png?raw=true)|![Screenshot #4](/static/screenshot-4.png?raw=true)| +|![Screenshot #5](/static/screenshot-5.png?raw=true)|![Screenshot #6](/static/screenshot-6.png?raw=true)| + +## A subset of bridges (15/447) + +* `CssSelectorBridge`: [Scrape out a feed using CSS selectors](https://rss-bridge.org/bridge01/#bridge-CssSelectorBridge) +* `FeedMergeBridge`: [Combine multiple feeds into one](https://rss-bridge.org/bridge01/#bridge-FeedMergeBridge) +* `FeedReducerBridge`: [Reduce a noisy feed by some percentage](https://rss-bridge.org/bridge01/#bridge-FeedReducerBridge) +* `FilterBridge`: [Filter a feed by excluding/including items by keyword](https://rss-bridge.org/bridge01/#bridge-FilterBridge) +* `GettrBridge`: [Fetches the latest posts from a GETTR user](https://rss-bridge.org/bridge01/#bridge-GettrBridge) +* `MastodonBridge`: [Fetches statuses from a Mastodon (ActivityPub) instance](https://rss-bridge.org/bridge01/#bridge-MastodonBridge) +* `RedditBridge`: [Fetches posts from a user/subredit (with filtering options)](https://rss-bridge.org/bridge01/#bridge-RedditBridge) +* `RumbleBridge`: [Fetches channel/user videos](https://rss-bridge.org/bridge01/#bridge-RumbleBridge) +* `SoundcloudBridge`: [Fetches music by username](https://rss-bridge.org/bridge01/#bridge-SoundcloudBridge) +* `TelegramBridge`: [Fetches posts from a public channel](https://rss-bridge.org/bridge01/#bridge-TelegramBridge) +* `ThePirateBayBridge:` [Fetches torrents by search/user/category](https://rss-bridge.org/bridge01/#bridge-ThePirateBayBridge) +* `TikTokBridge`: [Fetches posts by username](https://rss-bridge.org/bridge01/#bridge-TikTokBridge) +* `TwitchBridge`: [Fetches videos from channel](https://rss-bridge.org/bridge01/#bridge-TwitchBridge) +* `XPathBridge`: [Scrape out a feed using XPath expressions](https://rss-bridge.org/bridge01/#bridge-XPathBridge) +* `YoutubeBridge`: [Fetches videos by username/channel/playlist/search](https://rss-bridge.org/bridge01/#bridge-YoutubeBridge) +* `YouTubeCommunityTabBridge`: [Fetches posts from a channel's community tab](https://rss-bridge.org/bridge01/#bridge-YouTubeCommunityTabBridge) + +## Tutorial + +### How to install on traditional shared web hosting + +RSS-Bridge can basically be unzipped into a web folder. Should be working instantly. + +Latest zip: +https://github.com/RSS-Bridge/rss-bridge/archive/refs/heads/master.zip (2MB) + +### How to install on Debian 12 (nginx + php-fpm) + +These instructions have been tested on a fresh Debian 12 VM from Digital Ocean (1vcpu-512mb-10gb, 5 USD/month). + +```shell +timedatectl set-timezone Europe/Oslo + +apt install git nginx php8.2-fpm php-mbstring php-simplexml php-curl php-intl + +# Create a user account +useradd --shell /bin/bash --create-home rss-bridge + +cd /var/www + +# Create folder and change its ownership to rss-bridge +mkdir rss-bridge && chown rss-bridge:rss-bridge rss-bridge/ + +# Become rss-bridge +su rss-bridge + +# Clone master branch into existing folder +git clone https://github.com/RSS-Bridge/rss-bridge.git rss-bridge/ +cd rss-bridge + +# Copy over the default config (OPTIONAL) +cp -v config.default.ini.php config.ini.php + +# Recursively give full permissions to user/owner +chmod 700 --recursive ./ + +# Give read and execute to others on folder ./static +chmod o+rx ./ ./static + +# Recursively give give read to others on folder ./static +chmod o+r --recursive ./static +``` + +Nginx config: + +```nginx +# /etc/nginx/sites-enabled/rss-bridge.conf + +server { + listen 80; + + # TODO: change to your own server name + server_name example.com; + + access_log /var/log/nginx/rss-bridge.access.log; + error_log /var/log/nginx/rss-bridge.error.log; + log_not_found off; + + # Intentionally not setting a root folder + + # Static content only served here + location /static/ { + alias /var/www/rss-bridge/static/; + } + + # Pass off to php-fpm only when location is EXACTLY == / + location = / { + root /var/www/rss-bridge/; + include snippets/fastcgi-php.conf; + fastcgi_read_timeout 45s; + fastcgi_pass unix:/run/php/rss-bridge.sock; + } -**Important**: RSS-Bridge is __not__ a feed reader or feed aggregator, but a tool to generate feeds that are consumed by feed readers and feed aggregators. Find a list of feed aggregators on [Wikipedia](https://en.wikipedia.org/wiki/Comparison_of_feed_aggregators). + # Reduce log noise + location = /favicon.ico { + access_log off; + } -Supported sites/pages (examples) -=== + # Reduce log noise + location = /robots.txt { + access_log off; + } +} +``` -* `Bandcamp` : Returns last release from [bandcamp](https://bandcamp.com/) for a tag -* `Cryptome` : Returns the most recent documents from [Cryptome.org](http://cryptome.org/) -* `DansTonChat`: Most recent quotes from [danstonchat.com](http://danstonchat.com/) -* `DuckDuckGo`: Most recent results from [DuckDuckGo.com](https://duckduckgo.com/) -* `Facebook` : Returns the latest posts on a page or profile on [Facebook](https://facebook.com/) -* `FlickrExplore` : [Latest interesting images](http://www.flickr.com/explore) from Flickr -* `GoogleSearch` : Most recent results from Google Search -* `Identi.ca` : Identica user timeline (Should be compatible with other Pump.io instances) -* `Instagram`: Most recent photos from an Instagram user -* `OpenClassrooms`: Lastest tutorials from [fr.openclassrooms.com](http://fr.openclassrooms.com/) -* `Pinterest`: Most recent photos from user or search -* `ScmbBridge`: Newest stories from [secouchermoinsbete.fr](http://secouchermoinsbete.fr/) -* `ThePirateBay` : Returns the newest indexed torrents from [The Pirate Bay](https://thepiratebay.se/) with keywords -* `Twitter` : Return keyword/hashtag search or user timeline -* `Wikipedia`: highlighted articles from [Wikipedia](https://wikipedia.org/) in English, German, French or Esperanto -* `YouTube` : YouTube user channel, playlist or search +PHP FPM pool config: +```ini +; /etc/php/8.2/fpm/pool.d/rss-bridge.conf -And [many more](bridges/), thanks to the community! +[rss-bridge] -Output format -=== +user = rss-bridge +group = rss-bridge -RSS-Bridge is capable of producing several output formats: +listen = /run/php/rss-bridge.sock -* `Atom` : Atom feed, for use in feed readers -* `Html` : Simple HTML page -* `Json` : JSON, for consumption by other applications -* `Mrss` : MRSS feed, for use in feed readers -* `Plaintext` : Raw text, for consumption by other applications +listen.owner = www-data +listen.group = www-data -You can extend RSS-Bridge with your own format, using the [Format API](https://github.com/RSS-Bridge/rss-bridge/wiki/Format-API)! +; Create 10 workers standing by to serve requests +pm = static +pm.max_children = 10 -Screenshot -=== +; Respawn worker after 500 requests (workaround for memory leaks etc.) +pm.max_requests = 500 +``` -Welcome screen: +PHP ini config: +```ini +; /etc/php/8.2/fpm/conf.d/30-rss-bridge.ini -![Screenshot](https://github.com/RSS-Bridge/rss-bridge/wiki/images/screenshot_rss-bridge_welcome.png) +max_execution_time = 15 +memory_limit = 64M +``` -*** +Restart fpm and nginx: -RSS-Bridge hashtag (#rss-bridge) search on Twitter, in Atom format (as displayed by Firefox): +```shell +# Lint and restart php-fpm +php-fpm8.2 -t && systemctl restart php8.2-fpm -![Screenshot](https://github.com/RSS-Bridge/rss-bridge/wiki/images/screenshot_twitterbridge_atom.png) +# Lint and restart nginx +nginx -t && systemctl restart nginx +``` -Requirements -=== +### How to install from Composer -RSS-Bridge requires PHP 5.6 or higher with following extensions enabled: +Install the latest release. - - [`openssl`](https://secure.php.net/manual/en/book.openssl.php) - - [`libxml`](https://secure.php.net/manual/en/book.libxml.php) - - [`mbstring`](https://secure.php.net/manual/en/book.mbstring.php) - - [`simplexml`](https://secure.php.net/manual/en/book.simplexml.php) - - [`curl`](https://secure.php.net/manual/en/book.curl.php) - - [`json`](https://secure.php.net/manual/en/book.json.php) - - [`sqlite3`](http://php.net/manual/en/book.sqlite3.php) (only when using SQLiteCache) +```shell +cd /var/www +composer create-project -v --no-dev --no-scripts rss-bridge/rss-bridge +``` -Find more information on our [Wiki](https://github.com/rss-bridge/rss-bridge/wiki) +### How to install with Caddy -Enable / Disable bridges -=== +TODO. See https://github.com/RSS-Bridge/rss-bridge/issues/3785 -RSS-Bridge allows you to take full control over which bridges are displayed to the user. That way you can host your own RSS-Bridge service with your favorite collection of bridges! +### Install from Docker Hub: -Find more information on the [Wiki](https://github.com/RSS-Bridge/rss-bridge/wiki/Whitelisting) +Install by downloading the docker image from Docker Hub: -**Notice**: By default, RSS-Bridge will only show a small subset of bridges. Make sure to read up on [whitelisting](https://github.com/RSS-Bridge/rss-bridge/wiki/Whitelisting) to unlock the full potential of RSS-Bridge! +```bash +# Create container +docker create --name=rss-bridge --publish 3000:80 --volume $(pwd)/config:/config rssbridge/rss-bridge +``` -Deploy -=== +You can put custom `config.ini.php` and bridges into `./config`. -Thanks to the community, hosting your own instance of RSS-Bridge is as easy as clicking a button! +**You must restart container for custom changes to take effect.** + +See `docker-entrypoint.sh` for details. + +```bash +# Start container +docker start rss-bridge +``` + +Browse http://localhost:3000/ + +### Install by locally building from Dockerfile + +```bash +# Build image from Dockerfile +docker build -t rss-bridge . + +# Create container +docker create --name rss-bridge --publish 3000:80 --volume $(pwd)/config:/config rss-bridge +``` + +You can put custom `config.ini.php` and bridges into `./config`. + +**You must restart container for custom changes to take effect.** + +See `docker-entrypoint.sh` for details. + +```bash +# Start container +docker start rss-bridge +``` + +Browse http://localhost:3000/ + +### Install with docker-compose (using Docker Hub) + +You can put custom `config.ini.php` and bridges into `./config`. + +**You must restart container for custom changes to take effect.** + +See `docker-entrypoint.sh` for details. + +```bash +docker-compose up +``` + +Browse http://localhost:3000/ + +### Other installation methods [![Deploy on Scalingo](https://cdn.scalingo.com/deploy/button.svg)](https://my.scalingo.com/deploy?source=https://github.com/sebsauvage/rss-bridge) [![Deploy to Heroku](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy) +[![Deploy to Cloudron](https://cloudron.io/img/button.svg)](https://www.cloudron.io/store/com.rssbridgeapp.cloudronapp.html) +[![Run on PikaPods](https://www.pikapods.com/static/run-button.svg)](https://www.pikapods.com/pods?run=rssbridge) -Getting involved -=== - -There are many ways for you to getting involved with RSS-Bridge. Here are a few things: - -- Share RSS-Bridge with your friends (Twitter, Facebook, ..._you name it_...) -- Report broken bridges or bugs by opening [Issues](https://github.com/RSS-Bridge/rss-bridge/issues) on GitHub -- Request new features or suggest ideas (via [Issues](https://github.com/RSS-Bridge/rss-bridge/issues)) -- Discuss bugs, features, ideas or [issues](https://github.com/RSS-Bridge/rss-bridge/issues) -- Add new bridges or improve the API -- Improve the [Wiki](https://github.com/RSS-Bridge/rss-bridge/wiki) -- Host an instance of RSS-Bridge for your personal use or make it available to the community :sparkling_heart: - -Authors -=== - -We are RSS-Bridge community, a group of developers continuing the project initiated by sebsauvage, webmaster of [sebsauvage.net](http://sebsauvage.net), author of [Shaarli](http://sebsauvage.net/wiki/doku.php?id=php:shaarli) and [ZeroBin](http://sebsauvage.net/wiki/doku.php?id=php:zerobin). - -**Contributors** (sorted alphabetically): - - -* [16mhz](https://github.com/16mhz) -* [adamchainz](https://github.com/adamchainz) -* [Ahiles3005](https://github.com/Ahiles3005) -* [Albirew](https://github.com/Albirew) -* [aledeg](https://github.com/aledeg) -* [alex73](https://github.com/alex73) -* [alexAubin](https://github.com/alexAubin) -* [AmauryCarrade](https://github.com/AmauryCarrade) -* [AntoineTurmel](https://github.com/AntoineTurmel) -* [ArthurHoaro](https://github.com/ArthurHoaro) -* [Astalaseven](https://github.com/Astalaseven) -* [Astyan-42](https://github.com/Astyan-42) -* [az5he6ch](https://github.com/az5he6ch) -* [azdkj532](https://github.com/azdkj532) -* [b1nj](https://github.com/b1nj) -* [benasse](https://github.com/benasse) -* [captn3m0](https://github.com/captn3m0) -* [chemel](https://github.com/chemel) -* [ckiw](https://github.com/ckiw) -* [cnlpete](https://github.com/cnlpete) -* [corenting](https://github.com/corenting) -* [couraudt](https://github.com/couraudt) -* [cyberjacob](https://github.com/cyberjacob) -* [da2x](https://github.com/da2x) -* [Daiyousei](https://github.com/Daiyousei) -* [dawidsowa](https://github.com/dawidsowa) -* [disk0x](https://github.com/disk0x) -* [DJCrashdummy](https://github.com/DJCrashdummy) -* [Djuuu](https://github.com/Djuuu) -* [DnAp](https://github.com/DnAp) -* [dominik-th](https://github.com/dominik-th) -* [Draeli](https://github.com/Draeli) -* [Dreckiger-Dan](https://github.com/Dreckiger-Dan) -* [em92](https://github.com/em92) -* [eMerzh](https://github.com/eMerzh) -* [EtienneM](https://github.com/EtienneM) -* [floviolleau](https://github.com/floviolleau) -* [fluffy-critter](https://github.com/fluffy-critter) -* [Frenzie](https://github.com/Frenzie) -* [fulmeek](https://github.com/fulmeek) -* [Ginko-Aloe](https://github.com/Ginko-Aloe) -* [Glandos](https://github.com/Glandos) -* [gloony](https://github.com/gloony) -* [GregThib](https://github.com/GregThib) -* [griffaurel](https://github.com/griffaurel) -* [Grummfy](https://github.com/Grummfy) -* [hunhejj](https://github.com/hunhejj) -* [husim0](https://github.com/husim0) -* [IceWreck](https://github.com/IceWreck) -* [j0k3r](https://github.com/j0k3r) -* [JackNUMBER](https://github.com/JackNUMBER) -* [jdigilio](https://github.com/jdigilio) -* [JeremyRand](https://github.com/JeremyRand) -* [Jocker666z](https://github.com/Jocker666z) -* [johnnygroovy](https://github.com/johnnygroovy) -* [killruana](https://github.com/killruana) -* [klimplant](https://github.com/klimplant) -* [kranack](https://github.com/kranack) -* [kraoc](https://github.com/kraoc) -* [l1n](https://github.com/l1n) -* [laBecasse](https://github.com/laBecasse) -* [lagaisse](https://github.com/lagaisse) -* [lalannev](https://github.com/lalannev) -* [ldidry](https://github.com/ldidry) -* [Leomaradan](https://github.com/Leomaradan) -* [Limero](https://github.com/Limero) -* [LogMANOriginal](https://github.com/LogMANOriginal) -* [lorenzos](https://github.com/lorenzos) -* [lukasklinger](https://github.com/lukasklinger) -* [m0zes](https://github.com/m0zes) -* [matthewseal](https://github.com/matthewseal) -* [mcbyte-it](https://github.com/mcbyte-it) -* [mdemoss](https://github.com/mdemoss) -* [melangue](https://github.com/melangue) -* [metaMMA](https://github.com/metaMMA) -* [mitsukarenai](https://github.com/mitsukarenai) -* [MonsieurPoutounours](https://github.com/MonsieurPoutounours) -* [mr-flibble](https://github.com/mr-flibble) -* [mro](https://github.com/mro) -* [mxmehl](https://github.com/mxmehl) -* [nel50n](https://github.com/nel50n) -* [niawag](https://github.com/niawag) -* [Nono-m0le](https://github.com/Nono-m0le) -* [ObsidianWitch](https://github.com/ObsidianWitch) -* [OliverParoczai](https://github.com/OliverParoczai) -* [oratosquilla-oratoria](https://github.com/oratosquilla-oratoria) -* [ORelio](https://github.com/ORelio) -* [PaulVayssiere](https://github.com/PaulVayssiere) -* [pellaeon](https://github.com/pellaeon) -* [Piranhaplant](https://github.com/Piranhaplant) -* [pit-fgfjiudghdf](https://github.com/pit-fgfjiudghdf) -* [pitchoule](https://github.com/pitchoule) -* [pmaziere](https://github.com/pmaziere) -* [Pofilo](https://github.com/Pofilo) -* [prysme01](https://github.com/prysme01) -* [quentinus95](https://github.com/quentinus95) -* [regisenguehard](https://github.com/regisenguehard) -* [Riduidel](https://github.com/Riduidel) -* [rogerdc](https://github.com/rogerdc) -* [Roliga](https://github.com/Roliga) -* [sebsauvage](https://github.com/sebsauvage) -* [shutosg](https://github.com/shutosg) -* [somini](https://github.com/somini) -* [squeek502](https://github.com/squeek502) -* [stjohnjohnson](https://github.com/stjohnjohnson) -* [Strubbl](https://github.com/Strubbl) -* [sublimz](https://github.com/sublimz) -* [sunchaserinfo](https://github.com/sunchaserinfo) -* [sysadminstory](https://github.com/sysadminstory) -* [tameroski](https://github.com/tameroski) -* [teromene](https://github.com/teromene) -* [thefranke](https://github.com/thefranke) -* [ThePadawan](https://github.com/ThePadawan) -* [TheRadialActive](https://github.com/TheRadialActive) -* [TitiTestScalingo](https://github.com/TitiTestScalingo) -* [triatic](https://github.com/triatic) -* [VerifiedJoseph](https://github.com/VerifiedJoseph) -* [WalterBarrett](https://github.com/WalterBarrett) -* [wtuuju](https://github.com/wtuuju) -* [xurxof](https://github.com/xurxof) -* [yardenac](https://github.com/yardenac) -* [ZeNairolf](https://github.com/ZeNairolf) - -Licenses -=== +The Heroku quick deploy currently does not work. It might work if you fork this repo and +modify the `repository` in `scalingo.json`. See https://github.com/RSS-Bridge/rss-bridge/issues/2688 -The source code for RSS-Bridge is [Public Domain](UNLICENSE). +Learn more in +[Installation](https://rss-bridge.github.io/rss-bridge/For_Hosts/Installation.html). -RSS-Bridge uses third party libraries with their own license: +## How-to + +### How to fix "Access denied." + +Output is from php-fpm. It is unable to read index.php. + + chown rss-bridge:rss-bridge /var/www/rss-bridge/index.php + +### How to password-protect the instance (token) + +Modify `config.ini.php`: + + [authentication] + + token = "hunter2" + +### How to remove all cache items + +As current user: + + bin/cache-clear + +As user rss-bridge: + + sudo -u rss-bridge bin/cache-clear + +As root: + + sudo bin/cache-clear + +### How to remove all expired cache items + + bin/cache-prune + +### How to fix "PHP Fatal error: Uncaught Exception: The FileCache path is not writable" - * [`PHP Simple HTML DOM Parser`](http://simplehtmldom.sourceforge.net/) licensed under the [MIT License](http://opensource.org/licenses/MIT) - * [`php-urljoin`](https://github.com/fluffy-critter/php-urljoin) licensed under the [MIT License](http://opensource.org/licenses/MIT) +```shell +# Give rss-bridge ownership +chown rss-bridge:rss-bridge -R /var/www/rss-bridge/cache -Technical notes -=== +# Or, give www-data ownership +chown www-data:www-data -R /var/www/rss-bridge/cache + +# Or, give everyone write permission +chmod 777 -R /var/www/rss-bridge/cache + +# Or last ditch effort (CAREFUL) +rm -rf /var/www/rss-bridge/cache/ && mkdir /var/www/rss-bridge/cache/ +``` + +### How to fix "attempt to write a readonly database" + +The sqlite files (db, wal and shm) are not writeable. + + chown -v rss-bridge:rss-bridge cache/* + +### How to fix "Unable to prepare statement: 1, no such table: storage" + + rm cache/* + +### How to create a new bridge from scratch + +Create the new bridge in e.g. `bridges/BearBlogBridge.php`: + +```php +find('.blog-posts li') as $li) { + $a = $li->find('a', 0); + $this->items[] = [ + 'title' => $a->plaintext, + 'uri' => 'https://herman.bearblog.dev' . $a->href, + ]; + } + } +} +``` + +Learn more in [bridge api](https://rss-bridge.github.io/rss-bridge/Bridge_API/index.html). + +### How to enable all bridges + + enabled_bridges[] = * + +### How to enable some bridges + +``` +enabled_bridges[] = TwitchBridge +enabled_bridges[] = GettrBridge +``` + +### How to enable debug mode + +The +[debug mode](https://rss-bridge.github.io/rss-bridge/For_Developers/Debug_mode.html) +disables the majority of caching operations. + + enable_debug_mode = true + +### How to switch to memcached as cache backend + +``` +[cache] + +; Cache backend: file (default), sqlite, memcached, null +type = "memcached" +``` + +### How to switch to sqlite3 as cache backend + + type = "sqlite" + +### How to disable bridge errors (as feed items) + +When a bridge fails, RSS-Bridge will produce a feed with a single item describing the error. + +This way, feed readers pick it up and you are notified. + +If you don't want this behaviour, switch the error output to `http`: + + [error] + + ; Defines how error messages are returned by RSS-Bridge + ; + ; "feed" = As part of the feed (default) + ; "http" = As HTTP error message + ; "none" = No errors are reported + output = "http" + +### How to accumulate errors before finally reporting it + +Modify `report_limit` so that an error must occur 3 times before it is reported. + + ; Defines how often an error must occur before it is reported to the user + report_limit = 3 + +The report count is reset to 0 each day. + +### How to password-protect the instance (HTTP Basic Auth) + + [authentication] + + enable = true + username = "alice" + password = "cat" + +Will typically require feed readers to be configured with the credentials. + +It may also be possible to manually include the credentials in the URL: + +https://alice:cat@rss-bridge.org/bridge01/?action=display&bridge=FabriceBellardBridge&format=Html + +### How to create a new output format + +See `formats/PlaintextFormat.php` for an example. + +### How to run unit tests and linter + +These commands require that you have installed the dev dependencies in `composer.json`. + +Run all tests: + + ./vendor/bin/phpunit + +Run a single test class: + + ./vendor/bin/phpunit --filter UrlTest + +Run linter: + + ./vendor/bin/phpcs --standard=phpcs.xml --warning-severity=0 --extensions=php -p ./ + +https://github.com/squizlabs/PHP_CodeSniffer/wiki + +### How to spawn a minimal development environment + + php -S 127.0.0.1:9001 + +http://127.0.0.1:9001/ + +## Explanation + +We are RSS-Bridge community, a group of developers continuing the project initiated by sebsauvage, +webmaster of +[sebsauvage.net](https://sebsauvage.net), author of +[Shaarli](https://sebsauvage.net/wiki/doku.php?id=php:shaarli) and +[ZeroBin](https://sebsauvage.net/wiki/doku.php?id=php:zerobin). + +See [CONTRIBUTORS.md](CONTRIBUTORS.md) + +RSS-Bridge uses caching to prevent services from banning your server for repeatedly updating feeds. +The specific cache duration can be different between bridges. + +RSS-Bridge allows you to take full control over which bridges are displayed to the user. +That way you can host your own RSS-Bridge service with your favorite collection of bridges! + +Current maintainers (as of 2024): @dvikan and @Mynacol #2519 + +## Reference + +### Feed item structure + +This is the feed item structure that bridges are expected to produce. + +```php + $item = [ + 'uri' => 'https://example.com/blog/hello', + 'title' => 'Hello world', + // Publication date in unix timestamp + 'timestamp' => 1668706254, + 'author' => 'Alice', + 'content' => 'Here be item content', + 'enclosures' => [ + 'https://example.com/foo.png', + 'https://example.com/bar.png' + ], + 'categories' => [ + 'news', + 'tech', + ], + // Globally unique id + 'uid' => 'e7147580c8747aad', + ] +``` + +### Output formats + +* `Atom`: Atom feed, for use in feed readers +* `Html`: Simple HTML page +* `Json`: JSON, for consumption by other applications +* `Mrss`: MRSS feed, for use in feed readers +* `Plaintext`: Raw text, for consumption by other applications +* `Sfeed`: Text, TAB separated + +### Cache backends + +* `File` +* `SQLite` +* `Memcached` +* `Array` +* `Null` + +### Licenses + +The source code for RSS-Bridge is [Public Domain](UNLICENSE). + +RSS-Bridge uses third party libraries with their own license: - * RSS-Bridge uses caching to prevent services from banning your server for repeatedly updating feeds. The specific cache duration can be different between bridges. Cached files are deleted automatically after 24 hours. - * You can implement your own bridge, [following these instructions](https://github.com/RSS-Bridge/rss-bridge/wiki/Bridge-API). - * You can enable debug mode to disable caching. Find more information on the [Wiki](https://github.com/RSS-Bridge/rss-bridge/wiki/Debug-mode) + * [`Parsedown`](https://github.com/erusev/parsedown) licensed under the [MIT License](https://opensource.org/licenses/MIT) + * [`PHP Simple HTML DOM Parser`](https://simplehtmldom.sourceforge.io/docs/1.9/index.html) licensed under the [MIT License](https://opensource.org/licenses/MIT) + * [`php-urljoin`](https://github.com/fluffy-critter/php-urljoin) licensed under the [MIT License](https://opensource.org/licenses/MIT) + * [`Laravel framework`](https://github.com/laravel/framework/) licensed under the [MIT License](https://opensource.org/licenses/MIT) -Rant -=== +## Rant *Dear so-called "social" websites.* @@ -261,6 +531,6 @@ You're not social when you hamper sharing by removing feeds. You're happy to hav We want to share with friends, using open protocols: RSS, Atom, XMPP, whatever. Because no one wants to have *your* service with *your* applications using *your* API force-feeding them. Friends must be free to choose whatever software and service they want. -We are rebuilding bridges you have wilfully destroyed. +We are rebuilding bridges you have willfully destroyed. Get your shit together: Put RSS/Atom back in. diff --git a/actions/ConnectivityAction.php b/actions/ConnectivityAction.php index 69272ddade1..e4e1e7c2724 100644 --- a/actions/ConnectivityAction.php +++ b/actions/ConnectivityAction.php @@ -1,15 +1,4 @@ userData['bridge'])) { - $this->returnEntryPage(); - return; - } - - $bridgeName = $this->userData['bridge']; - - $this->reportBridgeConnectivity($bridgeName); - - } - - /** - * Generates a report about the bridge connectivity status and sends it back - * to the user. - * - * The report is generated as Json-formatted string in the format - * { - * "bridge": "", - * "successful": true/false - * } - * - * @param string $bridgeName Name of the bridge to generate the report for - * @return void - */ - private function reportBridgeConnectivity($bridgeName) { - - $bridgeFac = new \BridgeFactory(); - $bridgeFac->setWorkingDir(PATH_LIB_BRIDGES); - - if(!$bridgeFac->isWhitelisted($bridgeName)) { - header('Content-Type: text/html'); - returnServerError('Bridge is not whitelisted!'); - } - - header('Content-Type: text/json'); - - $retVal = array( - 'bridge' => $bridgeName, - 'successful' => false, - 'http_code' => 200, - ); - - $bridge = $bridgeFac->create($bridgeName); - - if($bridge === false) { - echo json_encode($retVal); - return; - } - - $curl_opts = array( - CURLOPT_CONNECTTIMEOUT => 5 - ); - - try { - $reply = getContents($bridge::URI, array(), $curl_opts, true); - - if($reply) { - $retVal['successful'] = true; - if (isset($reply['header'])) { - if (strpos($reply['header'], 'HTTP/1.1 301 Moved Permanently') !== false) { - $retVal['http_code'] = 301; - } - } - } - } catch(Exception $e) { - $retVal['successful'] = false; - } - - echo json_encode($retVal); - - } - - private function returnEntryPage() { - echo << - - - - - - - - - -
-
-
-
- - -
- - -EOD; - } +class ConnectivityAction implements ActionInterface +{ + private BridgeFactory $bridgeFactory; + + public function __construct( + BridgeFactory $bridgeFactory + ) { + $this->bridgeFactory = $bridgeFactory; + } + + public function __invoke(Request $request): Response + { + if (!Debug::isEnabled()) { + return new Response('This action is only available in debug mode!', 403); + } + + $bridgeName = $request->get('bridge'); + if (!$bridgeName) { + return new Response(render_template('connectivity.html.php')); + } + $bridgeClassName = $this->bridgeFactory->createBridgeClassName($bridgeName); + if (!$bridgeClassName) { + return new Response('Bridge not found', 404); + } + return $this->reportBridgeConnectivity($bridgeClassName); + } + + private function reportBridgeConnectivity($bridgeClassName) + { + if (!$this->bridgeFactory->isEnabled($bridgeClassName)) { + throw new \Exception('Bridge is not whitelisted!'); + } + + $bridge = $this->bridgeFactory->create($bridgeClassName); + $curl_opts = [ + CURLOPT_CONNECTTIMEOUT => 5, + CURLOPT_FOLLOWLOCATION => true, + ]; + $result = [ + 'bridge' => $bridgeClassName, + 'successful' => false, + 'http_code' => null, + ]; + try { + $response = getContents($bridge::URI, [], $curl_opts, true); + $result['http_code'] = $response->getCode(); + if (in_array($result['http_code'], [200])) { + $result['successful'] = true; + } + } catch (\Exception $e) { + } + + return new Response(Json::encode($result), 200, ['content-type' => 'text/json']); + } } diff --git a/actions/DetectAction.php b/actions/DetectAction.php index 86605de41d8..8d3d6263913 100644 --- a/actions/DetectAction.php +++ b/actions/DetectAction.php @@ -1,53 +1,51 @@ userData['url'] - or returnClientError('You must specify a url!'); - - $format = $this->userData['format'] - or returnClientError('You must specify a format!'); - - $bridgeFac = new \BridgeFactory(); - $bridgeFac->setWorkingDir(PATH_LIB_BRIDGES); - - foreach($bridgeFac->getBridgeNames() as $bridgeName) { - - if(!$bridgeFac->isWhitelisted($bridgeName)) { - continue; - } - - $bridge = $bridgeFac->create($bridgeName); - - if($bridge === false) { - continue; - } - - $bridgeParams = $bridge->detectParameters($targetURL); - - if(is_null($bridgeParams)) { - continue; - } - - $bridgeParams['bridge'] = $bridgeName; - $bridgeParams['format'] = $format; - - header('Location: ?action=display&' . http_build_query($bridgeParams), true, 301); - die(); - - } - - returnClientError('No bridge found for given URL: ' . $targetURL); - } +class DetectAction implements ActionInterface +{ + private BridgeFactory $bridgeFactory; + + public function __construct( + BridgeFactory $bridgeFactory + ) { + $this->bridgeFactory = $bridgeFactory; + } + + public function __invoke(Request $request): Response + { + $url = $request->get('url'); + $format = $request->get('format'); + + if (!$url) { + return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a url'])); + } + if (!$format) { + return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a format'])); + } + + foreach ($this->bridgeFactory->getBridgeClassNames() as $bridgeClassName) { + if (!$this->bridgeFactory->isEnabled($bridgeClassName)) { + continue; + } + + $bridge = $this->bridgeFactory->create($bridgeClassName); + + $bridgeParams = $bridge->detectParameters($url); + + if (!$bridgeParams) { + continue; + } + + $query = [ + 'action' => 'display', + 'bridge' => $bridgeClassName, + 'format' => $format, + ]; + $query = array_merge($query, $bridgeParams); + return new Response('', 301, ['location' => '?' . http_build_query($query)]); + } + + return new Response(render(__DIR__ . '/../templates/error.html.php', [ + 'message' => 'No bridge found for given URL: ' . $url, + ])); + } } diff --git a/actions/DisplayAction.php b/actions/DisplayAction.php index 89930cfbe6a..10af8ad7257 100644 --- a/actions/DisplayAction.php +++ b/actions/DisplayAction.php @@ -1,258 +1,231 @@ getCode(); - if ($returnCode === 301 || $returnCode === 302) { - # Don't pass redirect codes to the exterior - $returnCode = 508; - } - return $returnCode; - } - - public function execute() { - $bridge = array_key_exists('bridge', $this->userData) ? $this->userData['bridge'] : null; - - $format = $this->userData['format'] - or returnClientError('You must specify a format!'); - - $bridgeFac = new \BridgeFactory(); - $bridgeFac->setWorkingDir(PATH_LIB_BRIDGES); - - // whitelist control - if(!$bridgeFac->isWhitelisted($bridge)) { - throw new \Exception('This bridge is not whitelisted', 401); - die; - } - - // Data retrieval - $bridge = $bridgeFac->create($bridge); - - $noproxy = array_key_exists('_noproxy', $this->userData) - && filter_var($this->userData['_noproxy'], FILTER_VALIDATE_BOOLEAN); - - if(defined('PROXY_URL') && PROXY_BYBRIDGE && $noproxy) { - define('NOPROXY', true); - } - - // Cache timeout - $cache_timeout = -1; - if(array_key_exists('_cache_timeout', $this->userData)) { - - if(!CUSTOM_CACHE_TIMEOUT) { - unset($this->userData['_cache_timeout']); - $uri = parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH) . '?' . http_build_query($this->userData); - header('Location: ' . $uri, true, 301); - die(); - } - - $cache_timeout = filter_var($this->userData['_cache_timeout'], FILTER_VALIDATE_INT); - - } else { - $cache_timeout = $bridge->getCacheTimeout(); - } - - // Remove parameters that don't concern bridges - $bridge_params = array_diff_key( - $this->userData, - array_fill_keys( - array( - 'action', - 'bridge', - 'format', - '_noproxy', - '_cache_timeout', - '_error_time' - ), '') - ); - - // Remove parameters that don't concern caches - $cache_params = array_diff_key( - $this->userData, - array_fill_keys( - array( - 'action', - 'format', - '_noproxy', - '_cache_timeout', - '_error_time' - ), '') - ); - - // Initialize cache - $cacheFac = new CacheFactory(); - $cacheFac->setWorkingDir(PATH_LIB_CACHES); - $cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); - $cache->setScope(''); - $cache->purgeCache(86400); // 24 hours - $cache->setKey($cache_params); - - $items = array(); - $infos = array(); - $mtime = $cache->getTime(); - - if($mtime !== false - && (time() - $cache_timeout < $mtime) - && !Debug::isEnabled()) { // Load cached data - - // Send "Not Modified" response if client supports it - // Implementation based on https://stackoverflow.com/a/10847262 - if(isset($_SERVER['HTTP_IF_MODIFIED_SINCE'])) { - $stime = strtotime($_SERVER['HTTP_IF_MODIFIED_SINCE']); - - if($mtime <= $stime) { // Cached data is older or same - header('Last-Modified: ' . gmdate('D, d M Y H:i:s ', $mtime) . 'GMT', true, 304); - die(); - } - } - - $cached = $cache->loadData(); - - if(isset($cached['items']) && isset($cached['extraInfos'])) { - foreach($cached['items'] as $item) { - $items[] = new \FeedItem($item); - } - - $infos = $cached['extraInfos']; - } - - } else { // Collect new data - - try { - $bridge->setDatas($bridge_params); - $bridge->collectData(); - - $items = $bridge->getItems(); - - // Transform "legacy" items to FeedItems if necessary. - // Remove this code when support for "legacy" items ends! - if(isset($items[0]) && is_array($items[0])) { - $feedItems = array(); - - foreach($items as $item) { - $feedItems[] = new \FeedItem($item); - } - - $items = $feedItems; - } - - $infos = array( - 'name' => $bridge->getName(), - 'uri' => $bridge->getURI(), - 'icon' => $bridge->getIcon() - ); - } catch(Error $e) { - error_log($e); - - if(logBridgeError($bridge::NAME, $e->getCode()) >= Configuration::getConfig('error', 'report_limit')) { - if(Configuration::getConfig('error', 'output') === 'feed') { - $item = new \FeedItem(); - - // Create "new" error message every 24 hours - $this->userData['_error_time'] = urlencode((int)(time() / 86400)); - - // Error 0 is a special case (i.e. "trying to get property of non-object") - if($e->getCode() === 0) { - $item->setTitle( - 'Bridge encountered an unexpected situation! (' - . $this->userData['_error_time'] - . ')' - ); - } else { - $item->setTitle( - 'Bridge returned error ' - . $e->getCode() - . '! (' - . $this->userData['_error_time'] - . ')' - ); - } - - $item->setURI( - (isset($_SERVER['REQUEST_URI']) ? parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH) : '') - . '?' - . http_build_query($this->userData) - ); - - $item->setTimestamp(time()); - $item->setContent(buildBridgeException($e, $bridge)); - - $items[] = $item; - } elseif(Configuration::getConfig('error', 'output') === 'http') { - header('Content-Type: text/html', true, get_return_code($e)); - die(buildTransformException($e, $bridge)); - } - } - } catch(Exception $e) { - error_log($e); - - if(logBridgeError($bridge::NAME, $e->getCode()) >= Configuration::getConfig('error', 'report_limit')) { - if(Configuration::getConfig('error', 'output') === 'feed') { - $item = new \FeedItem(); - - // Create "new" error message every 24 hours - $this->userData['_error_time'] = urlencode((int)(time() / 86400)); - - $item->setURI( - (isset($_SERVER['REQUEST_URI']) ? parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH) : '') - . '?' - . http_build_query($this->userData) - ); - - $item->setTitle( - 'Bridge returned error ' - . $e->getCode() - . '! (' - . $this->userData['_error_time'] - . ')' - ); - $item->setTimestamp(time()); - $item->setContent(buildBridgeException($e, $bridge)); - - $items[] = $item; - } elseif(Configuration::getConfig('error', 'output') === 'http') { - header('Content-Type: text/html', true, get_return_code($e)); - die(buildTransformException($e, $bridge)); - } - } - } - - // Store data in cache - $cache->saveData(array( - 'items' => array_map(function($i){ return $i->toArray(); }, $items), - 'extraInfos' => $infos - )); - - } - - // Data transformation - try { - $formatFac = new FormatFactory(); - $formatFac->setWorkingDir(PATH_LIB_FORMATS); - $format = $formatFac->create($format); - $format->setItems($items); - $format->setExtraInfos($infos); - $format->setLastModified($cache->getTime()); - $format->display(); - } catch(Error $e) { - error_log($e); - header('Content-Type: text/html', true, $e->getCode()); - die(buildTransformException($e, $bridge)); - } catch(Exception $e) { - error_log($e); - header('Content-Type: text/html', true, $e->getCode()); - die(buildTransformException($e, $bridge)); - } - } +class DisplayAction implements ActionInterface +{ + private CacheInterface $cache; + private Logger $logger; + private BridgeFactory $bridgeFactory; + + public function __construct( + CacheInterface $cache, + Logger $logger, + BridgeFactory $bridgeFactory + ) { + $this->cache = $cache; + $this->logger = $logger; + $this->bridgeFactory = $bridgeFactory; + } + + public function __invoke(Request $request): Response + { + $bridgeName = $request->get('bridge'); + $format = $request->get('format'); + $noproxy = $request->get('_noproxy'); + + if (!$bridgeName) { + return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'Missing bridge name parameter']), 400); + } + $bridgeClassName = $this->bridgeFactory->createBridgeClassName($bridgeName); + if (!$bridgeClassName) { + return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'Bridge not found']), 404); + } + + if (!$format) { + return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a format']), 400); + } + if (!$this->bridgeFactory->isEnabled($bridgeClassName)) { + return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'This bridge is not whitelisted']), 400); + } + + // Disable proxy (if enabled and per user's request) + if ( + Configuration::getConfig('proxy', 'url') + && Configuration::getConfig('proxy', 'by_bridge') + && $noproxy + ) { + // This const is only used once in getContents() + define('NOPROXY', true); + } + + $cacheKey = 'http_' . json_encode($request->toArray()); + + $bridge = $this->bridgeFactory->create($bridgeClassName); + + $response = $this->createResponse($request, $bridge, $format); + + if ($response->getCode() === 200) { + $ttl = $request->get('_cache_timeout'); + if (Configuration::getConfig('cache', 'custom_timeout') && $ttl) { + $ttl = (int) $ttl; + } else { + $ttl = $bridge->getCacheTimeout(); + } + $this->cache->set($cacheKey, $response, $ttl); + } + + return $response; + } + + private function createResponse(Request $request, BridgeAbstract $bridge, string $format) + { + $items = []; + + try { + $bridge->loadConfiguration(); + // Remove parameters that don't concern bridges + $remove = [ + 'token', + 'action', + 'bridge', + 'format', + '_noproxy', + '_cache_timeout', + '_error_time', + '_', // Some RSS readers add a cache-busting parameter (_=) to feed URLs, detect and ignore them. + ]; + $requestArray = $request->toArray(); + $input = array_diff_key($requestArray, array_fill_keys($remove, '')); + $bridge->setInput($input); + $bridge->collectData(); + $items = $bridge->getItems(); + } catch (\Throwable $e) { + if ($e instanceof RateLimitException) { + // These are internally generated by bridges + $this->logger->info(sprintf('RateLimitException in DisplayAction(%s): %s', $bridge->getShortName(), create_sane_exception_message($e))); + return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), 429); + } + if ($e instanceof HttpException) { + if (in_array($e->getCode(), [429, 503])) { + // Log with debug, immediately reproduce and return + $this->logger->debug(sprintf('Exception in DisplayAction(%s): %s', $bridge->getShortName(), create_sane_exception_message($e))); + return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), $e->getCode()); + } + // Some other status code which we let fail normally (but don't log it) + } else { + // Log error if it's not an HttpException + $this->logger->error(sprintf('Exception in DisplayAction(%s)', $bridge->getShortName()), ['e' => $e]); + } + $errorOutput = Configuration::getConfig('error', 'output'); + $reportLimit = Configuration::getConfig('error', 'report_limit'); + $errorCount = 1; + if ($reportLimit > 1) { + $errorCount = $this->logBridgeError($bridge->getName(), $e->getCode()); + } + // Let clients know about the error if we are passed the report limit + if ($errorCount >= $reportLimit) { + if ($errorOutput === 'feed') { + // Render the exception as a feed item + $items = [$this->createFeedItemFromException($e, $bridge)]; + } elseif ($errorOutput === 'http') { + return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), 500); + } elseif ($errorOutput === 'none') { + // Do nothing (produces an empty feed) + } + } + } + + $formatFactory = new FormatFactory(); + $format = $formatFactory->create($format); + + $format->setItems($items); + $format->setFeed($bridge->getFeed()); + $now = time(); + $format->setLastModified($now); + $headers = [ + 'last-modified' => gmdate('D, d M Y H:i:s ', $now) . 'GMT', + 'content-type' => $format->getMimeType() . '; charset=UTF-8', + ]; + $body = $format->render(); + + // This is supposed to remove non-utf8 byte sequences, but I'm unsure if it works + ini_set('mbstring.substitute_character', 'none'); + $body = mb_convert_encoding($body, 'UTF-8', 'UTF-8'); + + return new Response($body, 200, $headers); + } + + private function createFeedItemFromException($e, BridgeAbstract $bridge): array + { + $item = []; + + // Create a unique identifier every 24 hours + $uniqueIdentifier = urlencode((int)(time() / 86400)); + $title = sprintf('Bridge returned error %s! (%s)', $e->getCode(), $uniqueIdentifier); + + $item['title'] = $title; + $item['uri'] = get_current_url(); + $item['timestamp'] = time(); + + // Create an item identifier for feed readers e.g. "staysafetv twitch videos_19389" + $item['uid'] = $bridge->getName() . '_' . $uniqueIdentifier; + + $content = render_template(__DIR__ . '/../templates/bridge-error.html.php', [ + 'error' => render_template(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), + 'searchUrl' => self::createGithubSearchUrl($bridge), + 'issueUrl' => self::createGithubIssueUrl($bridge, $e), + 'maintainer' => $bridge->getMaintainer(), + ]); + $item['content'] = $content; + + return $item; + } + + private function logBridgeError($bridgeName, $code) + { + // todo: it's not really necessary to json encode $report + $cacheKey = 'error_reporting_' . $bridgeName . '_' . $code; + $report = $this->cache->get($cacheKey); + if ($report) { + $report = Json::decode($report); + $report['time'] = time(); + $report['count']++; + } else { + $report = [ + 'error' => $code, + 'time' => time(), + 'count' => 1, + ]; + } + $ttl = 86400 * 5; + $this->cache->set($cacheKey, Json::encode($report), $ttl); + return $report['count']; + } + + private static function createGithubIssueUrl(BridgeAbstract $bridge, \Throwable $e): string + { + $maintainer = $bridge->getMaintainer(); + if (str_contains($maintainer, ',')) { + $maintainers = explode(',', $maintainer); + } else { + $maintainers = [$maintainer]; + } + $maintainers = array_map('trim', $maintainers); + + $queryString = $_SERVER['QUERY_STRING'] ?? ''; + $query = [ + 'title' => $bridge->getName() . ' failed with: ' . $e->getMessage(), + 'body' => sprintf( + "```\n%s\n\n%s\n\nQuery string: %s\nVersion: %s\nOs: %s\nPHP version: %s\n```\nMaintainer: @%s", + create_sane_exception_message($e), + implode("\n", trace_to_call_points(trace_from_exception($e))), + $queryString, + Configuration::getVersion(), + PHP_OS_FAMILY, + phpversion() ?: 'Unknown', + implode(', @', $maintainers), + ), + 'labels' => 'Bridge-Broken', + 'assignee' => $maintainer[0], + ]; + + return 'https://github.com/RSS-Bridge/rss-bridge/issues/new?' . http_build_query($query); + } + + private static function createGithubSearchUrl($bridge): string + { + return sprintf( + 'https://github.com/RSS-Bridge/rss-bridge/issues?q=%s', + urlencode('is:issue is:open ' . $bridge->getName()) + ); + } } diff --git a/actions/FindfeedAction.php b/actions/FindfeedAction.php new file mode 100644 index 00000000000..e18c3e1db55 --- /dev/null +++ b/actions/FindfeedAction.php @@ -0,0 +1,95 @@ +bridgeFactory = $bridgeFactory; + } + + public function __invoke(Request $request): Response + { + $url = $request->get('url'); + $format = $request->get('format'); + + if (!$url) { + return new Response('You must specify a url', 400); + } + if (!$format) { + return new Response('You must specify a format', 400); + } + + $results = []; + foreach ($this->bridgeFactory->getBridgeClassNames() as $bridgeClassName) { + if (!$this->bridgeFactory->isEnabled($bridgeClassName)) { + continue; + } + + $bridge = $this->bridgeFactory->create($bridgeClassName); + + $bridgeParams = $bridge->detectParameters($url); + + if ($bridgeParams === null) { + continue; + } + + // It's allowed to have no 'context' in a bridge (only a default context without any name) + // In this case, the reference to the parameters are found in the first element of the PARAMETERS array + + $context = $bridgeParams['context'] ?? 0; + + $bridgeData = []; + // Construct the array of parameters + foreach ($bridgeParams as $key => $value) { + // 'context' is a special case : it's a bridge parameters, there is no "name" for this parameter + if ($key == 'context') { + $bridgeData[$key]['name'] = 'Context'; + $bridgeData[$key]['value'] = $value; + } else { + $bridgeData[$key]['name'] = $this->getParameterName($bridge, $context, $key); + $bridgeData[$key]['value'] = $value; + } + } + + $bridgeParams['bridge'] = $bridgeClassName; + $bridgeParams['format'] = $format; + $content = [ + 'url' => './?action=display&' . http_build_query($bridgeParams), + 'bridgeParams' => $bridgeParams, + 'bridgeData' => $bridgeData, + 'bridgeMeta' => [ + 'name' => $bridge::NAME, + 'description' => $bridge::DESCRIPTION, + 'parameters' => $bridge::PARAMETERS, + 'icon' => $bridge->getIcon(), + ], + ]; + $results[] = $content; + } + if ($results === []) { + return new Response(Json::encode(['message' => 'No bridge found for given url']), 404, ['content-type' => 'application/json']); + } + return new Response(Json::encode($results), 200, ['content-type' => 'application/json']); + } + + // Get parameter name in the actual context, or in the global parameter + private function getParameterName($bridge, $context, $key) + { + if (isset($bridge::PARAMETERS[$context][$key]['name'])) { + $name = $bridge::PARAMETERS[$context][$key]['name']; + } else if (isset($bridge::PARAMETERS['global'][$key]['name'])) { + $name = $bridge::PARAMETERS['global'][$key]['name']; + } else { + $name = 'Variable "' . $key . '" (No name provided)'; + } + return $name; + } +} diff --git a/actions/FrontpageAction.php b/actions/FrontpageAction.php new file mode 100644 index 00000000000..79ffb4f5762 --- /dev/null +++ b/actions/FrontpageAction.php @@ -0,0 +1,49 @@ +bridgeFactory = $bridgeFactory; + } + + public function __invoke(Request $request): Response + { + $token = $request->getAttribute('token'); + + $messages = []; + $activeBridges = 0; + + $bridgeClassNames = $this->bridgeFactory->getBridgeClassNames(); + + foreach ($this->bridgeFactory->getMissingEnabledBridges() as $missingEnabledBridge) { + $messages[] = [ + 'body' => sprintf('Warning : Bridge "%s" not found', $missingEnabledBridge), + 'level' => 'warning' + ]; + } + + $body = ''; + foreach ($bridgeClassNames as $bridgeClassName) { + if ($this->bridgeFactory->isEnabled($bridgeClassName)) { + $body .= BridgeCard::render($this->bridgeFactory, $bridgeClassName, $token); + $activeBridges++; + } + } + + $response = new Response(render(__DIR__ . '/../templates/frontpage.html.php', [ + 'messages' => $messages, + 'admin_email' => Configuration::getConfig('admin', 'email'), + 'admin_telegram' => Configuration::getConfig('admin', 'telegram'), + 'bridges' => $body, + 'active_bridges' => $activeBridges, + 'total_bridges' => count($bridgeClassNames), + ])); + + // TODO: The rendered template could be cached, but beware config changes that changes the html + return $response; + } +} diff --git a/actions/HealthAction.php b/actions/HealthAction.php new file mode 100644 index 00000000000..13365a3c83d --- /dev/null +++ b/actions/HealthAction.php @@ -0,0 +1,15 @@ + 200, + 'message' => 'all is good', + ]; + return new Response(Json::encode($response), 200, ['content-type' => 'application/json']); + } +} diff --git a/actions/ListAction.php b/actions/ListAction.php index 92aef0e0f5e..f6347f9c457 100644 --- a/actions/ListAction.php +++ b/actions/ListAction.php @@ -1,56 +1,36 @@ bridges = array(); - $list->total = 0; - - $bridgeFac = new \BridgeFactory(); - $bridgeFac->setWorkingDir(PATH_LIB_BRIDGES); - - foreach($bridgeFac->getBridgeNames() as $bridgeName) { - - $bridge = $bridgeFac->create($bridgeName); - - if($bridge === false) { // Broken bridge, show as inactive - - $list->bridges[$bridgeName] = array( - 'status' => 'inactive' - ); - - continue; - - } - - $status = $bridgeFac->isWhitelisted($bridgeName) ? 'active' : 'inactive'; - - $list->bridges[$bridgeName] = array( - 'status' => $status, - 'uri' => $bridge->getURI(), - 'name' => $bridge->getName(), - 'icon' => $bridge->getIcon(), - 'parameters' => $bridge->getParameters(), - 'maintainer' => $bridge->getMaintainer(), - 'description' => $bridge->getDescription() - ); - - } - - $list->total = count($list->bridges); - - header('Content-Type: application/json'); - echo json_encode($list, JSON_PRETTY_PRINT); - } +class ListAction implements ActionInterface +{ + private BridgeFactory $bridgeFactory; + + public function __construct( + BridgeFactory $bridgeFactory + ) { + $this->bridgeFactory = $bridgeFactory; + } + + public function __invoke(Request $request): Response + { + $list = new \stdClass(); + $list->bridges = []; + $list->total = 0; + + foreach ($this->bridgeFactory->getBridgeClassNames() as $bridgeClassName) { + $bridge = $this->bridgeFactory->create($bridgeClassName); + + $list->bridges[$bridgeClassName] = [ + 'status' => $this->bridgeFactory->isEnabled($bridgeClassName) ? 'active' : 'inactive', + 'uri' => $bridge->getURI(), + 'donationUri' => $bridge->getDonationURI(), + 'name' => $bridge->getName(), + 'icon' => $bridge->getIcon(), + 'parameters' => $bridge->getParameters(), + 'maintainer' => $bridge->getMaintainer(), + 'description' => $bridge->getDescription() + ]; + } + $list->total = count($list->bridges); + return new Response(Json::encode($list), 200, ['content-type' => 'application/json']); + } } diff --git a/app.json b/app.json index f18479957f5..f79d7138f87 100644 --- a/app.json +++ b/app.json @@ -1,8 +1,8 @@ { "service": "Heroku", - "name": "RSS-Bridge", + "name": "rss-bridge-heroku", "description": "RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for websites which don't have one.", - "repository": "https://github.com/RSS-Bridge/rss-bridge", + "repository": "https://github.com/RSS-Bridge/rss-bridge?1651005770", "keywords": ["php", "rss-bridge", "rss"] } diff --git a/bin/cache-clear b/bin/cache-clear new file mode 100755 index 00000000000..2ca84ce6774 --- /dev/null +++ b/bin/cache-clear @@ -0,0 +1,16 @@ +#!/usr/bin/env php +clear(); diff --git a/bin/cache-prune b/bin/cache-prune new file mode 100755 index 00000000000..bb72c4ac99a --- /dev/null +++ b/bin/cache-prune @@ -0,0 +1,24 @@ +#!/usr/bin/env php +prune(); diff --git a/bin/test b/bin/test new file mode 100755 index 00000000000..746924107a9 --- /dev/null +++ b/bin/test @@ -0,0 +1,20 @@ +#!/usr/bin/env php +debug('This is a test debug message'); + +$logger->info('This is a test info message'); + +$logger->error('This is a test error message'); diff --git a/bridges/ABCNewsBridge.php b/bridges/ABCNewsBridge.php new file mode 100644 index 00000000000..154eb489215 --- /dev/null +++ b/bridges/ABCNewsBridge.php @@ -0,0 +1,49 @@ + [ + 'type' => 'list', + 'name' => 'Region', + 'title' => 'Choose state', + 'values' => [ + 'ACT' => 'act', + 'NSW' => 'nsw', + 'NT' => 'nt', + 'QLD' => 'qld', + 'SA' => 'sa', + 'TAS' => 'tas', + 'VIC' => 'vic', + 'WA' => 'wa' + ], + ] + ] + ]; + + public function collectData() + { + $url = sprintf('https://www.abc.net.au/news/%s', $this->getInput('topic')); + $dom = getSimpleHTMLDOM($url); + $dom = $dom->find('div[data-component="PaginationList"]', 0); + if (!$dom) { + throw new \Exception(sprintf('Unable to find css selector on `%s`', $url)); + } + $dom = defaultLinkTo($dom, $this->getURI()); + foreach ($dom->find('article[data-component="DetailCard"]') as $article) { + $a = $article->find('a', 0); + $this->items[] = [ + 'title' => $a->plaintext, + 'uri' => $a->href, + 'content' => $article->find('p', 0)->plaintext, + 'timestamp' => strtotime($article->find('time', 0)->datetime), + ]; + } + } +} diff --git a/bridges/ABCTabsBridge.php b/bridges/ABCTabsBridge.php deleted file mode 100644 index ef2c75b109c..00000000000 --- a/bridges/ABCTabsBridge.php +++ /dev/null @@ -1,42 +0,0 @@ -find('table#myTable', 0)->children(1); - - foreach ($table->find('tr') as $tab) { - $item = array(); - $item['author'] = $tab->find('td', 1)->plaintext - . ' - ' - . $tab->find('td', 2)->plaintext; - - $item['title'] = $tab->find('td', 1)->plaintext - . ' - ' - . $tab->find('td', 2)->plaintext; - - $item['content'] = 'Le ' - . $tab->find('td', 0)->plaintext - . '
Par: ' - . $tab->find('td', 5)->plaintext - . '
Type: ' - . $tab->find('td', 3)->plaintext; - - $item['id'] = static::URI - . $tab->find('td', 2)->find('a', 0)->getAttribute('href'); - - $item['uri'] = static::URI - . $tab->find('td', 2)->find('a', 0)->getAttribute('href'); - - $this->items[] = $item; - } - } -} diff --git a/bridges/ABolaBridge.php b/bridges/ABolaBridge.php new file mode 100644 index 00000000000..1f1c5da1954 --- /dev/null +++ b/bridges/ABolaBridge.php @@ -0,0 +1,116 @@ + [ + 'name' => 'News Feed', + 'type' => 'list', + 'title' => 'Feeds from the Portuguese sports newspaper A BOLA.PT', + 'values' => [ + 'Últimas' => 'Nnh/Noticias', + 'Seleção Nacional' => 'Selecao/Noticias', + 'Futebol Nacional' => [ + 'Notícias' => 'Nacional/Noticias', + 'Primeira Liga' => 'Nacional/Liga/Noticias', + 'Liga 2' => 'Nacional/Liga2/Noticias', + 'Liga 3' => 'Nacional/Liga3/Noticias', + 'Liga Revelação' => 'Nacional/Liga-Revelacao/Noticias', + 'Campeonato de Portugal' => 'Nacional/Campeonato-Portugal/Noticias', + 'Distritais' => 'Nacional/Distritais/Noticias', + 'Taça de Portugal' => 'Nacional/TPortugal/Noticias', + 'Futebol Feminino' => 'Nacional/FFeminino/Noticias', + 'Futsal' => 'Nacional/Futsal/Noticias', + ], + 'Futebol Internacional' => [ + 'Notícias' => 'Internacional/Noticias/Noticias', + 'Liga dos Campeões' => 'Internacional/Liga-dos-campeoes/Noticias', + 'Liga Europa' => 'Internacional/Liga-europa/Noticias', + 'Liga Conferência' => 'Internacional/Liga-conferencia/Noticias', + 'Liga das Nações' => 'Internacional/Liga-das-nacoes/Noticias', + 'UEFA Youth League' => 'Internacional/Uefa-Youth-League/Noticias', + ], + 'Mercado' => 'Mercado', + 'Modalidades' => 'Modalidades/Noticias', + 'Motores' => 'Motores/Noticias', + ] + ] + ] + ]; + + public function getIcon() + { + return 'https://abola.pt/img/icons/favicon-96x96.png'; + } + + public function getName() + { + return !is_null($this->getKey('feed')) ? self::NAME . ' | ' . $this->getKey('feed') : self::NAME; + } + + public function getURI() + { + return self::URI . $this->getInput('feed'); + } + + public function collectData() + { + $url = sprintf('https://abola.pt/%s', $this->getInput('feed')); + $dom = getSimpleHTMLDOM($url); + if ($this->getInput('feed') !== 'Mercado') { + $dom = $dom->find('div#body_Todas1_upNoticiasTodas', 0); + } else { + $dom = $dom->find('div#body_NoticiasMercado_upNoticiasTodas', 0); + } + if (!$dom) { + throw new \Exception(sprintf('Unable to find css selector on `%s`', $url)); + } + $dom = defaultLinkTo($dom, $this->getURI()); + foreach ($dom->find('div.media') as $key => $article) { + //Get thumbnail + $image = $article->find('.media-img', 0)->style; + $image = preg_replace('/background-image: url\(/i', '', $image); + $image = substr_replace($image, '', -4); + $image = preg_replace('/https:\/\//i', '', $image); + $image = preg_replace('/www\./i', '', $image); + $image = preg_replace('/\/\//', '/', $image); + $image = preg_replace('/\/\/\//', '//', $image); + $image = substr($image, 7); + $image = 'https://' . $image; + $image = preg_replace('/ptimg/', 'pt/img', $image); + $image = preg_replace('/\/\/bola/', 'www.abola', $image); + //Timestamp + $date = date('Y/m/d'); + if (!is_null($article->find("span#body_Todas1_rptNoticiasTodas_lblData_$key", 0))) { + $date = $article->find("span#body_Todas1_rptNoticiasTodas_lblData_$key", 0)->plaintext; + $date = preg_replace('/\./', '/', $date); + } + $time = $article->find("span#body_Todas1_rptNoticiasTodas_lblHora_$key", 0)->plaintext; + $date = explode('/', $date); + $time = explode(':', $time); + $year = $date[0]; + $month = $date[1]; + $day = $date[2]; + $hour = $time[0]; + $minute = $time[1]; + $timestamp = mktime($hour, $minute, 0, $month, $day, $year); + //Content + $image = '' . $article->find('h4 span', 0)->plaintext . ''; + $description = '

' . $article->find('.media-texto > span', 0)->plaintext . '

'; + $content = $image . '
' . $description; + $a = $article->find('.media-body > a', 0); + $this->items[] = [ + 'title' => $a->find('h4 span', 0)->plaintext, + 'uri' => $a->href, + 'content' => $content, + 'timestamp' => $timestamp, + ]; + } + } +} diff --git a/bridges/AO3Bridge.php b/bridges/AO3Bridge.php index 9a3b5c8ffa8..7e18b657f58 100644 --- a/bridges/AO3Bridge.php +++ b/bridges/AO3Bridge.php @@ -1,121 +1,238 @@ array( - 'url' => array( - 'name' => 'url', - 'required' => true, - // Example: F/F tag, complete works only - 'exampleValue' => self::URI - . 'works?work_search[complete]=T&tag_id=F*s*F', - ), - ), - 'Bookmarks' => array( - 'user' => array( - 'name' => 'user', - 'required' => true, - // Example: Nyaaru's bookmarks - 'exampleValue' => 'Nyaaru', - ), - ), - 'Work' => array( - 'id' => array( - 'name' => 'id', - 'required' => true, - // Example: latest chapters from A Better Past by LysSerris - 'exampleValue' => '18181853', - ), - ) - ); - - // Feed for lists of works (e.g. recent works, search results, filtered tags, - // bookmarks, series, collections). - private function collectList($url) { - $html = getSimpleHTMLDOM($url) - or returnServerError('could not request AO3'); - $html = defaultLinkTo($html, self::URI); - - foreach($html->find('.index.group > li') as $element) { - $item = array(); - - $title = $element->find('div h4 a', 0); - if (!isset($title)) continue; // discard deleted works - $item['title'] = $title->plaintext; - $item['content'] = $element; - $item['uri'] = $title->href; - - $strdate = $element->find('div p.datetime', 0)->plaintext; - $item['timestamp'] = strtotime($strdate); - - $chapters = $element->find('dl dd.chapters', 0); - // bookmarked series and external works do not have a chapters count - $chapters = (isset($chapters) ? $chapters->plaintext : 0); - $item['uid'] = $item['uri'] . "/$strdate/$chapters"; - - $this->items[] = $item; - } - } - - // Feed for recent chapters of a specific work. - private function collectWork($id) { - $url = self::URI . "/works/$id/navigate"; - $html = getSimpleHTMLDOM($url) - or returnServerError('could not request AO3'); - $html = defaultLinkTo($html, self::URI); - - $this->title = $html->find('h2 a', 0)->plaintext; - - foreach($html->find('ol.index.group > li') as $element) { - $item = array(); - - $item['title'] = $element->find('a', 0)->plaintext; - $item['content'] = $element; - $item['uri'] = $element->find('a', 0)->href; - - $strdate = $element->find('span.datetime', 0)->plaintext; - $strdate = str_replace('(', '', $strdate); - $strdate = str_replace(')', '', $strdate); - $item['timestamp'] = strtotime($strdate); - - $item['uid'] = $item['uri'] . "/$strdate"; - - $this->items[] = $item; - } - - $this->items = array_reverse($this->items); - } - - public function collectData() { - switch($this->queriedContext) { - case 'Bookmarks': - $user = $this->getInput('user'); - $this->title = $user; - $url = self::URI - . '/users/' . $user - . '/bookmarks?bookmark_search[sort_column]=bookmarkable_date'; - return $this->collectList($url); - case 'List': return $this->collectList( - $this->getInput('url') - ); - case 'Work': return $this->collectWork( - $this->getInput('id') - ); - } - } - - public function getName() { - $name = parent::getName() . " $this->queriedContext"; - if (isset($this->title)) $name .= " - $this->title"; - return $name; - } - - public function getIcon() { - return self::URI . '/favicon.ico'; - } +class AO3Bridge extends BridgeAbstract +{ + const NAME = 'AO3'; + const URI = 'https://archiveofourown.org/'; + const CACHE_TIMEOUT = 1800; + const DESCRIPTION = 'Returns works or chapters from Archive of Our Own'; + const MAINTAINER = 'Obsidienne'; + const PARAMETERS = [ + 'List' => [ + 'url' => [ + 'name' => 'url', + 'required' => true, + // Example: F/F tag + 'exampleValue' => 'https://archiveofourown.org/tags/F*s*F/works', + ], + 'range' => [ + 'name' => 'Chapter Content', + 'title' => 'Chapter(s) to include in each work\'s feed entry', + 'defaultValue' => null, + 'type' => 'list', + 'values' => [ + 'None' => null, + 'First' => 'first', + 'Latest' => 'last', + 'Entire work' => 'all', + ], + ], + 'limit' => self::LIMIT, + ], + 'Bookmarks' => [ + 'user' => [ + 'name' => 'user', + 'required' => true, + // Example: Nyaaru's bookmarks + 'exampleValue' => 'Nyaaru', + ], + ], + 'Work' => [ + 'id' => [ + 'name' => 'id', + 'required' => true, + // Example: latest chapters from A Better Past by LysSerris + 'exampleValue' => '18181853', + ], + ] + ]; + private $title; + + public function collectData() + { + switch ($this->queriedContext) { + case 'Bookmarks': + $this->collectList($this->getURI()); + break; + case 'List': + $this->collectList($this->getURI()); + break; + case 'Work': + $this->collectWork($this->getURI()); + break; + } + } + + /** + * Feed for lists of works (e.g. recent works, search results, filtered tags, + * bookmarks, series, collections). + */ + private function collectList($url) + { + $version = 'v0.0.1'; + $headers = [ + "useragent: rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)" + ]; + $response = getContents($url, $headers); + + $html = \str_get_html($response); + $html = defaultLinkTo($html, self::URI); + + // Get list title. Will include page range + count in some cases + $heading = ($html->find('#main h2', 0)); + if ($heading->find('a.tag')) { + $heading = $heading->find('a.tag', 0); + } + $this->title = $heading->plaintext; + + $limit = $this->getInput('limit') ?? 3; + $count = 0; + foreach ($html->find('.index.group > li') as $element) { + $item = []; + + $title = $element->find('div h4 a', 0); + if (!isset($title)) { + continue; // discard deleted works + } + $item['title'] = $title->plaintext; + $item['uri'] = $title->href; + + $strdate = $element->find('div p.datetime', 0)->plaintext; + $item['timestamp'] = strtotime($strdate); + + // detach from rest of page because remove() is buggy + $element = str_get_html($element->outertext()); + $tags = $element->find('ul.required-tags', 0); + foreach ($tags->childNodes() as $tag) { + $item['categories'][] = html_entity_decode($tag->plaintext); + } + $tags->remove(); + $tags = $element->find('ul.tags', 0); + foreach ($tags->childNodes() as $tag) { + $item['categories'][] = html_entity_decode($tag->plaintext); + } + $tags->remove(); + + $item['content'] = implode('', $element->childNodes()); + + $chapters = $element->find('dl dd.chapters', 0); + // bookmarked series and external works do not have a chapters count + $chapters = (isset($chapters) ? $chapters->plaintext : 0); + $item['uid'] = $item['uri'] . "/$strdate/$chapters"; + + // Fetch workskin of desired chapter(s) in list + if ($this->getInput('range') && ($limit == 0 || $count++ < $limit)) { + $url = $item['uri']; + switch ($this->getInput('range')) { + case ('all'): + $url .= '?view_full_work=true'; + break; + case ('first'): + break; + case ('last'): + // only way to get this is using the navigate page unfortunately + $url .= '/navigate'; + $response = getContents($url, $headers); + $html = \str_get_html($response); + $html = defaultLinkTo($html, self::URI); + $url = $html->find('ol.index.group > li > a', -1)->href; + break; + } + $response = getContents($url, $headers); + + $html = \str_get_html($response); + $html = defaultLinkTo($html, self::URI); + // remove duplicate fic summary + if ($ficsum = $html->find('#workskin > .preface > .summary', 0)) { + $ficsum->remove(); + } + $item['content'] .= $html->find('#workskin', 0); + } + + // Use predictability of download links to generate enclosures + $wid = explode('/', $item['uri'])[4]; + foreach (['azw3', 'epub', 'mobi', 'pdf', 'html'] as $ext) { + $item['enclosures'][] = 'https://archiveofourown.org/downloads/' . $wid . '/work.' . $ext; + } + + $this->items[] = $item; + } + } + + /** + * Feed for recent chapters of a specific work. + */ + private function collectWork($url) + { + $version = 'v0.0.1'; + $headers = [ + "useragent: rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)" + ]; + $response = getContents($url . '/navigate', $headers); + + $html = \str_get_html($response); + $html = defaultLinkTo($html, self::URI); + + $response = getContents($url . '?view_full_work=true', $headers); + + $workhtml = \str_get_html($response); + $workhtml = defaultLinkTo($workhtml, self::URI); + + $this->title = $html->find('h2 a', 0)->plaintext; + + $nav = $html->find('ol.index.group > li'); + for ($i = 0; $i < count($nav); $i++) { + $item = []; + + $element = $nav[$i]; + $item['title'] = $element->find('a', 0)->plaintext; + $item['content'] = $workhtml->find('#chapter-' . ($i + 1), 0); + $item['uri'] = $element->find('a', 0)->href; + + $strdate = $element->find('span.datetime', 0)->plaintext; + $strdate = str_replace('(', '', $strdate); + $strdate = str_replace(')', '', $strdate); + $item['timestamp'] = strtotime($strdate); + + $item['uid'] = $item['uri'] . "/$strdate"; + + $this->items[] = $item; + } + + $this->items = array_reverse($this->items); + } + + public function getName() + { + $name = parent::getName() . " $this->queriedContext"; + if (isset($this->title)) { + $name .= " - $this->title"; + } + return $name; + } + + public function getIcon() + { + return self::URI . '/favicon.ico'; + } + + public function getURI() + { + $url = parent::getURI(); + switch ($this->queriedContext) { + case 'Bookmarks': + $user = $this->getInput('user'); + $url = self::URI + . '/users/' . $user + . '/bookmarks?bookmark_search[sort_column]=bookmarkable_date'; + break; + case 'List': + $url = $this->getInput('url'); + break; + case 'Work': + $url = self::URI . '/works/' . $this->getInput('id'); + break; + } + return $url; + } } diff --git a/bridges/ARDAudiothekBridge.php b/bridges/ARDAudiothekBridge.php new file mode 100644 index 00000000000..02b6b00778d --- /dev/null +++ b/bridges/ARDAudiothekBridge.php @@ -0,0 +1,173 @@ +icon + * @const IMAGEEXTENSION + */ + const IMAGEEXTENSION = '.jpg'; + + const PARAMETERS = [ + [ + 'path' => [ + 'name' => 'Show Link or ID', + 'required' => true, + 'title' => 'Link to the show page or just its numeric suffix', + 'defaultValue' => 'https://www.ardaudiothek.de/sendung/kalk-welk/10777871/' + ], + 'limit' => self::LIMIT, + ] + ]; + + + /** + * Holds the title of the current show + * + * @var string + */ + private $title; + + /** + * Holds the URI of the show + * + * @var string + */ + private $uri; + + /** + * Holds the icon of the feed + * + */ + private $icon; + + public function collectData() + { + $path = $this->getInput('path'); + $limit = $this->getInput('limit'); + + $oldTz = date_default_timezone_get(); + date_default_timezone_set('Europe/Berlin'); + + $pathComponents = explode('/', $path); + if (empty($pathComponents)) { + returnClientError('Path may not be empty'); + } + if (count($pathComponents) < 2) { + $showID = $pathComponents[0]; + } else { + $lastKey = count($pathComponents) - 1; + $showID = $pathComponents[$lastKey]; + if (strlen($showID) === 0) { + $showID = $pathComponents[$lastKey - 1]; + } + } + + $url = self::APIENDPOINT . 'programsets/' . $showID . '/'; + $json1 = getContents($url); + $data1 = Json::decode($json1, false); + $processedJSON = $data1->data->programSet; + if (!$processedJSON) { + throw new \Exception('Unable to find show id: ' . $showID); + } + + $answerLength = 1; + $offset = 0; + $numberOfElements = 1; + + while ($answerLength != 0 && $offset < $numberOfElements && (is_null($limit) || $offset < $limit)) { + $json2 = getContents($url . '?offset=' . $offset); + $data2 = Json::decode($json2, false); + $processedJSON = $data2->data->programSet; + + $answerLength = count($processedJSON->items->nodes); + $offset = $offset + $answerLength; + $numberOfElements = $processedJSON->numberOfElements; + + foreach ($processedJSON->items->nodes as $audio) { + $item = []; + $item['uri'] = $audio->sharingUrl; + $item['title'] = $audio->title; + $imageSquare = str_replace(self::IMAGEWIDTHPLACEHOLDER, self::IMAGEWIDTH, $audio->image->url1X1); + $image = str_replace(self::IMAGEWIDTHPLACEHOLDER, self::IMAGEWIDTH, $audio->image->url); + $item['enclosures'] = [ + $audio->audios[0]->url, + $imageSquare + ]; + // synopsis in list is shortened, full synopsis is available using one request per item + $item['content'] = '

' . $audio->synopsis . '

'; + $item['timestamp'] = $audio->publicationStartDateAndTime; + $item['uid'] = $audio->id; + $item['author'] = $audio->programSet->publicationService->title; + + $category = $audio->programSet->editorialCategories->title ?? null; + if ($category) { + $item['categories'] = [$category]; + } + + $item['itunes'] = [ + 'duration' => $audio->duration, + ]; + + $this->items[] = $item; + } + } + $this->title = $processedJSON->title; + $this->uri = $processedJSON->sharingUrl; + $this->icon = str_replace(self::IMAGEWIDTHPLACEHOLDER, self::IMAGEWIDTH, $processedJSON->image->url1X1); + // add image file extension to URL so icon is shown in generated RSS feeds, see + // https://github.com/RSS-Bridge/rss-bridge/blob/4aed05c7b678b5673386d61374bba13637d15487/formats/MrssFormat.php#L76 + $this->icon = $this->icon . self::IMAGEEXTENSION; + + $this->items = array_slice($this->items, 0, $limit); + + date_default_timezone_set($oldTz); + } + + /** {@inheritdoc} */ + public function getURI() + { + if (!empty($this->uri)) { + return $this->uri; + } + return parent::getURI(); + } + + /** {@inheritdoc} */ + public function getName() + { + if (!empty($this->title)) { + return $this->title; + } + return parent::getName(); + } + + /** {@inheritdoc} */ + public function getIcon() + { + if (!empty($this->icon)) { + return $this->icon; + } + return parent::getIcon(); + } +} diff --git a/bridges/ARDMediathekBridge.php b/bridges/ARDMediathekBridge.php new file mode 100644 index 00000000000..da11dd642ae --- /dev/null +++ b/bridges/ARDMediathekBridge.php @@ -0,0 +1,114 @@ + [ + 'name' => 'Show Link or ID', + 'required' => true, + 'title' => 'Link to the show page or just its alphanumeric suffix', + 'defaultValue' => 'https://www.ardmediathek.de/sendung/45-min/Y3JpZDovL25kci5kZS8xMzkx/' + ] + ] + ]; + + public function collectData() + { + $oldTz = date_default_timezone_get(); + + date_default_timezone_set('Europe/Berlin'); + + $pathComponents = explode('/', $this->getInput('path')); + if (empty($pathComponents)) { + returnClientError('Path may not be empty'); + } + if (count($pathComponents) < 2) { + $showID = $pathComponents[0]; + } else { + $lastKey = count($pathComponents) - 1; + $showID = $pathComponents[$lastKey]; + if (strlen($showID) === 0) { + $showID = $pathComponents[$lastKey - 1]; + } + } + + $url = self::APIENDPOINT . $showID . '?pageSize=' . self::PAGESIZE; + $rawJSON = getContents($url); + $processedJSON = json_decode($rawJSON); + + foreach ($processedJSON->teasers as $video) { + $item = []; + // there is also ->links->self->id, ->links->self->urlId, ->links->target->id, ->links->target->urlId + $item['uri'] = self::VIDEOLINKPREFIX . $video->id . '/'; + // there is also ->mediumTitle and ->shortTitle + $item['title'] = $video->longTitle; + // in the test, aspect16x9 was the only child of images, not sure whether that is always true + $item['enclosures'] = [ + str_replace(self::IMAGEWIDTHPLACEHOLDER, self::IMAGEWIDTH, $video->images->aspect16x9->src) + ]; + $item['content'] = '

'; + $item['timestamp'] = $video->broadcastedOn; + $item['uid'] = $video->id; + $item['author'] = $video->publicationService->name; + $this->items[] = $item; + } + + $this->title = $processedJSON->title; + + date_default_timezone_set($oldTz); + } + + /** {@inheritdoc} */ + public function getName() + { + if (!empty($this->title)) { + return $this->title; + } + return parent::getName(); + } +} diff --git a/bridges/ASRockNewsBridge.php b/bridges/ASRockNewsBridge.php new file mode 100644 index 00000000000..1a3279784a0 --- /dev/null +++ b/bridges/ASRockNewsBridge.php @@ -0,0 +1,63 @@ +find('div.inner > a') as $index => $a) { + $item = []; + + $articlePath = $a->href; + + $articlePageHtml = getSimpleHTMLDOMCached($articlePath, self::CACHE_TIMEOUT); + + $articlePageHtml = defaultLinkTo($articlePageHtml, self::URI); + + $contents = $articlePageHtml->find('div.Contents', 0); + + $item['uri'] = $articlePath; + $item['title'] = $contents->find('h3', 0)->innertext; + + $contents->find('h3', 0)->outertext = ''; + + $item['content'] = $contents->innertext; + $item['timestamp'] = $this->extractDate($a->plaintext); + + $img = $a->find('img', 0); + if ($img) { + $item['enclosures'][] = $img->src; + } + + $this->items[] = $item; + + if (count($this->items) >= 10) { + break; + } + } + } + + private function extractDate($text) + { + $dateRegex = '/^([0-9]{4}\/[0-9]{1,2}\/[0-9]{1,2})/'; + + $text = trim($text); + + if (preg_match($dateRegex, $text, $matches)) { + return $matches[1]; + } + + return ''; + } +} diff --git a/bridges/AcrimedBridge.php b/bridges/AcrimedBridge.php index 7e0fb6b2a6d..f7bbd58ef0a 100644 --- a/bridges/AcrimedBridge.php +++ b/bridges/AcrimedBridge.php @@ -1,24 +1,37 @@ collectExpandableDatas(static::URI . 'spip.php?page=backend'); - } + const PARAMETERS = [ + [ + 'limit' => [ + 'name' => 'limit', + 'type' => 'number', + 'defaultValue' => -1, + ] + ] + ]; - protected function parseItem($newsItem){ - $item = parent::parseItem($newsItem); + public function collectData() + { + $url = 'https://www.acrimed.org/spip.php?page=backend'; + $limit = $this->getInput('limit'); + $this->collectExpandableDatas($url, $limit); + } - $articlePage = getSimpleHTMLDOM($newsItem->link); - $article = sanitize($articlePage->find('article.article1', 0)->innertext); - $article = defaultLinkTo($article, static::URI); - $item['content'] = $article; + protected function parseItem(array $item) + { + $articlePage = getSimpleHTMLDOM($item['uri']); + $article = sanitize($articlePage->find('article.article1', 0)->innertext); + $article = defaultLinkTo($article, static::URI); + $item['content'] = $article; - return $item; - } + return $item; + } } diff --git a/bridges/ActivisionResearchBridge.php b/bridges/ActivisionResearchBridge.php new file mode 100644 index 00000000000..88af4b46499 --- /dev/null +++ b/bridges/ActivisionResearchBridge.php @@ -0,0 +1,45 @@ +find('div[id="home-blog-feed"]', 0); + if (!$dom) { + throw new \Exception(sprintf('Unable to find css selector on `%s`', $url)); + } + $dom = defaultLinkTo($dom, $this->getURI()); + foreach ($dom->find('div[class="blog-entry"]') as $article) { + $a = $article->find('a', 0); + + $blogimg = extractFromDelimiters($article->find('div[class="blog-img"]', 0)->style, 'url(', ')'); + + $title = htmlspecialchars_decode($article->find('div[class="title"]', 0)->plaintext); + $author = htmlspecialchars_decode($article->find('div[class="author]', 0)->plaintext); + $date = $article->find('div[class="pubdate"]', 0)->plaintext; + + $entry = getSimpleHTMLDOMCached($a->href, static::CACHE_TIMEOUT * 7 * 4); + $entry = defaultLinkTo($entry, $this->getURI()); + + $content = $entry->find('div[class="blog-body"]', 0); + $tagsremove = ['script', 'iframe', 'input', 'form']; + $content = sanitize($content, $tagsremove); + $content = '' . $content; + + $this->items[] = [ + 'title' => $title, + 'author' => $author, + 'uri' => $a->href, + 'content' => $content, + 'timestamp' => strtotime($date), + ]; + } + } +} diff --git a/bridges/AirBreizhBridge.php b/bridges/AirBreizhBridge.php new file mode 100644 index 00000000000..272c74ee121 --- /dev/null +++ b/bridges/AirBreizhBridge.php @@ -0,0 +1,56 @@ + [ + 'theme' => [ + 'name' => 'Thematique', + 'type' => 'list', + 'values' => [ + 'Tout' => '', + 'Rapport d\'activite' => 'rapport-dactivite', + 'Etude' => 'etudes', + 'Information' => 'information', + 'Autres documents' => 'autres-documents', + 'Plan Régional de Surveillance de la qualité de l’air' => 'prsqa', + 'Transport' => 'transport' + ] + ] + ] + ]; + + public function getIcon() + { + return 'https://www.airbreizh.asso.fr/voy_content/uploads/2017/11/favicon.png'; + } + + public function collectData() + { + $html = ''; + $html = getSimpleHTMLDOM(static::URI . 'publications/?fwp_publications_thematiques=' . $this->getInput('theme')); + + foreach ($html->find('article') as $article) { + $item = []; + // Title + $item['title'] = $article->find('h2', 0)->plaintext; + // Author + $item['author'] = 'Air Breizh'; + // Image + $imagelink = $article->find('.card__image', 0)->find('img', 0)->getAttribute('src'); + // Content preview + $item['content'] = ' +
' + . $article->find('.card__text', 0)->plaintext; + // URL + $item['uri'] = $article->find('.publi__buttons', 0)->find('a', 0)->getAttribute('href'); + // ID + $item['id'] = $article->find('.publi__buttons', 0)->find('a', 0)->getAttribute('href'); + $this->items[] = $item; + } + } +} diff --git a/bridges/AlbionOnlineBridge.php b/bridges/AlbionOnlineBridge.php new file mode 100644 index 00000000000..4b191b18171 --- /dev/null +++ b/bridges/AlbionOnlineBridge.php @@ -0,0 +1,76 @@ + [ + 'name' => 'Limit', + 'type' => 'number', + 'required' => true, + 'title' => 'Maximum number of items to return', + 'defaultValue' => 5, + ], + 'language' => [ + 'name' => 'Language', + 'type' => 'list', + 'values' => [ + 'English' => 'en', + 'Deutsch' => 'de', + 'Polski' => 'pl', + 'Français' => 'fr', + 'Русский' => 'ru', + 'Português' => 'pt', + 'Español' => 'es', + ], + 'title' => 'Language of changelog posts', + 'defaultValue' => 'en', + ], + 'full' => [ + 'name' => 'Full changelog', + 'type' => 'checkbox', + 'required' => false, + 'title' => 'Enable to receive the full changelog post for each item' + ], + ]]; + + public function collectData() + { + $api = 'https://albiononline.com/'; + // Example: https://albiononline.com/en/changelog/1/5 + $url = $api . $this->getInput('language') . '/changelog/1/' . $this->getInput('postcount'); + + $html = getSimpleHTMLDOM($url); + + foreach ($html->find('li') as $data) { + $item = []; + $item['uri'] = self::URI . $data->find('a', 0)->getAttribute('href'); + $item['title'] = trim(explode('|', $data->find('span', 0)->plaintext)[0]); + // Time below work only with en lang. Need to think about solution. May be separate request like getFullChangelog, but to english list for all language + //print_r( date_parse_from_format( 'M j, Y' , 'Sep 9, 2020') ); + //$item['timestamp'] = $this->extractDate($a->plaintext); + $item['author'] = 'albiononline.com'; + if ($this->getInput('full')) { + $item['content'] = $this->getFullChangelog($item['uri']); + } else { + //$item['content'] = trim(preg_replace('/\s+/', ' ', $data->find('span', 0)->plaintext)); + // Just use title, no info at all or use title and date, see above + $item['content'] = $item['title']; + } + $item['uid'] = hash('sha256', $item['title']); + $this->items[] = $item; + } + } + + private function getFullChangelog($url) + { + $html = getSimpleHTMLDOMCached($url); + $html = defaultLinkTo($html, self::URI); + return $html->find('div.small-12.columns', 1)->innertext; + } +} diff --git a/bridges/AlfaBankByBridge.php b/bridges/AlfaBankByBridge.php new file mode 100644 index 00000000000..7c13c14dbfa --- /dev/null +++ b/bridges/AlfaBankByBridge.php @@ -0,0 +1,87 @@ + [ + 'business' => [ + 'name' => 'Альфа Бизнес', + 'type' => 'list', + 'title' => 'В зависимости от выбора, возращает уведомления для" . + " клиентов физ. лиц либо для клиентов-юридических лиц и ИП', + 'values' => [ + 'Новости' => 'news', + 'Новости бизнеса' => 'newsBusiness' + ], + 'defaultValue' => 'news' + ], + 'fullContent' => [ + 'name' => 'Включать содержимое', + 'type' => 'checkbox', + 'title' => 'Если выбрано, содержимое уведомлений вставляется в поток (работает медленно)' + ] + ] + ]; + + public function collectData() + { + $business = $this->getInput('business') == 'newsBusiness'; + $fullContent = $this->getInput('fullContent') == 'on'; + + $mainPageUrl = self::URI . '/about/articles/uvedomleniya/'; + if ($business) { + $mainPageUrl .= '?business=true'; + } + $html = getSimpleHTMLDOM($mainPageUrl); + $limit = 0; + + foreach ($html->find('a.notifications__item') as $element) { + if ($limit < 10) { + $item = []; + $item['uid'] = 'urn:sha1:' . hash('sha1', $element->getAttribute('data-notification-id')); + $item['title'] = $element->find('div.item-title', 0)->innertext; + $item['timestamp'] = DateTime::createFromFormat( + 'd M Y', + $this->ruMonthsToEn($element->find('div.item-date', 0)->innertext) + )->getTimestamp(); + + $itemUrl = self::URI . $element->href; + if ($business) { + $itemUrl = str_replace('?business=true', '', $itemUrl); + } + $item['uri'] = $itemUrl; + + if ($fullContent) { + $itemHtml = getSimpleHTMLDOM($itemUrl); + if ($itemHtml) { + $item['content'] = $itemHtml->find('div.now-p__content-text', 0)->innertext; + } + } + + $this->items[] = $item; + $limit++; + } + } + } + + public function getIcon() + { + return static::URI . '/local/images/favicon.ico'; + } + + private function ruMonthsToEn($date) + { + $ruMonths = [ + 'Января', 'Февраля', 'Марта', 'Апреля', 'Мая', 'Июня', + 'Июля', 'Августа', 'Сентября', 'Октября', 'Ноября', 'Декабря' ]; + $enMonths = [ + 'January', 'February', 'March', 'April', 'May', 'June', + 'July', 'August', 'September', 'October', 'November', 'December' ]; + return str_replace($ruMonths, $enMonths, $date); + } +} diff --git a/bridges/AllSidesBridge.php b/bridges/AllSidesBridge.php new file mode 100644 index 00000000000..d71195efe54 --- /dev/null +++ b/bridges/AllSidesBridge.php @@ -0,0 +1,85 @@ + [ + 'limit' => [ + 'name' => 'Number of posts to return', + 'type' => 'number', + 'defaultValue' => 10, + 'required' => false, + 'title' => 'Zero or negative values return all posts (ignored if not fetching full article)', + ], + 'fetch' => [ + 'name' => 'Fetch full article content', + 'type' => 'checkbox', + 'defaultValue' => 'checked', + ], + ], + 'Headline Roundups' => [], + ]; + + private const ROUNDUPS_URI = self::URI . '/headline-roundups'; + + public function collectData() + { + switch ($this->queriedContext) { + case 'Headline Roundups': + $index = getSimpleHTMLDOM(self::ROUNDUPS_URI); + defaultLinkTo($index, self::ROUNDUPS_URI); + $entries = $index->find('table.views-table > tbody > tr'); + + $limit = (int) $this->getInput('limit'); + $fetch = (bool) $this->getInput('fetch'); + + if ($limit > 0 && $fetch) { + $entries = array_slice($entries, 0, $limit); + } + + foreach ($entries as $entry) { + $item = [ + 'title' => $entry->find('.views-field-name', 0)->text(), + 'uri' => $entry->find('a', 0)->href, + 'timestamp' => $entry->find('.date-display-single', 0)->content, + 'author' => 'AllSides Staff', + ]; + + if ($fetch) { + $article = getSimpleHTMLDOMCached($item['uri']); + defaultLinkTo($article, $item['uri']); + + $item['content'] = $article->find('.story-id-page-description', 0); + + foreach ($article->find('.page-tags a') as $tag) { + $item['categories'][] = $tag->text(); + } + } + + $this->items[] = $item; + } + break; + } + } + + public function getName() + { + if ($this->queriedContext) { + return self::NAME . " - {$this->queriedContext}"; + } + return self::NAME; + } + + public function getURI() + { + switch ($this->queriedContext) { + case 'Headline Roundups': + return self::ROUNDUPS_URI; + } + return self::URI; + } +} diff --git a/bridges/AllegroBridge.php b/bridges/AllegroBridge.php new file mode 100644 index 00000000000..55e9f116040 --- /dev/null +++ b/bridges/AllegroBridge.php @@ -0,0 +1,157 @@ + [ + 'name' => 'Search URL', + 'title' => 'Copy the URL from your browser\'s address bar after searching for your items and paste it here', + 'exampleValue' => 'https://allegro.pl/kategoria/swieze-warzywa-cebula-318660', + 'required' => true, + ], + 'cookie' => [ + 'name' => 'The complete cookie value', + 'title' => 'Paste the value of the cookie value from your browser if you want to prevent Allegro imposing rate limits', + 'required' => false, + ], + 'includeSponsoredOffers' => [ + 'type' => 'checkbox', + 'name' => 'Include Sponsored Offers', + 'defaultValue' => 'checked' + ], + 'includePromotedOffers' => [ + 'type' => 'checkbox', + 'name' => 'Include Promoted Offers', + 'defaultValue' => 'checked' + ] + ]]; + + public function getName() + { + $url = $this->getInput('url'); + if (!$url) { + return parent::getName(); + } + $parsedUrl = parse_url($url, PHP_URL_QUERY); + if (!$parsedUrl) { + return parent::getName(); + } + parse_str($parsedUrl, $fields); + + if (array_key_exists('string', $fields)) { + $f = urldecode($fields['string']); + } else { + $f = false; + } + if ($f) { + return $f; + } + + return parent::getName(); + } + + public function getURI() + { + return $this->getInput('url') ?? parent::getURI(); + } + + public function collectData() + { + # make sure we order by the most recently listed offers + $url = preg_replace('/([?&])order=[^&]+(&|$)/', '$1', $this->getInput('url')); + $url .= (parse_url($url, PHP_URL_QUERY) ? '&' : '?') . 'order=n'; + + $opts = []; + + // If a cookie is provided + if ($cookie = $this->getInput('cookie')) { + $opts[CURLOPT_COOKIE] = $cookie; + } + + $html = getSimpleHTMLDOM($url, [], $opts); + + # if no results found + if ($html->find('.mzmg_6m.m9qz_yo._6a66d_-fJr5')) { + return; + } + + $results = $html->find('article[data-analytics-view-custom-context="REGULAR"]'); + + if ($this->getInput('includeSponsoredOffers')) { + $results = array_merge($results, $html->find('article[data-analytics-view-custom-context="SPONSORED"]')); + } + + if ($this->getInput('includePromotedOffers')) { + $results = array_merge($results, $html->find('article[data-analytics-view-custom-context="PROMOTED"]')); + } + + foreach ($results as $post) { + $item = []; + + $item['uid'] = $post->{'data-analytics-view-value'}; + + $item_link = $post->find('a[href*="' . $item['uid'] . '"], a[href*="allegrolokalnie"]', 0); + + $item['uri'] = $item_link->href; + + $item['title'] = $item_link->find('img', 0)->alt; + + $image = $item_link->find('img', 0)->{'data-src'} ?: $item_link->find('img', 0)->src ?? false; + + if ($image) { + $item['enclosures'] = [$image . '#.image']; + } + + $price = $post->{'data-analytics-view-json-custom-price'}; + if ($price) { + $priceDecoded = json_decode(html_entity_decode($price)); + $price = $priceDecoded->amount . ' ' . $priceDecoded->currency; + } + + $descriptionPatterns = ['/<\s*dt[^>]*>\b/', '/<\/dt>/', '/<\s*dd[^>]*>\b/', '/<\/dd>/']; + $descriptionReplacements = ['', ': ', '', '  ']; + $description = $post->find('.m7er_k4.mpof_5r.mpof_z0_s', 0)->innertext; + $descriptionPretty = preg_replace($descriptionPatterns, $descriptionReplacements, $description); + + $pricingExtraInfo = array_filter($post->find('.mqu1_g3.mgn2_12'), function ($node) { + return empty($node->find('.mvrt_0')); + }); + + $pricingExtraInfo = $pricingExtraInfo[0]->plaintext ?? ''; + + $offerExtraInfo = array_map(function ($node) { + return str_contains($node->plaintext, 'zapłać później') ? '' : $node->outertext; + }, $post->find('div.mpof_ki.mwdn_1.mj7a_4.mgn2_12')); + + $isSmart = $post->find('img[alt="Smart!"]', 0) ?? false; + if ($isSmart) { + $pricingExtraInfo .= $isSmart->outertext; + } + + $item['categories'] = []; + $parameters = $post->find('dd'); + foreach ($parameters as $parameter) { + if (in_array(strtolower($parameter->innertext), ['brak', 'nie'])) { + continue; + } + + $item['categories'][] = $parameter->innertext; + } + + $item['content'] = $descriptionPretty + . '

' + . $price + . '
' + . implode('
', $offerExtraInfo) + . '
' + . $pricingExtraInfo + . '

'; + + $this->items[] = $item; + } + } +} diff --git a/bridges/AllocineFRBridge.php b/bridges/AllocineFRBridge.php index 17da9031edf..e7b2adb2785 100644 --- a/bridges/AllocineFRBridge.php +++ b/bridges/AllocineFRBridge.php @@ -1,85 +1,107 @@ array( - 'name' => 'category', - 'type' => 'list', - 'exampleValue' => 'Faux Raccord', - 'title' => 'Select your category', - 'values' => array( - 'Faux Raccord' => 'faux-raccord', - 'Top 5' => 'top-5', - 'Tueurs en Séries' => 'tueurs-en-serie' - ) - ) - )); +class AllocineFRBridge extends BridgeAbstract +{ + const MAINTAINER = 'superbaillot.net'; + const NAME = 'Allo Cine Bridge'; + const CACHE_TIMEOUT = 25200; // 7h + const URI = 'https://www.allocine.fr'; + const DESCRIPTION = 'Bridge for allocine.fr'; + const PARAMETERS = [ [ + 'category' => [ + 'name' => 'Emission', + 'type' => 'list', + 'title' => 'Sélectionner l\'emission', + 'values' => [ + 'Faux Raccord' => 'faux-raccord', + 'Fanzone' => 'fanzone', + 'Game In Ciné' => 'game-in-cine', + 'Pour la faire courte' => 'pour-la-faire-courte', + 'Home Cinéma' => 'home-cinema', + 'PILS - Par Ici Les Sorties' => 'pils-par-ici-les-sorties', + 'AlloCiné : l\'émission, sur LeStream' => 'allocine-lemission-sur-lestream', + 'Give Me Five' => 'give-me-five', + 'Aviez-vous remarqué ?' => 'aviez-vous-remarque', + 'Et paf, il est mort' => 'et-paf-il-est-mort', + 'The Big Fan Theory' => 'the-big-fan-theory', + 'Clichés' => 'cliches', + 'Complètement...' => 'completement', + '#Fun Facts' => 'fun-facts', + 'Origin Story' => 'origin-story', + ] + ] + ]]; - public function getURI(){ - if(!is_null($this->getInput('category'))) { + public function getURI() + { + if (!is_null($this->getInput('category'))) { + $categories = [ + 'faux-raccord' => '/video/programme-12284/', + 'fanzone' => '/video/programme-12298/', + 'game-in-cine' => '/video/programme-12288/', + 'pour-la-faire-courte' => '/video/programme-20960/', + 'home-cinema' => '/video/programme-12287/', + 'pils-par-ici-les-sorties' => '/video/programme-25789/', + 'allocine-lemission-sur-lestream' => '/video/programme-25123/', + 'give-me-five' => '/video/programme-21919/saison-34518/', + 'aviez-vous-remarque' => '/video/programme-19518/', + 'et-paf-il-est-mort' => '/video/programme-25113/', + 'the-big-fan-theory' => '/video/programme-20403/', + 'cliches' => '/video/programme-24834/', + 'completement' => '/video/programme-23859/', + 'fun-facts' => '/video/programme-23040/', + 'origin-story' => '/video/programme-25667/' + ]; - switch($this->getInput('category')) { - case 'faux-raccord': - $uri = static::URI . 'video/programme-12284/saison-32180/'; - break; - case 'top-5': - $uri = static::URI . 'video/programme-12299/saison-29561/'; - break; - case 'tueurs-en-serie': - $uri = static::URI . 'video/programme-12286/saison-22938/'; - break; - } + $category = $this->getInput('category'); + if (array_key_exists($category, $categories)) { + return static::URI . $this->getLastSeasonURI($categories[$category]); + } else { + returnClientError('Emission inconnue'); + } + } - return $uri; - } + return parent::getURI(); + } - return parent::getURI(); - } + private function getLastSeasonURI($category) + { + $html = getSimpleHTMLDOMCached(static::URI . $category, 86400); + $seasonLink = $html->find('section[class=section-wrap section]', 0)->find('div[class=cf]', 0)->find('a', 0); + $URI = $seasonLink->href; + return $URI; + } - public function getName(){ - if(!is_null($this->getInput('category'))) { - return self::NAME . ' : ' - . array_search( - $this->getInput('category'), - self::PARAMETERS[$this->queriedContext]['category']['values'] - ); - } + public function getName() + { + if (!is_null($this->getInput('category'))) { + return self::NAME . ' : ' . $this->getKey('category'); + } - return parent::getName(); - } + return parent::getName(); + } - public function collectData(){ + public function collectData() + { + $html = getSimpleHTMLDOM($this->getURI()); - $html = getSimpleHTMLDOM($this->getURI()) - or returnServerError('Could not request ' . $this->getURI() . ' !'); + foreach ($html->find('div[class=gd-col-left]', 0)->find('div[class*=video-card]') as $element) { + $item = []; - $category = array_search( - $this->getInput('category'), - self::PARAMETERS[$this->queriedContext]['category']['values'] - ); + $title = $element->find('a[class*=meta-title-link]', 0); + $content = trim(defaultLinkTo($element->outertext, static::URI)); - foreach($html->find('.media-meta-list figure.media-meta-fig') as $element) { - $item = array(); + // Replace image 'src' with the one in 'data-src' + $content = preg_replace('@src="data:image/gif;base64,[A-Za-z0-9+\/]*"@', '', $content); + $content = preg_replace('@data-src=@', 'src=', $content); - $title = $element->find('div.titlebar h3.title a', 0); - $content = trim($element->innertext); - $figCaption = strpos($content, $category); + // Remove date in the content to prevent content update while the video is getting older + $content = preg_replace('@
.*[^<]*[^<]*
@', '', $content); - if($figCaption !== false) { - $content = str_replace('src="/', 'src="' . static::URI, $content); - $content = str_replace('href="/', 'href="' . static::URI, $content); - $content = str_replace('src=\'/', 'src=\'' . static::URI, $content); - $content = str_replace('href=\'/', 'href=\'' . static::URI, $content); - $item['content'] = $content; - $item['title'] = trim($title->innertext); - $item['uri'] = static::URI . $title->href; - $this->items[] = $item; - } - } - } + $item['content'] = $content; + $item['title'] = trim($title->innertext); + $item['uri'] = static::URI . '/' . substr($title->href, 1); + $this->items[] = $item; + } + } } diff --git a/bridges/AllocineFRSortiesBridge.php b/bridges/AllocineFRSortiesBridge.php new file mode 100644 index 00000000000..a75187bec87 --- /dev/null +++ b/bridges/AllocineFRSortiesBridge.php @@ -0,0 +1,66 @@ +getURI()); + + foreach ($html->find('section.section.section-wrap', 0)->find('li.mdl') as $element) { + $item = []; + + $thumb = $element->find('figure.thumbnail', 0); + $meta = $element->find('div.meta-body', 0); + $synopsis = $element->find('div.synopsis', 0); + $date = $element->find('span.date', 0); + + $title = $element->find('a[class*=meta-title-link]', 0); + $content = trim(defaultLinkTo($thumb->outertext . $meta->outertext . $synopsis->outertext, static::URI)); + + // Replace image 'src' with the one in 'data-src' + $content = preg_replace('@src="data:image/gif;base64,[A-Za-z0-9=+\/]*"@', '', $content); + $content = preg_replace('@data-src=@', 'src=', $content); + + $item['content'] = $content; + $item['title'] = trim($title->innertext); + $item['timestamp'] = $this->frenchPubDateToTimestamp($date->plaintext); + $item['uri'] = static::BASE_URI . '/' . substr($title->href, 1); + $this->items[] = $item; + } + } + + private function frenchPubDateToTimestamp($date) + { + return strtotime( + strtr( + strtolower($date), + [ + 'janvier' => 'jan', + 'février' => 'feb', + 'mars' => 'march', + 'avril' => 'apr', + 'mai' => 'may', + 'juin' => 'jun', + 'juillet' => 'jul', + 'août' => 'aug', + 'septembre' => 'sep', + 'octobre' => 'oct', + 'novembre' => 'nov', + 'décembre' => 'dec' + ] + ) + ); + } +} diff --git a/bridges/AmazonBridge.php b/bridges/AmazonBridge.php index bcd83dcce78..6d2aa424aa2 100644 --- a/bridges/AmazonBridge.php +++ b/bridges/AmazonBridge.php @@ -1,95 +1,105 @@ array( - 'name' => 'Keyword', - 'required' => true, - ), - 'sort' => array( - 'name' => 'Sort by', - 'type' => 'list', - 'values' => array( - 'Relevance' => 'relevanceblender', - 'Price: Low to High' => 'price-asc-rank', - 'Price: High to Low' => 'price-desc-rank', - 'Average Customer Review' => 'review-rank', - 'Newest Arrivals' => 'date-desc-rank', - ), - 'defaultValue' => 'relevanceblender', - ), - 'tld' => array( - 'name' => 'Country', - 'type' => 'list', - 'values' => array( - 'Australia' => 'com.au', - 'Brazil' => 'com.br', - 'Canada' => 'ca', - 'China' => 'cn', - 'France' => 'fr', - 'Germany' => 'de', - 'India' => 'in', - 'Italy' => 'it', - 'Japan' => 'co.jp', - 'Mexico' => 'com.mx', - 'Netherlands' => 'nl', - 'Spain' => 'es', - 'United Kingdom' => 'co.uk', - 'United States' => 'com', - ), - 'defaultValue' => 'com', - ), - )); - - public function getName(){ - if(!is_null($this->getInput('tld')) && !is_null($this->getInput('q'))) { - return 'Amazon.' . $this->getInput('tld') . ': ' . $this->getInput('q'); - } - - return parent::getName(); - } - - public function collectData() { - - $uri = 'https://www.amazon.' . $this->getInput('tld') . '/'; - $uri .= 's/?field-keywords=' . urlencode($this->getInput('q')) . '&sort=' . $this->getInput('sort'); - - $html = getSimpleHTMLDOM($uri) - or returnServerError('Could not request Amazon.'); - - foreach($html->find('li.s-result-item') as $element) { - - $item = array(); - - // Title - $title = $element->find('h2', 0); - if (is_null($title)) { - continue; - } - - $item['title'] = html_entity_decode($title->innertext, ENT_QUOTES); - - // Url - $uri = $title->parent()->getAttribute('href'); - $uri = substr($uri, 0, strrpos($uri, '/')); - - $item['uri'] = substr($uri, 0, strrpos($uri, '/')); - - // Content - $image = $element->find('img', 0); - $price = $element->find('span.s-price', 0); - $price = ($price) ? $price->innertext : ''; - - $item['content'] = '
' . $price; - - $this->items[] = $item; - } - } +class AmazonBridge extends BridgeAbstract +{ + const MAINTAINER = 'Alexis CHEMEL'; + const NAME = 'Amazon'; + const URI = 'https://www.amazon.com/'; + const CACHE_TIMEOUT = 3600; // 1h + const DESCRIPTION = 'Returns products from Amazon search'; + + const PARAMETERS = [[ + 'q' => [ + 'name' => 'Keyword', + 'required' => true, + 'exampleValue' => 'watch', + ], + 'sort' => [ + 'name' => 'Sort by', + 'type' => 'list', + 'values' => [ + 'Relevance' => 'relevanceblender', + 'Price: Low to High' => 'price-asc-rank', + 'Price: High to Low' => 'price-desc-rank', + 'Average Customer Review' => 'review-rank', + 'Newest Arrivals' => 'date-desc-rank', + ], + 'defaultValue' => 'relevanceblender', + ], + 'tld' => [ + 'name' => 'Country', + 'type' => 'list', + 'values' => [ + 'Australia' => 'com.au', + 'Brazil' => 'com.br', + 'Canada' => 'ca', + 'China' => 'cn', + 'France' => 'fr', + 'Germany' => 'de', + 'India' => 'in', + 'Italy' => 'it', + 'Japan' => 'co.jp', + 'Mexico' => 'com.mx', + 'Netherlands' => 'nl', + 'Poland' => 'pl', + 'Spain' => 'es', + 'Sweden' => 'se', + 'Turkey' => 'com.tr', + 'United Kingdom' => 'co.uk', + 'United States' => 'com', + ], + 'defaultValue' => 'com', + ], + ]]; + + public function collectData() + { + $baseUrl = sprintf('https://www.amazon.%s', $this->getInput('tld')); + + $url = sprintf( + '%s/s/?field-keywords=%s&sort=%s', + $baseUrl, + urlencode($this->getInput('q')), + $this->getInput('sort') + ); + + $dom = getSimpleHTMLDOM($url); + + $elements = $dom->find('div.s-result-item'); + + foreach ($elements as $element) { + $item = []; + + $title = $element->find('h2', 0); + if (!$title) { + continue; + } + + $item['title'] = $title->innertext; + + $itemUrl = $element->find('a', 0)->href; + $item['uri'] = urljoin($baseUrl, $itemUrl); + + $image = $element->find('img', 0); + if ($image) { + $item['content'] = '
'; + } + + $price = $element->find('span.a-price > .a-offscreen', 0); + if ($price) { + $item['content'] .= $price->innertext; + } + + $this->items[] = $item; + } + } + + public function getName() + { + if (!is_null($this->getInput('tld')) && !is_null($this->getInput('q'))) { + return 'Amazon.' . $this->getInput('tld') . ': ' . $this->getInput('q'); + } + + return parent::getName(); + } } diff --git a/bridges/AmazonPriceTrackerBridge.php b/bridges/AmazonPriceTrackerBridge.php index 950178a7e24..5f93eb49b64 100644 --- a/bridges/AmazonPriceTrackerBridge.php +++ b/bridges/AmazonPriceTrackerBridge.php @@ -1,186 +1,253 @@ array( - 'name' => 'ASIN', - 'required' => true, - 'exampleValue' => 'B071GB1VMQ', - // https://stackoverflow.com/a/12827734 - 'pattern' => 'B[\dA-Z]{9}|\d{9}(X|\d)', - ), - 'tld' => array( - 'name' => 'Country', - 'type' => 'list', - 'values' => array( - 'Australia' => 'com.au', - 'Brazil' => 'com.br', - 'Canada' => 'ca', - 'China' => 'cn', - 'France' => 'fr', - 'Germany' => 'de', - 'India' => 'in', - 'Italy' => 'it', - 'Japan' => 'co.jp', - 'Mexico' => 'com.mx', - 'Netherlands' => 'nl', - 'Spain' => 'es', - 'United Kingdom' => 'co.uk', - 'United States' => 'com', - ), - 'defaultValue' => 'com', - ), - )); - - protected $title; - - /** - * Generates domain name given a amazon TLD - */ - private function getDomainName() { - return 'https://www.amazon.' . $this->getInput('tld'); - } - - /** - * Generates URI for a Amazon product page - */ - public function getURI() { - if (!is_null($this->getInput('asin'))) { - return $this->getDomainName() . '/dp/' . $this->getInput('asin') . '/'; - } - return parent::getURI(); - } - - /** - * Scrapes the product title from the html page - * returns the default title if scraping fails - */ - private function getTitle($html) { - $titleTag = $html->find('#productTitle', 0); - - if (!$titleTag) { - return $this->getDefaultTitle(); - } else { - return trim(html_entity_decode($titleTag->innertext, ENT_QUOTES)); - } - } - - /** - * Title used by the feed if none could be found - */ - private function getDefaultTitle() { - return 'Amazon.' . $this->getInput('tld') . ': ' . $this->getInput('asin'); - } - - /** - * Returns name for the feed - * Uses title (already scraped) if it has one - */ - public function getName() { - if (isset($this->title)) { - return $this->title; - } else { - return parent::getName(); - } - } - - private function parseDynamicImage($attribute) { - $json = json_decode(html_entity_decode($attribute), true); - - if ($json and count($json) > 0) { - return array_keys($json)[0]; - } - } - - /** - * Returns a generated image tag for the product - */ - private function getImage($html) { - $imageSrc = $html->find('#main-image-container img', 0); - - if ($imageSrc) { - $hiresImage = $imageSrc->getAttribute('data-old-hires'); - $dynamicImageAttribute = $imageSrc->getAttribute('data-a-dynamic-image'); - $image = $hiresImage ?: $this->parseDynamicImage($dynamicImageAttribute); - } - $image = $image ?: 'https://placekitten.com/200/300'; - - return << [ + 'name' => 'ASIN', + 'required' => true, + 'exampleValue' => 'B071GB1VMQ', + // https://stackoverflow.com/a/12827734 + 'pattern' => 'B[\dA-Z]{9}|\d{9}(X|\d)', + ], + 'tld' => [ + 'name' => 'Country', + 'type' => 'list', + 'values' => [ + 'Australia' => 'com.au', + 'Brazil' => 'com.br', + 'Canada' => 'ca', + 'China' => 'cn', + 'France' => 'fr', + 'Germany' => 'de', + 'India' => 'in', + 'Italy' => 'it', + 'Japan' => 'co.jp', + 'Mexico' => 'com.mx', + 'Netherlands' => 'nl', + 'Poland' => 'pl', + 'Spain' => 'es', + 'Sweden' => 'se', + 'Turkey' => 'com.tr', + 'United Kingdom' => 'co.uk', + 'United States' => 'com', + ], + 'defaultValue' => 'com', + ], + ]]; + + const PRICE_SELECTORS = [ + '#priceblock_ourprice', + '.priceBlockBuyingPriceString', + '#newBuyBoxPrice', + '#tp_price_block_total_price_ww', + 'span.offer-price', + '.a-color-price', + ]; + + const WHITESPACE = " \t\n\r\0\x0B\xC2\xA0"; + + protected $title; + + /** + * Generates domain name given a amazon TLD + */ + private function getDomainName() + { + return 'https://www.amazon.' . $this->getInput('tld'); + } + + /** + * Generates URI for a Amazon product page + */ + public function getURI() + { + if (!is_null($this->getInput('asin'))) { + return $this->getDomainName() . '/dp/' . $this->getInput('asin'); + } + return parent::getURI(); + } + + /** + * Scrapes the product title from the html page + * returns the default title if scraping fails + */ + private function getTitle($html) + { + $titleTag = $html->find('#productTitle', 0); + + if (!$titleTag) { + return $this->getDefaultTitle(); + } else { + return trim(html_entity_decode($titleTag->innertext, ENT_QUOTES)); + } + } + + /** + * Title used by the feed if none could be found + */ + private function getDefaultTitle() + { + return 'Amazon.' . $this->getInput('tld') . ': ' . $this->getInput('asin'); + } + + /** + * Returns name for the feed + * Uses title (already scraped) if it has one + */ + public function getName() + { + if (isset($this->title)) { + return $this->title; + } else { + return parent::getName(); + } + } + + private function parseDynamicImage($attribute) + { + $json = json_decode(html_entity_decode($attribute), true); + + if ($json and count($json) > 0) { + return array_keys($json)[0]; + } + } + + /** + * Returns a generated image tag for the product + */ + private function getImage($html) + { + $image = 'https://placekitten.com/200/300'; + $imageSrc = $html->find('#main-image-container img', 0); + if ($imageSrc) { + $hiresImage = $imageSrc->getAttribute('data-old-hires'); + $dynamicImageAttribute = $imageSrc->getAttribute('data-a-dynamic-image'); + $image = $hiresImage ?: $this->parseDynamicImage($dynamicImageAttribute); + } + + return << EOT; - } - - /** - * Return \simple_html_dom object - * for the entire html of the product page - */ - private function getHtml() { - $uri = $this->getURI(); - - return getSimpleHTMLDOM($uri) ?: returnServerError('Could not request Amazon.'); - } - - private function scrapePriceFromMetrics($html) { - $asinData = $html->find('#cerberus-data-metrics', 0); - - // +
+ {$post['content']} +
+
+ {$imagesText} +
+EOD; + + return $item; + } +} diff --git a/bridges/PinterestBridge.php b/bridges/PinterestBridge.php index 48c0cfca496..8338fb25bf3 100644 --- a/bridges/PinterestBridge.php +++ b/bridges/PinterestBridge.php @@ -1,61 +1,67 @@ array( - 'u' => array( - 'name' => 'username', - 'required' => true - ), - 'b' => array( - 'name' => 'board', - 'required' => true - ) - ) - ); + const PARAMETERS = [ + 'By username and board' => [ + 'u' => [ + 'name' => 'username', + 'exampleValue' => 'VIGOIndustries', + 'required' => true + ], + 'b' => [ + 'name' => 'board', + 'exampleValue' => 'bathroom-remodels', + 'required' => true + ] + ] + ]; - public function getIcon() { - return 'https://s.pinimg.com/webapp/style/images/favicon-9f8f9adf.png'; - } + public function getIcon() + { + return 'https://s.pinimg.com/webapp/style/images/favicon-9f8f9adf.png'; + } - public function collectData() { - $this->collectExpandableDatas($this->getURI() . '.rss'); - $this->fixLowRes(); - } + public function collectData() + { + $this->collectExpandableDatas($this->getURI() . '.rss'); + $this->fixLowRes(); + } - private function fixLowRes() { + private function fixLowRes() + { + $newitems = []; + $pattern = '/https\:\/\/i\.pinimg\.com\/[a-zA-Z0-9]*x\//'; + foreach ($this->items as $item) { + $item['content'] = preg_replace($pattern, 'https://i.pinimg.com/originals/', $item['content']); + $item['enclosures'] = [ + $item['uri'], + ]; + $newitems[] = $item; + } + $this->items = $newitems; + } - $newitems = array(); - $pattern = '/https\:\/\/i\.pinimg\.com\/[a-zA-Z0-9]*x\//'; - foreach($this->items as $item) { + public function getURI() + { + if ($this->queriedContext === 'By username and board') { + return self::URI . '/' . urlencode($this->getInput('u')) . '/' . urlencode($this->getInput('b')); + } - $item['content'] = preg_replace($pattern, 'https://i.pinimg.com/originals/', $item['content']); - $newitems[] = $item; - } - $this->items = $newitems; + return parent::getURI(); + } - } + public function getName() + { + if ($this->queriedContext === 'By username and board') { + return $this->getInput('u') . ' - ' . $this->getInput('b') . ' - ' . self::NAME; + } - public function getURI() { - - if ($this->queriedContext === 'By username and board') { - return self::URI . '/' . urlencode($this->getInput('u')) . '/' . urlencode($this->getInput('b')); - } - - return parent::getURI(); - } - - public function getName() { - - if ($this->queriedContext === 'By username and board') { - return $this->getInput('u') . ' - ' . $this->getInput('b') . ' - ' . self::NAME; - } - - return parent::getName(); - } + return parent::getName(); + } } diff --git a/bridges/PirateCommunityBridge.php b/bridges/PirateCommunityBridge.php index fcf97b9c63c..5a617b0429d 100644 --- a/bridges/PirateCommunityBridge.php +++ b/bridges/PirateCommunityBridge.php @@ -1,88 +1,104 @@ array( - 'name' => 'Topic ID', - 'type' => 'number', - 'title' => 'Topic ID from topic URL. If the URL contains t=12 the ID is 12.', - 'required' => true - ))); - - private $feedName = ''; - - public function detectParameters($url){ - $parsed_url = parse_url($url); - - if($parsed_url['host'] !== 'raymanpc.com') - return null; - - parse_str($parsed_url['query'], $parsed_query); - - if($parsed_url['path'] === '/forum/viewtopic.php' - && array_key_exists('t', $parsed_query)) { - return array('t' => $parsed_query['t']); - } - - return null; - } - - public function getName() { - if(!empty($this->feedName)) - return $this->feedName; - - return parent::getName(); - } - - public function getURI(){ - if(!is_null($this->getInput('t'))) { - return self::URI - . 'forum/viewtopic.php?t=' - . $this->getInput('t') - . '&sd=d'; // sort posts decending by ate so first page has latest posts - } - - return parent::getURI(); - } - - public function collectData(){ - $html = getSimpleHTMLDOM($this->getURI()) - or returnServerError('Could not retrieve topic page at ' . $this->getURI()); - - $this->feedName = $html->find('head title', 0)->plaintext; - - foreach($html->find('.post') as $reply) { - $item = array(); - - $item['uri'] = $this->getURI() - . $reply->find('h3 a', 0)->getAttribute('href'); - - $item['title'] = $reply->find('h3 a', 0)->plaintext; - - $author_html = $reply->find('.author', 0); - // author_html contains the timestamp as text directly inside it, - // so delete all other child elements - foreach($author_html->children as $child) - $child->outertext = ''; - // Timestamps are always in UTC+1 - $item['timestamp'] = trim($author_html->innertext) . ' +01:00'; - - $item['author'] = $reply - ->find('.username, .username-coloured', 0) - ->plaintext; - - $item['content'] = defaultLinkTo($reply->find('.content', 0)->innertext, - $this->getURI()); - - $item['enclosures'] = array(); - foreach($reply->find('.attachbox img.postimage') as $img) - $item['enclosures'][] = urljoin($this->getURI(), $img->src); - - $this->items[] = $item; - } - } + +class PirateCommunityBridge extends BridgeAbstract +{ + const NAME = 'Pirate-Community Bridge'; + const URI = 'https://raymanpc.com/'; + const CACHE_TIMEOUT = 300; // 5min + const DESCRIPTION = 'Returns replies to topics'; + const MAINTAINER = 'Roliga'; + const PARAMETERS = [ [ + 't' => [ + 'name' => 'Topic ID', + 'type' => 'number', + 'exampleValue' => '12651', + 'title' => 'Topic ID from topic URL. If the URL contains t=12 the ID is 12.', + 'required' => true + ]]]; + + private $feedName = ''; + + public function detectParameters($url) + { + $parsed_url = parse_url($url); + + $host = $parsed_url['host'] ?? null; + + if ($host !== 'raymanpc.com') { + return null; + } + + parse_str($parsed_url['query'], $parsed_query); + + if ( + $parsed_url['path'] === '/forum/viewtopic.php' + && array_key_exists('t', $parsed_query) + ) { + return ['t' => $parsed_query['t']]; + } + + return null; + } + + public function getName() + { + if (!empty($this->feedName)) { + return $this->feedName; + } + + return parent::getName(); + } + + public function getURI() + { + if (!is_null($this->getInput('t'))) { + return self::URI + . 'forum/viewtopic.php?t=' + . $this->getInput('t') + . '&sd=d'; // sort posts decending by ate so first page has latest posts + } + + return parent::getURI(); + } + + public function collectData() + { + $html = getSimpleHTMLDOM($this->getURI()); + + $this->feedName = $html->find('head title', 0)->plaintext; + + foreach ($html->find('.post') as $reply) { + $item = []; + + $item['uri'] = $this->getURI() + . $reply->find('h3 a', 0)->getAttribute('href'); + + $item['title'] = $reply->find('h3 a', 0)->plaintext; + + $author_html = $reply->find('.author', 0); + // author_html contains the timestamp as text directly inside it, + // so delete all other child elements + foreach ($author_html->children as $child) { + $child->outertext = ''; + } + // Timestamps are always in UTC+1 + $item['timestamp'] = trim($author_html->innertext) . ' +01:00'; + + $item['author'] = $reply + ->find('.username, .username-coloured', 0) + ->plaintext; + + $item['content'] = defaultLinkTo( + $reply->find('.content', 0)->innertext, + $this->getURI() + ); + + $item['enclosures'] = []; + foreach ($reply->find('.attachbox img.postimage') as $img) { + $item['enclosures'][] = urljoin($this->getURI(), $img->src); + } + + $this->items[] = $item; + } + } } diff --git a/bridges/PixivBridge.php b/bridges/PixivBridge.php index 4af2da5a116..e464b12d03c 100644 --- a/bridges/PixivBridge.php +++ b/bridges/PixivBridge.php @@ -1,72 +1,351 @@ array( - 'name' => 'Tag to search', - 'exampleValue' => 'example', - 'required' => true - ), - )); - - public function collectData(){ - - $html = getContents(static::URI . 'search.php?word=' . urlencode($this->getInput('tag'))) - or returnClientError('Unable to query pixiv.net'); - $regex = '/getTimestamp(); - - $item['content'] = ""; - $this->items[] = $item; - } - } - - private function cacheImage($url, $illustId) { - - $url = str_replace('_master1200', '', $url); - $url = str_replace('c/240x240/img-master/', 'img-original/', $url); - $path = PATH_CACHE . 'pixiv_img/'; - - if(!is_dir($path)) - mkdir($path, 0755, true); - - if(!is_file($path . '/' . $illustId . '.jpeg')) { - $headers = array('Referer: https://www.pixiv.net/member_illust.php?mode=medium&illust_id=' . $illustId); - $illust = getContents($url, $headers); - if(strpos($illust, '404 Not Found') !== false) { - $illust = getContents(str_replace('jpg', 'png', $url), $headers); - } - file_put_contents($path . '/' . $illustId . '.jpeg', $illust); - } - - return 'cache/pixiv_img/' . $illustId . '.jpeg'; - - } + +/** + * Good resource on API return values (Ex: illustType): + * https://hackage.haskell.org/package/pixiv-0.1.0/docs/Web-Pixiv-Types.html + */ +class PixivBridge extends BridgeAbstract +{ + const NAME = 'Pixiv Bridge'; + const URI = 'https://www.pixiv.net/'; + const DESCRIPTION = 'Returns the tag search from pixiv.net'; + const MAINTAINER = 'mruac'; + const CONFIGURATION = [ + 'cookie' => [ + 'required' => false, + 'defaultValue' => null + ], + 'proxy_url' => [ + 'required' => false, + 'defaultValue' => null + ] + ]; + + const PARAMETERS = [ + 'global' => [ + 'posts' => [ + 'name' => 'Post Limit', + 'type' => 'number', + 'defaultValue' => '10' + ], + 'fullsize' => [ + 'name' => 'Full-size Image', + 'type' => 'checkbox' + ], + 'mode' => [ + 'name' => 'Post Type', + 'type' => 'list', + 'values' => [ + 'All Works' => 'all', + 'Illustrations' => 'illustrations/', + 'Manga' => 'manga/', + 'Novels' => 'novels/' + ] + ], + 'mature' => [ + 'name' => 'Include R-18 works', + 'type' => 'checkbox' + ], + 'ai' => [ + 'name' => 'Include AI-Generated works', + 'type' => 'checkbox' + ] + ], + 'Tag' => [ + 'tag' => [ + 'name' => 'Query to search', + 'exampleValue' => 'オリジナル', + 'required' => true + ] + ], + 'User' => [ + 'userid' => [ + 'name' => 'User ID from profile URL', + 'exampleValue' => '11', + 'required' => true + ] + ] + ]; + + // maps from URLs to json keys by context + const JSON_KEY_MAP = [ + 'Tag' => [ + 'illustrations/' => 'illust', + 'manga/' => 'manga', + 'novels/' => 'novel' + ], + 'User' => [ + 'illustrations/' => 'illusts', + 'manga/' => 'manga', + 'novels/' => 'novels' + ] + ]; + + // Hold the username for getName() + private $username = null; + + public function getName() + { + switch ($this->queriedContext) { + case 'Tag': + $context = 'Tag'; + $query = $this->getInput('tag'); + break; + case 'User': + $context = 'User'; + $query = $this->username ?? $this->getInput('userid'); + break; + default: + return parent::getName(); + } + return 'Pixiv ' . $this->getKey('mode') . " from {$context} {$query}"; + } + + public function getURI() + { + switch ($this->queriedContext) { + case 'Tag': + $uri = static::URI . 'tags/' . urlencode($this->getInput('tag') ?? ''); + break; + case 'User': + $uri = static::URI . 'users/' . $this->getInput('userid'); + break; + default: + return parent::getURI(); + } + if ($this->getInput('mode') != 'all') { + $uri = $uri . '/' . $this->getInput('mode'); + } + return $uri; + } + + private function getSearchURI($mode) + { + switch ($this->queriedContext) { + case 'Tag': + $query = urlencode($this->getInput('tag')); + $uri = static::URI . 'ajax/search/top/' . $query; + break; + case 'User': + $uri = static::URI . 'ajax/user/' . $this->getInput('userid') + . '/profile/top'; + break; + default: + returnClientError('Invalid Context'); + } + return $uri; + } + + private function getDataFromJSON($json, $json_key) + { + $key = $json_key; + if ( + $this->queriedContext === 'Tag' && + $this->getOption('cookie') !== null + ) { + switch ($json_key) { + case 'illust': + case 'manga': + $key = 'illustManga'; + break; + } + } + $json = $json['body'][$key]; + // Tags context contains subkey + if ($this->queriedContext === 'Tag') { + $json = $json['data']; + if ($this->getOption('cookie') !== null) { + switch ($json_key) { + case 'illust': + $json = array_reduce($json, function ($acc, $i) { + if ($i['illustType'] === 0) { + $acc[] = $i; + } + return $acc; + }, []); + break; + case 'manga': + $json = array_reduce($json, function ($acc, $i) { + if ($i['illustType'] === 1) { + $acc[] = $i; + }return $acc; + }, []); + break; + } + } + } + return $json; + } + + private function collectWorksArray() + { + $content = $this->getData($this->getSearchURI($this->getInput('mode')), true, true); + if ($this->getInput('mode') == 'all') { + $total = []; + foreach (self::JSON_KEY_MAP[$this->queriedContext] as $mode => $json_key) { + $current = $this->getDataFromJSON($content, $json_key); + $total = array_merge($total, $current); + } + $content = $total; + } else { + $json_key = self::JSON_KEY_MAP[$this->queriedContext][$this->getInput('mode')]; + $content = $this->getDataFromJSON($content, $json_key); + } + return $content; + } + + public function collectData() + { + $this->checkOptions(); + $proxy_url = $this->getOption('proxy_url'); + $proxy_url = $proxy_url ? rtrim($proxy_url, '/') : null; + + $content = $this->collectWorksArray(); + $content = array_filter($content, function ($v, $k) { + return !array_key_exists('isAdContainer', $v); + }, ARRAY_FILTER_USE_BOTH); + + // Sort by updateDate to get newest works + usort($content, function ($a, $b) { + return $b['updateDate'] <=> $a['updateDate']; + }); + + //exclude AI generated works if unchecked. + if ($this->getInput('ai') !== true) { + $content = array_filter($content, function ($v) { + $isAI = $v['aiType'] === 2; + return !$isAI; + }); + } + + //exclude R-18 works if unchecked. + if ($this->getInput('mature') !== true) { + $content = array_filter($content, function ($v) { + $isMature = $v['xRestrict'] > 0; + return !$isMature; + }); + } + + $content = array_slice($content, 0, $this->getInput('posts')); + + foreach ($content as $result) { + // Store username for getName() + if (!$this->username) { + $this->username = $result['userName']; + } + + $item = []; + $item['uid'] = $result['id']; + + $subpath = array_key_exists('illustType', $result) ? 'artworks/' : 'novel/show.php?id='; + $item['uri'] = static::URI . $subpath . $result['id']; + + $item['title'] = $result['title']; + $item['author'] = $result['userName']; + $item['timestamp'] = $result['updateDate']; + $item['categories'] = $result['tags']; + + if ($proxy_url) { + //use proxy image host if set. + if ($this->getInput('fullsize')) { + $ajax_uri = static::URI . 'ajax/illust/' . $result['id']; + $imagejson = $this->getData($ajax_uri, true, true); + $img_url = preg_replace('/https:\/\/i\.pximg\.net/', $proxy_url, $imagejson['body']['urls']['original']); + } else { + $img_url = preg_replace('/https:\/\/i\.pximg\.net/', $proxy_url, $result['url']); + } + } else { + $img_url = $result['url']; + } + + // Currently, this might result in broken image due to their strict referrer check + $item['content'] = sprintf('', $img_url, $img_url); + + // Additional content items + if (array_key_exists('pageCount', $result)) { + $item['content'] .= '
Page Count: ' . $result['pageCount']; + } else { + $item['content'] .= '
Word Count: ' . $result['wordCount']; + } + + $this->items[] = $item; + } + } + + private function checkOptions() + { + $proxy = $this->getOption('proxy_url'); + if ($proxy) { + if ( + !(strlen($proxy) > 0 && preg_match('/https?:\/\/.*/', $proxy)) + ) { + returnServerError('Invalid proxy_url value set. The proxy must include the HTTP/S at the beginning of the url.'); + } + } + + $cookie = $this->getCookie(); + if ($cookie) { + $isAuth = $this->loadCacheValue('is_authenticated'); + if (!$isAuth) { + $res = $this->getData('https://www.pixiv.net/ajax/webpush', true, true); + if ($res['error'] === false) { + $this->saveCacheValue('is_authenticated', true); + } + } + } + } + + private function checkCookie(array $headers) + { + if (array_key_exists('set-cookie', $headers)) { + foreach ($headers['set-cookie'] as $value) { + if (str_starts_with($value, 'PHPSESSID=')) { + parse_str(strtr($value, ['&' => '%26', '+' => '%2B', ';' => '&']), $cookie); + if ($cookie['PHPSESSID'] != $this->getCookie()) { + $this->saveCacheValue('cookie', $cookie['PHPSESSID']); + } + break; + } + } + } + } + + private function getCookie() + { + // checks if cookie is set, if not initialise it with the cookie from the config + $value = $this->loadCacheValue('cookie'); + if (!$value) { + $value = $this->getOption('cookie'); + + // 30 days + 1 day to let cookie chance to renew + $this->saveCacheValue('cookie', $this->getOption('cookie'), 2678400); + } + return $value; + } + + //Cache getContents by default + private function getData(string $url, bool $cache = true, bool $getJSON = false, array $httpHeaders = [], array $curlOptions = []) + { + $cookie_str = $this->getCookie(); + if ($cookie_str) { + $curlOptions[CURLOPT_COOKIE] = 'PHPSESSID=' . $cookie_str; + } + + if ($cache) { + $response = $this->loadCacheValue($url); + if (!$response || is_array($response)) { + $response = getContents($url, $httpHeaders, $curlOptions, true); + $this->saveCacheValue($url, $response); + } + } else { + $response = getContents($url, $httpHeaders, $curlOptions, true); + } + + $this->checkCookie($response->getHeaders()); + + if ($getJSON) { + return json_decode($response->getBody(), true); + } + return $response->getBody(); + } } diff --git a/bridges/PlantUMLReleasesBridge.php b/bridges/PlantUMLReleasesBridge.php index 664805606b4..bc1cca2056f 100644 --- a/bridges/PlantUMLReleasesBridge.php +++ b/bridges/PlantUMLReleasesBridge.php @@ -7,61 +7,43 @@ */ class PlantUMLReleasesBridge extends BridgeAbstract { - const MAINTAINER = 'Riduidel'; - - const NAME = 'PlantUML Releases'; - - const AUTHOR = 'PlantUML team'; - - // URI is no more valid, since we can address the whole gq galaxy - const URI = 'http://plantuml.com/fr/changes'; - - const CACHE_TIMEOUT = 7200; // 2h - const DESCRIPTION = 'PlantUML releases bridge, showing for each release the changelog'; - - const DEFAULT_DOMAIN = 'plantuml.com'; - - const PARAMETERS = array( array( - )); - - const REPLACED_ATTRIBUTES = array( - 'href' => 'href', - 'src' => 'src', - 'data-original' => 'src' - ); - - private function getDomain() { - $domain = $this->getInput('domain'); - if (empty($domain)) - $domain = self::DEFAULT_DOMAIN; - if (strpos($domain, '://') === false) - $domain = 'https://' . $domain; - return $domain; - } - - public function getURI() - { - return self::URI; - } - - public function collectData() - { - $html = getSimpleHTMLDOM($this->getURI()) or returnServerError('Could not request ' . $this->getURI()); - - // Since GQ don't want simple class scrapping, let's do it the hard way and ... discover content ! - $main = $html->find('div[id=root]', 0); - foreach ($main->find('h2') as $release) { - $item = array(); - $item['author'] = self::AUTHOR; - $release_text = $release->innertext; - if (preg_match('/(.+) \((.*)\)/', $release_text, $matches)) { - $item['title'] = $matches[1]; - // And now, build the date from the date text - $item['timestamp'] = strtotime($matches[2]); - } - $item['uri'] = $this->getURI(); - $item['content'] = $release->next_sibling (); - $this->items[] = $item; - } - } + const MAINTAINER = 'Riduidel'; + const NAME = 'PlantUML Releases'; + const AUTHOR = 'PlantUML team'; + const URI = 'https://plantuml.com/changes'; + + const CACHE_TIMEOUT = 7200; // 2h + const DESCRIPTION = 'PlantUML releases bridge, showing for each release the changelog'; + const ITEM_LIMIT = 10; + + public function getURI() + { + return self::URI; + } + + public function collectData() + { + $html = defaultLinkTo(getSimpleHTMLDOM($this->getURI()), self::URI); + + $num_items = 0; + $main = $html->find('div[id=root]', 0); + foreach ($main->find('h2') as $release) { + // Limit to $ITEM_LIMIT number of results + if ($num_items++ >= self::ITEM_LIMIT) { + break; + } + $item = []; + $item['author'] = self::AUTHOR; + $release_text = $release->innertext; + if (preg_match('/(.+) \((.*)\)/', $release_text, $matches)) { + $item['title'] = $matches[1]; + $item['timestamp'] = preg_replace('/(\d+) (\w{3})\w*, (\d+)/', '${1} ${2} ${3}', $matches[2]); + } else { + $item['title'] = $release_text; + } + $item['uri'] = $this->getURI(); + $item['content'] = $release->next_sibling(); + $this->items[] = $item; + } + } } diff --git a/bridges/PokemonNewsBridge.php b/bridges/PokemonNewsBridge.php new file mode 100644 index 00000000000..3dacb163108 --- /dev/null +++ b/bridges/PokemonNewsBridge.php @@ -0,0 +1,47 @@ +find('.news-list ul li') as $item) { + $title = $item->find('h3', 0)->plaintext; + $description = $item->find('p.hidden-mobile', 0); + $dateString = $item->find('p.date', 0)->plaintext; + // e.g. September 15, 2022 + $createdAt = date_create_from_format('F d, Y', $dateString); + // todo: + $tagsString = $item->find('p.tags', 0)->plaintext; + $path = $item->find('a', 0)->href; + $imagePath = $item->find('img', 0)->src; + $tags = explode('&', $tagsString); + $tags = array_map('trim', $tags); + + $this->items[] = [ + 'title' => $title, + 'uri' => sprintf('https://www.pokemon.com%s', $path), + 'timestamp' => $createdAt ? $createdAt->getTimestamp() : time(), + 'categories' => $tags, + 'content' => sprintf( + '

%s', + $imagePath, + $description ? $description->plaintext : '' + ), + ]; + } + } +} diff --git a/bridges/PokemonTVBridge.php b/bridges/PokemonTVBridge.php new file mode 100644 index 00000000000..a4c0a4bea47 --- /dev/null +++ b/bridges/PokemonTVBridge.php @@ -0,0 +1,152 @@ + [ + 'name' => 'Language', + 'type' => 'list', + 'title' => 'Select your language', + 'values' => [ + 'Danish' => 'dk', + 'Dutch' => 'nl', + 'English (UK)' => 'uk', + 'English (US)' => 'us', + 'Finish' => 'fi', + 'French' => 'fr', + 'German' => 'de', + 'Italian' => 'it', + 'Latin America' => 'el', + 'Norwegian' => 'no', + 'Portoguese' => 'br', + 'Russian' => 'ru', + 'Spanish' => 'es', + 'Swedish' => 'se' + ], + 'defaultValue' => 'English (US)' + ], + 'filtername' => [ + 'name' => 'Series Name Filter', + 'exampleValue' => 'Ultra', + 'required' => false + ], + 'filterseason' => [ + 'name' => 'Series Season Filter', + 'exampleValue' => '22', + 'required' => false + ] + ]]; + + public function collectData() + { + $link = 'https://www.pokemon.com/api/pokemontv/v2/channels/' . $this->getInput('language'); + + $html = getSimpleHTMLDOM($link); + $parsed_json = json_decode($html); + + $filtername = $this->getInput('filtername'); + $filterseason = $this->getInput('filterseason'); + + foreach ($parsed_json as $element) { + if (strlen($filtername) >= 1) { + if (!(stristr($element->{'channel_name'}, $filtername) !== false)) { + continue; + } + } + foreach ($element->{'media'} as $mediaelement) { + if (strlen($filterseason) >= 1) { + if ($mediaelement->{'season'} != $filterseason) { + continue; + } + } + switch ($element->media_type) { + case 'movie': + case 'junior': + case 'original': + case 'non-animation': + $itemtitle = $element->channel_name; + break; + case 'episode': + $season = str_pad($mediaelement->{'season'}, 2, '0', STR_PAD_LEFT); + $episode = str_pad($mediaelement->{'episode'}, 2, '0', STR_PAD_LEFT); + $itemtitle = $element->{'channel_name'} . ' - S' . $season . 'E' . $episode; + break; + default: + $itemtitle = ''; + } + $streamurl = 'https://watch.pokemon.com/' . $this->getCountryCode() . '/#/player?id=' . $mediaelement->{'id'}; + $item = []; + $item['uri'] = $streamurl; + $item['title'] = $itemtitle; + $item['timestamp'] = $mediaelement->{'last_modified'}; + $item['content'] = '

' . $itemtitle . ' ' . $mediaelement->{'title'} + . '





' + . $mediaelement->{'description'} + . '

Download'; + $this->items[] = $item; + } + } + } + + private function getCountryCode() + { + switch ($this->getInput('language')) { + case 'us': + return 'en-us'; + break; + case 'de': + return 'de-de'; + break; + case 'fr': + return 'fr-fr'; + break; + case 'es': + return 'es-es'; + break; + case 'el': + return 'es-xl'; + break; + case 'it': + return 'it-it'; + break; + case 'dk': + return 'da-dk'; + break; + case 'fi': + return 'fi-fi'; + break; + case 'br': + return 'pt-br'; + break; + case 'uk': + return 'en-gb'; + break; + case 'ru': + return 'ru-ru'; + break; + case 'nl': + return 'nl-nl'; + break; + case 'no': + return 'nb-no'; + break; + case 'se': + return 'sv-se'; + break; + } + } + + public function getIcon() + { + return 'https://assets.pokemon.com/static2/_ui/img/favicon.ico'; + } +} diff --git a/bridges/PornhubBridge.php b/bridges/PornhubBridge.php new file mode 100644 index 00000000000..788ef58a048 --- /dev/null +++ b/bridges/PornhubBridge.php @@ -0,0 +1,112 @@ + [ + 'name' => 'User name', + 'exampleValue' => 'asa-akira', + 'required' => true, + ], + 'type' => [ + 'name' => 'User type', + 'type' => 'list', + 'values' => [ + 'user' => 'users', + 'model' => 'model', + 'pornstar' => 'pornstar', + ], + 'defaultValue' => 'pornstar', + ], + 'sort' => [ + 'name' => 'Sort by', + 'type' => 'list', + 'values' => [ + 'Most recent' => '?', + 'Most views' => '?o=mv', + 'Top rated' => '?o=tr', + 'Longest' => '?o=lg', + ], + 'defaultValue' => '?', + ], + 'show_images' => [ + 'name' => 'Show thumbnails', + 'type' => 'checkbox', + ], + ]]; + + public function getName() + { + if (!is_null($this->getInput('type')) && !is_null($this->getInput('q'))) { + return 'PornHub ' . $this->getInput('type') . ':' . $this->getInput('q'); + } + + return parent::getName(); + } + + public function collectData() + { + $uri = 'https://www.pornhub.com/' . $this->getInput('type') . '/'; + switch ($this->getInput('type')) { // select proper permalink format per user type... + case 'model': + $uri .= urlencode($this->getInput('q')) . '/videos' . $this->getInput('sort'); + break; + case 'users': + $uri .= urlencode($this->getInput('q')) . '/videos/public' . $this->getInput('sort'); + break; + case 'pornstar': + $uri .= urlencode($this->getInput('q')) . '/videos/upload' . $this->getInput('sort'); + break; + } + + $show_images = $this->getInput('show_images'); + + $html = getSimpleHTMLDOM($uri, [ + 'cookie: accessAgeDisclaimerPH=1' + ]); + + foreach ($html->find('div.videoUList ul.videos li.videoblock') as $element) { + $item = []; + + $item['author'] = $this->getInput('q'); + + // Title + $title = $element->find('a', 0)->getAttribute('title'); + if (is_null($title)) { + continue; + } + $item['title'] = $title; + + // Url + $url = $element->find('a', 0)->href; + $item['uri'] = 'https://www.pornhub.com' . $url; + + // Duration + $marker = $element->find('div.marker-overlays var', 0); + $duration = $marker->innertext ?? ''; + + // Content + $videoImage = $element->find('img', 0); + $image = $videoImage->getAttribute('data-src') ?: $videoImage->getAttribute('src'); + if ($show_images === true) { + $item['content'] = sprintf('
%s', $item['uri'], $image, $duration); + } + + $uploaded = explode('/', $image); + if (isset($uploaded[4])) { + // date hack, guess upload YYYYMMDD from thumbnail URL (format: https://ci.phncdn.com/videos/201907/25/--- ) + $uploadTimestamp = strtotime($uploaded[4] . $uploaded[5]); + $item['timestamp'] = $uploadTimestamp; + } else { + // The thumbnail url did not have a date in it for some unknown reason + } + $this->items[] = $item; + } + } +} diff --git a/bridges/PresidenciaPTBridge.php b/bridges/PresidenciaPTBridge.php new file mode 100644 index 00000000000..2f55f26278e --- /dev/null +++ b/bridges/PresidenciaPTBridge.php @@ -0,0 +1,87 @@ + [ + '/atualidade/noticias' => [ + 'name' => 'Notícias', + 'type' => 'checkbox', + 'defaultValue' => 'checked', + ], + '/atualidade/mensagens' => [ + 'name' => 'Mensagens', + 'type' => 'checkbox', + 'defaultValue' => 'checked', + ], + '/atualidade/atividade-legislativa' => [ + 'name' => 'Atividade Legislativa', + 'type' => 'checkbox', + 'defaultValue' => 'checked', + ], + '/atualidade/notas-informativas' => [ + 'name' => 'Notas Informativas', + 'type' => 'checkbox', + 'defaultValue' => 'checked', + ] + ] + ]; + + const PT_MONTH_NAMES = [ + 'janeiro', + 'fevereiro', + 'março', + 'abril', + 'maio', + 'junho', + 'julho', + 'agosto', + 'setembro', + 'outubro', + 'novembro', + 'dezembro']; + + public function getIcon() + { + return 'https://www.presidencia.pt/Theme/favicon/apple-touch-icon.png'; + } + + public function collectData() + { + $contexts = $this->getParameters(); + + foreach (array_keys($contexts['Section']) as $k) { + if ($this->getInput($k)) { + $html = getSimpleHTMLDOMCached($this->getURI() . $k); + + foreach ($html->find('#atualidade-list article.card-block') as $element) { + $item = []; + + $link = $element->find('a', 0); + $etitle = $element->find('.article-title', 0); + $edts = $element->find('.date', 0); + $edt = $edts->innertext; + + $item['title'] = strip_tags($etitle->innertext); + $item['uri'] = self::URI . $link->href; + $item['description'] = $element; + $item['timestamp'] = str_ireplace( + array_map(function ($name) { + return ' de ' . $name . ' de '; + }, self::PT_MONTH_NAMES), + array_map(function ($num) { + return sprintf('-%02d-', $num); + }, range(1, count(self::PT_MONTH_NAMES))), + $edt + ); + + $this->items[] = $item; + } + } + } + } +} diff --git a/bridges/PriviblurBridge.php b/bridges/PriviblurBridge.php new file mode 100644 index 00000000000..6b442e75034 --- /dev/null +++ b/bridges/PriviblurBridge.php @@ -0,0 +1,80 @@ + [ + 'name' => 'URL', + 'exampleValue' => 'https://priviblur.fly.dev', + 'required' => true, + ] + ] + ]; + + private $title; + private $favicon = 'https://www.tumblr.com/favicon.ico'; + + public function collectData() + { + $url = $this->getURI(); + $html = getSimpleHTMLDOM($url); + $html = defaultLinkTo($html, $url); + $this->title = $html->find('head title', 0)->innertext; + + if ($html->find('#blog-header img.avatar', 0)) { + $icon = $html->find('#blog-header img.avatar', 0)->src; + $this->favicon = str_replace('pnj', 'png', $icon); + } + + $elements = $html->find('.post'); + foreach ($elements as $element) { + $item = []; + $item['author'] = $element->find('.primary-post-author .blog-name', 0)->innertext; + $item['comments'] = $element->find('.interaction-buttons > a', 1)->href; + $item['content'] = $element->find('.post-body', 0); + $item['timestamp'] = $element->find('.primary-post-author time', 0)->innertext; + $item['title'] = $item['author'] . ': ' . $item['timestamp']; + $item['uid'] = $item['comments']; // tumblr url is canonical + $item['uri'] = $element->find('.interaction-buttons > a', 0)->href; + + if ($element->find('.post-tags', 0)) { + $tags = html_entity_decode($element->find('.post-tags', 0)->plaintext); + $tags = explode('#', $tags); + $tags = array_map('trim', $tags); + array_shift($tags); + $item['categories'] = $tags; + } + + $heading = $element->find('h1', 0); + if ($heading) { + $item['title'] = $heading->innertext; + } + + $this->items[] = $item; + } + } + + public function getName() + { + $name = parent::getName(); + if (isset($this->title)) { + $name = $this->title; + } + return $name; + } + + public function getURI() + { + return $this->getInput('url') ?? parent::getURI(); + } + + public function getIcon() + { + return $this->favicon; + } +} diff --git a/bridges/QPlayBridge.php b/bridges/QPlayBridge.php deleted file mode 100644 index f2043267ebc..00000000000 --- a/bridges/QPlayBridge.php +++ /dev/null @@ -1,132 +0,0 @@ - array( - 'program' => array( - 'name' => 'Program Name', - 'type' => 'text', - 'required' => true, - ), - ), - 'Catalog' => array( - 'all_pages' => array( - 'name' => 'All Pages', - 'type' => 'checkbox', - 'defaultValue' => false, - ), - ), - ); - - public function getIcon() { - # This should be the favicon served on `self::URI` - return 'https://s3.amazonaws.com/unode1/assets/4957/r3T9Lm9LTLmpAEX6FlSA_apple-touch-icon.png'; - } - - public function getURI() { - switch ($this->queriedContext) { - case 'Program': - return self::URI . '/programs/' . $this->getInput('program'); - case 'Catalog': - return self::URI . '/catalog'; - } - return parent::getURI(); - } - - public function getName() { - switch ($this->queriedContext) { - case 'Program': - $html = getSimpleHTMLDOMCached($this->getURI()) - or returnServerError('Could not load content'); - - return $html->find('h1.program--title', 0)->innertext; - case 'Catalog': - return self::NAME . ' | Programas'; - } - - return parent::getName(); - } - - /* This uses the uscreen platform, other sites can adapt this. https://www.uscreen.tv/ */ - public function collectData() { - switch ($this->queriedContext) { - case 'Program': - $program = $this->getInput('program'); - $html = getSimpleHTMLDOMCached($this->getURI()) - or returnServerError('Could not load content'); - - foreach($html->find('.cce--thumbnails-video-chapter') as $element) { - $cid = $element->getAttribute('data-id'); - $item['title'] = $element->find('.cce--chapter-title', 0)->innertext; - $item['content'] = $element->find('.cce--thumbnails-image-block', 0) - . $element->find('.cce--chapter-body', 0)->innertext; - $item['uri'] = $this->getURI() . '?cid=' . $cid; - - /* TODO: Suport login credentials? */ - /* # Get direct video URL */ - /* $json_source = getContents(self::URI . '/chapters/' . $cid, array('Cookie: _uscreen2_session=???;')) */ - /* or returnServerError('Could not request chapter JSON'); */ - /* $json = json_decode($json_source); */ - - /* $item['enclosures'] = [$json->fallback]; */ - - $this->items[] = $item; - } - - break; - case 'Catalog': - $json_raw = getContents($this->getCatalogURI(1)) - or returnServerError('Could not load catalog content'); - - $json = json_decode($json_raw); - $total_pages = $json->total_pages; - - foreach($this->parseCatalogPage($json) as $item) { - $this->items[] = $item; - } - - if ($this->getInput('all_pages') === true) { - foreach(range(2, $total_pages) as $page) { - $json_raw = getContents($this->getCatalogURI($page)) - or returnServerError('Could not load catalog content (all pages)'); - - $json = json_decode($json_raw); - - foreach($this->parseCatalogPage($json) as $item) { - $this->items[] = $item; - } - } - } - - break; - } - } - - private function getCatalogURI($page) { - return self::URI . '/catalog.json?page=' . $page; - } - - private function parseCatalogPage($json) { - $items = array(); - - foreach($json->records as $record) { - $item = array(); - - $item['title'] = $record->title; - $item['content'] = $record->description - . '
Duration: ' . $record->duration . '
'; - $item['timestamp'] = strtotime($record->release_date); - $item['uri'] = self::URI . $record->url; - $item['enclosures'] = array( - $record->main_poster, - ); - - $items[] = $item; - } - - return $items; - } -} diff --git a/bridges/QnapBridge.php b/bridges/QnapBridge.php new file mode 100644 index 00000000000..cf8dbcec3cf --- /dev/null +++ b/bridges/QnapBridge.php @@ -0,0 +1,34 @@ +Use offical feed instead: https://www.qnap.com/fr-fr/security-news/feed

+Unofficial feed for security news. +DESCRIPTION; + + const MAINTAINER = 'dvikan'; + + public function collectData() + { + $thisYear = date('Y'); + $url = sprintf('https://www.qnap.com/api/v1/articles/security-news?locale=fr-fr&year=%s&page=1', $thisYear); + $response = json_decode(getContents($url)); + foreach ($response->data as $post) { + $item = []; + $item['uri'] = sprintf('https://www.qnap.com%s', $post->url); + $item['title'] = $post->title; + $item['timestamp'] = \DateTime::createFromFormat('Y-m-d', $post->date)->format('U'); + $image = sprintf('', $post->image_url); + $item['content'] = $image . '

' . $post->desc; + $this->items[] = $item; + } + usort($this->items, function ($a, $b) { + return $a['timestamp'] < $b['timestamp']; + }); + } +} diff --git a/bridges/QwantzBridge.php b/bridges/QwantzBridge.php new file mode 100644 index 00000000000..b975bd43a81 --- /dev/null +++ b/bridges/QwantzBridge.php @@ -0,0 +1,36 @@ +collectExpandableDatas(self::URI . 'rssfeed.php'); + } + + protected function parseItem(array $item) + { + $item['author'] = 'Ryan North'; + + preg_match('/title="(.*?)"/', $item['content'], $matches); + $title = $matches[1] ?? ''; + + $content = str_get_html(html_entity_decode($item['content'])); + $comicURL = $content->find('img')[0]->{'src'}; + $subject = $content->find('a')[1]->{'href'}; + $subject = urldecode(substr($subject, strpos($subject, 'subject') + 8)); + $p = (string)$content->find('P')[0]; + + $item['content'] = "{$subject}

{$title}

{$p}"; + + return $item; + } + + public function getIcon() + { + return self::URI . 'favicon.ico'; + } +} diff --git a/bridges/QwenBlogBridge.php b/bridges/QwenBlogBridge.php new file mode 100644 index 00000000000..2af3f4011aa --- /dev/null +++ b/bridges/QwenBlogBridge.php @@ -0,0 +1,49 @@ + [ + 'limit' => [ + 'name' => 'Limit', + 'type' => 'number', + 'required' => true, + 'defaultValue' => 10 + ], + ] + ]; + + public function collectData() + { + $this->collectExpandableDatas(self::URI . 'index.xml', $this->getInput('limit')); + } + + protected function parseItem(array $item) + { + $dom = getSimpleHTMLDOM($item['uri']); + $content = $dom->find('div.post-content', 0); + if ($content == null) { + return $item; + } + + // Fix code blocks + foreach ($dom->find('pre.chroma') as $code_block) { + // Somehow there are tags in
??
+            $code_block_html = str_get_html($code_block->plaintext);
+            $code = '';
+            foreach ($code_block_html->find('span.line') as $line) {
+                $code .= $line->plaintext . "\n";
+            }
+            $code_block->outertext = '
' . $code . '
'; + } + + $item['content'] = $content; + return $item; + } +} diff --git a/bridges/QwerteeBridge.php b/bridges/QwerteeBridge.php new file mode 100644 index 00000000000..937fa98d7db --- /dev/null +++ b/bridges/QwerteeBridge.php @@ -0,0 +1,34 @@ +find('div.big-slides', 0)->find('div.big-slide') as $element) { + $title = $element->find('div.index-tee', 0)->getAttribute('data-name', 0); + $today = date('m/d/Y'); + $item = []; + $item['uri'] = self::URI; + $item['title'] = $title; + $item['uid'] = $title; + $item['timestamp'] = $today; + $item['content'] = ''; + + $this->items[] = $item; + } + } +} diff --git a/bridges/RTBFBridge.php b/bridges/RTBFBridge.php deleted file mode 100644 index 0f0acdc897b..00000000000 --- a/bridges/RTBFBridge.php +++ /dev/null @@ -1,66 +0,0 @@ - array( - 'name' => 'series id', - 'exampleValue' => 9500, - 'required' => true - ) - )); - - public function collectData(){ - $html = ''; - $limit = 10; - $count = 0; - - $html = getSimpleHTMLDOM($this->getURI()) - or returnServerError('Could not request RTBF.'); - - foreach($html->find('section[id!=widget-ml-avoiraussi-] .rtbf-media-grid article') as $element) { - if($count >= $limit) { - break; - } - - $item = array(); - $item['id'] = $element->getAttribute('data-id'); - $item['uri'] = self::URI . 'detail?id=' . $item['id']; - $thumbnailUriSrcSet = explode( - ',', - $element->find('figure .www-img-16by9 img', 0)->getAttribute('data-srcset') - ); - - $thumbnailUriLastSrc = end($thumbnailUriSrcSet); - $thumbnailUri = explode(' ', $thumbnailUriLastSrc)[0]; - $item['title'] = trim($element->find('h3', 0)->plaintext) - . ' - ' - . trim($element->find('h4', 0)->plaintext); - - $item['timestamp'] = strtotime($element->find('time', 0)->getAttribute('datetime')); - $item['content'] = ''; - $this->items[] = $item; - $count++; - } - } - - public function getURI(){ - if(!is_null($this->getInput('c'))) { - return self::URI . 'emissions/detail?id=' . $this->getInput('c'); - } - - return parent::getURI() . 'emissions/'; - } - - public function getName(){ - if(!is_null($this->getInput('c'))) { - return $this->getInput('c') . ' - RTBF Bridge'; - } - - return parent::getName(); - } -} diff --git a/bridges/RadioFranceBridge.php b/bridges/RadioFranceBridge.php new file mode 100644 index 00000000000..b10e5151093 --- /dev/null +++ b/bridges/RadioFranceBridge.php @@ -0,0 +1,207 @@ + [ + 'name' => 'Domain to use', + 'required' => true, + 'defaultValue' => self::DEFAULT_DOMAIN + ], + 'page' => [ + 'name' => 'Initial page to load', + 'required' => true, + 'exampleValue' => 'franceinter/podcasts/burne-out' + ] + ]]; + + private function getDomain() + { + $domain = $this->getInput('domain'); + if (empty($domain)) { + $domain = self::DEFAULT_DOMAIN; + } + if (strpos($domain, '://') === false) { + $domain = 'https://' . $domain; + } + return $domain; + } + + public function getURI() + { + return $this->getDomain() . '/' . $this->getInput('page'); + } + + public function collectData() + { + $html = getSimpleHTMLDOM($this->getURI()); + + // An array of dom nodes + $documentsList = $html->find('.DocumentsList', 0); + $documentsListWrapper = $documentsList->find('.DocumentsList-wrapper', 0); + $cardList = $documentsListWrapper->find('.CardMedia'); + + foreach ($cardList as $card) { + $item = []; + $title_link = $card->find('.ConceptTitle a', 0); + $item['title'] = $title_link->plaintext; + $uri = $title_link->getAttribute('href', 0); + switch (substr($uri, 0, 1)) { + case 'h': // absolute uri + $item['uri'] = $uri; + break; + case '/': // domain relative uri + $item['uri'] = $this->getDomain() . $uri; + break; + default: + $item['uri'] = $this->getDomain() . '/' . $uri; + } + // Finally, obtain the mp3 from some weird Radio France API (url obtained by reading network calls, no less) + $media_url = self::APIENDPOINT . '?value=' . $uri; + $rawJSON = getSimpleHTMLDOMCached($media_url); + $processedJSON = json_decode($rawJSON); + $model_content = $processedJSON->content; + if (empty($model_content->manifestations)) { + error_log("Seems like $uri has no manifestation"); + } else { + $item['enclosures'] = [ $model_content->manifestations[0]->url ]; + + $item['content'] = ''; + if (isset($model_content->visual)) { + $item['content'] .= "visual->src}\" + alt=\"{$model_content->visual->legend}\" + style=\"float:left; width:400px; margin: 1em;\"/>"; + } + if (isset($model_content->standFirst)) { + $item['content'] .= $model_content->standFirst; + } + if (isset($model_content->bodyJson)) { + if (!empty($item['content'])) { + $item['content'] .= '
'; + } + $pseudo_html_array = array_map([$this, 'convertJsonElementToHTML'], $model_content->bodyJson); + $pseudo_html_text = array_reduce( + $pseudo_html_array, + function ($text, $element) { + return $text . "\n" . $element; + }, + '' + ); + $item['content'] .= $pseudo_html_text; + } + if (isset($model_content->producers)) { + $item['author'] = $this->readAuthorsNamesFrom($model_content->producers); + } elseif (isset($model_content->staff)) { + $item['author'] = $this->readAuthorsNamesFrom($model_content->staff); + } + $time = $card->find('time', 0); + $timevalue = $time->getAttribute('datetime'); + $item['timestamp'] = strtotime($timevalue); + + $this->items[] = $item; + } + } + } + + private function readAuthorsNamesFrom($persons_array) + { + $persons_names = array_map(function ($person_element) { + return $person_element->name; + }, $persons_array); + return array_reduce($persons_names, function ($a, $b) { + if (!empty($a)) { + $a .= ', '; + } + return $a . $b; + }, ''); + } + + private function convertJsonElementToHTML($jsonElement) + { + $childText = isset($jsonElement->children) ? $this->convertJsonChildrenToHTML($jsonElement->children) : ''; + $valueText = isset($jsonElement->value) ? $jsonElement->value : ''; + switch ($jsonElement->type) { + case 'text': + return "{$childText}{$valueText}"; + case 'heading': + $level = $jsonElement->level; + return "{$childText}{$valueText}"; + case 'list': + $tag = 'ul'; + if (isset($jsonElement->ordered)) { + if ($jsonElement->ordered) { + $tag = 'ol'; + } + } + return "<$tag>\n" . $childText . "\n"; + case 'list_item': + return "
  • {$childText}{$valueText}
  • \n"; + case 'bounce': + return ''; + case 'paragraph': + return "

    {$childText}{$valueText}

    \n"; + case 'quote': + return "
    {$childText}{$valueText}
    \n"; + case 'link': + return "data->href}\">{$childText}{$valueText}\n"; + case 'audio': + return ''; + case 'embed': + return $jsonElement->data->html; + default: + return $jsonElement->value; + } + } + + private function convertJsonChildrenToHTML($children) + { + $converted = array_map([$this, 'convertJsonElementToHTML'], $children); + return array_reduce($converted, function ($a, $b) { + return $a . $b; + }, ''); + } + + private function removeAds($element) + { + $ads = $element->find('AdSlot'); + foreach ($ads as $ad) { + $ad->remove(); + } + return $element; + } + + /** + * Replaces all relative URIs with absolute ones + * @param $element A simplehtmldom element + * @return The $element->innertext with all URIs replaced + */ + private function replaceUriInHtmlElement($element) + { + $returned = $element->innertext; + foreach (self::REPLACED_ATTRIBUTES as $initial => $final) { + $returned = str_replace($initial . '="/', $final . '="' . self::URI . '/', $returned); + } + return $returned; + } +} diff --git a/bridges/RadioMelodieBridge.php b/bridges/RadioMelodieBridge.php index fb5aca6e226..917b6ef2120 100644 --- a/bridges/RadioMelodieBridge.php +++ b/bridges/RadioMelodieBridge.php @@ -1,87 +1,206 @@ find('div[class=displayList]', 0)->children(); - foreach($list as $element) { - if($element->tag == 'a') { - $articleURL = self::URI . $element->href; - $article = getSimpleHTMLDOM($articleURL); - $textDOM = $article->find('article', 0); - - // Initialise arrays - $item = array(); - $audio = array(); - $picture = array(); - - // Get the Main picture URL - $picture[] = $this->rewriteImage($article->find('div[id=pictureTitleSupport]', 0)->find('img', 0)->src); - $audioHTML = $article->find('audio'); - - // Add the audio element to the enclosure - foreach($audioHTML as $audioElement) { - $audioURL = $audioElement->src; - $audio[] = $audioURL; - } - - // Rewrite pictures URL - $imgs = $textDOM->find('img[src^="http://www.radiomelodie.com/image.php]'); - foreach($imgs as $img) { - $img->src = $this->rewriteImage($img->src); - $article->save(); - } - - // Remove Google Ads - $ads = $article->find('div[class=adInline]'); - foreach($ads as $ad) { - $ad->outertext = ''; - $article->save(); - } - - // Remove Radio Melodie Logo - $logoHTML = $article->find('div[id=logoArticleRM]', 0); - $logoHTML->outertext = ''; - $article->save(); - - $author = $article->find('p[class=AuthorName]', 0)->plaintext; - - $item['enclosures'] = array_merge($picture, $audio); - $item['author'] = $author; - $item['uri'] = $articleURL; - $item['title'] = $article->find('meta[property=og:title]', 0)->content; - $date = $article->find('p[class*=date]', 0)->plaintext; - - // Header Image - $header = ''; - - // Remove the Date and Author part - $textDOM->find('div[class=AuthorDate]', 0)->outertext = ''; - $article->save(); - $text = $textDOM->innertext; - $item['content'] = '

    ' . $item['title'] . '

    ' . $date . '
    ' . $header . $text; - $this->items[] = $item; - } - } - } - - /* - * Function to rewrite image URL to use the real Image URL and not the resized one (which is very slow) - */ - private function rewriteImage($url) - { - $parts = explode('?', $url); - parse_str(html_entity_decode($parts[1]), $params); - return self::URI . '/' . $params['image']; - - } + +class RadioMelodieBridge extends BridgeAbstract +{ + const NAME = 'Radio Melodie Actu'; + const URI = 'https://www.radiomelodie.com'; + const DESCRIPTION = 'Retourne les actualités publiées par Radio Melodie'; + const MAINTAINER = 'sysadminstory'; + + public function getIcon() + { + return self::URI . '/img/favicon.png'; + } + + public function collectData() + { + $html = getSimpleHTMLDOM(self::URI . '/actu/'); + $list = $html->find('div[class=listArticles]', 0)->children(); + + foreach ($list as $element) { + if ($element->tag == 'a') { + $articleURL = self::URI . $element->href; + $article = getSimpleHTMLDOM($articleURL); + $this->rewriteAudioPlayers($article); + // Reload the modified content + $article = str_get_html($article->save()); + $textDOM = $article->find('article', 0); + + // Remove HTML code for the article title + $textDOM->find('h1', 0)->outertext = ''; + + // Fix the CSS for the author + $textDOM->find('div[class=author]', 0)->find('img', 0) + ->setAttribute('style', 'width: 60px; margin: 0 15px; display: inline-block; vertical-align: top;'); + + + // Initialise arrays + $item = []; + $audio = []; + $picture = []; + + // Get the Main picture URL + $picture[] = self::URI . $article->find('figure[class*=photoviewer]', 0)->find('img', 0)->src; + $audioHTML = $article->find('audio'); + + // Add the audio element to the enclosure + foreach ($audioHTML as $audioElement) { + $audioURL = $audioElement->src; + $audio[] = $audioURL; + } + + // Rewrite pictures URL + $imgs = $textDOM->find('img[src^="http://www.radiomelodie.com/image.php]'); + foreach ($imgs as $img) { + $img->src = $this->rewriteImage($img->src); + $article->save(); + } + + // Remove Google Ads + $ads = $article->find('div[class=adInline]'); + foreach ($ads as $ad) { + $ad->outertext = ''; + $article->save(); + } + + // Extract the author + $author = $article->find('div[class=author]', 0)->children(1)->children(0)->plaintext; + + // Handle date to timestamp + $dateHTML = $article->find('div[class=author]', 0)->children(1)->plaintext; + + preg_match('/([a-z]{4,10}[ ]{1,2}[0-9]{1,2} [\p{L}]{3,10} [0-9]{4} à [0-9]{2}:[0-9]{2})/mus', $dateHTML, $matches); + $dateText = $matches[1]; + + $timestamp = $this->parseDate($dateText); + + $item['enclosures'] = array_merge($picture, $audio); + $item['author'] = $author; + $item['uri'] = $articleURL; + $item['title'] = $article->find('meta[property=og:title]', 0)->content; + if ($timestamp !== false) { + $item['timestamp'] = $timestamp; + } + + // Remove the share article part + $textDOM->find('div[class=share]', 0)->outertext = ''; + $textDOM->find('div[class=share]', 1)->outertext = ''; + + // Rewrite relative Links + $textDOM = defaultLinkTo($textDOM, self::URI . '/'); + + $article->save(); + $text = $textDOM->innertext; + $item['content'] = '

    ' . $item['title'] . '

    ' . $dateText . '
    ' . $text; + $this->items[] = $item; + } + } + } + + /* + * Function to rewrite image URL to use the real Image URL and not the resized one (which is very slow) + */ + private function rewriteImage($url) + { + $parts = explode('?', $url); + parse_str(html_entity_decode($parts[1]), $params); + return self::URI . '/' . $params['image']; + } + + /* + * Function to rewrite Audio Players to use the