diff --git a/Dockerfile b/Dockerfile index f504b51f138..2f1f4f3d93a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,7 +38,7 @@ ENV CURL_IMPERSONATE ff91esr COPY ./config/nginx.conf /etc/nginx/sites-available/default COPY ./config/php-fpm.conf /etc/php/8.2/fpm/pool.d/rss-bridge.conf -COPY ./config/php.ini /etc/php/8.2/fpm/conf.d/90-rss-bridge.conf +COPY ./config/php.ini /etc/php/8.2/fpm/conf.d/90-rss-bridge.ini COPY --chown=www-data:www-data ./ /app/ diff --git a/README.md b/README.md index 7037095ec19..34efc8de3e7 100644 --- a/README.md +++ b/README.md @@ -2,16 +2,18 @@ ![RSS-Bridge](static/logo_600px.png) -RSS-Bridge is a web application. +RSS-Bridge is a PHP web application. It generates web feeds for websites that don't have one. Officially hosted instance: https://rss-bridge.org/bridge01/ +IRC channel #rssbridge at https://libera.chat/ + + [![LICENSE](https://img.shields.io/badge/license-UNLICENSE-blue.svg)](UNLICENSE) [![GitHub release](https://img.shields.io/github/release/rss-bridge/rss-bridge.svg?logo=github)](https://github.com/rss-bridge/rss-bridge/releases/latest) [![irc.libera.chat](https://img.shields.io/badge/irc.libera.chat-%23rssbridge-blue.svg)](https://web.libera.chat/#rssbridge) -[![Chat on Matrix](https://matrix.to/img/matrix-badge.svg)](https://matrix.to/#/#rssbridge:libera.chat) [![Actions Status](https://img.shields.io/github/actions/workflow/status/RSS-Bridge/rss-bridge/tests.yml?branch=master&label=GitHub%20Actions&logo=github)](https://github.com/RSS-Bridge/rss-bridge/actions) ||| @@ -49,54 +51,147 @@ Check out RSS-Bridge right now on https://rss-bridge.org/bridge01/ Alternatively find another [public instance](https://rss-bridge.github.io/rss-bridge/General/Public_Hosts.html). +Requires minimum PHP 7.4. + ## Tutorial -### Install with composer or git +### How to install on traditional shared web hosting -Requires minimum PHP 7.4. +RSS-Bridge can basically be unzipped in a web folder. Should be working instantly. -```shell -apt install nginx php-fpm php-mbstring php-simplexml php-curl -``` +Latest zip as of Sep 2023: https://github.com/RSS-Bridge/rss-bridge/archive/refs/tags/2023-09-24.zip -```shell -cd /var/www -composer create-project -v --no-dev rss-bridge/rss-bridge -``` +### How to install on Debian 12 (nginx + php-fpm) + +These instructions have been tested on a fresh Debian 12 VM from Digital Ocean (1vcpu-512mb-10gb, 5 USD/month). ```shell +timedatectl set-timezone Europe/Oslo + +apt install git nginx php8.2-fpm php-mbstring php-simplexml php-curl + +# Create a new user account +useradd --shell /bin/bash --create-home rss-bridge + cd /var/www -git clone https://github.com/RSS-Bridge/rss-bridge.git -``` -Config: +# Create folder and change ownership +mkdir rss-bridge && chown rss-bridge:rss-bridge rss-bridge/ -```shell -# Give the http user write permission to the cache folder -chown www-data:www-data /var/www/rss-bridge/cache +# Become user +su rss-bridge + +# Fetch latest master +git clone https://github.com/RSS-Bridge/rss-bridge.git rss-bridge/ +cd rss-bridge + +# Copy over the default config +cp -v config.default.ini.php config.ini.php -# Optionally copy over the default config file -cp config.default.ini.php config.ini.php +# Give full permissions only to owner (rss-bridge) +chmod 700 -R ./ + +# Give read and execute to others (nginx and php-fpm) +chmod o+rx ./ ./static + +# Give read to others (nginx) +chmod o+r -R ./static ``` -Example config for nginx: +Nginx config: ```nginx -# /etc/nginx/sites-enabled/rssbridge +# /etc/nginx/sites-enabled/rss-bridge.conf + server { listen 80; server_name example.com; - root /var/www/rss-bridge; - index index.php; + access_log /var/log/nginx/rss-bridge.access.log; + error_log /var/log/nginx/rss-bridge.error.log; + + # Intentionally not setting a root folder here + + # autoindex is off by default but feels good to explicitly turn off + autoindex off; - location ~ \.php$ { + # Static content only served here + location /static/ { + alias /var/www/rss-bridge/static/; + } + + # Pass off to php-fpm only when location is exactly / + location = / { + root /var/www/rss-bridge/; include snippets/fastcgi-php.conf; - fastcgi_read_timeout 60s; - fastcgi_pass unix:/run/php/php-fpm.sock; + fastcgi_pass unix:/run/php/rss-bridge.sock; + } + + # Reduce spam + location = /favicon.ico { + access_log off; + log_not_found off; + } + + # Reduce spam + location = /robots.txt { + access_log off; + log_not_found off; } } ``` +PHP FPM pool config: +```ini +; /etc/php/8.2/fpm/pool.d/rss-bridge.conf + +[rss-bridge] + +user = rss-bridge +group = rss-bridge + +listen = /run/php/rss-bridge.sock + +listen.owner = www-data +listen.group = www-data + +pm = static +pm.max_children = 10 +pm.max_requests = 500 +``` + +PHP ini config: +```ini +; /etc/php/8.2/fpm/conf.d/30-rss-bridge.ini + +max_execution_time = 20 +memory_limit = 64M +``` + +Restart fpm and nginx: + +```shell +# Lint and restart php-fpm +php-fpm8.2 -t +systemctl restart php8.2-fpm + +# Lint and restart nginx +nginx -t +systemctl restart nginx +``` + +### How to install from Composer + +Install the latest release. + +```shell +cd /var/www +composer create-project -v --no-dev rss-bridge/rss-bridge +``` + +### How to install with Caddy + +TODO. See https://github.com/RSS-Bridge/rss-bridge/issues/3785 + ### Install from Docker Hub: Install by downloading the docker image from Docker Hub: @@ -154,6 +249,7 @@ Browse http://localhost:3000/ [![Deploy on Scalingo](https://cdn.scalingo.com/deploy/button.svg)](https://my.scalingo.com/deploy?source=https://github.com/sebsauvage/rss-bridge) [![Deploy to Heroku](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy) [![Deploy to Cloudron](https://cloudron.io/img/button.svg)](https://www.cloudron.io/store/com.rssbridgeapp.cloudronapp.html) +[![Run on PikaPods](https://www.pikapods.com/static/run-button.svg)](https://www.pikapods.com/pods?run=rssbridge) The Heroku quick deploy currently does not work. It might possibly work if you fork this repo and modify the `repository` in `scalingo.json`. See https://github.com/RSS-Bridge/rss-bridge/issues/2688 @@ -163,6 +259,22 @@ Learn more in ## How-to +### How to fix "PHP Fatal error: Uncaught Exception: The FileCache path is not writable" + +```shell +# Give rssbridge ownership +chown rssbridge:rssbridge -R /var/www/rss-bridge/cache + +# Or, give www-data ownership +chown www-data:www-data -R /var/www/rss-bridge/cache + +# Or, give everyone write permission +chmod 777 -R /var/www/rss-bridge/cache + +# Or last ditch effort (CAREFUL) +rm -rf /var/www/rss-bridge/cache/ && mkdir /var/www/rss-bridge/cache/ +``` + ### How to create a new bridge from scratch Create the new bridge in e.g. `bridges/BearBlogBridge.php`: diff --git a/actions/DisplayAction.php b/actions/DisplayAction.php index 6f73ccc6b74..080da52ea59 100644 --- a/actions/DisplayAction.php +++ b/actions/DisplayAction.php @@ -19,6 +19,7 @@ public function execute(array $request) 'message' => 'RSS-Bridge is down for maintenance.', ]), 503); } + $cacheKey = 'http_' . json_encode($request); /** @var Response $cachedResponse */ $cachedResponse = $this->cache->get($cacheKey); @@ -80,16 +81,19 @@ public function execute(array $request) $this->cache->set($cacheKey, $response, $ttl); } - if (in_array($response->getCode(), [429, 503])) { - $this->cache->set($cacheKey, $response, 60 * 15 + rand(1, 60 * 10)); // average 20m + if (in_array($response->getCode(), [403, 429, 503])) { + // Cache these responses for about ~20 mins on average + $this->cache->set($cacheKey, $response, 60 * 15 + rand(1, 60 * 10)); } if ($response->getCode() === 500) { $this->cache->set($cacheKey, $response, 60 * 15); } + if (rand(1, 100) === 2) { $this->cache->prune(); } + return $response; } @@ -182,6 +186,7 @@ private function createFeedItemFromException($e, BridgeAbstract $bridge): FeedIt private function logBridgeError($bridgeName, $code) { + // todo: it's not really necessary to json encode $report $cacheKey = 'error_reporting_' . $bridgeName . '_' . $code; $report = $this->cache->get($cacheKey); if ($report) { diff --git a/actions/FindfeedAction.php b/actions/FindfeedAction.php index 25fe4714f8c..fe5ceef99f1 100644 --- a/actions/FindfeedAction.php +++ b/actions/FindfeedAction.php @@ -56,7 +56,7 @@ public function execute(array $request) $bridgeParams['bridge'] = $bridgeClassName; $bridgeParams['format'] = $format; $content = [ - 'url' => get_home_page_url() . '?action=display&' . http_build_query($bridgeParams), + 'url' => './?action=display&' . http_build_query($bridgeParams), 'bridgeParams' => $bridgeParams, 'bridgeData' => $bridgeData, 'bridgeMeta' => [ diff --git a/actions/FrontpageAction.php b/actions/FrontpageAction.php index 64281b1e9e2..ad48927d731 100644 --- a/actions/FrontpageAction.php +++ b/actions/FrontpageAction.php @@ -31,6 +31,7 @@ public function execute(array $request) } } + // todo: cache this renderered template return render(__DIR__ . '/../templates/frontpage.html.php', [ 'messages' => $messages, 'admin_email' => Configuration::getConfig('admin', 'email'), diff --git a/bridges/ARDAudiothekBridge.php b/bridges/ARDAudiothekBridge.php index 2c1958f3d0e..02b6b00778d 100644 --- a/bridges/ARDAudiothekBridge.php +++ b/bridges/ARDAudiothekBridge.php @@ -63,11 +63,13 @@ class ARDAudiothekBridge extends BridgeAbstract public function collectData() { - $oldTz = date_default_timezone_get(); + $path = $this->getInput('path'); + $limit = $this->getInput('limit'); + $oldTz = date_default_timezone_get(); date_default_timezone_set('Europe/Berlin'); - $pathComponents = explode('/', $this->getInput('path')); + $pathComponents = explode('/', $path); if (empty($pathComponents)) { returnClientError('Path may not be empty'); } @@ -82,17 +84,21 @@ public function collectData() } $url = self::APIENDPOINT . 'programsets/' . $showID . '/'; - $rawJSON = getContents($url); - $processedJSON = json_decode($rawJSON)->data->programSet; + $json1 = getContents($url); + $data1 = Json::decode($json1, false); + $processedJSON = $data1->data->programSet; + if (!$processedJSON) { + throw new \Exception('Unable to find show id: ' . $showID); + } - $limit = $this->getInput('limit'); $answerLength = 1; $offset = 0; $numberOfElements = 1; while ($answerLength != 0 && $offset < $numberOfElements && (is_null($limit) || $offset < $limit)) { - $rawJSON = getContents($url . '?offset=' . $offset); - $processedJSON = json_decode($rawJSON)->data->programSet; + $json2 = getContents($url . '?offset=' . $offset); + $data2 = Json::decode($json2, false); + $processedJSON = $data2->data->programSet; $answerLength = count($processedJSON->items->nodes); $offset = $offset + $answerLength; @@ -119,6 +125,10 @@ public function collectData() $item['categories'] = [$category]; } + $item['itunes'] = [ + 'duration' => $audio->duration, + ]; + $this->items[] = $item; } } diff --git a/bridges/ArsTechnicaBridge.php b/bridges/ArsTechnicaBridge.php index 5b3283b519b..2c631871caf 100644 --- a/bridges/ArsTechnicaBridge.php +++ b/bridges/ArsTechnicaBridge.php @@ -35,32 +35,34 @@ public function collectData() protected function parseItem(array $item) { - $item_html = getSimpleHTMLDOMCached($item['uri'] . '&'); + $item_html = getSimpleHTMLDOMCached($item['uri']); $item_html = defaultLinkTo($item_html, self::URI); - $item['content'] = $item_html->find('.amp-wp-article-content', 0); + $item['content'] = $item_html->find('.article-content', 0); + + $pages = $item_html->find('nav.page-numbers > .numbers > a', -2); + if (null !== $pages) { + for ($i = 2; $i <= $pages->innertext; $i++) { + $page_url = $item['uri'] . '&page=' . $i; + $page_html = getSimpleHTMLDOMCached($page_url); + $page_html = defaultLinkTo($page_html, self::URI); + $item['content'] .= $page_html->find('.article-content', 0); + } + $item['content'] = str_get_html($item['content']); + } // remove various ars advertising $item['content']->find('#social-left', 0)->remove(); foreach ($item['content']->find('.ars-component-buy-box') as $ad) { $ad->remove(); } - foreach ($item['content']->find('i-amphtml-sizer') as $ad) { + foreach ($item['content']->find('.ad_wrapper') as $ad) { $ad->remove(); } foreach ($item['content']->find('.sidebar') as $ad) { $ad->remove(); } - foreach ($item['content']->find('a') as $link) { //remove amp redirect links - $url = $link->getAttribute('href'); - if (str_contains($url, 'go.redirectingat.com')) { - $url = extractFromDelimiters($url, 'url=', '&'); - $url = urldecode($url); - $link->setAttribute('href', $url); - } - } - - $item['content'] = backgroundToImg(str_replace('data-amp-original-style="background-image', 'style="background-image', $item['content'])); + $item['content'] = backgroundToImg($item['content']); $item['uid'] = explode('=', $item['uri'])[1]; diff --git a/bridges/Arte7Bridge.php b/bridges/Arte7Bridge.php index 239fc6ad838..5898e881d49 100644 --- a/bridges/Arte7Bridge.php +++ b/bridges/Arte7Bridge.php @@ -156,6 +156,10 @@ public function collectData() . $element['mainImage']['url'] . '" />'; + $item['itunes'] = [ + 'duration' => $durationSeconds, + ]; + $this->items[] = $item; } } diff --git a/bridges/CNETBridge.php b/bridges/CNETBridge.php index 34442abda8f..4a63c84773c 100644 --- a/bridges/CNETBridge.php +++ b/bridges/CNETBridge.php @@ -1,6 +1,6 @@ 'list', 'values' => [ 'All articles' => '', - 'Apple' => 'apple', - 'Google' => 'google', - 'Microsoft' => 'tags-microsoft', - 'Computers' => 'topics-computers', - 'Mobile' => 'topics-mobile', - 'Sci-Tech' => 'topics-sci-tech', - 'Security' => 'topics-security', - 'Internet' => 'topics-internet', - 'Tech Industry' => 'topics-tech-industry' + 'Tech' => 'tech', + 'Money' => 'personal-finance', + 'Home' => 'home', + 'Wellness' => 'health', + 'Energy' => 'home/energy-and-utilities', + 'Deals' => 'deals', + 'Computing' => 'tech/computing', + 'Mobile' => 'tech/mobile', + 'Science' => 'science', + 'Services' => 'tech/services-and-software' ] - ] + ], + 'limit' => self::LIMIT ] ]; - private function cleanArticle($article_html) - { - $offset_p = strpos($article_html, '

'); - $offset_figure = strpos($article_html, '', '', $article_html); - $article_html = str_replace('', '', $article_html); - $article_html = StripWithDelimiters($article_html, ''); - $article_html = stripWithDelimiters($article_html, 'innertext, 'ImageObject","url":"', '"'); + if ($imageObject !== false) { + $enclosure = $imageObject; + } + } - if (is_null($article_thumbnail)) { - $article_thumbnail = extractFromDelimiters($element->innertext, 'find('div.c-shortcodeGallery') as $cleanup) { + $cleanup->outertext = ''; } - if (!empty($article_title) && !empty($article_uri) && strpos($article_uri, self::URI . 'news/') !== false) { - $article_html = getSimpleHTMLDOMCached($article_uri) or $article_html = null; - - if (!is_null($article_html)) { - if (empty($article_thumbnail)) { - $article_thumbnail = $article_html->find('div.originalImage', 0); - } - if (empty($article_thumbnail)) { - $article_thumbnail = $article_html->find('span.imageContainer', 0); - } - if (is_object($article_thumbnail)) { - $article_thumbnail = $article_thumbnail->find('img', 0)->src; - } - - $article_content .= trim( - $this->cleanArticle( - extractFromDelimiters( - $article_html, - 'find('figure') as $figure) { + $img = $figure->find('img', 0); + if ($img) { + $figure->outertext = $img->outertext; } + } + + $content = $content->innertext; + + if ($enclosure) { + $content = "

" . $content; + } + + if ($headline) { + $content = '

' . $headline->plaintext . '


' . $content; + } + + $item = []; + $item['uri'] = $article_uri; + $item['title'] = $title; + $item['author'] = $author; + $item['content'] = $content; - $item = []; - $item['uri'] = $article_uri; - $item['title'] = $article_title; - $item['author'] = $article_author; - $item['timestamp'] = $article_timestamp; - $item['enclosures'] = [$article_thumbnail]; - $item['content'] = $article_content; - $this->items[] = $item; + if (!is_null($date)) { + $item['timestamp'] = $date; } + + if (!is_null($enclosure)) { + $item['enclosures'] = [$enclosure]; + } + + $this->items[] = $item; } } } diff --git a/bridges/CarThrottleBridge.php b/bridges/CarThrottleBridge.php index 913b686caec..70d7b54e140 100644 --- a/bridges/CarThrottleBridge.php +++ b/bridges/CarThrottleBridge.php @@ -9,8 +9,7 @@ class CarThrottleBridge extends BridgeAbstract public function collectData() { - $news = getSimpleHTMLDOMCached(self::URI . 'news') - or returnServerError('could not retrieve page'); + $news = getSimpleHTMLDOMCached(self::URI . 'news'); $this->items[] = []; @@ -22,8 +21,7 @@ public function collectData() $item['uri'] = self::URI . $titleElement->getAttribute('href'); $item['title'] = $titleElement->innertext; - $articlePage = getSimpleHTMLDOMCached($item['uri']) - or returnServerError('could not retrieve page'); + $articlePage = getSimpleHTMLDOMCached($item['uri']); $authorDiv = $articlePage->find('div.author div'); if ($authorDiv) { diff --git a/bridges/CeskaTelevizeBridge.php b/bridges/CeskaTelevizeBridge.php index 003cd4c76f0..be00d6640e7 100644 --- a/bridges/CeskaTelevizeBridge.php +++ b/bridges/CeskaTelevizeBridge.php @@ -57,9 +57,9 @@ public function collectData() $this->feedName .= " ({$category})"; } - foreach ($html->find('#episodeListSection a[data-testid=next-link]') as $element) { + foreach ($html->find('#episodeListSection a[data-testid=card]') as $element) { $itemTitle = $element->find('h3', 0); - $itemContent = $element->find('div[class^=content-]', 0); + $itemContent = $element->find('p[class^=content-]', 0); $itemDate = $element->find('div[class^=playTime-] span', 0); $itemThumbnail = $element->find('img', 0); $itemUri = self::URI . $element->getAttribute('href'); diff --git a/bridges/CodebergBridge.php b/bridges/CodebergBridge.php index 2a450477340..79dd706cdd9 100644 --- a/bridges/CodebergBridge.php +++ b/bridges/CodebergBridge.php @@ -79,9 +79,9 @@ class CodebergBridge extends BridgeAbstract public function collectData() { - $html = getSimpleHTMLDOM($this->getURI()); - - $html = defaultLinkTo($html, $this->getURI()); + $url = $this->getURI(); + $html = getSimpleHTMLDOM($url); + $html = defaultLinkTo($html, $url); switch ($this->queriedContext) { case 'Commits': @@ -205,22 +205,22 @@ private function extractCommits($html) */ private function extractIssues($html) { - $div = $html->find('div.issue.list', 0); + $issueList = $html->find('div#issue-list', 0); - foreach ($div->find('li.item') as $li) { + foreach ($issueList->find('div.flex-item') as $div) { $item = []; - $number = trim($li->find('a.index,ml-0.mr-2', 0)->plaintext); + $number = trim($div->find('a.index,ml-0.mr-2', 0)->plaintext); - $item['title'] = $li->find('a.title', 0)->plaintext . ' (' . $number . ')'; - $item['uri'] = $li->find('a.title', 0)->href; + $item['title'] = $div->find('a.issue-title', 0)->plaintext . ' (' . $number . ')'; + $item['uri'] = $div->find('a.issue-title', 0)->href; - $time = $li->find('relative-time.time-since', 0); + $time = $div->find('relative-time.time-since', 0); if ($time) { $item['timestamp'] = $time->datetime; } - $item['author'] = $li->find('div.desc', 0)->find('a', 1)->plaintext; + //$item['author'] = $li->find('div.desc', 0)->find('a', 1)->plaintext; // Fetch issue page $issuePage = getSimpleHTMLDOMCached($item['uri'], 3600); @@ -228,7 +228,7 @@ private function extractIssues($html) $item['content'] = $issuePage->find('div.timeline-item.comment.first', 0)->find('div.render-content.markup', 0); - foreach ($li->find('a.ui.label') as $label) { + foreach ($div->find('a.ui.label') as $label) { $item['categories'][] = $label->plaintext; } diff --git a/bridges/CssSelectorBridge.php b/bridges/CssSelectorBridge.php index f6ab8d15588..8fba52858ef 100644 --- a/bridges/CssSelectorBridge.php +++ b/bridges/CssSelectorBridge.php @@ -336,9 +336,11 @@ protected function entryHtmlRetrieveMetadata($entry_html) ], 'timestamp' => [ 'article:published_time', + 'og:article:published_time', 'releaseDate', 'releasedate', 'article:modified_time', + 'og:article:modified_time', 'lastModified', 'lastmodified' ], @@ -351,8 +353,9 @@ protected function entryHtmlRetrieveMetadata($entry_html) 'thumbnailimg' ], 'author' => [ - 'author', 'article:author', + 'og:article:author', + 'author', 'article:author:username', 'profile:first_name', 'profile:last_name', diff --git a/bridges/DagensNyheterDirektBridge.php b/bridges/DagensNyheterDirektBridge.php new file mode 100644 index 00000000000..4d1629fbbd5 --- /dev/null +++ b/bridges/DagensNyheterDirektBridge.php @@ -0,0 +1,62 @@ +find('article') as $element) { + $link = $element->find('button', 0)->getAttribute('data-link'); + $datetime = $element->getAttribute('data-publication-time'); + $url = self::BASEURL . $link; + $title = $element->find('h2', 0)->plaintext; + $author = $element->find('div.ds-byline__titles', 0)->plaintext; + // Debug::log($link); + // Debug::log($datetime); + // Debug::log($title); + // Debug::log($url); + // Debug::log($author); + + $article_content = $element->find('div.direkt-post__content', 0); + $article_html = ''; + + $figure = $element->find('figure', 0); + + if ($figure) { + $article_html = $figure->find('img', 0) . '

' . $figure->find('figcaption', 0) . '

'; + } + + foreach ($article_content->find('p') as $p) { + $article_html = $article_html . $p; + } + + $this->items[] = [ + 'uri' => $url, + 'title' => $title, + 'author' => trim($author), + 'timestamp' => $datetime, + 'content' => trim($article_html), + ]; + + if (count($this->items) > self::LIMIT) { + break; + } + } + } +} diff --git a/bridges/DealabsBridge.php b/bridges/DealabsBridge.php index a904c3ff495..4d39502ca9a 100644 --- a/bridges/DealabsBridge.php +++ b/bridges/DealabsBridge.php @@ -1910,6 +1910,7 @@ class DealabsBridge extends PepperBridgeAbstract 'context-talk' => 'Surveillance Discussion', 'uri-group' => 'groupe/', 'uri-deal' => 'bons-plans/', + 'uri-merchant' => 'search/bons-plans?merchant-id=', 'request-error' => 'Impossible de joindre Dealabs', 'thread-error' => 'Impossible de déterminer l\'ID de la discussion. Vérifiez l\'URL que vous avez entré', 'no-results' => 'Il n'y a rien à afficher pour le moment :(', diff --git a/bridges/DemosBerlinBridge.php b/bridges/DemosBerlinBridge.php new file mode 100644 index 00000000000..05fd2335d45 --- /dev/null +++ b/bridges/DemosBerlinBridge.php @@ -0,0 +1,62 @@ + [ + 'name' => 'Tage', + 'type' => 'number', + 'title' => 'Einträge für die nächsten Tage zurückgeben', + 'required' => true, + 'defaultValue' => 7, + ] + ]]; + + public function getIcon() + { + return 'https://www.berlin.de/i9f/r1/images/favicon/favicon.ico'; + } + + public function collectData() + { + $json = getContents('https://www.berlin.de/polizei/service/versammlungsbehoerde/versammlungen-aufzuege/index.php/index/all.json'); + $jsonFile = json_decode($json, true); + + $daysInterval = DateInterval::createFromDateString($this->getInput('days') . ' day'); + $maxTargetDate = date_add(new DateTime('now'), $daysInterval); + + foreach ($jsonFile['index'] as $entry) { + $entryDay = implode('-', array_reverse(explode('.', $entry['datum']))); // dd.mm.yyyy to yyyy-mm-dd + $ts = (new DateTime())->setTimestamp(strtotime($entryDay)); + if ($ts <= $maxTargetDate) { + $item = []; + $item['uri'] = 'https://www.berlin.de/polizei/service/versammlungsbehoerde/versammlungen-aufzuege/index.php/detail/' . $entry['id']; + $item['timestamp'] = $entryDay . ' ' . $entry['von']; + $item['title'] = $entry['thema']; + $location = $entry['strasse_nr'] . ' ' . $entry['plz']; + $locationQuery = http_build_query(['query' => $location]); + $item['content'] = <<{$entry['thema']} +

📅

+
+ 📍 {$location} + +

{$entry['aufzugsstrecke']}

+ HTML; + $item['uid'] = $this->getSanitizedHash($entry['datum'] . '-' . $entry['von'] . '-' . $entry['bis'] . '-' . $entry['thema']); + + $this->items[] = $item; + } + } + } + + private function getSanitizedHash($string) + { + return hash('sha1', preg_replace('/[^a-zA-Z0-9]/', '', strtolower($string))); + } +} diff --git a/bridges/EZTVBridge.php b/bridges/EZTVBridge.php index 73318f0c713..25a88124266 100644 --- a/bridges/EZTVBridge.php +++ b/bridges/EZTVBridge.php @@ -96,7 +96,7 @@ protected function getEztvUri() protected function getItemFromTorrent($torrent) { $item = []; - $item['uri'] = $torrent->episode_url; + $item['uri'] = $torrent->episode_url ?? $torrent->torrent_url; $item['author'] = $torrent->imdb_id; $item['timestamp'] = $torrent->date_released_unix; $item['title'] = $torrent->title; diff --git a/bridges/EdfPricesBridge.php b/bridges/EdfPricesBridge.php new file mode 100644 index 00000000000..f67ed30b1c7 --- /dev/null +++ b/bridges/EdfPricesBridge.php @@ -0,0 +1,106 @@ + [ + 'name' => 'Choisir un contrat', + 'type' => 'list', + // we can add later HCHP, EJP, base + 'values' => ['Tempo' => '/energie/edf/tarifs/tempo'], + ] + ] + ]; + const CACHE_TIMEOUT = 7200; // 2h + + /** + * @param simple_html_dom $html + * @param string $contractUri + * @return void + */ + private function tempo(simple_html_dom $html, string $contractUri): void + { + // current color and next + $daysDom = $html->find('#calendrier', 0)->nextSibling()->find('.card--ejp'); + if ($daysDom && count($daysDom) === 2) { + foreach ($daysDom as $dayDom) { + $day = trim($dayDom->find('.card__title', 0)->innertext) . '/' . (new \DateTime('now'))->format(('Y')); + $dayColor = $dayDom->find('.card-ejp__icon span', 0)->innertext; + + $text = $day . ' - ' . $dayColor; + $item['uri'] = self::URI . $contractUri; + $item['title'] = $text; + $item['author'] = self::MAINTAINER; + $item['content'] = $text; + $item['uid'] = hash('sha256', $item['title']); + + $this->items[] = $item; + } + } + + // colors + $ulDom = $html->find('#tarif-de-l-offre-edf-tempo-current-date-html-year', 0)->nextSibling()->nextSibling()->nextSibling(); + $elementsDom = $ulDom->find('li'); + if ($elementsDom && count($elementsDom) === 3) { + foreach ($elementsDom as $elementDom) { + $item = []; + + $matches = []; + preg_match_all('/Jour (.*) : Heures (.*) : (.*) € \/ Heures (.*) : (.*) €/um', $elementDom->innertext, $matches, PREG_SET_ORDER, 0); + + if ($matches && count($matches[0]) === 6) { + for ($i = 0; $i < 2; $i++) { + $text = 'Jour ' . $matches[0][1] . ' - Heures ' . $matches[0][2 + 2 * $i] . ' : ' . $matches[0][3 + 2 * $i] . '€'; + $item['uri'] = self::URI . $contractUri; + $item['title'] = $text; + $item['author'] = self::MAINTAINER; + $item['content'] = $text; + $item['uid'] = hash('sha256', $item['title']); + + $this->items[] = $item; + } + } + } + } + + // powers + $ulPowerContract = $ulDom->nextSibling()->nextSibling(); + $elementsPowerContractDom = $ulPowerContract->find('li'); + if ($elementsPowerContractDom && count($elementsPowerContractDom) === 4) { + foreach ($elementsPowerContractDom as $elementPowerContractDom) { + $item = []; + + $matches = []; + preg_match_all('/(.*) kVA : (.*) €/um', $elementPowerContractDom->innertext, $matches, PREG_SET_ORDER, 0); + + if ($matches && count($matches[0]) === 3) { + $text = $matches[0][1] . ' kVA : ' . $matches[0][2] . '€'; + $item['uri'] = self::URI . $contractUri; + $item['title'] = $text; + $item['author'] = self::MAINTAINER; + $item['content'] = $text; + $item['uid'] = hash('sha256', $item['title']); + + $this->items[] = $item; + } + } + } + } + + public function collectData() + { + $contract = $this->getKey('contract'); + $contractUri = $this->getInput('contract'); + $html = getSimpleHTMLDOM(self::URI . $contractUri); + + if ($contract === 'Tempo') { + $this->tempo($html, $contractUri); + } + } +} diff --git a/bridges/FarsideNitterBridge.php b/bridges/FarsideNitterBridge.php new file mode 100644 index 00000000000..b167347acf8 --- /dev/null +++ b/bridges/FarsideNitterBridge.php @@ -0,0 +1,103 @@ + [ + 'name' => 'username', + 'required' => true, + 'exampleValue' => 'NASA' + ], + 'noreply' => [ + 'name' => 'Without replies', + 'type' => 'checkbox', + 'title' => 'Only return initial tweets' + ], + 'noretweet' => [ + 'name' => 'Without retweets', + 'required' => false, + 'type' => 'checkbox', + 'title' => 'Hide retweets' + ], + 'linkbacktotwitter' => [ + 'name' => 'Link back to twitter', + 'required' => false, + 'type' => 'checkbox', + 'title' => 'Rewrite links back to twitter.com' + ] + ], + ]; + + public function detectParameters($url) + { + if (preg_match('/^(https?:\/\/)?(www\.)?(nitter\.net|twitter\.com)\/([^\/?\n]+)/', $url, $matches) > 0) { + return [ + 'username' => $matches[4], + 'noreply' => true, + 'noretweet' => true, + 'linkbacktotwitter' => true + ]; + } + return null; + } + + public function collectData() + { + $this->getRSS(); + } + + private function getRSS($attempt = 0) + { + try { + $this->collectExpandableDatas(self::URI . $this->getInput('username') . '/rss'); + } catch (\Exception $e) { + if ($attempt >= self::MAX_RETRIES) { + throw $e; + } else { + $this->getRSS($attempt++); + } + } + } + + protected function parseItem(array $item) + { + if ($this->getInput('noreply') && substr($item['title'], 0, 5) == 'R to ') { + return; + } + if ($this->getInput('noretweet') && substr($item['title'], 0, 6) == 'RT by ') { + return; + } + $item['title'] = truncate($item['title']); + if (preg_match('/(\/status\/.+)/', $item['uri'], $matches) > 0) { + if ($this->getInput('linkbacktotwitter')) { + $item['uri'] = self::HOST . $this->getInput('username') . $matches[1]; + } else { + $item['uri'] = self::URI . $this->getInput('username') . $matches[1]; + } + } + return $item; + } + + public function getName() + { + if (preg_match('/(.+) \//', parent::getName(), $matches) > 0) { + return $matches[1]; + } + return parent::getName(); + } + + public function getURI() + { + if ($this->getInput('linkbacktotwitter')) { + return self::HOST . $this->getInput('username'); + } else { + return self::URI . $this->getInput('username'); + } + } +} diff --git a/bridges/FreeTelechargerBridge.php b/bridges/FreeTelechargerBridge.php index 8362b4ff74c..f0e5d35a5bb 100644 --- a/bridges/FreeTelechargerBridge.php +++ b/bridges/FreeTelechargerBridge.php @@ -3,7 +3,7 @@ class FreeTelechargerBridge extends BridgeAbstract { const NAME = 'Free-Telecharger'; - const URI = 'https://www.free-telecharger.live/'; + const URI = 'https://www.free-telecharger.art/'; const DESCRIPTION = 'Suivi de série sur Free-Telecharger'; const MAINTAINER = 'sysadminstory'; const PARAMETERS = [ @@ -12,43 +12,46 @@ class FreeTelechargerBridge extends BridgeAbstract 'name' => 'URL de la série', 'type' => 'text', 'required' => true, - 'title' => 'URL d\'une série sans le https://www.free-telecharger.live/', + 'title' => 'URL d\'une série sans le https://www.free-telecharger.art/', 'pattern' => 'series.*\.html', - 'exampleValue' => 'series-vf-hd/145458-the-last-of-us-saison-1-web-dl-720p.html' + 'exampleValue' => 'series-vf-hd/151432-wolf-saison-1-complete-web-dl-720p.html' ], ] ]; const CACHE_TIMEOUT = 3600; + private string $showTitle; + private string $showTechDetails; + public function collectData() { - $html = getSimpleHTMLDOM(self::URI . $this->getInput('url')); + $html = getSimpleHTMLDOM(self::URI . $this->getInput('url')); - // Find all block content of the page - $blocks = $html->find('div[class=block1]'); + // Find all block content of the page + $blocks = $html->find('div[class=block1]'); - // Global Infos block - $infosBlock = $blocks[0]; - // Links block - $linksBlock = $blocks[2]; + // Global Infos block + $infosBlock = $blocks[0]; + // Links block + $linksBlock = $blocks[2]; - // Extract Global Show infos - $this->showTitle = trim($infosBlock->find('div[class=titre1]', 0)->find('font', 0)->plaintext); - $this->showTechDetails = trim($infosBlock->find('div[align=center]', 0)->find('b', 0)->plaintext); + // Extract Global Show infos + $this->showTitle = trim($infosBlock->find('div[class=titre1]', 0)->find('font', 0)->plaintext); + $this->showTechDetails = trim($infosBlock->find('div[align=center]', 0)->find('b', 0)->plaintext); - // Get Episodes names and links - $episodes = $linksBlock->find('div[id=link]', 0)->find('font[color=#ff6600]'); - $links = $linksBlock->find('div[id=link]', 0)->find('a'); + // Get Episodes names and links + $episodes = $linksBlock->find('div[id=link]', 0)->find('font[color=#e93100]'); + $links = $linksBlock->find('div[id=link]', 0)->find('a'); foreach ($episodes as $index => $episode) { - $item = []; // Create an empty item - $item['title'] = $this->showTitle . ' ' . $this->showTechDetails . ' - ' . ltrim(trim($episode->plaintext), '-'); - $item['uri'] = $links[$index]->href; - $item['content'] = '' . $item['title'] . ''; - $item['uid'] = hash('md5', $item['uri']); + $item = []; // Create an empty item + $item['title'] = $this->showTitle . ' ' . $this->showTechDetails . ' - ' . ltrim(trim($episode->plaintext), '-'); + $item['uri'] = $links[$index]->href; + $item['content'] = '' . $item['title'] . ''; + $item['uid'] = hash('md5', $item['uri']); - $this->items[] = $item; // Add this item to the list + $this->items[] = $item; // Add this item to the list } } @@ -57,7 +60,7 @@ public function getName() switch ($this->queriedContext) { case 'Suivi de publication de série': return $this->showTitle . ' ' . $this->showTechDetails . ' - ' . self::NAME; - break; + break; default: return self::NAME; } @@ -68,7 +71,7 @@ public function getURI() switch ($this->queriedContext) { case 'Suivi de publication de série': return self::URI . $this->getInput('url'); - break; + break; default: return self::URI; } @@ -76,14 +79,14 @@ public function getURI() public function detectParameters($url) { - // Example: https://www.free-telecharger.live/series-vf-hd/145458-the-last-of-us-saison-1-web-dl-720p.html + // Example: https://www.free-telecharger.art/series-vf-hd/151432-wolf-saison-1-complete-web-dl-720p.html $params = []; - $regex = '/^https:\/\/www.*\.free-telecharger\.live\/(series.*\.html)/'; + $regex = '/^https:\/\/www.*\.free-telecharger\.art\/(series.*\.html)/'; if (preg_match($regex, $url, $matches) > 0) { - $params['context'] = 'Suivi de publication de série'; - $params['url'] = urldecode($matches[1]); - return $params; + $params['context'] = 'Suivi de publication de série'; + $params['url'] = urldecode($matches[1]); + return $params; } return null; diff --git a/bridges/GatesNotesBridge.php b/bridges/GatesNotesBridge.php index 24ba9b2ec17..0d9199680f2 100644 --- a/bridges/GatesNotesBridge.php +++ b/bridges/GatesNotesBridge.php @@ -23,12 +23,14 @@ public function collectData() $cleanedContent = str_replace([ '', '', - '\r\n', ], '', $rawContent); - $cleanedContent = str_replace('\"', '"', $cleanedContent); - $cleanedContent = trim($cleanedContent, '"'); + // $cleanedContent = str_replace('\"', '"', $cleanedContent); + // $cleanedContent = trim($cleanedContent, '"'); $json = Json::decode($cleanedContent, false); + if (is_string($json)) { + throw new \Exception('wtf? ' . $json); + } foreach ($json as $article) { $item = []; diff --git a/bridges/GettrBridge.php b/bridges/GettrBridge.php index 74804043049..d3b9b899aa3 100644 --- a/bridges/GettrBridge.php +++ b/bridges/GettrBridge.php @@ -33,7 +33,15 @@ public function collectData() $user, min($this->getInput('limit'), 20) ); - $data = json_decode(getContents($api), false); + try { + $json = getContents($api); + } catch (HttpException $e) { + if ($e->getCode() === 400 && str_contains($e->response->getBody(), 'E_USER_NOTFOUND')) { + throw new \Exception('User not found: ' . $user); + } + throw $e; + } + $data = json_decode($json, false); foreach ($data->result->aux->post as $post) { $this->items[] = [ diff --git a/bridges/GolemBridge.php b/bridges/GolemBridge.php index 6699e433617..599d713a0ee 100644 --- a/bridges/GolemBridge.php +++ b/bridges/GolemBridge.php @@ -82,11 +82,6 @@ protected function parseItem(array $item) // URI without RSS feed reference $item['uri'] = $articlePage->find('head meta[name="twitter:url"]', 0)->content; - $author = $articlePage->find('article header .authors .authors__name', 0); - if ($author) { - $item['author'] = $author->plaintext; - } - $categories = $articlePage->find('ul.tags__list li'); foreach ($categories as $category) { $trimmedcategories[] = trim(html_entity_decode($category->plaintext)); @@ -121,9 +116,6 @@ private function extractContent($page) // reload html, as remove() is buggy $article = str_get_html($article->outertext); - if ($pageHeader = $article->find('header.paged-cluster-header h1', 0)) { - $item .= $pageHeader; - } $header = $article->find('header', 0); foreach ($header->find('p, figure') as $element) { @@ -137,7 +129,7 @@ private function extractContent($page) $img->src = $img->getAttribute('data-src-full'); } - foreach ($content->find('p, h1, h3, img[src*="."]') as $element) { + foreach ($content->find('p, h1, h2, h3, img[src*="."]') as $element) { $item .= $element; } diff --git a/bridges/GoogleScholarBridge.php b/bridges/GoogleScholarBridge.php index 981355dd32a..11dc123b22b 100644 --- a/bridges/GoogleScholarBridge.php +++ b/bridges/GoogleScholarBridge.php @@ -2,7 +2,7 @@ class GoogleScholarBridge extends BridgeAbstract { - const NAME = 'Google Scholar v2'; + const NAME = 'Google Scholar'; const URI = 'https://scholar.google.com/'; const DESCRIPTION = 'Search for publications or follow authors on Google Scholar.'; const MAINTAINER = 'nicholasmccarthy'; @@ -193,6 +193,11 @@ public function collectData() $articleUrl = $articleTitleElement->find('a', 0)->href; $articleTitle = $articleTitleElement->plaintext; + // Break the loop if 'Check for Updates' is found in the article title + if (strpos($articleTitle, 'Check for updates') !== false) { + break; + } + $articleDateElement = $publication->find('div[class="gs_a"]', 0); $articleDate = $articleDateElement ? $articleDateElement->plaintext : ''; diff --git a/bridges/HotUKDealsBridge.php b/bridges/HotUKDealsBridge.php index 69301c42ae2..a7e622500e7 100644 --- a/bridges/HotUKDealsBridge.php +++ b/bridges/HotUKDealsBridge.php @@ -3274,6 +3274,7 @@ class HotUKDealsBridge extends PepperBridgeAbstract 'context-talk' => 'Discussion Monitoring', 'uri-group' => 'tag/', 'uri-deal' => 'deals/', + 'uri-merchant' => 'search/deals?merchant-id=', 'request-error' => 'Could not request HotUKDeals', 'thread-error' => 'Unable to determine the thread ID. Check the URL you entered', 'no-results' => 'Ooops, looks like we could', diff --git a/bridges/IdealoBridge.php b/bridges/IdealoBridge.php new file mode 100644 index 00000000000..89c5f87df90 --- /dev/null +++ b/bridges/IdealoBridge.php @@ -0,0 +1,180 @@ + [ + 'name' => 'Idealo.de Link to productpage', + 'required' => true, + 'exampleValue' => 'https://www.idealo.de/preisvergleich/OffersOfProduct/202007367_-s7-pro-ultra-roborock.html' + ], + 'ExcludeNew' => [ + 'name' => 'Priceupdate: Do not track new items', + 'type' => 'checkbox', + 'value' => 'c' + ], + 'ExcludeUsed' => [ + 'name' => 'Priceupdate: Do not track used items', + 'type' => 'checkbox', + 'value' => 'uc' + ], + 'MaxPriceNew' => [ + 'name' => 'Pricealarm: Maximum price for new Product', + 'type' => 'number' + ], + 'MaxPriceUsed' => [ + 'name' => 'Pricealarm: Maximum price for used Product', + 'type' => 'number' + ], + ] + ]; + + public function getIcon() + { + return 'https://cdn.idealo.com/storage/ids-assets/ico/favicon.ico'; + } + + public function collectData() + { + $link = $this->getInput('Link'); + $html = getSimpleHTMLDOM($link); + + // Get Productname + $titleobj = $html->find('.oopStage-title', 0); + $Productname = $titleobj->find('span', 0)->plaintext; + + // Create product specific Cache Keys with the link + $KeyNEW = $link; + $KeyNEW .= 'NEW'; + + $KeyUSED = $link; + $KeyUSED .= 'USED'; + + // Load previous Price + $OldPriceNew = $this->loadCacheValue($KeyNEW); + $OldPriceUsed = $this->loadCacheValue($KeyUSED); + + // First button is new. Found at oopStage-conditionButton-wrapper-text class (.) + $FirstButton = $html->find('.oopStage-conditionButton-wrapper-text', 0); + if ($FirstButton) { + $PriceNew = $FirstButton->find('strong', 0)->plaintext; + } + + // Second Button is used + $SecondButton = $html->find('.oopStage-conditionButton-wrapper-text', 1); + if ($SecondButton) { + $PriceUsed = $SecondButton->find('strong', 0)->plaintext; + } + + // Only continue if a price has changed + if ($PriceNew != $OldPriceNew || $PriceUsed != $OldPriceUsed) { + // Get Product Image + $image = $html->find('.datasheet-cover-image', 0)->src; + + // Generate Content + if ($PriceNew > 1) { + $content = "

Price New:
$PriceNew

"; + $content .= "

Price Newbefore:
$OldPriceNew

"; + } + + if ($this->getInput('MaxPriceNew') != '') { + $content .= sprintf('

Max Price Used:
%s,00 €

', $this->getInput('MaxPriceNew')); + } + + if ($PriceUsed > 1) { + $content .= "

Price Used:
$PriceUsed

"; + $content .= "

Price Used before:
$OldPriceUsed

"; + } + + if ($this->getInput('MaxPriceUsed') != '') { + $content .= sprintf('

Max Price Used:
%s,00 €

', $this->getInput('MaxPriceUsed')); + } + + $content .= ""; + + + $now = date('d.m.j H:m'); + + $Pricealarm = 'Pricealarm %s: %s %s %s'; + + // Currently under Max new price + if ($this->getInput('MaxPriceNew') != '') { + if ($PriceNew < $this->getInput('MaxPriceNew')) { + $title = sprintf($Pricealarm, 'Used', $PriceNew, $Productname, $now); + $item = [ + 'title' => $title, + 'uri' => $link, + 'content' => $content, + 'uid' => md5($title) + ]; + $this->items[] = $item; + } + } + + // Currently under Max used price + if ($this->getInput('MaxPriceUsed') != '') { + if ($PriceUsed < $this->getInput('MaxPriceUsed')) { + $title = sprintf($Pricealarm, 'Used', $PriceUsed, $Productname, $now); + $item = [ + 'title' => $title, + 'uri' => $link, + 'content' => $content, + 'uid' => md5($title) + ]; + $this->items[] = $item; + } + } + + // General Priceupdate + if ($this->getInput('MaxPriceUsed') == '' && $this->getInput('MaxPriceNew') == '') { + // check if a relevant pricechange happened + if ( + (!$this->getInput('ExcludeNew') && $PriceNew != $OldPriceNew ) || + (!$this->getInput('ExcludeUsed') && $PriceUsed != $OldPriceUsed ) + ) { + $title .= 'Priceupdate! '; + + if (!$this->getInput('ExcludeNew')) { + if ($PriceNew < $OldPriceNew) { + $title .= 'NEW:⬇ '; // Arrow Down Emoji + } + if ($PriceNew > $OldPriceNew) { + $title .= 'NEW:⬆ '; // Arrow Up Emoji + } + } + + + if (!$this->getInput('ExcludeUsed')) { + if ($PriceUsed < $OldPriceUsed) { + $title .= 'USED:⬇ '; // Arrow Down Emoji + } + if ($PriceUsed > $OldPriceUsed) { + $title .= 'USED:⬆ '; // Arrow Up Emoji + } + } + $title .= $Productname; + $title .= ' '; + $title .= $now; + + $item = [ + 'title' => $title, + 'uri' => $link, + 'content' => $content, + 'uid' => md5($title) + ]; + $this->items[] = $item; + } + } + } + + // Save current price + $this->saveCacheValue($KeyNEW, $PriceNew); + $this->saveCacheValue($KeyUSED, $PriceUsed); + } +} diff --git a/bridges/ImgsedBridge.php b/bridges/ImgsedBridge.php index e605cf4fbb2..12466c6b03b 100644 --- a/bridges/ImgsedBridge.php +++ b/bridges/ImgsedBridge.php @@ -206,14 +206,12 @@ private function parseDate($content) { // Parse date, and transform the date into a timetamp, even in a case of a relative date $date = date_create(); - $dateString = str_replace(' ago', '', $content); - // Special case : 'a day' is not a valid interval in PHP, so replace it with it's PHP equivalenbt : '1 day' - if ($dateString == 'a day') { - $dateString = '1 day'; - } - if ($dateString === 'an hour') { - $dateString = '1 hour'; - } + + // Content trimmed to be sure that the "article" is at the beginning of the string and remove "ago" to make it a valid PHP date interval + $dateString = trim(str_replace(' ago', '', $content)); + + // Replace the article "an" or "a" by the number "1" to be a valid PHP date interval + $dateString = preg_replace('/^((an|a) )/m', '1 ', $dateString); $relativeDate = date_interval_create_from_date_string($dateString); if ($relativeDate) { diff --git a/bridges/ItakuBridge.php b/bridges/ItakuBridge.php index f49865eca14..0577752cc55 100644 --- a/bridges/ItakuBridge.php +++ b/bridges/ItakuBridge.php @@ -201,7 +201,7 @@ public function collectData() 'rating_e' => $this->getInput('rating_e') ]; - $tag_arr = explode(' ', $this->getInput('tags')); + $tag_arr = explode(' ', $this->getInput('tags') ?? ''); foreach ($tag_arr as $str) { switch ($str[0]) { case '-': @@ -446,6 +446,9 @@ private function getOwnerID($username) private function getPost($id, array $metadata = null) { + if (isset($metadata) && sizeof($metadata['gallery_images']) < $metadata['num_images']) { + $metadata = null; //force re-fetch of metadata + } $uri = self::URI . '/posts/' . $id; $url = self::URI . '/api/posts/' . $id . '/?format=json'; $data = $metadata ?? $this->getData($url, true, true) diff --git a/bridges/KleinanzeigenBridge.php b/bridges/KleinanzeigenBridge.php new file mode 100644 index 00000000000..e0535b59c9c --- /dev/null +++ b/bridges/KleinanzeigenBridge.php @@ -0,0 +1,150 @@ + [ + 'query' => [ + 'name' => 'query', + 'required' => false, + 'title' => 'query term', + ], + 'location' => [ + 'name' => 'location', + 'required' => false, + 'title' => 'e.g. Berlin', + ], + 'radius' => [ + 'name' => 'radius', + 'required' => false, + 'type' => 'number', + 'title' => 'search radius in kilometers', + 'defaultValue' => 10, + ], + 'pages' => [ + 'name' => 'pages', + 'required' => true, + 'type' => 'number', + 'title' => 'how many pages to fetch', + 'defaultValue' => 2, + ] + ], + 'By profile' => [ + 'userid' => [ + 'name' => 'user id', + 'required' => true, + 'type' => 'number', + 'exampleValue' => 12345678 + ], + 'pages' => [ + 'name' => 'pages', + 'required' => true, + 'type' => 'number', + 'title' => 'how many pages to fetch', + 'defaultValue' => 2, + ] + ], + ]; + + public function getIcon() + { + return 'https://www.kleinanzeigen.de/favicon.ico'; + } + + public function getName() + { + switch ($this->queriedContext) { + case 'By profile': + return 'Kleinanzeigen Profil'; + case 'By search': + return 'Kleinanzeigen ' . $this->getInput('query') . ' / ' . $this->getInput('location'); + default: + return parent::getName(); + } + } + + public function collectData() + { + if ($this->queriedContext === 'By profile') { + for ($i = 1; $i <= $this->getInput('pages'); $i++) { + $html = getSimpleHTMLDOM(self::URI . '/s-bestandsliste.html?userId=' . $this->getInput('userid') . '&pageNum=' . $i . '&sortingField=SORTING_DATE'); + + $foundItem = false; + foreach ($html->find('article.aditem') as $element) { + $this->addItem($element); + $foundItem = true; + } + if (!$foundItem) { + break; + } + } + } + + if ($this->queriedContext === 'By search') { + $locationID = ''; + if ($this->getInput('location')) { + $json = getContents(self::URI . '/s-ort-empfehlungen.json?' . http_build_query(['query' => $this->getInput('location')])); + $jsonFile = json_decode($json, true); + $locationID = str_replace('_', '', array_key_first($jsonFile)); + } + for ($i = 1; $i <= $this->getInput('pages'); $i++) { + $searchUrl = self::URI . '/s-walled-garden/'; + if ($i != 1) { + $searchUrl .= 'seite:' . $i . '/'; + } + if ($this->getInput('query')) { + $searchUrl .= urlencode($this->getInput('query')) . '/k0'; + } + if ($locationID) { + $searchUrl .= 'l' . $locationID; + } + if ($this->getInput('radius')) { + $searchUrl .= 'r' . $this->getInput('radius'); + } + + $html = getSimpleHTMLDOM($searchUrl); + + // end of list if returned page is not the expected one + if ($html->find('.pagination-current', 0)->plaintext != $i) { + break; + } + + foreach ($html->find('ul#srchrslt-adtable article.aditem') as $element) { + $this->addItem($element); + } + } + } + } + + private function addItem($element) + { + $item = []; + + $item['uid'] = $element->getAttribute('data-adid'); + $item['uri'] = self::URI . $element->getAttribute('data-href'); + + $item['title'] = $element->find('h2', 0)->plaintext; + $item['timestamp'] = $element->find('div.aditem-main--top--right', 0)->plaintext; + $imgUrl = str_replace( + 'rule=$_2.JPG', + 'rule=$_57.JPG', + str_replace( + 'rule=$_35.JPG', + 'rule=$_57.JPG', + $element->find('img', 0) ? $element->find('img', 0)->getAttribute('src') : '' + ) + ); //enhance img quality + $textContainer = $element->find('div.aditem-main', 0); + $textContainer->find('a', 0)->href = self::URI . $textContainer->find('a', 0)->href; // add domain to url + $item['content'] = '' . + $textContainer->outertext; + + $this->items[] = $item; + } +} diff --git a/bridges/KoFiBridge.php b/bridges/KoFiBridge.php index c16005907fc..da8f1e7da34 100644 --- a/bridges/KoFiBridge.php +++ b/bridges/KoFiBridge.php @@ -27,12 +27,15 @@ public function collectData() if (isset($titleWrapper[0])) { $item = []; $item['title'] = $element->find('div.content-link-text div')[0]->plaintext; - // $item['timestamp'] = strtotime($element->find('div.feeditem-time', 0)->plaintext); - $item['uri'] = self::URI . $element->find('div.fi-post-item-large a')[0]->href; + $uri = $element->find('div.content-link-text div')[2]->find('a')[0]->onclick; + $uri = trim(str_replace('window.location =', '', $uri)); + $uri = trim(str_replace(''', '', $uri)); + $uri = trim(str_replace(';', '', $uri)); + $item['uri'] = self::URI . $uri; + if (isset($element->find('div.fi-post-item-large div.content-link-post img')[0])) { $item['enclosures'][] = $element->find('div.fi-post-item-large div.content-link-post img')[0]->src; } - // $item['content'] = $element->find('div.content-link-text div#content-link', 0)->plaintext; $html = getSimpleHTMLDOM($item['uri']); $feedItemTime = $html->find('div.feeditem-time', 0); diff --git a/bridges/MangaReaderBridge.php b/bridges/MangaReaderBridge.php new file mode 100644 index 00000000000..1fa0c62dc54 --- /dev/null +++ b/bridges/MangaReaderBridge.php @@ -0,0 +1,44 @@ + [ + 'name' => 'Manga URL', + 'type' => 'text', + 'required' => true, + 'title' => 'The URL of the manga on MangaReader', + 'pattern' => '^https:\/\/mangareader\.to\/[^\/]+$', + 'exampleValue' => 'https://mangareader.to/bleach-1623', + ], + 'lang' => [ + 'name' => 'Chapter Language', + 'title' => 'two-letter language code (example "en", "jp", "fr")', + 'exampleValue' => 'en', + 'required' => true, + 'pattern' => '^[a-z][a-z]$', + ] + ] + ]; + + public function collectData() + { + $url = $this->getInput('url'); + $lang = $this->getInput('lang'); + $dom = getSimpleHTMLDOM($url); + $chapters = $dom->getElementById($lang . '-chapters'); + + foreach ($chapters->getElementsByTagName('li') as $chapter) { + $a = $chapter->getElementsByTagName('a')[0]; + $item = []; + $item['title'] = $a->getAttribute('title'); + $item['uri'] = self::URI . $a->getAttribute('href'); + $this->items[] = $item; + } + } +} diff --git a/bridges/MarktplaatsBridge.php b/bridges/MarktplaatsBridge.php index 70a369d9542..6ba993e7885 100644 --- a/bridges/MarktplaatsBridge.php +++ b/bridges/MarktplaatsBridge.php @@ -14,6 +14,51 @@ class MarktplaatsBridge extends BridgeAbstract 'required' => true, 'title' => 'The search string for marktplaats', ], + 'c' => [ + 'name' => 'Category', + 'type' => 'list', + 'values' => [ + 'Select a category' => '', + 'Antiek en Kunst' => '1', + 'Audio, Tv en Foto' => '31', + 'Auto's' => '91', + 'Auto-onderdelen' => '2600', + 'Auto diversen' => '48', + 'Boeken' => '201', + 'Caravans en Kamperen' => '289', + 'Cd's en Dvd's' => '1744', + 'Computers en Software' => '322', + 'Contacten en Berichten' => '378', + 'Diensten en Vakmensen' => '1098', + 'Dieren en Toebehoren' => '395', + 'Doe-het-zelf en Verbouw' => '239', + 'Fietsen en Brommers' => '445', + 'Hobby en Vrije tijd' => '1099', + 'Huis en Inrichting' => '504', + 'Huizen en Kamers' => '1032', + 'Kinderen en Baby's' => '565', + 'Kleding | Dames' => '621', + 'Kleding | Heren' => '1776', + 'Motoren' => '678', + 'Muziek en Instrumenten' => '728', + 'Postzegels en Munten' => '1784', + 'Sieraden, Tassen en Uiterlijk' => '1826', + 'Spelcomputers en Games' => '356', + 'Sport en Fitness' => '784', + 'Telecommunicatie' => '820', + 'Tickets en Kaartjes' => '1984', + 'Tuin en Terras' => '1847', + 'Vacatures' => '167', + 'Vakantie' => '856', + 'Verzamelen' => '895', + 'Watersport en Boten' => '976', + 'Witgoed en Apparatuur' => '537', + 'Zakelijke goederen' => '1085', + 'Diversen' => '428', + ], + 'required' => false, + 'title' => 'The category to search in', + ], 'z' => [ 'name' => 'zipcode', 'type' => 'text', @@ -57,7 +102,15 @@ class MarktplaatsBridge extends BridgeAbstract 'type' => 'checkbox', 'required' => false, 'title' => 'Include the raw data behind the content', - ] + ], + 'sc' => [ + 'name' => 'Sub category', + 'type' => 'number', + 'required' => false, + 'exampleValue' => '12345', + 'title' => 'Sub category has to be given by id as the list is too big to show here. + Only use subcategories that belong to the main category. Both have to be correct', + ], ] ]; const CACHE_TIMEOUT = 900; @@ -80,6 +133,12 @@ public function collectData() $excludeGlobal = true; } } + if (!empty($this->getInput('c'))) { + $query .= '&l1CategoryId=' . $this->getInput('c'); + } + if (!is_null($this->getInput('sc'))) { + $query .= '&l2CategoryId=' . $this->getInput('sc'); + } $url = 'https://www.marktplaats.nl/lrp/api/search?query=' . urlencode($this->getInput('q')) . $query; $jsonString = getSimpleHTMLDOM($url); $jsonObj = json_decode($jsonString); @@ -97,15 +156,15 @@ public function collectData() $item['enclosures'] = $listing->imageUrls; if (is_array($listing->imageUrls)) { foreach ($listing->imageUrls as $imgurl) { - $item['content'] .= "
\n"; + $item['content'] .= "
\n"; } } else { - $item['content'] .= "
\n"; + $item['content'] .= "
\n"; } } if (!is_null($this->getInput('r'))) { if ($this->getInput('r')) { - $item['content'] .= "
\n
\n
\n" . json_encode($listing); + $item['content'] .= "
\n
\n
\n" . json_encode($listing) . "
$url"; } } $item['content'] .= "
\n
\nPrice: " . $listing->priceInfo->priceCents / 100; @@ -130,4 +189,80 @@ public function getName() } return parent::getName(); } + + /** + * Method can be used to scrape the subcategories from marktplaats + */ + private static function scrapeSubCategories() + { + $main = []; + $main['Select a category'] = ''; + $marktplaatsHTML = file_get_html('https://www.marktplaats.nl'); + foreach ($marktplaatsHTML->find('select[id=categoryId] option') as $opt) { + if (!str_contains($opt->innertext, 'categorie')) { + $main[$opt->innertext] = $opt->value; + $ids[] = $opt->value; + } + } + + $result = []; + foreach ($ids as $id) { + $url = 'https://www.marktplaats.nl/lrp/api/search?l1CategoryId=' . $id; + $jsonstring = getContents($url); + $jsondata = json_decode((string)$jsonstring); + if (isset($jsondata->searchCategoryOptions)) { + $categories = $jsondata->searchCategoryOptions; + if (isset($jsondata->categoriesById->$id)) { + $maincategory = $jsondata->categoriesById->$id; + $array = []; + foreach ($categories as $categorie) { + $array[$categorie->fullName] = $categorie->id; + } + $result[$maincategory->fullName] = $array; + } + } else { + print($jsonstring); + } + } + $combinedResult = [ + 'main' => $main, + 'sub' => $result + ]; + return $combinedResult; + } + + /** + * Helper method to construct the array that could be used for categories + * + * @param $array + * @param $indent + * @return void + */ + private static function printArrayAsCode($array, $indent = 0) + { + foreach ($array as $key => $value) { + if (is_array($value)) { + echo str_repeat(' ', $indent) . "'$key' => [" . PHP_EOL; + self::printArrayAsCode($value, $indent + 1); + echo str_repeat(' ', $indent) . '],' . PHP_EOL; + } else { + $value = str_replace('\'', '\\\'', $value); + $key = str_replace('\'', '\\\'', $key); + echo str_repeat(' ', $indent) . "'$key' => '$value'," . PHP_EOL; + } + } + } + + private static function printScrapeArray() + { + $array = (MarktplaatsBridge::scrapeSubCategories()); + + echo '$myArray = [' . PHP_EOL; + self::printArrayAsCode($array['main'], 1); + echo '];' . PHP_EOL; + + echo '$myArray = [' . PHP_EOL; + self::printArrayAsCode($array['sub'], 1); + echo '];' . PHP_EOL; + } } diff --git a/bridges/MotatosBridge.php b/bridges/MotatosBridge.php new file mode 100644 index 00000000000..6833521a794 --- /dev/null +++ b/bridges/MotatosBridge.php @@ -0,0 +1,102 @@ + [ + 'name' => 'Region', + 'type' => 'list', + 'title' => 'Choose country', + 'values' => [ + 'Austria' => 'at', + 'Denmark' => 'dk', + 'Finland' => 'fi', + 'Germany' => 'de', + 'Sweden' => 'se', + ], + ], + ]]; + + public function getName() + { + switch ($this->getInput('region')) { + case 'at': + return 'Motatos'; + case 'dk': + return 'Motatos'; + case 'de': + return 'Motatos'; + case 'fi': + return 'Matsmart'; + case 'se': + return 'Matsmart'; + default: + return self::NAME; + } + } + + public function getURI() + { + switch ($this->getInput('region')) { + case 'at': + return 'https://www.motatos.at/neu-im-shop'; + case 'dk': + return 'https://www.motatos.dk/nye-varer'; + case 'de': + return 'https://www.motatos.de/neu-im-shop'; + case 'fi': + return 'https://www.matsmart.fi/uusimmat'; + case 'se': + return 'https://www.matsmart.se/nyinkommet'; + default: + return self::URI; + } + } + + public function getIcon() + { + return 'https://www.motatos.de/favicon.ico'; + } + + private function getApiUrl() + { + switch ($this->getInput('region')) { + case 'at': + return 'https://api.findify.io/v4/4359f7b3-17e0-4f74-9fdb-e6606dfed25c/smart-collection/new-arrivals'; + case 'dk': + return 'https://api.findify.io/v4/3709426e-621a-49df-bd61-ac8543452022/smart-collection/new-arrivals'; + case 'de': + return 'https://api.findify.io/v4/2a044754-6cda-4541-b159-39133b75386c/smart-collection/new-arrivals'; + case 'fi': + return 'https://api.findify.io/v4/63946f89-2a82-4839-a412-883b79144f7b/smart-collection/new-arrivals'; + case 'se': + return 'https://api.findify.io/v4/3ae86b36-a1bd-4442-a3d9-2af6845908e6/smart-collection/new-arrivals'; + } + } + + public function collectData() + { + // motatos uses this api to dynamically load more items on page scroll + $json = getContents($this->getApiUrl() . '?t_client=0&user={%22uid%22:%220%22,%22sid%22:%220%22}'); + $jsonFile = json_decode($json, true); + + foreach ($jsonFile['items'] as $entry) { + $item = []; + $item['uid'] = $entry['custom_fields']['uuid'][0]; + $item['uri'] = $entry['product_url']; + $item['timestamp'] = $entry['created_at'] / 1000; + $item['title'] = $entry['title']; + $item['content'] = <<{$entry['title']} + +

{$entry['price'][0]}€

+ HTML; + $this->items[] = $item; + } + } +} diff --git a/bridges/MydealsBridge.php b/bridges/MydealsBridge.php index 0ef9c201701..d7e074a9aac 100644 --- a/bridges/MydealsBridge.php +++ b/bridges/MydealsBridge.php @@ -2,9 +2,9 @@ class MydealsBridge extends PepperBridgeAbstract { - const NAME = 'Mydeals bridge'; + const NAME = 'Mydealz bridge'; const URI = 'https://www.mydealz.de/'; - const DESCRIPTION = 'Zeigt die Deals von mydeals.de'; + const DESCRIPTION = 'Zeigt die Deals von mydealz.de'; const MAINTAINER = 'sysadminstory'; const PARAMETERS = [ 'Suche nach Stichworten' => [ @@ -2021,9 +2021,10 @@ class MydealsBridge extends PepperBridgeAbstract 'context-talk' => 'Überwachung Diskussion', 'uri-group' => 'gruppe/', 'uri-deal' => 'deals/', + 'uri-merchant' => 'search/gutscheine?merchant-id=', 'request-error' => 'Could not request mydeals', 'thread-error' => 'Die ID der Diskussion kann nicht ermittelt werden. Überprüfen Sie die eingegebene URL', - 'no-results' => 'Ups, wir konnten keine Deals zu', + 'no-results' => 'Ups, wir konnten nichts', 'relative-date-indicator' => [ 'vor', 'seit' @@ -2068,7 +2069,9 @@ class MydealsBridge extends PepperBridgeAbstract 'relative-date-alt-prefixes' => [ 'aktualisiert vor ', 'kommentiert vor ', - 'heiß seit ' + 'eingestellt vor ', + 'heiß seit ', + 'vor ' ], 'relative-date-ignore-suffix' => [ '/von.*$/' diff --git a/bridges/NintendoBridge.php b/bridges/NintendoBridge.php new file mode 100644 index 00000000000..1f463e91a00 --- /dev/null +++ b/bridges/NintendoBridge.php @@ -0,0 +1,486 @@ + [ + 'category' => [ + 'name' => 'Category', + 'type' => 'list', + 'values' => [ + 'All' => 'all', + 'Mario Kart 8 Deluxe' => 'mk8d', + 'Splatoon 2' => 's2', + 'Super Mario 3D All-Stars' => 'sm3as', + 'Super Mario 3D World + Bowser’s Fury' => 'sm3wbf', + 'Super Mario Bros. Wonder' => 'smbw', + 'Super Mario Maker 2' => 'smm2', + 'Super Mario Odyssey' => 'smo', + 'Super Smash Bros. Ultimate' => 'ssbu', + 'Switch Firmware' => 'sf', + 'The Legend of Zelda: Link’s Awakening' => 'tlozla', + 'The Legend of Zelda: Skyward Sword HD' => 'tlozss', + 'The Legend of Zelda: Tears of the Kingdom' => 'tloztotk', + 'Xenoblade Chronicles 2' => 'xc2', + ], + 'defaultValue' => 'mk8d', + 'title' => 'Select category' + ], + 'country' => [ + 'name' => 'Country', + 'type' => 'list', + 'values' => [ + 'België' => 'be/nl', + 'Belgique' => 'be/fr', + 'Deutschland' => 'de', + 'España' => 'es', + 'France' => 'fr', + 'Italia' => 'it', + 'Nederland' => 'nl', + 'Österreich' => 'at', + 'Portugal' => 'pt', + 'Schweiz' => 'ch/de', + 'Suisse' => 'ch/fr', + 'Svizzera' => 'ch/it', + 'UK & Ireland' => 'co.uk', + 'South Africa' => 'co.za' + ], + 'defaultValue' => 'co.uk', + 'title' => 'Select your country' + ] + ] + ]; + + const CACHE_TIMEOUT = 3600; + + const FEED_SOURCE_URL = [ + 'mk8d' => 'https://www.nintendo.co.uk/Support/Nintendo-Switch/Game-Updates/How-to-Update-Mario-Kart-8-Deluxe-1482895.html', + 's2' => 'https://www.nintendo.co.uk/Support/Nintendo-Switch/Game-Updates/How-to-Update-Splatoon-2-1482897.html', + 'sm3as' => 'https://www.nintendo.co.uk/Support/Nintendo-Switch/Game-Updates/How-to-Update-Super-Mario-3D-All-Stars-1844226.html', + 'sm3wbf' => 'https://www.nintendo.co.uk/Support/Nintendo-Switch/Game-Updates/How-to-Update-Super-Mario-3D-World-Bowser-s-Fury-1920668.html', + 'smbw' => 'https://www.nintendo.co.uk/Support/Nintendo-Switch/Game-Updates/How-to-Update-Super-Mario-Bros-Wonder-2485410.html', + 'smm2' => 'https://www.nintendo.co.uk/Support/Nintendo-Switch/Game-Updates/How-to-Update-Super-Mario-Maker-2-1586745.html', + 'smo' => 'https://www.nintendo.co.uk/Support/Nintendo-Switch/Game-Updates/How-to-Update-Super-Mario-Odyssey-1482901.html', + 'ssbu' => 'https://www.nintendo.co.uk/Support/Nintendo-Switch/Game-Updates/How-to-Update-Super-Smash-Bros-Ultimate-1484130.html', + 'sf' => 'https://www.nintendo.co.uk/Support/Nintendo-Switch/System-Updates/Nintendo-Switch-System-Updates-and-Change-History-1445507.html', + 'tlozla' => 'https://www.nintendo.co.uk/Support/Nintendo-Switch/Game-Updates/How-to-Update-The-Legend-of-Zelda-Link-s-Awakening-1666739.html', + 'tlozss' => 'https://www.nintendo.co.uk/Support/Nintendo-Switch/Game-Updates/How-to-Update-The-Legend-of-Zelda-Skyward-Sword-HD-2022801.html', + 'tloztotk' => 'https://www.nintendo.co.uk/Support/Nintendo-Switch/Game-Updates/How-to-Update-The-Legend-of-Zelda-Tears-of-the-Kingdom-2388231.html', + 'xc2' => 'https://www.nintendo.co.uk/Support/Nintendo-Switch/Game-Updates/Xenoblade-Chronicles-2-Update-History-1482911.html', + ]; + const XPATH_EXPRESSION_ITEM = '//div[@class="col-xs-12 content"]/div[starts-with(@id,"v") and @class="collapse"]'; + const XPATH_EXPRESSION_ITEM_FIRMWARE = '//div[@id="latest" and @class="collapse" and @rel="1"]'; + const XPATH_EXPRESSION_ITEM_TITLE = '(.//h2[1] | .//strong[1])[1]/node()'; + const XPATH_EXPRESSION_ITEM_CONTENT = '.'; + const XPATH_EXPRESSION_ITEM_URI = '//link[@rel="canonical"]/@href'; + + //const XPATH_EXPRESSION_ITEM_AUTHOR = ''; + const XPATH_EXPRESSION_ITEM_TIMESTAMP_PART = 'substring-after(//a[@class="collapse_link collapsed" and @data-target="#{{id_here}}"]/text(), "{{label_here}}")'; + const XPATH_EXPRESSION_ITEM_TIMESTAMP = 'substring(' . self::XPATH_EXPRESSION_ITEM_TIMESTAMP_PART . ', 1, string-length(' + . self::XPATH_EXPRESSION_ITEM_TIMESTAMP_PART . ') - 1)'; + + //const XPATH_EXPRESSION_ITEM_ENCLOSURES = ''; + //const XPATH_EXPRESSION_ITEM_CATEGORIES = ''; + const SETTING_FIX_ENCODING = false; + const SETTING_USE_RAW_ITEM_CONTENT = true; + + private const GAME_COUNTRY_DATE_SUBSTRING_PART = [ + 'mk8d' => [ + 'de' => 'eröffentlicht am ', + 'es' => 'isponible desde el ', + 'fr' => 'atée du ', + 'it' => 'ubblicata il ', + 'nl' => 'itgebracht op ', + 'pt' => 'ançada no dia ', + 'en' => 'eleased ', + ], + 's2' => [ + 'de' => 'eröffentlicht am ', + 'es' => 'isponible desde el ', + 'fr' => 'atée du ', + 'it' => 'ubblicata il ', + 'nl' => 'itgebracht op ', + 'pt' => 'ançada a ', + 'en' => 'eleased ', + ], + 'sm3as' => [ + 'de' => 'eröffentlicht am ', + 'es' => 'isponible desde el ', + 'fr' => 'ubliée le ', + 'it' => 'istribuita il ', + 'nl' => 'itgebracht op ', + 'pt' => 'ançada a ', + 'en' => 'eleased ', + ], + 'sm3wbf' => [ + 'de' => 'eröffentlicht am ', + 'es' => 'isponible desde el ', + 'fr' => 'atée du ', + 'it' => 'istribuita il ', + 'nl' => 'itgebracht op ', + 'pt' => 'ançada no dia ', + 'en' => 'eleased ', + ], + 'smbw' => [ + 'de' => 'eröffentlicht am ', + 'es' => 'isponible desde el ', + 'fr' => 'atée du ', + 'it' => 'istribuita il ', + 'nl' => 'itgebracht op ', + 'pt' => 'ançada a ', + 'en' => 'eleased ', + ], + 'smm2' => [ + 'de' => 'eröffentlicht am ', + 'es' => 'isponible desde el ', + 'fr' => 'ubliée le ', + 'it' => 'istribuita il ', + 'nl' => 'itgebracht op ', + 'pt' => 'ançada no dia ', + 'en' => 'eleased ', + ], + 'smo' => [ + 'de' => 'eröffentlicht am ', + 'es' => 'isponible desde el ', + 'fr' => 'atée du ', + 'it' => 'istribuita il ', + 'nl' => 'itgebracht op ', + 'pt' => 'ançada no dia ', + 'en' => 'eleased ', + ], + 'ssbu' => [ + 'de' => 'eröffentlicht am ', + 'es' => 'isponible desde el ', + 'fr' => 'atée du ', + 'it' => 'istribuita il ', + 'nl' => 'itgebracht op ', + 'pt' => 'ançada no dia ', + 'en' => 'eleased ', + ], + 'sf' => [ + 'de' => 'eröffentlicht am ', + 'es' => 'isponible desde el ', + 'fr' => 'ise en ligne le ', + 'it' => 'ubblicata il ', + 'nl' => 'itgebracht op ', + 'pt' => 'ançada no dia ', + 'en' => 'istributed ', + ], + 'tlozla' => [ + 'de' => 'eröffentlicht ', + 'es' => 'ublicada el ', + 'fr' => 'atée du ', + 'it' => 'istribuita il ', + 'nl' => 'itgegeven op ', + 'pt' => 'ançada a ', + 'en' => 'eleased ', + ], + 'tlozss' => [ + 'de' => 'eröffentlicht am ', + 'es' => 'isponible desde el ', + 'fr' => 'atée du ', + 'it' => 'ubblicata l\'', + 'nl' => 'itgebracht op ', + 'pt' => 'ançada a ', + 'en' => 'eleased ', + ], + 'tloztotk' => [ + 'de' => 'eröffentlicht am ', + 'es' => 'isponible desde el ', + 'fr' => 'ubliée le ', + 'it' => 'ubblicata il ', + 'nl' => 'erschenen op ', + 'pt' => 'ançada a ', + 'en' => 'eleased ', + ], + 'xc2' => [ + 'de' => 'eröffentlicht am ', + 'es' => 'isponible desde el ', + 'fr' => 'atée du ', + 'it' => 'istribuita il ', + 'nl' => 'itgebracht op ', + 'pt' => 'ançada a ', + 'en' => 'eleased ', + ], + ]; + + private const GAME_COUNTRY_DATE_FORMAT = [ + 'mk8d' => [ + 'de' => 'd.m.y', + 'es' => 'd-m-y', + 'fr' => 'd/m/Y', + 'it' => 'd/m/y', + 'nl' => 'd m Y', + 'pt' => 'd/m/y', + 'en' => 'd/m/y', + ], + 's2' => [ + 'de' => 'd.m.Y', + 'es' => 'd-m-Y', + 'fr' => 'd/m/y', + 'it' => 'd/m/y', + 'nl' => 'd/m/y', + 'pt' => 'd/m/y', + 'en' => 'd F Y', + ], + 'sm3as' => [ + 'de' => 'j. m Y', + 'es' => 'j \d\e m \d\e Y', + 'fr' => 'j m Y', + 'it' => 'j m Y', + 'nl' => 'j m Y', + 'pt' => 'j \d\e m \d\e Y', + 'en' => 'j F Y', + ], + 'sm3wbf' => [ + 'de' => 'd.m.y', + 'es' => 'd-m-y', + 'fr' => 'd/m/y', + 'it' => 'd/m/y', + 'nl' => 'd m Y', + 'pt' => 'd/m/y', + 'en' => 'F j, Y', + ], + 'smbw' => [ + 'de' => 'd. m Y', + 'es' => 'j \d\e m \d\e Y', + 'fr' => 'd/m/Y', + 'it' => 'j m Y', + 'nl' => 'd m Y', + 'pt' => 'j \d\e m \d\e Y', + 'en' => 'j F Y', + ], + 'smm2' => [ + 'de' => 'd.m.Y', + 'es' => 'd-m-Y', + 'fr' => 'd/m/Y', + 'it' => 'd/m/Y', + 'nl' => 'd m Y', + 'pt' => 'd/m/y', + 'en' => 'd/m/y', + ], + 'smo' => [ + 'de' => 'd.m.Y', + 'es' => 'd-m-Y', + 'fr' => 'd/m/Y', + 'it' => 'd/m/y', + 'nl' => 'd m Y', + 'pt' => 'd/m/y', + 'en' => 'd/m/y', + ], + 'ssbu' => [ + 'de' => 'd. m Y', + 'es' => 'j \d\e m \d\e Y', + 'fr' => 'j m Y', + 'it' => 'j m Y', + 'nl' => 'd m Y', + 'pt' => 'd/m/Y', + 'en' => 'j F Y', + ], + 'sf' => [ + 'de' => 'd.m.Y', + 'es' => 'd-m-y', + 'fr' => 'd/m/Y', + 'it' => 'd/m/Y', + 'nl' => 'd m Y', + 'pt' => 'd/m/Y', + 'en' => 'd/m/Y', + ], + 'tlozla' => [ + 'de' => 'd. m Y', + 'es' => 'j m \d\e Y', + 'fr' => 'd/m/y', + 'it' => 'j m Y', + 'nl' => 'd m Y', + 'pt' => 'j \d\e m \d\e Y', + 'en' => 'j F y', + ], + 'tlozss' => [ + 'de' => 'd. m Y', + 'es' => 'j \d\e m \d\e Y', + 'fr' => 'd/m/y', + 'it' => 'j m Y', + 'nl' => 'd m Y', + 'pt' => 'j \d\e m \d\e Y', + 'en' => 'j F Y', + ], + 'tloztotk' => [ + 'de' => 'd. m Y', + 'es' => 'j \d\e m \d\e Y', + 'fr' => 'j m Y', + 'it' => 'j m Y', + 'nl' => 'd m Y', + 'pt' => 'j \d\e m \d\e Y', + 'en' => 'j F Y', + ], + 'xc2' => [ + 'de' => 'd.m.y', + 'es' => 'd-m-y', + 'fr' => 'd/m/Y', + 'it' => 'd/m/y', + 'nl' => 'd m Y', + 'pt' => 'd/m/y', + 'en' => 'd/m/y', + ], + ]; + + private const FOREIGN_MONTH_NAMES = [ + 'nl' => ['01' => 'januari', '02' => 'februari', '03' => 'maart', '04' => 'april', '05' => 'mei', '06' => 'juni', '07' => 'juli', '08' => 'augustus', + '09' => 'september', '10' => 'oktober', '11' => 'november', '12' => 'december'], + 'fr' => ['01' => 'janvier', '02' => 'février', '03' => 'mars', '04' => 'avril', '05' => 'mai', '06' => 'juin', '07' => 'juillet', '08' => 'août', + '09' => 'septembre', '10' => 'octobre', '11' => 'novembre', '12' => 'décembre'], + 'de' => ['01' => 'Januar', '02' => 'Februar', '03' => 'März', '04' => 'April', '05' => 'Mai', '06' => 'Juni', '07' => 'Juli', '08' => 'August', + '09' => 'September', '10' => 'Oktober', '11' => 'November', '12' => 'Dezember'], + 'es' => ['01' => 'enero', '02' => 'febrero', '03' => 'marzo', '04' => 'abril', '05' => 'mayo', '06' => 'junio', '07' => 'julio', '08' => 'agosto', + '09' => 'septiembre', '10' => 'octubre', '11' => 'noviembre', '12' => 'diciembre'], + 'it' => ['01' => 'gennaio', '02' => 'febbraio', '03' => 'marzo', '04' => 'aprile', '05' => 'maggio', '06' => 'giugno', '07' => 'luglio', '08' => 'agosto', + '09' => 'settembre', '10' => 'ottobre', '11' => 'novembre', '12' => 'dicembre'], + 'pt' => ['01' => 'janeiro', '02' => 'fevereiro', '03' => 'março', '04' => 'abril', '05' => 'maio', '06' => 'junho', '07' => 'julho', '08' => 'agosto', + '09' => 'setembro', '10' => 'outubro', '11' => 'novembro', '12' => 'dezembro'], + ]; + const LANGUAGE_REWRITE = ['co.uk' => 'en', 'co.za' => 'en', 'at' => 'de']; + + private string $lastId = ''; + private ?string $currentCategory = ''; + + private function getCurrentCategory() + { + if (empty($this->currentCategory)) { + $category = $this->getInput('category'); + $this->currentCategory = empty($category) ? self::PARAMETERS['']['category']['defaultValue'] : $category; + } + return $this->currentCategory; + } + + public function getIcon() + { + return 'https://www.nintendo.co.uk/favicon.ico'; + } + + public function getURI() + { + $category = $this->getInput('category'); + return 'all' === $category ? self::URI : $this->getSourceUrl(); + } + + protected function provideFeedTitle(\DOMXPath $xpath) + { + $category = $this->getInput('category'); + $categoryName = array_search($category, self::PARAMETERS['']['category']['values']); + return 'all' === $category ? self::NAME : $categoryName . ' Software-Updates'; + } + + protected function getSourceUrl() + { + $country = $this->getInput('country'); + $category = $this->getCurrentCategory(); + return str_replace(self::PARAMETERS['']['country']['defaultValue'], $country, self::FEED_SOURCE_URL[$category]); + } + + protected function getExpressionItem() + { + $category = $this->getCurrentCategory(); + return 'sf' === $category ? self::XPATH_EXPRESSION_ITEM_FIRMWARE : self::XPATH_EXPRESSION_ITEM; + } + + protected function getExpressionItemTimestamp() + { + if (empty($this->lastId)) { + return null; + } + $country = $this->getInput('country'); + $category = $this->getCurrentCategory(); + $language = $this->getLanguageFromCountry($country); + return str_replace( + ['{{id_here}}', '{{label_here}}'], + [$this->lastId, static::GAME_COUNTRY_DATE_SUBSTRING_PART[$category][$language]], + static::XPATH_EXPRESSION_ITEM_TIMESTAMP + ); + } + + protected function getExpressionItemCategories() + { + $category = $this->getCurrentCategory(); + $categoryName = array_search($category, self::PARAMETERS['']['category']['values']); + return 'string("' . $categoryName . '")'; + } + + public function collectData() + { + $category = $this->getCurrentCategory(); + if ('all' === $category) { + $allItems = []; + foreach (self::PARAMETERS['']['category']['values'] as $catKey) { + if ('all' === $catKey) { + continue; + } + $this->currentCategory = $catKey; + $this->items = []; + parent::collectData(); + $allItems = [...$allItems, ...$this->items]; + } + $this->currentCategory = 'all'; + $this->items = $allItems; + } else { + parent::collectData(); + } + } + + protected function formatItemTitle($value) + { + if (false !== strpos($value, ' (')) { + $value = substr($value, 0, strpos($value, ' (')); + } + if ('all' === $this->getInput('category')) { + $category = $this->getCurrentCategory(); + $categoryName = array_search($category, self::PARAMETERS['']['category']['values']); + return $categoryName . ' ' . $value; + } + return $value; + } + + protected function formatItemContent($value) + { + $result = preg_match('~
(.*)
~', $value, $matches); + if (1 === $result) { + $this->lastId = $matches[1]; + return trim($matches[2]); + } + return $value; + } + + protected function formatItemTimestamp($value) + { + $country = $this->getInput('country'); + $category = $this->getCurrentCategory(); + $language = $this->getLanguageFromCountry($country); + + $aMonthNames = self::FOREIGN_MONTH_NAMES[$language] ?? null; + if (null !== $aMonthNames) { + $value = str_replace(array_values($aMonthNames), array_keys($aMonthNames), $value); + } + $value = str_replace('­', '-', $value); + $value = str_replace('--', '-', $value); + + $date = \DateTime::createFromFormat(self::GAME_COUNTRY_DATE_FORMAT[$category][$language], $value); + if (false === $date) { + $date = new \DateTime('now'); + } + return $date->getTimestamp(); + } + + protected function generateItemId(FeedItem $item) + { + return $this->getCurrentCategory() . '-' . $this->lastId; + } + + private function getLanguageFromCountry($country) + { + return (strpos($country, '/') !== false) ? substr($country, strpos($country, '/') + 1) : (self::LANGUAGE_REWRITE[$country] ?? $country); + } +} diff --git a/bridges/OglafBridge.php b/bridges/OglafBridge.php new file mode 100644 index 00000000000..1f4bc1aff9e --- /dev/null +++ b/bridges/OglafBridge.php @@ -0,0 +1,35 @@ + [ + 'name' => 'limit (max 20)', + 'type' => 'number', + 'defaultValue' => 10, + 'required' => true, + ] + ] + ]; + + public function collectData() + { + $url = self::URI . 'feeds/rss/'; + $limit = min(20, $this->getInput('limit')); + $this->collectExpandableDatas($url, $limit); + } + + protected function parseItem($item) + { + $html = getSimpleHTMLDOMCached($item['uri']); + $comicImage = $html->find('img[id="strip"]', 0); + $item['content'] = $comicImage; + + return $item; + } +} diff --git a/bridges/PanneauPocketBridge.php b/bridges/PanneauPocketBridge.php index 8547a500c8c..464d56c5d92 100644 --- a/bridges/PanneauPocketBridge.php +++ b/bridges/PanneauPocketBridge.php @@ -12,6 +12,12 @@ class PanneauPocketBridge extends BridgeAbstract 'name' => 'Choisir une ville', 'type' => 'list', 'values' => self::CITIES, + ], + 'cityName' => [ + 'name' => 'Ville', + ], + 'cityId' => [ + 'name' => 'Identifiant', ] ] ]; @@ -113,8 +119,14 @@ class PanneauPocketBridge extends BridgeAbstract public function collectData() { - $matchedCity = array_search($this->getInput('cities'), self::CITIES); - $city = strtolower($this->getInput('cities') . '-' . $matchedCity); + $cityId = $this->getInput('cityId'); + if ($cityId != null) { + $cityName = $this->getInput('cityName'); + $city = strtolower($cityId . '-' . $cityName); + } else { + $matchedCity = array_search($this->getInput('cities'), self::CITIES); + $city = strtolower($this->getInput('cities') . '-' . $matchedCity); + } $url = sprintf('https://app.panneaupocket.com/ville/%s', urlencode($city)); $html = getSimpleHTMLDOM($url); @@ -136,6 +148,18 @@ public function collectData() } } + public function detectParameters($url) + { + $params = []; + $regex = '/\/ville\/(\d+)-([a-z0-9-]+)/'; + if (preg_match($regex, $url, $matches)) { + $params['cityId'] = $matches[1]; + $params['cityName'] = $matches[2]; + return $params; + } + return null; + } + /** * Produce self::CITIES array */ diff --git a/bridges/PepperBridgeAbstract.php b/bridges/PepperBridgeAbstract.php index 5d2e552b844..73bd194da8e 100644 --- a/bridges/PepperBridgeAbstract.php +++ b/bridges/PepperBridgeAbstract.php @@ -94,7 +94,7 @@ protected function collectDeals($url) ); // If there is no results, we don't parse the content because it display some random deals - $noresult = $html->find('h3[class=size--all-l]', 0); + $noresult = $html->find('h3[class*=text--b]', 0); if ($noresult != null && strpos($noresult->plaintext, $this->i8n('no-results')) !== false) { $this->items = []; } else { @@ -104,6 +104,9 @@ protected function collectDeals($url) $item['title'] = $this->getTitle($deal); $item['author'] = $deal->find('span.thread-username', 0)->plaintext; + // Get the JSON Data stored as vue + $jsonDealData = $this->getDealJsonData($deal); + $item['content'] = '
find('div[class=js-vue2]', 0)->getAttribute('data-vue2')); + return $data; + } + /** * Get the source of a Deal if it exists * @return string String of the deal source */ - private function getSource($deal) + private function getSource($jsonData) { - if (($origin = $deal->find('button[class*=text--color-greyShade]', 0)) != null) { - $path = str_replace(' ', '/', trim(Json::decode($origin->{'data-cloak-link'})['path'])); - $text = $origin->find('span[class*=cept-merchant-name]', 0); + if ($jsonData['props']['thread']['merchant'] != null) { + $path = $this->i8n('uri-merchant') . $jsonData['props']['thread']['merchant']['merchantId']; + $text = $jsonData['props']['thread']['merchant']['merchantName']; return '
' . $this->i8n('origin') . ' : ' . $text . '
'; } else { return ''; @@ -542,6 +564,10 @@ private function relativeDateToTimestamp($str) { $date = new DateTime(); + // The minimal amount of time substracted is a minute : the seconds in the resulting date would be related to the execution time of the script. + // This make no sense, so we set the seconds manually to "00". + $date->setTime($date->format('H'), $date->format('i'), 0); + // In case of update date, replace it by the regular relative date first word $str = str_replace($this->i8n('relative-date-alt-prefixes'), $this->i8n('local-time-relative')[0], $str); @@ -559,6 +585,8 @@ private function relativeDateToTimestamp($str) '' ]; $date->modify(str_replace($search, $replace, $str)); + + return $date->getTimestamp(); } diff --git a/bridges/RedditBridge.php b/bridges/RedditBridge.php index f761afaa378..618463a642d 100644 --- a/bridges/RedditBridge.php +++ b/bridges/RedditBridge.php @@ -1,10 +1,15 @@ cache->get($cacheKey)) { + $forbiddenKey = 'reddit_forbidden'; + if ($this->cache->get($forbiddenKey)) { + throw new HttpException('403 Forbidden', 403); + } + + $rateLimitKey = 'reddit_rate_limit'; + if ($this->cache->get($rateLimitKey)) { throw new HttpException('429 Too Many Requests', 429); } + try { $this->collectDataInternal(); } catch (HttpException $e) { + if ($e->getCode() === 403) { + // 403 Forbidden + // This can possibly mean that reddit has permanently blocked this server's ip address + $this->cache->set($forbiddenKey, true, 60 * 61); + } if ($e->getCode() === 429) { - $this->cache->set($cacheKey, true, 60 * 16); + $this->cache->set($rateLimitKey, true, 60 * 16); } throw $e; } @@ -157,7 +173,7 @@ private function collectDataInternal(): void $item['author'] = $data->author; $item['uid'] = $data->id; $item['timestamp'] = $data->created_utc; - $item['uri'] = $this->encodePermalink($data->permalink); + $item['uri'] = $this->urlEncodePathParts($data->permalink); $item['categories'] = []; @@ -177,13 +193,11 @@ private function collectDataInternal(): void if ($post->kind == 't1') { // Comment - $item['content'] - = htmlspecialchars_decode($data->body_html); + $item['content'] = htmlspecialchars_decode($data->body_html); } elseif ($data->is_self) { // Text post - $item['content'] - = htmlspecialchars_decode($data->selftext_html); + $item['content'] = htmlspecialchars_decode($data->selftext_html); } elseif (isset($data->post_hint) && $data->post_hint == 'link') { // Link with preview @@ -199,18 +213,11 @@ private function collectDataInternal(): void $embed = ''; } - $item['content'] = $this->template( - $data->url, - $data->thumbnail, - $data->domain - ) . $embed; - } elseif (isset($data->post_hint) ? $data->post_hint == 'image' : false) { + $item['content'] = $this->createFigureLink($data->url, $data->thumbnail, $data->domain) . $embed; + } elseif (isset($data->post_hint) && $data->post_hint == 'image') { // Single image - $item['content'] = $this->link( - $this->encodePermalink($data->permalink), - '' - ); + $item['content'] = $this->createLink($this->urlEncodePathParts($data->permalink), ''); } elseif ($data->is_gallery ?? false) { // Multiple images @@ -230,32 +237,18 @@ private function collectDataInternal(): void end($data->preview->images[0]->resolutions); $index = key($data->preview->images[0]->resolutions); - $item['content'] = $this->template( - $data->url, - $data->preview->images[0]->resolutions[$index]->url, - 'Video' - ); - } elseif (isset($data->media) ? $data->media->type == 'youtube.com' : false) { + $item['content'] = $this->createFigureLink($data->url, $data->preview->images[0]->resolutions[$index]->url, 'Video'); + } elseif (isset($data->media) && $data->media->type == 'youtube.com') { // Youtube link - - $item['content'] = $this->template( - $data->url, - $data->media->oembed->thumbnail_url, - 'YouTube' - ); + $item['content'] = $this->createFigureLink($data->url, $data->media->oembed->thumbnail_url, 'YouTube'); + //$item['content'] = htmlspecialchars_decode($data->media->oembed->html); } elseif (explode('.', $data->domain)[0] == 'self') { // Crossposted text post // TODO (optionally?) Fetch content of the original post. - - $item['content'] = $this->link( - $this->encodePermalink($data->permalink), - 'Crossposted from r/' - . explode('.', $data->domain)[1] - ); + $item['content'] = $this->createLink($this->urlEncodePathParts($data->permalink), 'Crossposted from r/' . explode('.', $data->domain)[1]); } else { // Link WITHOUT preview - - $item['content'] = $this->link($data->url, $data->domain); + $item['content'] = $this->createLink($data->url, $data->domain); } $this->items[] = $item; @@ -263,7 +256,7 @@ private function collectDataInternal(): void } // Sort the order to put the latest posts first, even for mixed subreddits usort($this->items, function ($a, $b) { - return $a['timestamp'] < $b['timestamp']; + return $b['timestamp'] <=> $a['timestamp']; }); } @@ -283,24 +276,19 @@ public function getName() } } - private function encodePermalink($link) + private function urlEncodePathParts($link) { - return self::URI . implode( - '/', - array_map('urlencode', explode('/', $link)) - ); + return self::URI . implode('/', array_map('urlencode', explode('/', $link))); } - private function template($href, $src, $caption) + private function createFigureLink($href, $src, $caption) { - return '
' - . $caption . '
'; + return sprintf('
%s
', $href, $caption, $src); } - private function link($href, $text) + private function createLink($href, $text) { - return '' . $text . ''; + return sprintf('%s', $href, $text); } public function detectParameters($url) diff --git a/bridges/ReporterreBridge.php b/bridges/ReporterreBridge.php index 18378d2480d..78c60d5f599 100644 --- a/bridges/ReporterreBridge.php +++ b/bridges/ReporterreBridge.php @@ -1,31 +1,20 @@ find('div[style=text-align:justify]') as $e) { - $text = $e->outertext; - } - - $html2->clear(); - unset($html2); - - $text = strip_tags($text, '


'); - return $text; - } + const DESCRIPTION = 'Returns the newest articles. See also their official feed https://reporterre.net/spip.php?page=backend-simple'; public function collectData() { - $html = getSimpleHTMLDOM(self::URI . 'spip.php?page=backend'); + //$url = self::URI . 'spip.php?page=backend'; + $url = self::URI . 'spip.php?page=backend-simple'; + $html = getSimpleHTMLDOM($url); $limit = 0; foreach ($html->find('item') as $element) { @@ -34,10 +23,27 @@ public function collectData() $item['title'] = html_entity_decode($element->find('title', 0)->plaintext); $item['timestamp'] = strtotime($element->find('dc:date', 0)->plaintext); $item['uri'] = $element->find('guid', 0)->innertext; - $item['content'] = html_entity_decode($this->extractContent($item['uri'])); + //$item['content'] = html_entity_decode($this->extractContent($item['uri'])); + $item['content'] = htmlspecialchars_decode($element->find('description', 0)->plaintext); $this->items[] = $item; $limit++; } } } + + private function extractContent($url) + { + $html2 = getSimpleHTMLDOM($url); + $html2 = defaultLinkTo($html2, self::URI); + + foreach ($html2->find('div[style=text-align:justify]') as $e) { + $text = $e->outertext; + } + + $html2->clear(); + unset($html2); + + $text = strip_tags($text, '


'); + return $text; + } } diff --git a/bridges/RumbleBridge.php b/bridges/RumbleBridge.php index 08b416bfe22..f6bfca7d193 100644 --- a/bridges/RumbleBridge.php +++ b/bridges/RumbleBridge.php @@ -39,16 +39,19 @@ public function collectData() } $dom = getSimpleHTMLDOM($url); - foreach ($dom->find('li.video-listing-entry') as $video) { - $datetime = $video->find('time', 0)->getAttribute('datetime'); - - $this->items[] = [ + foreach ($dom->find('ol.thumbnail__grid div.thumbnail__grid--item') as $video) { + $item = [ 'title' => $video->find('h3', 0)->plaintext, 'uri' => self::URI . $video->find('a', 0)->href, - 'timestamp' => (new \DateTimeImmutable($datetime))->getTimestamp(), 'author' => $account . '@rumble.com', 'content' => defaultLinkTo($video, self::URI)->innertext, ]; + $time = $video->find('time', 0); + if ($time) { + $publishedAt = new \DateTimeImmutable($time->getAttribute('datetime')); + $item['timestamp'] = $publishedAt->getTimestamp(); + } + $this->items[] = $item; } } diff --git a/bridges/SchweinfurtBuergerinformationenBridge.php b/bridges/SchweinfurtBuergerinformationenBridge.php index 349a9d8a84e..d1f5db158f5 100644 --- a/bridges/SchweinfurtBuergerinformationenBridge.php +++ b/bridges/SchweinfurtBuergerinformationenBridge.php @@ -107,9 +107,9 @@ private function generateItemFromArticle($id) ]; // Let's see if there are images in the content, and if yes, attach - // them as enclosures, but not images which are used for linking to an external site. + // them as enclosures, but not images which are used for linking to an external site and data URIs. foreach ($images as $image) { - if ($image->class != 'imgextlink') { + if ($image->class != 'imgextlink' && parse_url($image->src, PHP_URL_SCHEME) != 'data') { $item['enclosures'][] = $image->src; } } diff --git a/bridges/SensCritiqueBridge.php b/bridges/SensCritiqueBridge.php index b823b55c23a..f6a2ea16142 100644 --- a/bridges/SensCritiqueBridge.php +++ b/bridges/SensCritiqueBridge.php @@ -57,7 +57,7 @@ public function collectData() } $html = getSimpleHTMLDOM($uri); // This selector name looks like it's automatically generated - $list = $html->find('div.Universes__WrapperProducts-sc-1qa2w66-0.eVdcAv', 0); + $list = $html->find('div[data-testid="row"]', 0); $this->extractDataFromList($list); } @@ -69,11 +69,19 @@ private function extractDataFromList($list) if ($list === null) { returnClientError('Cannot extract data from list'); } + foreach ($list->find('div[data-testid="product-list-item"]') as $movie) { + $synopsis = $movie->find('p[data-testid="synopsis"]', 0); + $item = []; $item['title'] = $movie->find('h2 a', 0)->plaintext; - // todo: fix image - $item['content'] = $movie->innertext; + $item['content'] = sprintf( + '

%s

%s

%s', + $movie->find('span[data-testid="poster-img-wrapper"]', 0)->{'data-srcname'}, + $movie->find('p[data-testid="other-infos"]', 0)->innertext, + $movie->find('p[data-testid="creators"]', 0)->innertext, + $synopsis ? sprintf('

%s

', $synopsis->innertext) : '' + ); $item['id'] = $this->getURI() . ltrim($movie->find('a', 0)->href, '/'); $item['uri'] = $this->getURI() . ltrim($movie->find('a', 0)->href, '/'); $this->items[] = $item; diff --git a/bridges/SitemapBridge.php b/bridges/SitemapBridge.php index bdf662eedd7..bbbb3e16616 100644 --- a/bridges/SitemapBridge.php +++ b/bridges/SitemapBridge.php @@ -131,7 +131,7 @@ protected function sitemapXmlToList($sitemap, $url_pattern = '', $limit = 0, $ke foreach ($sitemap->find('sitemap') as $nested_sitemap) { $url = $nested_sitemap->find('loc'); if (!empty($url)) { - $url = $url[0]->plaintext; + $url = trim($url[0]->plaintext); if (str_ends_with(strtolower($url), '.xml')) { $nested_sitemap_xml = $this->getSitemapXml($url, true); $nested_sitemap_links = $this->sitemapXmlToList($nested_sitemap_xml, $url_pattern, null, true); @@ -148,8 +148,8 @@ protected function sitemapXmlToList($sitemap, $url_pattern = '', $limit = 0, $ke $url = $item->find('loc'); $lastmod = $item->find('lastmod'); if (!empty($url) && !empty($lastmod)) { - $url = $url[0]->plaintext; - $lastmod = $lastmod[0]->plaintext; + $url = trim($url[0]->plaintext); + $lastmod = trim($lastmod[0]->plaintext); $timestamp = strtotime($lastmod); if (empty($url_pattern) || preg_match('/' . $url_pattern . '/', $url) === 1) { $links[$url] = $timestamp; diff --git a/bridges/SongkickBridge.php b/bridges/SongkickBridge.php new file mode 100644 index 00000000000..bfe29865300 --- /dev/null +++ b/bridges/SongkickBridge.php @@ -0,0 +1,92 @@ + [ + 'name' => 'Artist ID', + 'type' => 'text', + 'required' => true, + 'exampleValue' => '2506696-imagine-dragons', + ] + ] ]; + + const ARTIST_URI = 'https://www.songkick.com/artists/%s/'; + const CALENDAR_URI = self::ARTIST_URI . 'calendar'; + + private $name = ''; + + public function getURI() + { + return sprintf(self::ARTIST_URI, $this->getInput('artistid')); + } + + public function getName() + { + if (!empty($this->name)) { + return $this->name . ' - ' . parent::getName(); + } + return parent::getName(); + } + + public function getIcon() + { + return 'https://assets.sk-static.com/images/nw/furniture/songkick-logo.svg'; + } + + public function collectData() + { + $url = sprintf(self::CALENDAR_URI, $this->getInput('artistid')); + + $dom = getSimpleHTMLDOM($url); + + $jsonscript = $dom->find('div.microformat > script', 0); + + if (empty($this->name) && $jsonscript) { + $this->name = json_decode($jsonscript->innertext)[0]->name; + } + + $dom = $dom->find('div.container > div.row > div.primary', 0); + + if (!$dom) { + throw new Exception(sprintf('Unable to find css selector on `%s`', $url)); + } + $dom = defaultLinkTo($dom, $this->getURI()); + + foreach ($dom->find('div[@id="calendar-summary"] > ol > li') as $article) { + $detailsobj = json_decode($article->find('div.microformat > script', 0)->innertext)[0]; + + $a = $article->find('a', 0); + + $details = $a->find('div.event-details', 0); + $title = $details->find('.secondary-detail', 0)->plaintext; + $city = $details->find('.primary-detail', 0)->plaintext; + $event = $detailsobj->location->name; + + $content = 'City: ' . $city . '
Event: ' . $event . '
Date: ' . $article->title; + + $categories = []; + if ($details->hasClass('concert')) { + $categories[] = 'concert'; + } + if ($details->hasClass('festival')) { + $categories[] = 'festival'; + } + if (!is_null($details->find('.outdoor', 0))) { + $categories[] = 'outdoor'; + } + + $this->items[] = [ + 'title' => $title, + 'uri' => $a->href, + 'content' => $content, + 'categories' => $categories, + ]; + } + } +} diff --git a/bridges/ThreadsBridge.php b/bridges/ThreadsBridge.php new file mode 100644 index 00000000000..b7e5cd1abff --- /dev/null +++ b/bridges/ThreadsBridge.php @@ -0,0 +1,120 @@ + [ + 'u' => [ + 'name' => 'username', + 'required' => true, + 'exampleValue' => 'zuck', + 'title' => 'Insert a user name' + ], + 'limit' => [ + 'name' => 'Limit', + 'type' => 'number', + 'required' => false, + 'title' => 'Specify number of posts to fetch', + 'defaultValue' => 5 + ] + ] + ]; + + protected $feedName = self::NAME; + public function getName() + { + return $this->feedName; + } + + public function detectParameters($url) + { + // By username + $regex = '/^(https?:\/\/)?(www\.)?threads\.net\/(@)?([^\/?\n]+)/'; + if (preg_match($regex, $url, $matches) > 0) { + $params['context'] = 'By username'; + $params['u'] = urldecode($matches[3]); + return $params; + } + return null; + } + + public function getURI() + { + return self::URI . '@' . $this->getInput('u'); + } + + // https://stackoverflow.com/a/3975706/421140 + // Found this in FlaschenpostBridge, modified to return an array and take an object. + private function recursiveFind($haystack, $needle) + { + $found = []; + $iterator = new \RecursiveArrayIterator($haystack); + $recursive = new \RecursiveIteratorIterator( + $iterator, + \RecursiveIteratorIterator::SELF_FIRST + ); + foreach ($recursive as $key => $value) { + if ($key === $needle) { + $found[] = $value; + } + } + return $found; + } + + public function collectData() + { + $html = getSimpleHTMLDOMCached($this->getURI(), static::CACHE_TIMEOUT); + Debug::log(sprintf('Fetched: %s', $this->getURI())); + $jsonBlobs = $html->find('script[type="application/json"]'); + Debug::log(sprintf('%d JSON blobs found.', count($jsonBlobs))); + $gatheredCodes = []; + $limit = $this->getInput('limit'); + foreach ($jsonBlobs as $jsonBlob) { + // The structure of the JSON document is likely to change, but we're looking for a "code" inside a "post" + foreach ($this->recursiveFind($this->recursiveFind(json_decode($jsonBlob->innertext), 'post'), 'code') as $candidateCode) { + // code should be like CzZk4-USq1O or Cy3m1VnRiwP or Cywjyrdv9T6 or CzZk4-USq1O + if (grapheme_strlen($candidateCode) == 11 and !in_array($candidateCode, $gatheredCodes)) { + $gatheredCodes[] = $candidateCode; + if (count($gatheredCodes) >= $limit) { + break 2; + } + } + } + } + Debug::log(sprintf('Candidate codes found in JSON in script tags: %s', print_r($gatheredCodes, true))); + + $this->feedName = html_entity_decode($html->find('meta[property=og:title]', 0)->content); + // todo: meta[property=og:description] could populate the feed description + + foreach ($gatheredCodes as $postCode) { + $item = []; + // post URL is like: https://www.threads.net/@zuck/post/Czrr520PZfh + $item['uri'] = $this->getURI() . '/post/' . $postCode; + $articleHtml = getSimpleHTMLDOMCached($item['uri'], 15778800); // cache time: six months + + // Relying on meta tags ought to be more reliable. + if ($articleHtml->find('meta[property=og:type]', 0)->content != 'article') { + continue; + } + $item['title'] = $articleHtml->find('meta[property=og:description]', 0)->content; + $item['content'] = $articleHtml->find('meta[property=og:description]', 0)->content; + $item['author'] = html_entity_decode($articleHtml->find('meta[property=og:title]', 0)->content); + + $imageUrl = $articleHtml->find('meta[property=og:image]', 0); + if ($imageUrl) { + $item['enclosures'][] = html_entity_decode($imageUrl->content); + } + + // todo: parse hashtags out of content for $item['categories'] + // todo: try to scrape out a timestamp for $item['timestamp'], it's not in the meta tags + + $this->items[] = $item; + } + } +} diff --git a/bridges/TikTokBridge.php b/bridges/TikTokBridge.php index 73a18b0468c..6590df66808 100644 --- a/bridges/TikTokBridge.php +++ b/bridges/TikTokBridge.php @@ -8,12 +8,12 @@ class TikTokBridge extends BridgeAbstract const MAINTAINER = 'VerifiedJoseph'; const PARAMETERS = [ 'By user' => [ - 'username' => [ - 'name' => 'Username', - 'type' => 'text', - 'required' => true, - 'exampleValue' => '@tiktok', - ] + 'username' => [ + 'name' => 'Username', + 'type' => 'text', + 'required' => true, + 'exampleValue' => '@tiktok', + ] ]]; const TEST_DETECT_PARAMETERS = [ @@ -24,53 +24,33 @@ class TikTokBridge extends BridgeAbstract const CACHE_TIMEOUT = 900; // 15 minutes - private $feedName = ''; - public function collectData() { - $html = getSimpleHTMLDOM($this->getURI()); + $html = getSimpleHTMLDOMCached('https://www.tiktok.com/embed/' . $this->processUsername()); - $title = $html->find('h1', 0)->plaintext ?? self::NAME; - $this->feedName = htmlspecialchars_decode($title); + $author = $html->find('span[data-e2e=creator-profile-userInfo-TUXText]', 0)->plaintext ?? self::NAME; - $var = $html->find('script[id=SIGI_STATE]', 0); - if (!$var) { - throw new \Exception('Unable to find tiktok user data for ' . $this->processUsername()); - } - $SIGI_STATE_RAW = $var->innertext; - $SIGI_STATE = Json::decode($SIGI_STATE_RAW, false); + $videos = $html->find('div[data-e2e=common-videoList-VideoContainer]'); - if (!isset($SIGI_STATE->ItemModule)) { - return; - } - - foreach ($SIGI_STATE->ItemModule as $key => $value) { + foreach ($videos as $video) { $item = []; - $link = 'https://www.tiktok.com/@' . $value->author . '/video/' . $value->id; - $image = $value->video->dynamicCover; - if (empty($image)) { - $image = $value->video->cover; - } - $views = $value->stats->playCount; - $hastags = []; - foreach ($value->textExtra as $tag) { - $hastags[] = $tag->hashtagName; - } - $hastags_str = ''; - foreach ($hastags as $tag) { - $hastags_str .= '
#' . $tag . ' '; - } + // Handle link "untracking" + $linkParts = parse_url($video->find('a', 0)->href); + $link = $linkParts['scheme'] . '://' . $linkParts['host'] . '/' . $linkParts['path']; + + $image = $video->find('video', 0)->poster; + $views = $video->find('div[data-e2e=common-Video-Count]', 0)->plaintext; + + $enclosures = [$image]; $item['uri'] = $link; - $item['title'] = $value->desc; - $item['timestamp'] = $value->createTime; - $item['author'] = '@' . $value->author; - $item['enclosures'][] = $image; - $item['categories'] = $hastags; + $item['title'] = 'Video'; + $item['author'] = '@' . $author; + $item['enclosures'] = $enclosures; $item['content'] = << -

{$views} views


Hashtags: {$hastags_str} +

{$views} views


EOD; $this->items[] = $item; @@ -91,7 +71,7 @@ public function getName() { switch ($this->queriedContext) { case 'By user': - return $this->feedName . ' (' . $this->processUsername() . ') - TikTok'; + return $this->processUsername() . ' - TikTok'; default: return parent::getName(); } diff --git a/bridges/TrelloBridge.php b/bridges/TrelloBridge.php index a1b5cfb8567..cab2bde2880 100644 --- a/bridges/TrelloBridge.php +++ b/bridges/TrelloBridge.php @@ -648,7 +648,7 @@ public function collectData() $action->type ]; if (isset($action->data->card)) { - $item['categories'][] = $action->data->card->name; + $item['categories'][] = $action->data->card->name ?? $action->data->card->id; $item['uri'] = 'https://trello.com/c/' . $action->data->card->shortLink . '#action-' diff --git a/bridges/VkBridge.php b/bridges/VkBridge.php index 0d47692da33..980b4154877 100644 --- a/bridges/VkBridge.php +++ b/bridges/VkBridge.php @@ -29,11 +29,12 @@ class VkBridge extends BridgeAbstract 'https://vk.com/groupname/anythingelse' => ['u' => 'groupname'], 'https://vk.com/groupname?w=somethingelse' => ['u' => 'groupname'], 'https://vk.com/with_underscore' => ['u' => 'with_underscore'], + 'https://vk.com/vk.cats' => ['u' => 'vk.cats'], ]; protected $pageName; protected $tz = 0; - private $urlRegex = '/vk\.com\/([\w]+)/'; + private $urlRegex = '/vk\.com\/([\w.]+)/'; public function getURI() { @@ -314,6 +315,13 @@ public function collectData() $copy_quote->outertext = "
Reposted ($copy_quote_author):
$copy_quote_content"; } + foreach ($post->find('.PrimaryAttachment .PhotoPrimaryAttachment') as $pa) { + $img = $pa->find('.PhotoPrimaryAttachment__imageElement', 0); + if (is_object($img)) { + $pa->outertext = $img->outertext; + } + } + foreach ($post->find('.SecondaryAttachment') as $sa) { $sa_href = $sa->getAttribute('href'); if (!$sa_href) { @@ -515,7 +523,7 @@ private function getContents() } if (!preg_match('#^https?://vk.com/#', $uri)) { - returnServerError('Unexpected redirect location'); + returnServerError('Unexpected redirect location: ' . $uri); } $redirects++; diff --git a/bridges/WorldbankBridge.php b/bridges/WorldbankBridge.php new file mode 100644 index 00000000000..9b40e86e5da --- /dev/null +++ b/bridges/WorldbankBridge.php @@ -0,0 +1,52 @@ + [ + 'name' => 'Language', + 'type' => 'list', + 'defaultValue' => 'English', + 'values' => [ + 'English' => 'English', + 'French' => 'French', + ] + ], + 'limit' => [ + 'name' => 'limit (max 100)', + 'type' => 'number', + 'defaultValue' => 5, + 'required' => true, + ] + ] + ]; + + public function collectData() + { + $apiUrl = 'https://search.worldbank.org/api/v2/news?format=json&rows=' + . min(100, $this->getInput('limit')) + . '&lang_exact=' . $this->getInput('lang'); + + $jsonData = json_decode(getContents($apiUrl)); + + // Remove unnecessary data from the original object + if (isset($jsonData->documents->facets)) { + unset($jsonData->documents->facets); + } + + foreach ($jsonData->documents as $element) { + $this->items[] = [ + 'uid' => $element->id, + 'timestamp' => $element->lnchdt, + 'title' => $element->title->{'cdata!'}, + 'uri' => $element->url, + 'content' => $element->descr->{'cdata!'}, + ]; + } + } +} diff --git a/bridges/YGGTorrentBridge.php b/bridges/YGGTorrentBridge.php index f0c31f11dd5..018bcfc4f02 100644 --- a/bridges/YGGTorrentBridge.php +++ b/bridges/YGGTorrentBridge.php @@ -7,7 +7,7 @@ class YGGTorrentBridge extends BridgeAbstract { const MAINTAINER = 'teromene'; const NAME = 'Yggtorrent Bridge'; - const URI = 'https://www5.yggtorrent.fi'; + const URI = 'https://www3.yggtorrent.qa'; const DESCRIPTION = 'Returns torrent search from Yggtorrent'; const PARAMETERS = [ diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php index 993f8c90663..6a29e387158 100644 --- a/bridges/YoutubeBridge.php +++ b/bridges/YoutubeBridge.php @@ -164,7 +164,11 @@ private function collectDataInternal() $jsonData = $this->extractJsonFromHtml($html); // TODO: this method returns only first 100 video items // if it has more videos, playlistVideoListRenderer will have continuationItemRenderer as last element - $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0]; + $jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0] ?? null; + if (!$jsonData) { + // playlist probably doesnt exists + throw new \Exception('Unable to find playlist: ' . $url_listing); + } $jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer; $jsonData = $jsonData->contents[0]->playlistVideoListRenderer->contents; $item_count = count($jsonData); diff --git a/caches/FileCache.php b/caches/FileCache.php index 2f4b3ad5ec6..7a0eb81d95e 100644 --- a/caches/FileCache.php +++ b/caches/FileCache.php @@ -49,11 +49,12 @@ public function set($key, $value, int $ttl = null): void { $item = [ 'key' => $key, - 'value' => $value, 'expiration' => $ttl === null ? 0 : time() + $ttl, + 'value' => $value, ]; $cacheFile = $this->createCacheFile($key); $bytes = file_put_contents($cacheFile, serialize($item), LOCK_EX); + // todo: Consider tightening the permissions of the created file. It usually allow others to read, depending on umask if ($bytes === false) { // Consider just logging the error here throw new \Exception(sprintf('Failed to write to: %s', $cacheFile)); diff --git a/config.default.ini.php b/config.default.ini.php index 52786aefbe4..201b1414fcd 100644 --- a/config.default.ini.php +++ b/config.default.ini.php @@ -47,7 +47,8 @@ enable_maintenance_mode = false [http] -timeout = 60 +; Operation timeout in seconds +timeout = 30 useragent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0" ; Max http response size in MB diff --git a/config/php.ini b/config/php.ini index 115f1c89f37..383afffb0b6 100644 --- a/config/php.ini +++ b/config/php.ini @@ -1,4 +1,4 @@ ; Inspired by https://github.com/docker-library/php/blob/master/8.2/bookworm/fpm/Dockerfile -; https://github.com/docker-library/php/issues/878#issuecomment-938595965' +; https://github.com/docker-library/php/issues/878#issuecomment-938595965 fastcgi.logging = Off diff --git a/docs/01_General/06_Public_Hosts.md b/docs/01_General/06_Public_Hosts.md index 9aa292a5744..4aa905dad49 100644 --- a/docs/01_General/06_Public_Hosts.md +++ b/docs/01_General/06_Public_Hosts.md @@ -20,6 +20,10 @@ | ![](https://iplookup.flagfox.net/images/h16/NL.png) | https://feed.eugenemolotov.ru | ![](https://img.shields.io/website/https/feed.eugenemolotov.ru.svg) | [@em92](https://github.com/em92) | Hosted in Amsterdam, Netherlands | | ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://rss-bridge.mediani.de | ![](https://img.shields.io/website/https/rss-bridge.mediani.de.svg) | [@sokai](https://github.com/sokai) | Hosted with Netcup, Germany | | ![](https://iplookup.flagfox.net/images/h16/PL.png) | https://rss.foxhaven.cyou| ![](https://img.shields.io/badge/website-up-brightgreen) | [@Aysilu](https://foxhaven.cyou) | Hosted with Timeweb (Maintained in Poland) | +| ![](https://iplookup.flagfox.net/images/h16/PL.png) | https://rss.m3wz.su| ![](https://img.shields.io/badge/website-up-brightgreen) | [@m3oweezed](https://m3wz.su/en/about) | Poland, Hosted with Timeweb Cloud | +| ![](https://iplookup.flagfox.net/images/h16/DE.png) | https://rb.ash.fail | ![](https://img.shields.io/website/https/rb.ash.fail.svg) | [@ash](https://ash.fail/contact.html) | Hosted with Hostaris, Germany +| ![](https://iplookup.flagfox.net/images/h16/UA.png) | https://rss.noleron.com | ![](https://img.shields.io/website/https/rss.noleron.com) | [@ihor](https://noleron.com/about) | Hosted with Hosting Ukraine, Ukraine + ## Inactive instances diff --git a/docs/10_Bridge_Specific/FacebookBridge.md b/docs/10_Bridge_Specific/FacebookBridge.md index c2a1fd0eb03..f24f8aa86a6 100644 --- a/docs/10_Bridge_Specific/FacebookBridge.md +++ b/docs/10_Bridge_Specific/FacebookBridge.md @@ -1,18 +1,18 @@ FacebookBridge =============== -Resume of the actual state of this bridge: +State of this bridge: +- Facebook Groups (and probably other sections too) do not work at all - No maintainer -- Need Cookies consent -- New design architecture deployed +- Needs cookie consent support for public pages +- Needs login support (see [this example]([url](https://github.com/RSS-Bridge/rss-bridge/issues/1891)) for Instagram) for private groups -Due [facebook-redesing](https://engineering.fb.com/2020/05/08/web/facebook-redesign/) +Due to the 2020 [Facebook redesign](https://engineering.fb.com/2020/05/08/web/facebook-redesign/) and the requirement to [accept cookies](https://www.facebook.com/business/help/348535683460989) -users start getting [Problems with Facebook on public RSS-Bridge instances](https://github.com/RSS-Bridge/rss-bridge/issues/2047 ) +users are getting [problems with Facebook on public RSS-Bridge instances](https://github.com/RSS-Bridge/rss-bridge/issues/2047). +Relevant Info +-------------- -[Facebook Cookies](https://www.facebook.com/policy/cookies/) - -"Datr" is a unique identifier for your browser and it has a lifespan of two years. - -"c_user" and "xs" cookies to verify the account and have a lifespan of 365 days - +- [Facebook Cookies](https://www.facebook.com/policy/cookies/) +- "Datr" is a unique identifier for your browser and it has a lifespan of two years. +- "c_user" and "xs" cookies to verify the account and have a lifespan of 365 days diff --git a/formats/AtomFormat.php b/formats/AtomFormat.php index 158f288e5a6..1fabef2e7f5 100644 --- a/formats/AtomFormat.php +++ b/formats/AtomFormat.php @@ -154,11 +154,13 @@ public function stringify() $entry->appendChild($itunesProperty); $itunesProperty->appendChild($document->createTextNode($itunesValue)); } - $itunesEnclosure = $document->createElement('enclosure'); - $entry->appendChild($itunesEnclosure); - $itunesEnclosure->setAttribute('url', $itemArray['enclosure']['url']); - $itunesEnclosure->setAttribute('length', $itemArray['enclosure']['length']); - $itunesEnclosure->setAttribute('type', $itemArray['enclosure']['type']); + if (isset($itemArray['enclosure'])) { + $itunesEnclosure = $document->createElement('enclosure'); + $entry->appendChild($itunesEnclosure); + $itunesEnclosure->setAttribute('url', $itemArray['enclosure']['url']); + $itunesEnclosure->setAttribute('length', $itemArray['enclosure']['length']); + $itunesEnclosure->setAttribute('type', $itemArray['enclosure']['type']); + } } elseif (!empty($entryUri)) { $entryLinkAlternate = $document->createElement('link'); $entry->appendChild($entryLinkAlternate); diff --git a/formats/MrssFormat.php b/formats/MrssFormat.php index b626033ab17..87c630efae2 100644 --- a/formats/MrssFormat.php +++ b/formats/MrssFormat.php @@ -150,12 +150,17 @@ public function stringify() $entry->appendChild($itunesProperty); $itunesProperty->appendChild($document->createTextNode($itunesValue)); } - $itunesEnclosure = $document->createElement('enclosure'); - $entry->appendChild($itunesEnclosure); - $itunesEnclosure->setAttribute('url', $itemArray['enclosure']['url']); - $itunesEnclosure->setAttribute('length', $itemArray['enclosure']['length']); - $itunesEnclosure->setAttribute('type', $itemArray['enclosure']['type']); - } elseif (!empty($itemUri)) { + + if (isset($itemArray['enclosure'])) { + $itunesEnclosure = $document->createElement('enclosure'); + $entry->appendChild($itunesEnclosure); + $itunesEnclosure->setAttribute('url', $itemArray['enclosure']['url']); + $itunesEnclosure->setAttribute('length', $itemArray['enclosure']['length']); + $itunesEnclosure->setAttribute('type', $itemArray['enclosure']['type']); + } + } + + if (!empty($itemUri)) { $entryLink = $document->createElement('link'); $entry->appendChild($entryLink); $entryLink->appendChild($document->createTextNode($itemUri)); diff --git a/index.php b/index.php index 123f6ecdb88..c2c546a184e 100644 --- a/index.php +++ b/index.php @@ -6,7 +6,12 @@ require_once __DIR__ . '/lib/bootstrap.php'; -Configuration::verifyInstallation(); +$errors = Configuration::checkInstallation(); +if ($errors) { + print '

' . implode("\n", $errors) . '
'; + exit(1); +} + $customConfig = []; if (file_exists(__DIR__ . '/config.ini.php')) { $customConfig = parse_ini_file(__DIR__ . '/config.ini.php', true, INI_SCANNER_TYPED); diff --git a/lib/BridgeAbstract.php b/lib/BridgeAbstract.php index ea6c131a181..8001ba4fba4 100644 --- a/lib/BridgeAbstract.php +++ b/lib/BridgeAbstract.php @@ -171,8 +171,8 @@ private function setInputWithContext(array $input, $queriedContext) { // Import and assign all inputs to their context foreach ($input as $name => $value) { - foreach (static::PARAMETERS as $context => $set) { - if (array_key_exists($name, static::PARAMETERS[$context])) { + foreach ($this->getParameters() as $context => $set) { + if (array_key_exists($name, $this->getParameters()[$context])) { $this->inputs[$context][$name]['value'] = $value; } } @@ -180,16 +180,16 @@ private function setInputWithContext(array $input, $queriedContext) // Apply default values to missing data $contexts = [$queriedContext]; - if (array_key_exists('global', static::PARAMETERS)) { + if (array_key_exists('global', $this->getParameters())) { $contexts[] = 'global'; } foreach ($contexts as $context) { - if (!isset(static::PARAMETERS[$context])) { + if (!isset($this->getParameters()[$context])) { // unknown context provided by client, throw exception here? or continue? } - foreach (static::PARAMETERS[$context] as $name => $properties) { + foreach ($this->getParameters()[$context] as $name => $properties) { if (isset($this->inputs[$context][$name]['value'])) { continue; } @@ -221,8 +221,8 @@ private function setInputWithContext(array $input, $queriedContext) } // Copy global parameter values to the guessed context - if (array_key_exists('global', static::PARAMETERS)) { - foreach (static::PARAMETERS['global'] as $name => $properties) { + if (array_key_exists('global', $this->getParameters())) { + foreach ($this->getParameters()['global'] as $name => $properties) { if (isset($input[$name])) { $value = $input[$name]; } else { @@ -263,8 +263,8 @@ public function getKey($input) if (!isset($this->inputs[$this->queriedContext][$input]['value'])) { return null; } - if (array_key_exists('global', static::PARAMETERS)) { - if (array_key_exists($input, static::PARAMETERS['global'])) { + if (array_key_exists('global', $this->getParameters())) { + if (array_key_exists($input, $this->getParameters()['global'])) { $context = 'global'; } } @@ -273,7 +273,7 @@ public function getKey($input) } $needle = $this->inputs[$this->queriedContext][$input]['value']; - foreach (static::PARAMETERS[$context][$input]['values'] as $first_level_key => $first_level_value) { + foreach ($this->getParameters()[$context][$input]['values'] as $first_level_key => $first_level_value) { if (!is_array($first_level_value) && $needle === (string)$first_level_value) { return $first_level_key; } elseif (is_array($first_level_value)) { @@ -290,7 +290,7 @@ public function detectParameters($url) { $regex = '/^(https?:\/\/)?(www\.)?(.+?)(\/)?$/'; if ( - empty(static::PARAMETERS) + empty($this->getParameters()) && preg_match($regex, $url, $urlMatches) > 0 && preg_match($regex, static::URI, $bridgeUriMatches) > 0 && $urlMatches[3] === $bridgeUriMatches[3] diff --git a/lib/Configuration.php b/lib/Configuration.php index d699178fac0..ac7d29bfbdc 100644 --- a/lib/Configuration.php +++ b/lib/Configuration.php @@ -15,15 +15,7 @@ private function __construct() { } - /** - * Verifies the current installation of RSS-Bridge and PHP. - * - * Returns an error message and aborts execution if the installation does - * not satisfy the requirements of RSS-Bridge. - * - * @return void - */ - public static function verifyInstallation() + public static function checkInstallation(): array { $errors = []; @@ -57,10 +49,7 @@ public static function verifyInstallation() if (!extension_loaded('json')) { $errors[] = 'json extension not loaded'; } - - if ($errors) { - throw new \Exception(sprintf('Configuration error: %s', implode(', ', $errors))); - } + return $errors; } public static function loadConfiguration(array $customConfig = [], array $env = []) diff --git a/lib/contents.php b/lib/contents.php index a3830ca713f..8676a2a8df8 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -63,9 +63,15 @@ function getContents( if ($cachedResponse) { $cachedLastModified = $cachedResponse->getHeader('last-modified'); if ($cachedLastModified) { - $cachedLastModified = new \DateTimeImmutable($cachedLastModified); - $config['if_not_modified_since'] = $cachedLastModified->getTimestamp(); + try { + // Some servers send Unix timestamp instead of RFC7231 date. Prepend it with @ to allow parsing as DateTime + $cachedLastModified = new \DateTimeImmutable((is_numeric($cachedLastModified) ? '@' : '') . $cachedLastModified); + $config['if_not_modified_since'] = $cachedLastModified->getTimestamp(); + } catch (Exception $dateTimeParseFailue) { + // Ignore invalid 'Last-Modified' HTTP header value + } } + // todo: to be nice nice citizen we should also check for Etag } $response = $httpClient->request($url, $config); @@ -95,19 +101,8 @@ function getContents( $response = $response->withBody($cachedResponse->getBody()); break; default: - $exceptionMessage = sprintf( - '%s resulted in %s %s %s', - $url, - $response->getCode(), - $response->getStatusLine(), - // If debug, include a part of the response body in the exception message - Debug::isEnabled() ? mb_substr($response->getBody(), 0, 500) : '', - ); - - if (CloudFlareException::isCloudFlareResponse($response)) { - throw new CloudFlareException($exceptionMessage, $response->getCode()); - } - throw new HttpException(trim($exceptionMessage), $response->getCode()); + $e = HttpException::fromResponse($response, $url); + throw $e; } if ($returnFull === true) { // todo: return the actual response object diff --git a/lib/html.php b/lib/html.php index 505221fc1aa..d65d1b20440 100644 --- a/lib/html.php +++ b/lib/html.php @@ -244,20 +244,41 @@ function convertLazyLoading($dom) $dom = str_get_html($dom); } + // Retrieve image URL from srcset attribute + // https://developer.mozilla.org/en-US/docs/Web/API/HTMLImageElement/srcset + // Example: convert "header640.png 640w, header960.png 960w, header1024.png 1024w" to "header1024.png" + $srcset_to_src = function ($srcset) { + $sources = explode(',', $srcset); + $last_entry = trim($sources[array_key_last($sources)]); + $url = explode(' ', $last_entry)[0]; + return $url; + }; + // Process standalone images, embeds and picture sources foreach ($dom->find('img, iframe, source') as $img) { if (!empty($img->getAttribute('data-src'))) { $img->src = $img->getAttribute('data-src'); } elseif (!empty($img->getAttribute('data-srcset'))) { - $img->src = explode(' ', $img->getAttribute('data-srcset'))[0]; + $img->src = $srcset_to_src($img->getAttribute('data-srcset')); } elseif (!empty($img->getAttribute('data-lazy-src'))) { $img->src = $img->getAttribute('data-lazy-src'); + } elseif (!empty($img->getAttribute('data-orig-file'))) { + $img->src = $img->getAttribute('data-orig-file'); } elseif (!empty($img->getAttribute('srcset'))) { - $img->src = explode(' ', $img->getAttribute('srcset'))[0]; + $img->src = $srcset_to_src($img->getAttribute('srcset')); } else { continue; // Proceed to next element without removing attributes } - foreach (['loading', 'decoding', 'srcset', 'data-src', 'data-srcset'] as $attr) { + + // Remove data attributes, no longer necessary + foreach ($img->getAllAttributes() as $attr => $val) { + if (str_starts_with($attr, 'data-')) { + $img->removeAttribute($attr); + } + } + + // Remove other attributes that may be processed by the client + foreach (['loading', 'decoding', 'srcset'] as $attr) { if ($img->hasAttribute($attr)) { $img->removeAttribute($attr); } @@ -274,7 +295,7 @@ function convertLazyLoading($dom) $img->tag = 'img'; } // Adding/removing node would change its position inside the parent element, - // So instead we rewrite the node in-place though the outertext attribute + // So instead we rewrite the node in-place through the outertext attribute $picture->outertext = $img->outertext; } } diff --git a/lib/http.php b/lib/http.php index c5c57d05c6b..bfa6b6bff7f 100644 --- a/lib/http.php +++ b/lib/http.php @@ -2,6 +2,29 @@ class HttpException extends \Exception { + public ?Response $response; + + public function __construct(string $message = '', int $statusCode = 0, ?Response $response = null) + { + parent::__construct($message, $statusCode); + $this->response = $response ?? new Response('', 0); + } + + public static function fromResponse(Response $response, string $url): HttpException + { + $message = sprintf( + '%s resulted in %s %s %s', + $url, + $response->getCode(), + $response->getStatusLine(), + // If debug, include a part of the response body in the exception message + Debug::isEnabled() ? mb_substr($response->getBody(), 0, 500) : '', + ); + if (CloudFlareException::isCloudFlareResponse($response)) { + return new CloudFlareException($message, $response->getCode(), $response); + } + return new HttpException(trim($message), $response->getCode(), $response); + } } final class CloudFlareException extends HttpException diff --git a/templates/exception.html.php b/templates/exception.html.php index dac0ad26a7a..e1dd97c112e 100644 --- a/templates/exception.html.php +++ b/templates/exception.html.php @@ -16,6 +16,13 @@

+ getCode() === 400): ?> +

400 Bad Request

+

+ This is usually caused by an incorrectly constructed http request. +

+ + getCode() === 404): ?>

404 Page Not Found

@@ -40,6 +47,22 @@

+ getCode() === 0): ?> +

+ See + + https://curl.haxx.se/libcurl/c/libcurl-errors.html + + for description of the curl error code. +

+ +

+ + https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/getCode()) ?> + +

+ + getCode() === 10): ?>

The rss feed is completely empty

diff --git a/tests/FeedItemTest.php b/tests/FeedItemTest.php index 0e7af222e06..3390e7b3534 100644 --- a/tests/FeedItemTest.php +++ b/tests/FeedItemTest.php @@ -41,7 +41,8 @@ public function testTimestamp() $this->assertSame(64800, $item->getTimestamp()); $item->setTimestamp('1st jan last year'); - // This will fail at 2024-01-01 hehe - $this->assertSame(1640995200, $item->getTimestamp()); + + // This will fail at 2025-01-01 hehe + $this->assertSame(1672531200, $item->getTimestamp()); } }