Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CSS Selector Feed Expander #3732

Merged
merged 2 commits into from
Oct 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions bridges/CssSelectorBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,12 @@ class CssSelectorBridge extends BridgeAbstract
]
];

private $feedName = '';
protected $feedName = '';
protected $homepageUrl = '';

public function getURI()
{
$url = $this->getInput('home_page');
$url = $this->homepageUrl;
if (empty($url)) {
$url = parent::getURI();
}
Expand All @@ -81,7 +82,7 @@ public function getName()

public function collectData()
{
$url = $this->getInput('home_page');
$this->homepageUrl = $this->getInput('home_page');
$url_selector = $this->getInput('url_selector');
$url_pattern = $this->getInput('url_pattern');
$content_selector = $this->getInput('content_selector');
Expand All @@ -90,7 +91,7 @@ public function collectData()
$discard_thumbnail = $this->getInput('discard_thumbnail');
$limit = $this->getInput('limit') ?? 10;

$html = defaultLinkTo(getSimpleHTMLDOM($url), $url);
$html = defaultLinkTo(getSimpleHTMLDOM($this->homepageUrl), $this->homepageUrl);
$this->feedName = $this->titleCleanup($this->getPageTitle($html), $title_cleanup);
$items = $this->htmlFindEntries($html, $url_selector, $url_pattern, $limit, $content_cleanup);

Expand Down
98 changes: 98 additions & 0 deletions bridges/CssSelectorFeedExpanderBridge.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
<?php

if (!class_exists('CssSelectorFeedExpanderBridgeInternal')) {
// Utility class used internally by CssSelectorFeedExpanderBridge
class CssSelectorFeedExpanderBridgeInternal extends FeedExpander
{
public function collectData()
{
// Unused. Call collectExpandableDatas($url) inherited from FeedExpander instead
}
}
}

class CssSelectorFeedExpanderBridge extends CssSelectorBridge
{
const MAINTAINER = 'ORelio';
const NAME = 'CSS Selector Feed Expander';
const URI = 'https://github.com/RSS-Bridge/rss-bridge/';
const DESCRIPTION = 'Expand any site RSS feed using CSS selectors (Advanced Users)';
const PARAMETERS = [
[
'feed' => [
'name' => 'Feed: URL of truncated RSS feed',
'exampleValue' => 'https://example.com/feed.xml',
'required' => true
],
'content_selector' => [
'name' => 'Selector for each article content',
'title' => <<<EOT
This bridge works using CSS selectors, e.g. "div.article" will match <div class="article">.
Everything inside that element becomes feed item content.
EOT,
'exampleValue' => 'article.content',
'required' => true
],
'content_cleanup' => [
'name' => '[Optional] Content cleanup: List of items to remove',
'title' => 'Selector for unnecessary elements to remove inside article contents.',
'exampleValue' => 'div.ads, div.comments',
],
'dont_expand_metadata' => [
'name' => '[Optional] Don\'t expand metadata',
'title' => "This bridge will attempt to fill missing fields using metadata from the webpage.\nCheck to disable.",
'type' => 'checkbox',
],
'discard_thumbnail' => [
'name' => '[Optional] Discard thumbnail set by site author',
'title' => 'Some sites set their logo as thumbnail for every article. Use this option to discard it.',
'type' => 'checkbox',
],
'limit' => self::LIMIT
]
];

public function collectData()
{
$url = $this->getInput('feed');
$content_selector = $this->getInput('content_selector');
$content_cleanup = $this->getInput('content_cleanup');
$dont_expand_metadata = $this->getInput('dont_expand_metadata');
$discard_thumbnail = $this->getInput('discard_thumbnail');
$limit = $this->getInput('limit');

$feed_expander = new CssSelectorFeedExpanderBridgeInternal();
$items = $feed_expander->collectExpandableDatas($url)->getItems();

$this->homepageUrl = urljoin($url, '/');
$this->feedName = $feed_expander->getName();

foreach ($items as $item_from_feed) {
$item_expanded = $this->expandEntryWithSelector(
$item_from_feed['uri'],
$content_selector,
$content_cleanup
);

if ($dont_expand_metadata) {
// Take feed item, only replace content from expanded data
$content = $item_expanded['content'];
$item_expanded = $item_from_feed;
$item_expanded['content'] = $content;
} else {
// Take expanded item, but give priority to metadata already in source item
foreach ($item_from_feed as $field => $val) {
if ($field !== 'content') {
$item_expanded[$field] = $val;
}
}
}

if ($discard_thumbnail && isset($item_expanded['enclosures'])) {
unset($item_expanded['enclosures']);
}

$this->items[] = $item_expanded;
}
}
}
6 changes: 3 additions & 3 deletions bridges/SitemapBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class SitemapBridge extends CssSelectorBridge

public function collectData()
{
$url = $this->getInput('home_page');
$this->homepageUrl = $this->getInput('home_page');
$url_pattern = $this->getInput('url_pattern');
$content_selector = $this->getInput('content_selector');
$content_cleanup = $this->getInput('content_cleanup');
Expand All @@ -73,8 +73,8 @@ public function collectData()
$discard_thumbnail = $this->getInput('discard_thumbnail');
$limit = $this->getInput('limit');

$this->feedName = $this->titleCleanup($this->getPageTitle($url), $title_cleanup);
$sitemap_url = empty($site_map) ? $url : $site_map;
$this->feedName = $this->titleCleanup($this->getPageTitle($this->homepageUrl), $title_cleanup);
$sitemap_url = empty($site_map) ? $this->homepageUrl : $site_map;
$sitemap_xml = $this->getSitemapXml($sitemap_url, !empty($site_map));
$links = $this->sitemapXmlToList($sitemap_xml, $url_pattern, empty($limit) ? 10 : $limit);

Expand Down