Skip to content

Commit

Permalink
[WeLiveSecurity] Fix content extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
ORelio committed Oct 10, 2023
1 parent 47f52b5 commit 79f1461
Showing 1 changed file with 26 additions and 9 deletions.
35 changes: 26 additions & 9 deletions bridges/WeLiveSecurityBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,36 @@ protected function parseItem($item)
{
$item = parent::parseItem($item);

$article_html = getSimpleHTMLDOMCached($item['uri']);
if (!$article_html) {
$item['content'] .= '<p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>';
$html = getSimpleHTMLDOMCached($item['uri']);
if (!$html) {
$item['content'] .= '<br /><p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>';
return $item;
}

$article_content = $article_html->find('div.formatted', 0)->innertext;
$article_content = stripWithDelimiters($article_content, '<script', '</script>');
$article_content = stripRecursiveHTMLSection($article_content, 'div', '<div class="comments');
$article_content = stripRecursiveHTMLSection($article_content, 'div', '<div class="similar-articles');
$article_content = stripRecursiveHTMLSection($article_content, 'span', '<span class="meta');
$item['content'] = trim($article_content);
$html = $html->find('.article-page', 0);
$content_html = $html->find('.article-body', 0);

// Remove social media footer
foreach ($content_html->find('blockquote') as $blockquote) {
if (str_starts_with(trim($blockquote->plaintext), 'Connect with us on')) {
$blockquote->outertext = '';
}
}

// Headline subtitle
$content = $content_html->innertext;
$subtitle = $html->find('.sub-title', 0);
if ($subtitle) {
$content = '<p><b>' . $subtitle->plaintext . '</b></p>' . $content;
}

// Author
$author = $html->find('.article-author', 0);
if ($author && !isset($item['author'])) {
$item['author'] = trim($author->plaintext);
}

$item['content'] = trim($content);
return $item;
}

Expand Down

0 comments on commit 79f1461

Please sign in to comment.