-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrawl.rb
72 lines (64 loc) · 1.77 KB
/
crawl.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/bin/usr/env ruby
require 'rss'
require 'faraday'
require 'faraday_middleware'
require 'json'
require 'toml-rb'
class Crawler
Entry = Struct.new(:entry_url, :title, :abstract, :icon_url, :published_at)
def initialize(feed_url)
@feed_url = feed_url
end
def crawl
conn = Faraday.new(@feed_url) do |b|
b.use FaradayMiddleware::FollowRedirects
b.adapter :net_http do |http|
http.open_timeout = 2
end
end
response = conn.get
STDERR.puts "status:#{response.status}\turl:#{@feed_url}"
if response.status != 200
return
end
feed = RSS::Parser.parse(response.body, false)
case feed
when RSS::Rss
feed.items.map do |item|
Entry.new(
item.link,
item.title,
item.description,
item.enclosure&.url,
item.date
)
end
when RSS::Atom::Feed
feed.items.map do |item|
Entry.new(
item.link.href,
item.title.content,
item.summary&.content || item.content.content,
item.links.find { |link| link.rel == 'enclosure' }&.href,
item.published.content
)
end
when RSS::RDF
feed.items.map do |item|
Entry.new(
item.link,
item.title,
item.description,
feed.channel.image&.resource, # NOTE: 画像付きのRSS1.0のフィードを見つけられなかったので動作未検証
item.date
)
end
else
puts "Unsupported feed type: #{feed.class} from #{@feed_url}"
end
end
end
toml_string = $stdin.read
feeds = TomlRB.parse(toml_string, symbolize_keys: true)
entries = feeds.map {|username, feed| Crawler.new(feed[:feed_url]).crawl }.reject(&:nil?).flatten.map(&:to_h)
puts({entries: entries}.to_json)