Skip to content

Commit

Permalink
Add 27 new user agents (#294)
Browse files Browse the repository at this point in the history
  • Loading branch information
MaxGiting authored Oct 3, 2018
1 parent be0f5ba commit bac5e3f
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 5 deletions.
2 changes: 1 addition & 1 deletion raw/Crawlers.json

Large diffs are not rendered by default.

28 changes: 27 additions & 1 deletion raw/Crawlers.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
^PHP\/[0-9]
^RMA\/
^Ruby|Ruby\/[0-9]
^scrutiny\/
^VSE\/[0-9]
^WordPress\.com
^XRL\/[0-9]
Expand Down Expand Up @@ -69,10 +68,12 @@ Anarchie
AndroidDownloadManager
Anemone
AngleSharp\/
annotate_google
Ant\.com
Anturis Agent
AnyEvent-HTTP\/
Apache Droid
Apache OpenOffice
Apache-HttpAsyncClient\/
Apache-HttpClient\/
ApacheBench\/
Expand Down Expand Up @@ -115,6 +116,7 @@ BBBike
BCKLINKS
BDFetch
BegunAdvertising\/
Bidtellect\/
BigBozz
Bigfoot
biglotron
Expand All @@ -127,6 +129,8 @@ biz_Directory
Black\ Hole
Blackboard Safeassign
BlackWidow
BlockNote\.Net
Bloglines\/
Bloglovin
BlogPulseLive
BlogSearch
Expand Down Expand Up @@ -217,6 +221,7 @@ DataparkSearch
dataprovider
DataXu
Daum(oa)?[ \/][0-9]
DemandbasePublisherAnalyzer\/
Demon
DeuSu
developers\.google\.com\/\+\/web\/snippet\/
Expand Down Expand Up @@ -267,6 +272,7 @@ EMail\ Wolf
EmailWolf
Embed PHP Library
Embedly
endo\/
europarchive\.org
evc-batch
EventMachine HttpClient
Expand All @@ -293,15 +299,19 @@ Faveeo
Favicon downloader
FavOrg
Feed Wrangler
Feedable\/
Feedbin
FeedBooster
FeedBucket
FeedBunch\/[0-9]
FeedBurner
FeedChecker
Feedly
Feedreader
FeedshowOnline
Feedspot
Feedwind\/[0-9]
FeedZcollector
feeltiptop
Fetch API
Fetch\/[0-9]
Expand Down Expand Up @@ -418,6 +428,7 @@ HostTracker
ht:\/\/check
htdig
HTMLparser
htmlyse\.com
http-get
HTTP-Header-Abfrage
http-kit
Expand Down Expand Up @@ -451,6 +462,7 @@ IDwhois\/[0-9]
Iframely
igdeSpyder
IlTrovatore
ImageVisu\/
Image\ Fetch
Image\ Sucker
ImageEngine\/
Expand All @@ -460,6 +472,7 @@ InAGist
inbound\.li parser
InDesign%20CC
Indy\ Library
InetURL
infegy
infohelfer
InfoTekies
Expand Down Expand Up @@ -604,6 +617,7 @@ Miniflux
Mister\ PiX
mixdata dot com
mixed-content-scan
Mixmax-LinkPreview
mixnode
Mnogosearch
mogimogi
Expand Down Expand Up @@ -640,6 +654,7 @@ NetAnts
NETCRAFT
NetLyzer
NetMechanic
NetNewsWire
Netpursual
netresearch
NetShelter ContentScan
Expand Down Expand Up @@ -678,6 +693,7 @@ NYU
Ocelli\/[0-9]
Octopus
oegp
og-scraper\/
Offline Explorer
Offline\ Navigator
okhttp
Expand Down Expand Up @@ -743,6 +759,7 @@ PingSpot
pinterest\.com
Pixray
Pizilla
Plagger\/
PleaseCrawl
Ploetz \+ Zeller
Plukkie
Expand Down Expand Up @@ -790,6 +807,7 @@ Radian6
RankActive
RankFlex
RankSonicSiteAuditor
raynette_httprequest
Readability
RealDownload
RealPlayer%20Downloader
Expand All @@ -811,6 +829,7 @@ Robozilla\/[0-9]
ROI Hunter
RPT-HTTPClient
RSSOwl
RssReader\/
safe-agent-scanner
SalesIntelligent
Saleslift
Expand All @@ -819,6 +838,7 @@ SBIder
scalaj-http
scan\.lol
ScanAlert
ScrapeBox Page Scanner
Scoop
scooter
ScoutJet
Expand All @@ -827,9 +847,11 @@ Scrapy
Screaming
ScreenShotService\/[0-9]
Scrubby
Scrutiny\/
Search37\/
search\.thunderstone
Searchestate
SearchExpress
SearchSight
Seeker
semanticdiscovery
Expand Down Expand Up @@ -894,6 +916,7 @@ SlySearch
SmartDownload
SMRF URL Expander
SMUrlExpander
Snarfer\/
Snake
Snappy
SniffRSS
Expand Down Expand Up @@ -993,6 +1016,7 @@ ubermetrics-technologies
uclassify
uCrawlr\/
UdmSearch
unirest-java
UniversalFeedParser
Unshorten\.It
Untiny
Expand Down Expand Up @@ -1106,11 +1130,13 @@ WhoRunsCoinHive
Whynder Magnet
Windows-RSS-Platform
WinHttpRequest
WinPodder
wkhtmlto
wmtips
Woko
Word\/
WordPress\/
WordupinfoSearch
wotbox
WP Engine Install Performance API
wpif
Expand Down
28 changes: 27 additions & 1 deletion src/Fixtures/Crawlers.php
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ class Crawlers extends AbstractProvider
'^PHP\/[0-9]',
'^RMA\/',
'^Ruby|Ruby\/[0-9]',
'^scrutiny\/',
'^VSE\/[0-9]',
'^WordPress\.com',
'^XRL\/[0-9]',
Expand Down Expand Up @@ -90,10 +89,12 @@ class Crawlers extends AbstractProvider
'AndroidDownloadManager',
'Anemone',
'AngleSharp\/',
'annotate_google',
'Ant\.com',
'Anturis Agent',
'AnyEvent-HTTP\/',
'Apache Droid',
'Apache OpenOffice',
'Apache-HttpAsyncClient\/',
'Apache-HttpClient\/',
'ApacheBench\/',
Expand Down Expand Up @@ -136,6 +137,7 @@ class Crawlers extends AbstractProvider
'BCKLINKS',
'BDFetch',
'BegunAdvertising\/',
'Bidtellect\/',
'BigBozz',
'Bigfoot',
'biglotron',
Expand All @@ -148,6 +150,8 @@ class Crawlers extends AbstractProvider
'Black\ Hole',
'Blackboard Safeassign',
'BlackWidow',
'BlockNote\.Net',
'Bloglines\/',
'Bloglovin',
'BlogPulseLive',
'BlogSearch',
Expand Down Expand Up @@ -238,6 +242,7 @@ class Crawlers extends AbstractProvider
'dataprovider',
'DataXu',
'Daum(oa)?[ \/][0-9]',
'DemandbasePublisherAnalyzer\/',
'Demon',
'DeuSu',
'developers\.google\.com\/\+\/web\/snippet\/',
Expand Down Expand Up @@ -288,6 +293,7 @@ class Crawlers extends AbstractProvider
'EmailWolf',
'Embed PHP Library',
'Embedly',
'endo\/',
'europarchive\.org',
'evc-batch',
'EventMachine HttpClient',
Expand All @@ -314,15 +320,19 @@ class Crawlers extends AbstractProvider
'Favicon downloader',
'FavOrg',
'Feed Wrangler',
'Feedable\/',
'Feedbin',
'FeedBooster',
'FeedBucket',
'FeedBunch\/[0-9]',
'FeedBurner',
'FeedChecker',
'Feedly',
'Feedreader',
'FeedshowOnline',
'Feedspot',
'Feedwind\/[0-9]',
'FeedZcollector',
'feeltiptop',
'Fetch API',
'Fetch\/[0-9]',
Expand Down Expand Up @@ -439,6 +449,7 @@ class Crawlers extends AbstractProvider
'ht:\/\/check',
'htdig',
'HTMLparser',
'htmlyse\.com',
'http-get',
'HTTP-Header-Abfrage',
'http-kit',
Expand Down Expand Up @@ -472,6 +483,7 @@ class Crawlers extends AbstractProvider
'Iframely',
'igdeSpyder',
'IlTrovatore',
'ImageVisu\/',
'Image\ Fetch',
'Image\ Sucker',
'ImageEngine\/',
Expand All @@ -481,6 +493,7 @@ class Crawlers extends AbstractProvider
'inbound\.li parser',
'InDesign%20CC',
'Indy\ Library',
'InetURL',
'infegy',
'infohelfer',
'InfoTekies',
Expand Down Expand Up @@ -625,6 +638,7 @@ class Crawlers extends AbstractProvider
'Mister\ PiX',
'mixdata dot com',
'mixed-content-scan',
'Mixmax-LinkPreview',
'mixnode',
'Mnogosearch',
'mogimogi',
Expand Down Expand Up @@ -661,6 +675,7 @@ class Crawlers extends AbstractProvider
'NETCRAFT',
'NetLyzer',
'NetMechanic',
'NetNewsWire',
'Netpursual',
'netresearch',
'NetShelter ContentScan',
Expand Down Expand Up @@ -699,6 +714,7 @@ class Crawlers extends AbstractProvider
'Ocelli\/[0-9]',
'Octopus',
'oegp',
'og-scraper\/',
'Offline Explorer',
'Offline\ Navigator',
'okhttp',
Expand Down Expand Up @@ -764,6 +780,7 @@ class Crawlers extends AbstractProvider
'pinterest\.com',
'Pixray',
'Pizilla',
'Plagger\/',
'PleaseCrawl',
'Ploetz \+ Zeller',
'Plukkie',
Expand Down Expand Up @@ -811,6 +828,7 @@ class Crawlers extends AbstractProvider
'RankActive',
'RankFlex',
'RankSonicSiteAuditor',
'raynette_httprequest',
'Readability',
'RealDownload',
'RealPlayer%20Downloader',
Expand All @@ -832,6 +850,7 @@ class Crawlers extends AbstractProvider
'ROI Hunter',
'RPT-HTTPClient',
'RSSOwl',
'RssReader\/',
'safe-agent-scanner',
'SalesIntelligent',
'Saleslift',
Expand All @@ -840,6 +859,7 @@ class Crawlers extends AbstractProvider
'scalaj-http',
'scan\.lol',
'ScanAlert',
'ScrapeBox Page Scanner',
'Scoop',
'scooter',
'ScoutJet',
Expand All @@ -848,9 +868,11 @@ class Crawlers extends AbstractProvider
'Screaming',
'ScreenShotService\/[0-9]',
'Scrubby',
'Scrutiny\/',
'Search37\/',
'search\.thunderstone',
'Searchestate',
'SearchExpress',
'SearchSight',
'Seeker',
'semanticdiscovery',
Expand Down Expand Up @@ -915,6 +937,7 @@ class Crawlers extends AbstractProvider
'SmartDownload',
'SMRF URL Expander',
'SMUrlExpander',
'Snarfer\/',
'Snake',
'Snappy',
'SniffRSS',
Expand Down Expand Up @@ -1014,6 +1037,7 @@ class Crawlers extends AbstractProvider
'uclassify',
'uCrawlr\/',
'UdmSearch',
'unirest-java',
'UniversalFeedParser',
'Unshorten\.It',
'Untiny',
Expand Down Expand Up @@ -1127,11 +1151,13 @@ class Crawlers extends AbstractProvider
'Whynder Magnet',
'Windows-RSS-Platform',
'WinHttpRequest',
'WinPodder',
'wkhtmlto',
'wmtips',
'Woko',
'Word\/',
'WordPress\/',
'WordupinfoSearch',
'wotbox',
'WP Engine Install Performance API',
'wpif',
Expand Down
Loading

0 comments on commit bac5e3f

Please sign in to comment.