From 06dee0f47bce5f818a7a15dad8b5f9162d05fbd0 Mon Sep 17 00:00:00 2001 From: Ivan Merzlyakov Date: Fri, 13 Dec 2019 18:26:53 +0300 Subject: [PATCH 1/2] Scraping developer data --- src/Scraper.php | 16 ++++++++++++++++ tests/resources/app1.json | 5 ++++- tests/resources/app2.json | 5 ++++- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/Scraper.php b/src/Scraper.php index a9e3c4b..9ad4e34 100644 --- a/src/Scraper.php +++ b/src/Scraper.php @@ -130,6 +130,9 @@ public function getApp($id, $lang = null, $country = null) 'whatsnew' => null, 'video_link' => null, 'video_image' => null, + 'author_website' => null, + 'author_email' => null, + 'privacy_policy_link' => null ); $info['id'] = $id; @@ -186,6 +189,19 @@ public function getApp($id, $lang = null, $country = null) $info['content_rating'] = $node->filter('div > .htlgb > div')->first()->text(); } }); + $developerInfoNodes = $crawler->filter('.hAyfc > .htlgb > .IQ1z0d > .htlgb > div > .hrTbp'); + $developerInfoNodes->each(function ($node) use (&$info) { + $nodeText = $node->text(); + $nodeText = str_replace("\xc2\xa0", ' ', $nodeText); // convert non breaking to normal space + + if ($nodeText === 'Visit website') { + $info['author_website'] = $node->attr('href'); + } elseif ($nodeText === 'Privacy Policy') { + $info['privacy_policy_link'] = $node->attr('href'); + } elseif (strpos($nodeText, '@')) { + $info['author_email'] = $nodeText; + } + }); $whatsnewNode = $crawler->filter('[itemprop="description"] > span')->eq(1); if ($whatsnewNode->count()) { $whatsnew = $this->cleanDescription($whatsnewNode); diff --git a/tests/resources/app1.json b/tests/resources/app1.json index 7ca1786..5490e3c 100644 --- a/tests/resources/app1.json +++ b/tests/resources/app1.json @@ -46,5 +46,8 @@ "content_rating": "Everyone 10+", "whatsnew": "What's new in version 1.11?\nRevamped villages! Discover new villages across different biomes, with job sites, buildings, designs, and construction opportunities. \nEarn the villagers\u2019 trust and explore a refined trading system, with brand new ways to exchange items!\nTerrible new threats! Pillager Outposts are emerging all around the Overworld, preparing their attack. Are you ready to fight back?", "video_link": "https:\/\/www.youtube.com\/embed\/gcf9FM4TbN4?ps=play&vq=large&rel=0&autohide=1&showinfo=0", - "video_image": "https:\/\/i.ytimg.com\/vi\/gcf9FM4TbN4\/hqdefault.jpg" + "video_image": "https:\/\/i.ytimg.com\/vi\/gcf9FM4TbN4\/hqdefault.jpg", + "author_website": "http:\/\/help.mojang.com", + "author_email": "android-help@mojang.com", + "privacy_policy_link": "https:\/\/privacy.microsoft.com\/en-us\/privacystatement" } \ No newline at end of file diff --git a/tests/resources/app2.json b/tests/resources/app2.json index 4d64bcc..28299c0 100644 --- a/tests/resources/app2.json +++ b/tests/resources/app2.json @@ -28,5 +28,8 @@ "content_rating": "12 \u5c81\u4ee5\u4e0a", "whatsnew": null, "video_link": null, - "video_image": null + "video_image": null, + "author_website": null, + "author_email": "android-support@instagram.com", + "privacy_policy_link": null } \ No newline at end of file From c235fd6c984015c191265818076652d62f253c5f Mon Sep 17 00:00:00 2001 From: Ivan Merzlyakov Date: Fri, 13 Dec 2019 18:35:46 +0300 Subject: [PATCH 2/2] Updated readme --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index a52e624..46804a4 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,9 @@ array ( 'supported_os' => '4.2 and up', 'content_rating' => 'Everyone 10+', 'whatsnew' => 'What\'s new in version 1.11?[...]', + 'author_website' => 'http:\/\/help.mojang.com', + 'author_email' => 'android-help@mojang.com', + 'privacy_policy_link' => 'https:\/\/privacy.microsoft.com\/en-us\/privacystatement' ) ```