Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Marktplaats categories added #3761

Merged
merged 5 commits into from
Oct 22, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 139 additions & 4 deletions bridges/MarktplaatsBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,51 @@ class MarktplaatsBridge extends BridgeAbstract
'required' => true,
'title' => 'The search string for marktplaats',
],
'c' => [
'name' => 'Category',
'type' => 'list',
'values' => [
'Select a category' => '',
'Antiek en Kunst' => '1',
'Audio, Tv en Foto' => '31',
'Auto's' => '91',
'Auto-onderdelen' => '2600',
'Auto diversen' => '48',
'Boeken' => '201',
'Caravans en Kamperen' => '289',
'Cd's en Dvd's' => '1744',
'Computers en Software' => '322',
'Contacten en Berichten' => '378',
'Diensten en Vakmensen' => '1098',
'Dieren en Toebehoren' => '395',
'Doe-het-zelf en Verbouw' => '239',
'Fietsen en Brommers' => '445',
'Hobby en Vrije tijd' => '1099',
'Huis en Inrichting' => '504',
'Huizen en Kamers' => '1032',
'Kinderen en Baby's' => '565',
'Kleding | Dames' => '621',
'Kleding | Heren' => '1776',
'Motoren' => '678',
'Muziek en Instrumenten' => '728',
'Postzegels en Munten' => '1784',
'Sieraden, Tassen en Uiterlijk' => '1826',
'Spelcomputers en Games' => '356',
'Sport en Fitness' => '784',
'Telecommunicatie' => '820',
'Tickets en Kaartjes' => '1984',
'Tuin en Terras' => '1847',
'Vacatures' => '167',
'Vakantie' => '856',
'Verzamelen' => '895',
'Watersport en Boten' => '976',
'Witgoed en Apparatuur' => '537',
'Zakelijke goederen' => '1085',
'Diversen' => '428',
],
'required' => false,
'title' => 'The category to search in',
],
'z' => [
'name' => 'zipcode',
'type' => 'text',
Expand Down Expand Up @@ -57,7 +102,15 @@ class MarktplaatsBridge extends BridgeAbstract
'type' => 'checkbox',
'required' => false,
'title' => 'Include the raw data behind the content',
]
],
'sc' => [
'name' => 'Sub category',
'type' => 'number',
'required' => false,
'exampleValue' => '12345',
'title' => 'Sub category has to be given by id as the list is too big to show here.
Only use subcategories that belong to the main category. Both have to be correct',
],
]
];
const CACHE_TIMEOUT = 900;
Expand All @@ -80,6 +133,12 @@ public function collectData()
$excludeGlobal = true;
}
}
if (!empty($this->getInput('c'))) {
$query .= '&l1CategoryId=' . $this->getInput('c');
}
if (!is_null($this->getInput('sc'))) {
$query .= '&l2CategoryId=' . $this->getInput('sc');
}
$url = 'https://www.marktplaats.nl/lrp/api/search?query=' . urlencode($this->getInput('q')) . $query;
$jsonString = getSimpleHTMLDOM($url);
$jsonObj = json_decode($jsonString);
Expand All @@ -97,15 +156,15 @@ public function collectData()
$item['enclosures'] = $listing->imageUrls;
if (is_array($listing->imageUrls)) {
foreach ($listing->imageUrls as $imgurl) {
$item['content'] .= "<br />\n<img src='https:" . $imgurl . "' />";
$item['content'] .= "<br />\n<img alt='' src='https:" . $imgurl . "' />";
}
} else {
$item['content'] .= "<br>\n<img src='https:" . $listing->imageUrls . "' />";
$item['content'] .= "<br>\n<img alt='' src='https:" . $listing->imageUrls . "' />";
}
}
if (!is_null($this->getInput('r'))) {
if ($this->getInput('r')) {
$item['content'] .= "<br />\n<br />\n<br />\n" . json_encode($listing);
$item['content'] .= "<br />\n<br />\n<br />\n" . json_encode($listing) . "<br />$url";
}
}
$item['content'] .= "<br>\n<br>\nPrice: " . $listing->priceInfo->priceCents / 100;
Expand All @@ -130,4 +189,80 @@ public function getName()
}
return parent::getName();
}

/**
* Method can be used to scrape the subcategories from marktplaats
*/
private static function scrapeSubCategories()
{
$main = [];
$main['Select a category'] = '';
$marktplaatsHTML = file_get_html('https://www.marktplaats.nl');
foreach ($marktplaatsHTML->find('select[id=categoryId] option') as $opt) {
if (!str_contains($opt->innertext, 'categorie')) {
$main[$opt->innertext] = $opt->value;
$ids[] = $opt->value;
}
}

$result = [];
foreach ($ids as $id) {
$url = 'https://www.marktplaats.nl/lrp/api/search?l1CategoryId=' . $id;
$jsonstring = getContents($url);
$jsondata = json_decode((string)$jsonstring);
if (isset($jsondata->searchCategoryOptions)) {
$categories = $jsondata->searchCategoryOptions;
if (isset($jsondata->categoriesById->$id)) {
$maincategory = $jsondata->categoriesById->$id;
$array = [];
foreach ($categories as $categorie) {
$array[$categorie->fullName] = $categorie->id;
}
$result[$maincategory->fullName] = $array;
}
} else {
print($jsonstring);
}
}
$combinedResult = [
'main' => $main,
'sub' => $result
];
return $combinedResult;
}

/**
* Helper method to construct the array that could be used for categories
*
* @param $array
* @param $indent
* @return void
*/
private static function printArrayAsCode($array, $indent = 0)
{
foreach ($array as $key => $value) {
if (is_array($value)) {
echo str_repeat(' ', $indent) . "'$key' => [" . PHP_EOL;
self::printArrayAsCode($value, $indent + 1);
echo str_repeat(' ', $indent) . '],' . PHP_EOL;
} else {
$value = str_replace('\'', '\\\'', $value);
$key = str_replace('\'', '\\\'', $key);
echo str_repeat(' ', $indent) . "'$key' => '$value'," . PHP_EOL;
}
}
}

private static function printScrapeArray()
{
$array = (MarktplaatsBridge::scrapeSubCategories());

echo '$myArray = [' . PHP_EOL;
self::printArrayAsCode($array['main'], 1);
echo '];' . PHP_EOL;

echo '$myArray = [' . PHP_EOL;
self::printArrayAsCode($array['sub'], 1);
echo '];' . PHP_EOL;
}
}