Skip to content

Commit

Permalink
Marktplaats categories added (#3761)
Browse files Browse the repository at this point in the history
* Update MarktplaatsBridge.php

* Update MarktplaatsBridge.php only main categories

As the whole list is too big only main categories are used for now.

* Renamed parameter 2 to sc

Renamed unused method to better reflect it usage

* Update MarktplaatsBridge.php Several fixed

Categories completed
Added a default empty one
Check if the input is not empty before using
Added helper methods to generate the categorylist

* Update MarktplaatsBridge.php

Set the methods to private for the CI
  • Loading branch information
Park0 authored Oct 22, 2023
1 parent a6a4502 commit f134808
Showing 1 changed file with 139 additions and 4 deletions.
143 changes: 139 additions & 4 deletions bridges/MarktplaatsBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,51 @@ class MarktplaatsBridge extends BridgeAbstract
'required' => true,
'title' => 'The search string for marktplaats',
],
'c' => [
'name' => 'Category',
'type' => 'list',
'values' => [
'Select a category' => '',
'Antiek en Kunst' => '1',
'Audio, Tv en Foto' => '31',
'Auto's' => '91',
'Auto-onderdelen' => '2600',
'Auto diversen' => '48',
'Boeken' => '201',
'Caravans en Kamperen' => '289',
'Cd's en Dvd's' => '1744',
'Computers en Software' => '322',
'Contacten en Berichten' => '378',
'Diensten en Vakmensen' => '1098',
'Dieren en Toebehoren' => '395',
'Doe-het-zelf en Verbouw' => '239',
'Fietsen en Brommers' => '445',
'Hobby en Vrije tijd' => '1099',
'Huis en Inrichting' => '504',
'Huizen en Kamers' => '1032',
'Kinderen en Baby's' => '565',
'Kleding | Dames' => '621',
'Kleding | Heren' => '1776',
'Motoren' => '678',
'Muziek en Instrumenten' => '728',
'Postzegels en Munten' => '1784',
'Sieraden, Tassen en Uiterlijk' => '1826',
'Spelcomputers en Games' => '356',
'Sport en Fitness' => '784',
'Telecommunicatie' => '820',
'Tickets en Kaartjes' => '1984',
'Tuin en Terras' => '1847',
'Vacatures' => '167',
'Vakantie' => '856',
'Verzamelen' => '895',
'Watersport en Boten' => '976',
'Witgoed en Apparatuur' => '537',
'Zakelijke goederen' => '1085',
'Diversen' => '428',
],
'required' => false,
'title' => 'The category to search in',
],
'z' => [
'name' => 'zipcode',
'type' => 'text',
Expand Down Expand Up @@ -57,7 +102,15 @@ class MarktplaatsBridge extends BridgeAbstract
'type' => 'checkbox',
'required' => false,
'title' => 'Include the raw data behind the content',
]
],
'sc' => [
'name' => 'Sub category',
'type' => 'number',
'required' => false,
'exampleValue' => '12345',
'title' => 'Sub category has to be given by id as the list is too big to show here.
Only use subcategories that belong to the main category. Both have to be correct',
],
]
];
const CACHE_TIMEOUT = 900;
Expand All @@ -80,6 +133,12 @@ public function collectData()
$excludeGlobal = true;
}
}
if (!empty($this->getInput('c'))) {
$query .= '&l1CategoryId=' . $this->getInput('c');
}
if (!is_null($this->getInput('sc'))) {
$query .= '&l2CategoryId=' . $this->getInput('sc');
}
$url = 'https://www.marktplaats.nl/lrp/api/search?query=' . urlencode($this->getInput('q')) . $query;
$jsonString = getSimpleHTMLDOM($url);
$jsonObj = json_decode($jsonString);
Expand All @@ -97,15 +156,15 @@ public function collectData()
$item['enclosures'] = $listing->imageUrls;
if (is_array($listing->imageUrls)) {
foreach ($listing->imageUrls as $imgurl) {
$item['content'] .= "<br />\n<img src='https:" . $imgurl . "' />";
$item['content'] .= "<br />\n<img alt='' src='https:" . $imgurl . "' />";
}
} else {
$item['content'] .= "<br>\n<img src='https:" . $listing->imageUrls . "' />";
$item['content'] .= "<br>\n<img alt='' src='https:" . $listing->imageUrls . "' />";
}
}
if (!is_null($this->getInput('r'))) {
if ($this->getInput('r')) {
$item['content'] .= "<br />\n<br />\n<br />\n" . json_encode($listing);
$item['content'] .= "<br />\n<br />\n<br />\n" . json_encode($listing) . "<br />$url";
}
}
$item['content'] .= "<br>\n<br>\nPrice: " . $listing->priceInfo->priceCents / 100;
Expand All @@ -130,4 +189,80 @@ public function getName()
}
return parent::getName();
}

/**
* Method can be used to scrape the subcategories from marktplaats
*/
private static function scrapeSubCategories()
{
$main = [];
$main['Select a category'] = '';
$marktplaatsHTML = file_get_html('https://www.marktplaats.nl');
foreach ($marktplaatsHTML->find('select[id=categoryId] option') as $opt) {
if (!str_contains($opt->innertext, 'categorie')) {
$main[$opt->innertext] = $opt->value;
$ids[] = $opt->value;
}
}

$result = [];
foreach ($ids as $id) {
$url = 'https://www.marktplaats.nl/lrp/api/search?l1CategoryId=' . $id;
$jsonstring = getContents($url);
$jsondata = json_decode((string)$jsonstring);
if (isset($jsondata->searchCategoryOptions)) {
$categories = $jsondata->searchCategoryOptions;
if (isset($jsondata->categoriesById->$id)) {
$maincategory = $jsondata->categoriesById->$id;
$array = [];
foreach ($categories as $categorie) {
$array[$categorie->fullName] = $categorie->id;
}
$result[$maincategory->fullName] = $array;
}
} else {
print($jsonstring);
}
}
$combinedResult = [
'main' => $main,
'sub' => $result
];
return $combinedResult;
}

/**
* Helper method to construct the array that could be used for categories
*
* @param $array
* @param $indent
* @return void
*/
private static function printArrayAsCode($array, $indent = 0)
{
foreach ($array as $key => $value) {
if (is_array($value)) {
echo str_repeat(' ', $indent) . "'$key' => [" . PHP_EOL;
self::printArrayAsCode($value, $indent + 1);
echo str_repeat(' ', $indent) . '],' . PHP_EOL;
} else {
$value = str_replace('\'', '\\\'', $value);
$key = str_replace('\'', '\\\'', $key);
echo str_repeat(' ', $indent) . "'$key' => '$value'," . PHP_EOL;
}
}
}

private static function printScrapeArray()
{
$array = (MarktplaatsBridge::scrapeSubCategories());

echo '$myArray = [' . PHP_EOL;
self::printArrayAsCode($array['main'], 1);
echo '];' . PHP_EOL;

echo '$myArray = [' . PHP_EOL;
self::printArrayAsCode($array['sub'], 1);
echo '];' . PHP_EOL;
}
}

0 comments on commit f134808

Please sign in to comment.