-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.js
116 lines (114 loc) · 3.41 KB
/
parser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
var parseString = require('xml2js').parseString;
var exec = require('child_process').exec;
var async = require('async');
var fs = require('fs');
var mongoose = require('mongoose');
mongoose.connect('mongodb://localhost/parser');
var CronJob = require('cron').CronJob;
var config = [
{ name: 'Madrobots',
url: 'http://madrobots.ru/htcCatalogNew.xml',
url_prepend: 'http://madrobots.ru/',
pictures_prepend_need: true,
filename: 'mrob.txt'
},
{ name: 'Designboom',
url: 'http://designboom.ru/bitrix/catalog_export/yml.php',
url_prepend: 'http://designboom.ru/',
pictures_prepend_need: false,
filename: 'dboom.txt'
}
]
var Item = mongoose.model('Item',
{
title: String,
cover: String,
likes: {
type: Number,
default: 0
},
dislikes: {
type: Number,
default: 0
},
tags: [],
desc: String,
shops: [
{
name: String,
price: Number,
url: String,
date: {
type: String,
default: Date.now()
},
}
],
// vendor: String,
category: [String],
index: Number,
photos: [String],
offers: []
});
var job = new CronJob('20 * * * * *', function() {
config.map(function(shop) {
exec(('curl -X GET ' + shop.url + ' | iconv -f cp1251 -t utf8 -- > ' + shop.filename), function(err, stdout, stderr) {
var rs = fs.readFileSync(('./' + shop.filename));
parseString(rs.toString(), function (err, result) {
var categories = {};
result.yml_catalog.shop[0].categories[0].category.map(function(item) {
categories[item["$"].id] = item._
});
result.yml_catalog.shop[0].offers[0].offer.map(function(offer) {
Item.findOne({title: offer.model}, function (err, item) {
if (err)
console.log('find', err);
if (!item) {
var category_array = []
offer.categoryId.map(function(id) {
category_array.push(categories[id])
})
var pictures = []
if ((offer.param[2] || {})._){
offer.param[2]._.split(',').map(function(picture) {
picture = picture.replace(/\s/g, '');
if (shop.pictures_prepend_need)
pictures.push(shop.url_prepend.concat(picture))
else
pictures.push(picture)
})}
pictures.push((offer.picture || {})[0])
new Item({
title: offer.model[0],
index: offer['$'].id,
shops: [{ name: shop.name,
price: offer.price[0],
url: offer.url[0]
}],
cover: (offer.picture || {}),
vendor: offer.vendor[0],
desc: offer.description[0],
categories: category_array,
photos: pictures
}).save(function (err) {
if (err){
// console.log('saveitem', err)
}
})
}
})
})
});
});
});
/*
* Runs every weekday (Monday through Friday)
* at 11:30:00 AM. It does not run on Saturday
* or Sunday.
*/
}, function () {
// mongoose.disconnect();
},
true, /* Start the job right now */
'America/Los_Angeles' /* Time zone of this job. */
);