forked from jmervine/phantomjs-tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgrep.js
executable file
·114 lines (93 loc) · 2.8 KB
/
grep.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/env phantomjs
/***********************************************************
* Author: @mervinej
* Licence: MIT
* Date: 12/02/2013
*
* Run with:
*
* $ phantomjs ./grep.js ./urls.txt ./strings.txt
*
* or
*
* $ phantomjs ./grep.js \
* "http://foo.com, http://foo.com/bar" \
* "string1, string2"
*
* '--json' returns JSON output for parsing with Phapper
* (http://github.com/jmervine/phapper).
*
* Warning: all search are case insensitive.
*
* Note: As a bonus, I left the page timing as well from
* the example script I started this from.
*
***********************************************************/
var webpage = require('webpage');
var system = require('system');
var util = require('../common/util');
var args = system.args.copyArgs();
function usage() {
console.log('Usage: grep.js <URL(s)>|<URL(s) file> <STRING(s)|STRING(s) file>] [--json]');
phantom.exit();
}
var json = args.getArg(['--json', '-j'], false);
var addresses = util.parsePaths(args.shift());
var strings = util.parsePaths(args.shift());
var finished = 0;
if (addresses.length === 0) {
usage();
}
// parse strings
if (!addresses || addresses.length === 0) {
usage();
}
var results = []; // if --json
addresses.forEach(function(address) {
var t = Date.now();
var page = webpage.create();
page.open(address, function (status) {
if (status !== 'success') {
console.log('FAIL to load the address');
} else {
t = Date.now() - t;
var body = page.evaluate(function() {
return document.body.innerHTML;
});
var found = [];
strings.forEach(function(str) {
var count = 0;
try {
count = Object.keys(body.match(new RegExp(str, 'ig'))).length;
found.push({ string: str, count: count });
} catch(e) {
found.push({ string: str, count: 0 });
}
});
if (json) {
results.push({
address: address,
complete: t,
matches: found
});
} else {
console.log('Regarding: ' + address);
console.log('> took ' + t + ' msec');
console.log(' ');
console.log('Found:');
found.forEach(function(item) {
console.log('- ' + item.string + ': ' + item.count);
});
console.log(' ');
}
}
(page.close||page.release)();
finished++;
if (finished === addresses.length) {
if (json) {
console.dir(results);
}
phantom.exit();
}
});
});