-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.js
107 lines (99 loc) · 3.03 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
const Crawler = require('crawler');
const Promise = require('bluebird');
const _ = require('lodash');
/**
* StaticCrawler - Based on node-crawler
* Its a wrapper supporting promised based API
*/
class StaticCrawler {
/**
* Constructor - Initialize the Static Crawler
* with crawler options .
*
* Crawler will not be ready until the setup() method
* resolves the promise
*
* Usage:
* const crawler = new StaticCrawler(opts);
*
* crawler.setup().then(() => {
*
* // make requests
* request(options).then((result) => {
* //do something good with the result.
* })
* })
*
* See [new Crawler(options)]
*
* Note:
* PreRequest option is modified to supported to not calling the callback
* - This is to make it consistent with Dynamic Crawler api
*
* (https://github.com/bda-research/node-crawler)
* @param {Object} [options={}] Crawler general options
*/
constructor(options = {}) {
this.options = options;
this.crawler = null;
}
/**
* setup - will initialize underlying node-crawler instance
* Options can either be passed at initialization or setup
* @param {Object} [options={}] Crawler general options
* @return {Promise} To be resolved with underlying crawler instance
*/
setup(options = {}) {
this.options = _.extend(this.options, options);
let preReqFn = this.options.preRequest;
//TODO : Log error messages
if (_.isFunction(preReqFn)) {
this.options.preRequest = (opts, cb) => Promise.resolve(preReqFn(opts, () => {})).catch(e => {}).finally(cb)
}
this.options.callback = function(error, response, callback) {
process.nextTick(callback);
if (error) {
error.options = response.options;
error.options.npolisReject(error);
} else {
response.options.npolisResolve(response);
}
};
/*
No need to be aggressive
this.crawler = _.attempt(() => new Crawler(this.options))
return _.isError(this.crawler) ? Promise.reject(this.crawler) : Promise.resolve(this.crawler);
*/
this.crawler = new Crawler(this.options);
return Promise.resolve(this.crawler);
}
/**
* request - Its promise version of crawler.queue
* See [crawler.queue(options)]
* (https://github.com/yujiosaka/headless-chrome-crawler/blob/master/docs/API.md)
* @param {Object} [options={}] HCCrawler.queue options passed
* when making requests
* @return {Promise} Promise holding either result or error object
*/
request(options = {}) {
return new Promise((resolve, reject) => {
options.npolisResolve = resolve;
options.npolisReject = reject;
this.crawler.queue(options);
});
}
/**
* destroy Empty STUB
* TODO implement destroy of static crawler to release all queue'd tasks.
* @return {Promise} To be resolved when resources used up by
* crawler are released.
*/
destroy() {
return Promise.resolve();
}
}
/**
* exports Static Crawler class
* @type {StaticCrawler}
*/
module.exports = StaticCrawler;