2015-09-01 22:44:55 +03:00
|
|
|
var Promise = require('promise');
|
|
|
|
var request = require('request');
|
|
|
|
|
|
|
|
var requestPromise = function (url, urlArgs) {
|
|
|
|
var promise = new Promise(function(resolve, reject) {
|
|
|
|
request({
|
|
|
|
url: url,
|
|
|
|
qs: urlArgs,
|
|
|
|
json: false
|
|
|
|
}, function(error, response, body) {
|
|
|
|
if(!error && response.statusCode == 200) {
|
|
|
|
resolve(body);
|
|
|
|
} else {
|
|
|
|
reject(error);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
});
|
|
|
|
return promise;
|
|
|
|
};
|
|
|
|
|
|
|
|
module.exports = function(config) {
|
|
|
|
// http://stackoverflow.com/questions/13087888/getting-the-page-title-from-a-scraped-webpage
|
|
|
|
var getTitle = function(url) {
|
|
|
|
var urlOpts = {host: url, path: '/', port: '80'};
|
|
|
|
|
2015-09-02 21:13:39 +03:00
|
|
|
var re = /(<\s*title[^>]*>((.|\n)+?)<\s*\/\s*title)>/gi;
|
2015-09-01 22:44:55 +03:00
|
|
|
var urlArgs = {};
|
|
|
|
|
|
|
|
var promise = new Promise(function(resolve, reject) {
|
|
|
|
var urlPromise = requestPromise(url, urlArgs);
|
|
|
|
urlPromise.then(function(data) {
|
2015-09-02 21:13:39 +03:00
|
|
|
|
2015-09-01 22:44:55 +03:00
|
|
|
var match = re.exec(data);
|
2015-09-02 21:13:39 +03:00
|
|
|
|
2015-09-01 22:44:55 +03:00
|
|
|
if (match && match[2]) {
|
2015-09-02 21:20:20 +03:00
|
|
|
var title = match[2].trim(); // remove whitespace
|
|
|
|
title = title.replace(/\r?\n|\r/g, ''); // remove newlines
|
|
|
|
|
|
|
|
resolve(title);
|
2015-09-01 22:44:55 +03:00
|
|
|
} else {
|
|
|
|
reject();
|
|
|
|
}
|
|
|
|
});
|
|
|
|
});
|
|
|
|
return promise;
|
|
|
|
};
|
|
|
|
|
|
|
|
return getTitle;
|
|
|
|
};
|