kulmapaikka-ircbot/plugins/getUrl.js

49 lines
1.1 KiB
JavaScript
Raw Normal View History

2015-09-01 22:44:55 +03:00
var Promise = require('promise');
var request = require('request');
var requestPromise = function (url, urlArgs) {
var promise = new Promise(function(resolve, reject) {
request({
url: url,
qs: urlArgs,
json: false
}, function(error, response, body) {
if(!error && response.statusCode == 200) {
resolve(body);
} else {
reject(error);
}
});
});
return promise;
};
module.exports = function(config) {
// http://stackoverflow.com/questions/13087888/getting-the-page-title-from-a-scraped-webpage
var getTitle = function(url) {
var urlOpts = {host: url, path: '/', port: '80'};
2015-09-02 21:13:39 +03:00
var re = /(<\s*title[^>]*>((.|\n)+?)<\s*\/\s*title)>/gi;
2015-09-01 22:44:55 +03:00
var urlArgs = {};
var promise = new Promise(function(resolve, reject) {
var urlPromise = requestPromise(url, urlArgs);
urlPromise.then(function(data) {
2015-09-02 21:13:39 +03:00
2015-09-01 22:44:55 +03:00
var match = re.exec(data);
2015-09-02 21:13:39 +03:00
2015-09-01 22:44:55 +03:00
if (match && match[2]) {
2015-09-02 21:20:20 +03:00
var title = match[2].trim(); // remove whitespace
title = title.replace(/\r?\n|\r/g, ''); // remove newlines
resolve(title);
2015-09-01 22:44:55 +03:00
} else {
reject();
}
});
});
return promise;
};
return getTitle;
};