2019-08-13 19:37:22 +03:00
|
|
|
var Promise = require('promise')
|
|
|
|
var request = require('request')
|
2015-09-01 22:44:55 +03:00
|
|
|
|
|
|
|
var requestPromise = function (url, urlArgs) {
|
2019-08-13 19:37:22 +03:00
|
|
|
var promise = new Promise(function (resolve, reject) {
|
|
|
|
request({
|
|
|
|
url: url,
|
|
|
|
qs: urlArgs,
|
|
|
|
json: false
|
|
|
|
}, function (error, response, body) {
|
|
|
|
if (!error && response.statusCode === 200) {
|
|
|
|
resolve(body)
|
|
|
|
} else {
|
|
|
|
reject(error)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
})
|
|
|
|
return promise
|
|
|
|
}
|
2015-09-01 22:44:55 +03:00
|
|
|
|
2019-08-13 19:37:22 +03:00
|
|
|
module.exports = function (config) {
|
|
|
|
// http://stackoverflow.com/questions/13087888/getting-the-page-title-from-a-scraped-webpage
|
|
|
|
var getTitle = function (url) {
|
|
|
|
var re = /(<\s*title[^>]*>((.|\n)+?)<\s*\/\s*title)>/gi
|
|
|
|
var urlArgs = {}
|
2015-09-01 22:44:55 +03:00
|
|
|
|
2019-08-13 19:37:22 +03:00
|
|
|
var promise = new Promise(function (resolve, reject) {
|
|
|
|
var urlPromise = requestPromise(url, urlArgs)
|
|
|
|
urlPromise.then(function (data) {
|
|
|
|
var match = re.exec(data)
|
2015-09-01 22:44:55 +03:00
|
|
|
|
2019-08-13 19:37:22 +03:00
|
|
|
if (match && match[2]) {
|
|
|
|
var title = match[2].trim() // remove whitespace
|
|
|
|
title = title.replace(/\r?\n|\r/g, '') // remove newlines
|
|
|
|
|
|
|
|
resolve(title)
|
|
|
|
} else {
|
|
|
|
reject(new Error())
|
|
|
|
}
|
|
|
|
})
|
|
|
|
})
|
|
|
|
return promise
|
|
|
|
}
|
|
|
|
|
|
|
|
return getTitle
|
|
|
|
}
|