lib/hapi-goldwasher.js
// jscs:disable requireCamelCaseOrUpperCaseIdentifiers
'use strict';
var Hoek = require('hoek');
var Joi = require('joi');
var Boom = require('boom');
var needle = require('needle');
var goldwasher = require('goldwasher');
var pack = require('../package.json');
var goldwasherPack = require('../node_modules/goldwasher/package.json');
/**
* Splits a string to an array, divided by ";"
* @param text
* @returns {Array}
*/
var stringToArray = function(text) {
return text
.split(';')
.map(function(word) {
return word.trim();
});
};
/**
* Gets options, merges with default options.
* @param options
* @returns {*}
*/
var getOptions = function(options) {
var schema = Joi.object().keys({
path: Joi.string(),
header: Joi.string(),
maxRedirects: Joi.number().integer(),
raw: Joi.boolean()
});
var defaults = {
path: '/goldwasher',
header: 'x-goldwasher-version',
maxRedirects: 5,
cors: false,
raw: false
};
Joi.validate(options, schema);
return Hoek.applyToDefaults(defaults, options);
};
/**
* Gets a default response, e.g. if called with no parameters
* @param server
* @param options
* @returns {{
* name: (options.name|*),
* description: *,
* docs: *,
* uri: *,
* parameters: {
* url: string,
* selector: string,
* search: string,
* output: string,
* limit: string,
* filterKeywords: string,
* filterTexts: string,
* filterLocale: string
* },
* goldwasher: {
* url: *,
* version: (string|exports.version|*),
* options: *,
* defaults: *
* },
* cors: *
* }}
*/
var getDefaultResponse = function(server, options) {
var text = {
commaSeparated: ' (comma-separated for multiple,' +
' remember url encoding)',
locale: ' (filter common stop words by language, "en" available)'
};
return {
name: options.name || pack.name,
description: options.description || pack.description,
docs: options.docs || pack.homepage,
uri: server.info.uri + options.path,
parameters: {
url: 'string (required)',
selector: 'string (jQuery selector)',
search: 'string' + text.commaSeparated,
output: 'string ("json", "xml", "atom" or "rss")',
limit: 'integer (limits number of results)',
filterKeywords: 'string' + text.commaSeparated,
filterTexts: 'string' + text.commaSeparated,
filterLocale: 'string' + text.locale,
contractAdjecent: 'boolean (contract adjecent targets)'
},
cors: options.cors,
goldwasher: {
url: goldwasherPack.homepage,
version: goldwasherPack.version,
options: goldwasherPack.goldwasher.options,
defaults: goldwasherPack.goldwasher.defaults
}
};
};
/**
* Gets the response.
* @param server
* @param options
* @param request
* @param callback
* @returns {*}
*/
var getResponse = function(server, options, request, callback) {
var needleOptions = {
follow_max: options.maxRedirects
};
// if no url is given, send back general information of how to use
if (!request.query.hasOwnProperty('url')) {
return callback(null, getDefaultResponse(server, options));
}
// if url is given, start scraping
needle.get(request.query.url, needleOptions, function(error, response, body) {
var queryOptions = Hoek.clone(request.query);
var result;
if (error) {
return callback(Boom.wrap(error, 404));
}
if (response.statusCode === 301 || response.statusCode === 302) {
return callback(Boom.notFound('Too many redirects.'));
}
if (response.headers['x-goldwasher-version']) {
return callback(Boom.forbidden('http://bit.ly/IqT6zt'));
}
if (options.raw && request.query.output === 'raw') {
return callback(null, body);
}
if (queryOptions.search) {
queryOptions.search = stringToArray(queryOptions.search);
}
if (queryOptions.filterKeywords) {
queryOptions.filterKeywords = stringToArray(queryOptions.filterKeywords);
}
if (queryOptions.filterTexts) {
queryOptions.filterTexts = stringToArray(queryOptions.filterTexts);
}
result = goldwasher(body, queryOptions);
return callback(null, result);
});
};
/**
* Registers the plugin.
* @param server
* @param options
* @param next
*/
exports.register = function(server, options, next) {
options = getOptions(options);
server.route({
method: 'GET',
path: options.path,
config: {
validate: {
query: {
filterKeywords: Joi.string(),
filterLocale: Joi.string().valid('en'),
filterTexts: Joi.string(),
limit: Joi.number().integer(),
output: Joi.string().valid('json', 'xml', 'atom', 'rss', 'raw'),
search: Joi.string(),
selector: Joi.string(),
url: Joi.string(),
contractAdjecent: Joi.boolean()
}
},
cors: options.cors
},
handler: function(request, reply) {
getResponse(server, options, request, function(error, result) {
if (error) {
return reply(error);
}
if (request.query.output === 'xml') {
return reply(result)
.type('text/xml')
.header(options.header, goldwasherPack.version);
}
if (request.query.output === 'atom') {
return reply(result)
.type('application/atom+xml')
.header(options.header, goldwasherPack.version);
}
if (request.query.output === 'rss') {
return reply(result)
.type('application/rss+xml')
.header(options.header, goldwasherPack.version);
}
return reply(result)
.header(options.header, goldwasherPack.version);
});
}
});
next();
};
/**
* Registers plugin attributes.
* @type {{pkg: exports}}
*/
exports.register.attributes = {
pkg: pack
};