lib/chanarchive.js
/*eslint no-use-before-define:0*/
// Disabling this one because of ChanArchiver.prototype.handleNext
'use strict';
/*
* chanarchive
* https://github.com/j3lte/chanarchive
*
* Copyright (c) 2014-2015 Jelte Lagendijk
* Licensed under the MIT license.
*/
var fs = require('fs');
var request = require('request');
var mkdirp = require('mkdirp');
var version = require('../package').version;
var headers = { 'User-Agent': 'Chanarchive/' + version };
var _ = require('lodash');
var Emitter = require('events').EventEmitter;
var util = require('util');
function ChanArchiver (options) {
if (!(this instanceof ChanArchiver)) {
return new ChanArchiver(options);
}
Emitter.call(this);
if (!options || !options.chan) {
return new Error('ChanArchiver not properly configured. Missing options.url');
}
_.extend(this, options.chan);
if (!options.chan.alias) {
return new Error('Unknown board type');
}
if (options.chan.useProxy) {
var proxyPort = options.chan.proxyPort || 8088;
this.proxyUrl = 'http://localhost:' + proxyPort + '/?url=' + options.url;
}
this.url = options.url;
this.type = options.chan.alias;
this._originalFilenames = false;
var folderRoot = options.folder || './';
this.saveFolder = folderRoot + options.chan.alias + '/';
this.saveJSON = options.chan.skipSaveJSON ? false : true;
this._extensions = null;
this.queue = [];
this.fin = [];
this.a = 0; // currently busy
this.downloadTimeoutID = null;
this.abort = false;
this._concurrentThreads = 1;
this._watchTimeOut = null;
this.board = options.url.split(/\/|\?|&|=|\./g)[options.chan.b];
this.thread = options.url.split(/\/|\?|&|=|\./g)[options.chan.t];
this.thread = this.thread.replace('thread-', ''); // krautchan fix
this.name = this.type + '/' + this.board + '/' + this.thread;
}
util.inherits(ChanArchiver, Emitter);
ChanArchiver.prototype.useOriginalFileNames = function (original) {
this._originalFilenames = original;
return this;
};
ChanArchiver.prototype.setMaxThreads = function (concurrentDownloads) {
this._concurrentThreads = concurrentDownloads;
return this;
};
ChanArchiver.prototype.setWatch = function (timeout) {
this._watchTimeOut = timeout || 10000;
return this;
};
ChanArchiver.prototype.setExtensions = function (extensions) {
var ext = _.map(extensions.split('/'), function (extension) {
if (extension) {
extension = extension.indexOf('.') !== 0 ? '.' + extension : extension;
return extension;
}
});
this._extensions = ext;
};
ChanArchiver.prototype.download = function () {
var boardUrl = this.proxyUrl || this.baseUrl + this.board + this.del + this.thread + '.json';
var options = {
url: boardUrl,
method: 'GET',
headers: headers
};
this.downloadTimeoutID = 0;
request(options, this.parsePage.bind(this));
return this;
};
ChanArchiver.prototype.watchDownload = function () {
if (this._watchTimeOut && !this.downloadTimeoutID) {
this.downloadTimeoutID = setTimeout(this.download.bind(this), this._watchTimeOut);
}
return this;
};
ChanArchiver.prototype.parsePage = function (error, response, body) {
var _this = this;
if (!error && response.statusCode === 200 && response.headers['content-type'].indexOf('application/json') === 0) {
var storageFolder = this.saveFolder + this.board + '/' + this.thread + '/';
mkdirp(storageFolder, function(err) {
if (err) {
_this.emit('error', new Error('Error creating the folder'));
} else {
var jsonBody = JSON.parse(body);
if (_this.saveJSON) {
fs.writeFile(storageFolder + '!index.json', JSON.stringify(jsonBody, null, 4));
}
_.forEach(jsonBody.posts, function(post){
if (post.filename) {
_this.imagePostHandler.call(_this, post);
} else if (post.multi) { // handle multifiles from 7chan
_.each(post.multi, function (multiPost) {
_this.imagePostHandler.call(_this, multiPost.file);
});
}
});
_this.parsed();
}
});
} else {
this.emit('error', new Error('Thread not found'));
}
};
ChanArchiver.prototype.parsed = function () {
this.emit('parse');
if (this.queue.length) {
for (var t = 0; t < this._concurrentThreads; t++) {
this.handleNext();
}
} else if (this._watchTimeOut) {
this.watchDownload();
} else {
this.emit('end');
}
};
ChanArchiver.prototype.addFile = function (url, fileName) {
var file = { url: url, fileName: fileName, size: 0, completed: 0, progress: 0};
if (~this.fin.indexOf(file.url)) {
return;
}
this.queue.push(file);
};
ChanArchiver.prototype.handleNext = function () {
var save = function (exists) {
handleFile.existed = exists;
if (exists) {
return f();
}
request(handleFile.url)
.on('response', r)
.on('data', d)
.on('error', e)
.pipe(fs.createWriteStream(dest))
.on('error', e)
.on('finish', f);
};
// RESPONSE (START)
var r = function (res) {
handleFile.size = parseInt(res.headers['content-length'], 10);
this.emit('file:start', handleFile);
}.bind(this);
// ONDATA
var d = function (chunk) {
handleFile.completed += chunk.length;
if (handleFile.size) {
handleFile.progress = handleFile.completed / handleFile.size;
}
this.emit('file:chunk', handleFile, chunk);
this.emit('file:progress', handleFile);
}.bind(this);
// ERROR
var e = function (err) {
this.emit('file:error', err, handleFile);
fs.unlink(dest, function () {});
next();
}.bind(this);
// FINISH
var f = function () {
this.emit('file:end', handleFile);
this.fin.push(handleFile.url);
next();
}.bind(this);
var next = function () {
this.a -= 1;
if (this.queue.length) {
this.handleNext();
} else if (!this.a) {
if (this._watchTimeOut) {
this.watchDownload();
} else {
this.emit('end');
}
}
}.bind(this);
if (this.abort || !this.queue.length) {
return;
}
var handleFile = this.queue.shift();
var dest = this.saveFolder + this.board + '/' + this.thread + '/' + handleFile.fileName;
this.a += 1;
fs.exists(dest, save);
};
ChanArchiver.prototype.stop = function () {
this.queue.length = 0;
this.abort = true;
this._watchTimeOut = null;
if (this.downloadTimeoutID) {
clearTimeout(this.downloadTimeoutID);
this.emit('end');
}
};
module.exports = ChanArchiver;