bin/njtool-download
#!/usr/bin/env node
// This file is distributed under the MIT license.
// See LICENSE file in the project root for details.
'use strict';
const fs = require('fs');
const path = require('path');
const program = require('commander');
const streamToString = require('stream-to-string');
const Downloader = require('..').Downloader;
const HELP = `
Description:
This command loads each journal page and downloads PDF files of articles
included in the journal page.
PDF files linked from a journal page are saved into a folder created in the
<outdir> folder with a name like "nature/<volume>/<issue>". Each PDF
file is saved with a name like "<index> <title>.pdf".
While this command is downloading articles in a journal, a special file
named "cursor" is created in the journal folder. The cursor file contains
the number of the index of the currently downloading article in order to be
able to restart to download from it after the command has terminated for
some error, or by Ctrl-C. The cursor file is removed when articles are
downloaded successfully.
Environment Variables:
NJTOOL_NATURE_USERNAME:
Username used for logging in to www.nature.com
NJTOOL_NATURE_PASSWORD:
Password used for logging in to www.nature.com
NJTOOL_NATURE_OUTDIR:
Path to an output directory where downloaded files will be stored
`;
program
.description(
'Download PDF files of articles')
.option(
'-u, --username <username>',
'Username (default: $NJTOOL_NATURE_USERNAME)')
.option(
'-p, --password <password>',
'Password (default: $NJTOOL_NATURE_PASSWORD')
.option(
'-o, --outdir <path-to-dir>',
'Path to the output dir (default: $NJTOOL_NATURE_OUTDIR)',
(value) => path.resolve(process.cwd(), value))
.option(
'--retry <num>',
'Retry <num> times if failed to download a PDF file',
4)
.option(
'--retry-interval <sec>',
'Retry interval time in seconds',
5)
.option(
'--sleep <sec>',
'Time in seconds to sleep between article downloads',
0)
.option(
'--no-headless',
'Run Chromium browser in the window mode for debugging')
.option(
'--no-sandbox',
'Run Chromium browser without the sandbox')
.on('--help', () => console.log(HELP))
.action(async (options) => {
if (!options.username) {
if (process.env.NJTOOL_NATURE_USERNAME === undefined) {
console.error('-u is required');
process.exit(1);
}
options.username = process.env.NJTOOL_NATURE_USERNAME;
}
if (!options.password) {
if (process.env.NJTOOL_NATURE_PASSWORD === undefined) {
console.error('-p is required');
process.exit(1);
}
options.password = process.env.NJTOOL_NATURE_PASSWORD;
}
if (!options.outdir) {
if (process.env.NJTOOL_NATURE_OUTDIR === undefined) {
console.error('-o is required');
process.exit(1);
}
options.outdir = process.env.NJTOOL_NATURE_OUTDIR;
}
if (!fs.existsSync(options.outdir)) {
throw new Error(`No such directory: ${options.outdir}`);
}
const json = await streamToString(process.stdin);
const journals = JSON.parse(json).filter((j) => j.error === undefined);
const downloader = new Downloader(options, console);
let abort_count = 0;
process.on('SIGINT', () => { // Ctrl-C
if (!downloader || abort_count > 2) {
process.exit(1);
}
downloader.abort();
abort_count++;
});
return await downloader.download(journals);
})
.parse(process.argv);