masnagam/njtool

View on GitHub
bin/njtool-download

Summary

Maintainability
Test Coverage
#!/usr/bin/env node

// This file is distributed under the MIT license.
// See LICENSE file in the project root for details.

'use strict';

const fs = require('fs');
const path = require('path');
const program = require('commander');
const streamToString = require('stream-to-string');
const Downloader = require('..').Downloader;

const HELP = `
  Description:

    This command loads each journal page and downloads PDF files of articles
    included in the journal page.

    PDF files linked from a journal page are saved into a folder created in the
    <outdir> folder with a name like "nature/<volume>/<issue>". Each PDF
    file is saved with a name like "<index> <title>.pdf".

    While this command is downloading articles in a journal, a special file
    named "cursor" is created in the journal folder. The cursor file contains
    the number of the index of the currently downloading article in order to be
    able to restart to download from it after the command has terminated for
    some error, or by Ctrl-C. The cursor file is removed when articles are
    downloaded successfully.

  Environment Variables:

    NJTOOL_NATURE_USERNAME:
      Username used for logging in to www.nature.com

    NJTOOL_NATURE_PASSWORD:
      Password used for logging in to www.nature.com

    NJTOOL_NATURE_OUTDIR:
      Path to an output directory where downloaded files will be stored
`;

program
  .description(
    'Download PDF files of articles')
  .option(
    '-u, --username <username>',
    'Username (default: $NJTOOL_NATURE_USERNAME)')
  .option(
    '-p, --password <password>',
    'Password (default: $NJTOOL_NATURE_PASSWORD')
  .option(
    '-o, --outdir <path-to-dir>',
    'Path to the output dir (default: $NJTOOL_NATURE_OUTDIR)',
    (value) => path.resolve(process.cwd(), value))
  .option(
    '--retry <num>',
    'Retry <num> times if failed to download a PDF file',
    4)
  .option(
    '--retry-interval <sec>',
    'Retry interval time in seconds',
    5)
  .option(
    '--sleep <sec>',
    'Time in seconds to sleep between article downloads',
    0)
  .option(
    '--no-headless',
    'Run Chromium browser in the window mode for debugging')
  .option(
    '--no-sandbox',
    'Run Chromium browser without the sandbox')
  .on('--help', () => console.log(HELP))
  .action(async (options) => {
    if (!options.username) {
      if (process.env.NJTOOL_NATURE_USERNAME === undefined) {
        console.error('-u is required');
        process.exit(1);
      }
      options.username = process.env.NJTOOL_NATURE_USERNAME;
    }
    if (!options.password) {
      if (process.env.NJTOOL_NATURE_PASSWORD === undefined) {
        console.error('-p is required');
        process.exit(1);
      }
      options.password = process.env.NJTOOL_NATURE_PASSWORD;
    }
    if (!options.outdir) {
      if (process.env.NJTOOL_NATURE_OUTDIR === undefined) {
        console.error('-o is required');
        process.exit(1);
      }
      options.outdir = process.env.NJTOOL_NATURE_OUTDIR;
    }
    if (!fs.existsSync(options.outdir)) {
      throw new Error(`No such directory: ${options.outdir}`);
    }
    const json = await streamToString(process.stdin);
    const journals = JSON.parse(json).filter((j) => j.error === undefined);
    const downloader = new Downloader(options, console);
    let abort_count = 0;
    process.on('SIGINT', () => {  // Ctrl-C
      if (!downloader || abort_count > 2) {
        process.exit(1);
      }
      downloader.abort();
      abort_count++;
    });
    return await downloader.download(journals);
  })
  .parse(process.argv);