plugins/formats/write/base_writer.js
const async = require('async');
const { Context } = require('../../../core/context');
const { Plugin } = require('../../plugin');
/**
* BaseWriter is the base class for writing files into a package.
* It implements an asynchronous queue to write files one at a time
* in the order they were added. This allows it to work with async
* and stream-based Node packages like tar-stream, while also supporting
* serialized formats that must be written one at time, in order (such
* as tar format).
*
* @param {string} name - The name of the subclass that derives from this
* class. That is, the name of your own implementation. This name is
* used for logging and debugging.
*
* @param {function} writeFn - The function that writes each file into
* the archive, directory, or serialized format you are creating. This
* function should take two params, (data, done), where data is any
* JavaScript object and done is a callback to be called when the file
* has finished being written.
*
* See {@link TarWriter} or {@link FileSystemWriter} for examples of
* the writeFn and how subclasses can be implement.
*
*/
class BaseWriter extends Plugin {
constructor(name, writeFn) {
super();
if (typeof writeFn != 'function') {
throw new Error(Context.y18n.__('Param %s must be a function', 'writeFn'));
}
/**
* The name of the module. This is required for logging and
* debugging purposes.
*
* @type {string}
*/
this.name = name;
/**
* Asynchronous queue for writing files one at a time onto the
* file system. The final step of any subclass's add() function
* will be to push data into this queue. See the add() implementations
* in {@link TarWriter} and {@link FileSystemWriter} for examples.
*
* @type {async.queue}
*/
this._queue = async.queue(writeFn, 1);
var writer = this;
/**
* @event FileSystemWriter#finish - This event fires after all files
* have been written to the file system.
*
* Queue may drain before writes actually complete, so we check the
* total number of files written before firing the finish event.
*
* @type {BagItFile}
*
*/
this._queue.drain(function() {
let intervalCount = 0;
let doneInterval = setInterval(function() {
intervalCount += 1;
if (intervalCount % 50 == 0) {
Context.logger.warn(Context.y18n.__(`%s is still writing final file to archive.`, this.name));
}
if (writer.filesWritten == writer.filesAdded) {
writer.emit('finish');
clearInterval(doneInterval);
}
}, 50);
});
/**
* @event FileSystemWriter#error - This event fires when a write
* error occurs, or if the system cannot access the files to be
* written. This immediately fires the finish event after passing
* the error.
*
* @type {string}
*/
this._queue.error(function(err, task) {
if (err) {
writer._queue.pause(); // stop processing
writer._queue.kill(); // empty the queue & remove drain fn
writer.emit('error', `${this.name}: ${err.message}`);
writer.emit('finish');
}
});
/**
* The total number of files added to the write queue.
*
* @type {number}
*/
this.filesAdded = 0;
/**
* The total number of files that have been written into the
* tar file.
*
* @type {number}
*/
this.filesWritten = 0;
/**
* This is a map of directory stats. Key is relDestPath,
* value is fs.Stats object. For example, key will be
* "/data/subdir". Value will be the stats of the original
* directory being bagged. This allows us to preserve directory
* attributes such as uid, gid, mode, and mtime when creating
* tar files.
*/
this.directories = {};
}
/**
* Returns a {@link PluginDefinition} object describing this plugin.
*
* @returns {PluginDefinition}
*/
static description() {
return {
id: '8bac73e0-1aae-4afd-bfa3-327314befd2a',
name: 'BaseWriter',
description: 'BaseWriter for other format writers.',
version: '1.0',
readsFormats: [],
writesFormats: [],
implementsProtocols: [],
talksToRepository: [],
setsUp: []
};
}
/**
* Subclasses should implement their own init methods if they
* require some initialization. Otherwise, they can omit this.
* In the BaseWriter, init() is a noop.
*/
init() {
}
/**
* Returns the percent complete of the total write operations.
* This will be a number between 0 and 100. E.g. 42.833.
*
* @returns {number}
*/
percentComplete() {
return (this.filesWritten / this.filesAdded) * 100;
}
/**
* Writes a file into the directory, tar archive, or whatever format the
* underlying writer supports. This method is asynchronous, emitting
* events 'fileAdded' when it's done writing a file.
*
* Files will be written in the order they were added. You'll get errors
* if bagItFile.absSourcePath does not exist or is not readable.
*
* @param {BagItFile} bagItFile - The BagItFile object describing the file
* to be added into the output directory.
*
* @param {Array<crypto.Hash>} cryptoHashes - An array of Node.js crypto.Hash
* objects used to calculate checksums of the files being written onto the
* file system. All digests are calculated during the write, so adding
* multiple hashes will not lead to multiple end-to-end reads of the
* input stream.
*
* You can omit this parameter if you don't care to calculate
* checksums. If present, the digests will be written into the
* bagItFile.checksums object. For example, if cryptoHashes includes md5
* and sha256 Hash objects, bagItFile.checksums will come out looking
* like this:
*
* @example
* bagItFile.checksums = {
* 'md5': '1234567890',
* 'sha256': '0987654321'
* }
*
* The add() method of your derived class should call this before it
* continues with your own custom processing. See the add() implementations
* in {@link TarWriter} and {@link FileSystemWriter} for examples.
*
*/
add(bagItFile, cryptoHashes = []) {
this.filesAdded += 1;
}
/**
* Call this after a file is written, to keep an accurate
* count of how many files have been written. See {@link
* TarWriter} or {@link FileSystemWriter} for examples.
*/
onFileWritten() {
this.filesWritten += 1;
}
}
module.exports.BaseWriter = BaseWriter;