core/job_params.js
const { AppSetting } = require('./app_setting');
const { BagItProfile } = require('../bagit/bagit_profile');
const { Context } = require('./context');
const fs = require('fs');
const { Job } = require('./job');
const { PackageOperation } = require('./package_operation');
const path = require('path');
const { TagDefinition } = require('../bagit/tag_definition');
const { UploadOperation } = require('./upload_operation');
const { Workflow } = require('./workflow');
/**
* The JobParams class provides a way of converting a simple set of
* parameters into a DART job. See the {@link JobParams#toJob} method
* for details on how this small data structure is converted to a Job.
*
* @param {object} opts
*
* @param {string} opts.workflowName - The name of the workflow to run. The
* {@link JobParams#toJob} method will create a {@link Job} that includes
* all of the components of the named Workflow, including packaging,
* validation, and upload to one or more targets. Be sure to name an
* existing {@link Workflow}, or you'll get an error when you call
* {@link JobParams#toJob}.
*
* @param {string} [opts.packageName] - The name of the package/file
* your job will create. This may be a tar or zip file. For example,
* "virginia.edu.Photos_2019-06-26.tar", "pdfs.zip", etc. This parameter
* is required if your job will be creating a package.
*
* @param {string} [opts.files] - An array of file paths. These are the
* files that the job will be packaging and/or uploading. These should be
* absolute paths.
*
* @param {Array<TagDefinition>} [opts.tags] - These are required only
* if your job will be creating a package in BagIt, OCFL, or another
* format that requires metadata in name-value pairs. These values will
* be written into the package. Per the BagIt specification, repeated
* tags (those with the same tagFile and tagName) will be written in the
* same order in which they appear in the opts.tags array.
*
* @example
*
* let opts = {
* workflowName: "Name of Workflow",
* packageName: "test.edu.my_files.tar",
* files: [
* "/path/to/first/directory",
* "/path/to/second/directory",
* "/path/to/some/document.pdf"
* ],
* tags: [
* {
* "tagFile": "bag-info.txt",
* "tagName": "Bag-Group-Identifier",
* "userValue": "Photos_2019"
* },
* {
* "tagFile": "aptrust-info.txt",
* "tagName": "Title",
* "userValue": "Photos from 2019"
* },
* {
* "tagFile": "aptrust-info.txt",
* "tagName": "Description",
* "userValue": "What I did with my summer vacation."
* },
* {
* "tagFile": "custom/legal-info.txt",
* "tagName": "License",
* "userValue": "https://creativecommons.org/publicdomain/zero/1.0/"
* }
* ]
* }
*
* let jobParams = new JobParams(opts);
*
* // Create a job and save it to the DART Jobs database.
* let job = jobParams.toJob();
* job.save();
*
* // Or just write the job directly to a JSON file.
* jobParams.toJobFile("/tmp/dart/job_file.json");
*/
class JobParams {
constructor(opts = {}) {
/**
* The name of the workflow. This workflow will provide the
* template for the job.
*
* @type {string}
*/
this.workflowName = opts.workflowName;
/**
* The name of the package or output file to create.
*
* @type {string}
*/
this.packageName = opts.packageName;
/**
* A list of files to package and/or upload. These should be
* absolute paths.
*
* @type {Array<string>}
*/
this.files = opts.files;
/**
* A list of metadata tags to include in the package.
*
* @type {Array<TagDefinition>}
*/
this.tags = opts.tags || [];
/**
* A copy of the {@link Workflow} object whose name matches
* this.workflowName. This is private, for internal use only.
*
* @private
* @type {Workflow}
*/
this._workflowObj = null;
/**
* A copy of the {@link BagItProfile} object whose id matches
* this._workflowObj.bagItProfileId. This is private, for internal
* use only.
*
* @private
* @type {BagItProfile}
*/
this._bagItProfile = null;
/**
* A hash of validation errors for this JobParams object. Keys
* are the names of invalid properies. Values are error messages
* (strings) describing why the field is invalid.
*
* @type {object.<string, string>}
*/
this.errors = {};
}
/**
* Converts the JobParams to a {@link Job} object, which DART can then
* run. This performs the following operations to create a job:
*
* 1. Loads the {@link Workflow} speficied in this.workflowName.
*
* 2. Creates a {@link Job} object patterned after that Workflow.
*
* 3. Copies the packageName and files (if specified) into the new
* {@link Job} object.
*
* 4. If the {@link Workflow} includes a {@link BagItProfile}, this
* copies this.tags into a copy of that {@link BagItProfile}.
* Typically, most tag values in a DART {@link BagItProfile}, such
* as "Source-Organization", "Contact-Email", etc. are set to
* default values because they do not change from one bag to the
* next. More specific values, such as "Title", "Description", or
* "Internal-Sender-Identifier" do change from bag to bag. The
* {@link JobParams#tags} property allows you to overwrite
* bag-specific tag values withouth having to re-specify any
* default tag values. Any values you specify in {@link JobParams#tags}
* will overwrite tag values in the job-specific copy of the
* {@link BagItProfile}.
*
* @returns {@link Job}
*/
toJob() {
if (!this._loadWorkflow()) {
return null;
}
if (!this._loadBagItProfile()) {
return null;
}
// Call job.validate before returning?
return this._buildJob();
}
/**
* Validates this JobParams object to ensure it can build a valid
* job.
*
* THIS IS NOT YET IMPLEMENTED.
*
*/
validate() {
// If packageName, then files are required.
// If packageName, then packageFormat is required.
// If packageFormat requires a plugin, then pluginId required.
}
/**
* Merge tags from this.tags into the Job's copy of the BagItProfile.
* Note that this.tags may include 1..N instances of a tag that is
* defined 1 time in the BagItProfile. In those cases, this merge function
* ensures that all values from this.tags will be copied, and that
* any validation constraints on the original {@link TagDefinition} in
* the {@link BagItProfile} will be copied to all instances of the tag.
* Those constraints include the "required" attribute and the list of
* valid values in the "values" attribute.
*
* @param {Job}
*/
_mergeTags(job) {
if (!job.bagItProfile) {
return;
}
for(let tags of Object.values(this._groupedTags())) {
// Every tag in this list will have the same tagFile and tagName.
let firstTag = tags[0];
//let indices = this._getTagIndices(job.bagItProfile, firstTag.tagFile, firstTag.tagName);
let profileTags = job.bagItProfile.tags.filter(tag => tag.tagFile == firstTag.tagFile && tag.tagName == firstTag.tagName);
this._mergeTagSet(job.bagItProfile, tags, profileTags);
}
}
/**
* Merges one or more values from this.tags into job.bagItProfile.tags.
* Note the following behaviors:
*
* 1. If tags contains one tag with tagFile "bag-info.txt" and
* tagName "My-Tag", and profileTags also has one tag with that
* tagFile and tagName, the userValue from the tags version will
* be copied into the userValue from the profileTags version.
* That latter version persists in bag.bagItProfile.tags.
* When it's time to create the bag, the value will be copied
* from bag.bagItProfile.tags into the tag file.
*
* 2. If tags and profileTags each contain multiple copies of a tag
* with a given tagFile/tagName combination, all userValues from
* tags will be copied into the corresponding profileTags.
*
* 3. If tags contains more copies of a tag than are defined in
* profileTags, this method will add new tags to profileTags,
* each with a userValue copied from tags.
*
* @param {BagItProfile} bagItProfile - The job.bagItProfile object
* into which you want to merge tags. Tags in the job's copy of the
* BagItProfile will have their userValue property updated with the
* userValue from matching items in the tags list. Items in the tags
* list that do not exist in the bagItProfile will be added.
*
* @param {Array<object>} tags - A list of tags from this JobParams
* object that have the same tagFile and tagName attributes.
*
* @param {Array<TagDefinition>} profileTags - A list of tags from
* the job.bagItProfile.tags list that have the same tagFile and
* tagName properties as those in the tags list (second param).
*
*/
_mergeTagSet(bagItProfile, tags, profileTags) {
let firstInstanceOfTag = null;
for (let i = 0; i < tags.length; i++) {
let tag = tags[i];
if (profileTags.length > i) {
let tagInProfile = profileTags[i];
if (!firstInstanceOfTag) {
// If this tag definition exists in the BagItProfile,
// keep a copy of it for use in the else clause
// below. The tag definition may include important
// validation info, such as whether a value is
// required and which values are legal.
firstInstanceOfTag = tagInProfile;
}
tagInProfile.userValue = tag.userValue;
} else {
// We have more instances of the tag specified
// in our .tags array than in the BagItProfile.
// This is allowed. Just append the new values
// in order, per the BagIt spec (which says order
// may be important).
let newTag = new TagDefinition();
let origId = newTag.id;
if (firstInstanceOfTag) {
Object.assign(newTag, firstInstanceOfTag);
newTag.id = origId;
} else {
newTag.tagFile = tag.tagFile;
newTag.tagName = tag.tagName;
}
newTag.userValue = tag.userValue;
bagItProfile.tags.push(newTag);
}
}
}
/**
* Returns an object in which tags having the same
* tagFile and tagName are grouped. The order of the tags within
* each group is preserved. The key to each object is tagFile/tagName
* and the value is a list of tags having that tagFile and tagName.
*
* @returns {object}
*/
_groupedTags() {
let groupedTags = {};
for (let tag of this.tags) {
let key = tag.tagFile + ':' + tag.tagName;
if (key in groupedTags === false) {
groupedTags[key] = [];
}
groupedTags[key].push(tag)
}
return groupedTags;
}
/**
* This does the same as {@link JobParams#toJob}, but instead of
* returning the Job object, it writes it as JSON to the specified
* file.
*
* @param {string} pathToFile - The path to the file where you want
* to write a JSON description of this job.
*
*/
toJobFile(pathToFile) {
let job = this.toJob();
fs.writeFileSync(pathToFile, JSON.stringify(job), 'utf8');
}
/**
* Loads the Workflow from the DART Worflow database whose name
* matches this.workflowName and stores it in this._workflowObj.
* Returns true if it was able to load the {@link Workflow}.
* Returns false if there is no such workflow in the database.
*
* If this returns false, the caller should check this.errors['workflow']
* and should assume that no further operations will succeed.
*
* @returns {boolean}
*/
_loadWorkflow() {
this._workflowObj = Workflow.firstMatching('name', this.workflowName);
if (!this._workflowObj) {
this.errors['workflow'] = Context.y18n.__('Cannot find workflow %s', this.workflowName);
return false;
}
return true;
}
/**
* Sets this._bagItProfile to the {@link BagItProfile} object whose
* id is stored in this._workFlowObj.bagItProfileId, or does nothing if
* no profile id is specified. Returns false and sets
* this.errors['bagItProfile'] if a profile id was specified
* but no matching BagItProfile could be found. Returns true otherwise.
* If this returns false, the caller should assume that no further
* operations on this object will succeed.
*
* @returns {boolean}
*/
_loadBagItProfile() {
if (!this._workflowObj) {
this._loadWorkflow();
}
if (this._workflowObj.bagItProfileId) {
this._bagItProfile = BagItProfile.find(this._workflowObj.bagItProfileId);
if (!this._bagItProfile) {
this.errors['bagItProfile'] = Context.y18n.__("Could not find BagItProfile with id %s", this._workflowObj.bagItProfileId);
return false;
}
}
return true;
}
/**
* Builds a {@link Job} object based on the workflow and other params
* specified in this JobParams object. Returns the object, but does
* not save it to the DART Jobs database. It's up to the caller to do
* that, if they so choose.
*
* @returns {Job}
*/
_buildJob() {
// Note: No need to create job.validationOp. The JobRunner will
// create that if the package format is BagIt and the package was
// successfully created. See workers/job_runner.js#createPackage().
let job = new Job();
job.name = this._workflowObj.packageName || Job.genericName();
job.bagItProfile = this._bagItProfile;
job.workflowId = this._workflowObj.id;
this._makePackageOp(job);
this._makeUploadOps(job);
this._mergeTags(job);
return job;
}
/**
* Creates the {@link PackageOperation} for the {@link Job} returned
* by {@link JobParams#buildJob}.
*
* @param {Job}
*/
_makePackageOp(job) {
if (this.packageName) {
let outputPath = this._getOutputPath();
job.packageOp = new PackageOperation(this.packageName, outputPath);
job.packageOp.packageFormat = this._workflowObj.packageFormat;
job.packageOp.pluginId = this._workflowObj.packagePluginId;
job.packageOp.sourceFiles = this.files;
this._setSerialization(job);
}
}
/**
* This sets the bagItSerialization attribute of the job's
* PackageOperation, if necessary. Because DART supports only tar
* serialization (as of July, 2020), this only looks for and sets
* the values 'application/tar' or 'application/x-tar'.
*
* This method will need to be revisited in the future as DART
* supports more serialization formats.
*
*/
_setSerialization(job) {
if (job.packageOp == null || job.bagItProfile == null) {
return;
}
let profile = job.bagItProfile;
let formats = profile.acceptSerialization;
let serializationOK = (profile.serialization == 'required' ||
profile.serialization == 'optional');
let supportsTar = (formats.includes('application/tar') ||
formats.includes('application/x-tar'))
if (serializationOK && supportsTar) {
job.packageOp.bagItSerialization = '.tar';
if (!job.packageOp.outputPath.endsWith('.tar')) {
job.packageOp.outputPath += '.tar';
}
}
}
/**
* Creates the {@link UploadOperation}s for the {@link Job} returned
* by {@link JobParams#buildJob}.
*
* @param {Job}
*/
_makeUploadOps(job) {
if (!this._workflowObj.storageServiceIds || this._workflowObj.storageServiceIds.length == 0) {
// No storage services specified, so no uploads to perform.
return;
}
let files = [];
if (job.packageOp && job.packageOp.outputPath) {
// We want to upload the result of the package operation.
files = [job.packageOp.outputPath];
} else {
// No packaging step. We want to upload the files themselves.
files = this.files;
}
for (let ssid of this._workflowObj.storageServiceIds) {
job.uploadOps.push(new UploadOperation(ssid, files));
}
}
/**
* Returns the output path of the package that will be built during
* the Job's packaging stage.
*
* @returns {string}
*/
_getOutputPath() {
let outputPath = null;
if (this.packageName) {
let outputDir = AppSetting.firstMatching('name', 'Bagging Directory').value;
outputPath = path.join(outputDir, this.packageName);
}
return outputPath;
}
/**
* This converts a generic hash/object into an JobParams
* object. this is useful when loading objects from JSON.
*
* @param {object} data - An object you want to convert to an
* JobParams. The object should include the same properties as
* a JobParams object.
*
* @returns {JobParams}
*/
static inflateFrom(data) {
let setting = new JobParams();
Object.assign(setting, data);
return setting;
}
}
module.exports.JobParams = JobParams;