NatLibFi/marc-record-js

View on GitHub
src/schema.js

Summary

Maintainability
A
2 hrs
Test Coverage
const anythingPattern = /[\s\S]*/su;

// https://www.loc.gov/marc/specifications/specrecstruc.html
// tag. A three character string used to identify or label an associated variable field.
// The tag may consist of ASCII numeric characters (decimal integers 0-9) and/or ASCII alphabetic characters (uppercase or lowercase, but not both).

//  https://www.loc.gov/marc/specifications/specrecstruc.html
// control field. A variable field containing information useful or required for the processing of the record.
// Control fields are assigned tags beginning with two zeroes. Control fields with fixed length data elements are restricted to ASCII graphics.
// NOTE: Aleph uses also some other controlfields with non-numeric tags (FMT, LDR if its handled as a controlfield)

// ASCII - all printable/graphic: 32-126 (\x20 - \x7E)

const controlFieldTagPattern = /^(?:[0A-Z][0A-Z][0-9A-Z])|(?:[0a-z][0a-z][0-9a-z])$/u;
const controlFieldValuePattern = /^[\x20-\x7E]*$/u;

// https://www.loc.gov/marc/specifications/specrecstruc.html
// data field. A variable field containing bibliographic or other data. Data fields are assigned tags beginning with characters other than two zeroes.
// Data fields contain data in any MARC 21 character set unless a field-specific restriction applies.

const dataFieldTagPattern = /^(?:(?:(?:[1-9A-Z][0-9A-Z])|(?:[0-9A-Z][1-9A-Z]))[0-9A-Z])|(?:(?:(?:[1-9a-z][0-9a-z])|(?:[0-9a-z][1-9a-z]))[0-9a-z])$/u;

// https://www.loc.gov/marc/specifications/specrecstruc.html
// data element identifier: A one-character code used to identify individual data elements within a variable field.
// The data element may be any ASCII lowercase alphabetic, numeric, or graphic symbol except blank.
//
//      http://oeis.org/wiki/ASCII#ASCII_graphic.2Fnongraphic_characters
//      "Among the ninety-five ASCII printable characters, there are the ninety-four [visible] ASCII graphic characters
//      (of which the space is not) and the [invisible] ASCII nongraphic character, namely the space character."

// ASCII - all printable/graphic: 32-126 (\x20 - \x7E)
// ASCII - blank/space: 32 (\x20)
// ASCII - uppercase alphabetic: 65-90 (\x41 - \x5A)

// eslint-disable-next-line no-control-regex
const subfieldCodePattern = /^[\x21-\x40\x5B-\x7E]$/u;

// https://www.loc.gov/marc/specifications/specrecstruc.html:
// ... An indicator may be any ASCII lowercase alphabetic, numeric, or blank .

const indicatorPattern = /^[0-9a-z ]$/u;

// Option to not allow ASCII control characters in subfield values

// eslint-disable-next-line no-control-regex
const dataFieldValuePatternNoControlCharacters = /^[^\x00-\x1F\x7F]*$/u;
// Match anything - no restrictions
const dataFieldValuePattern = /.*/u;

// https://www.loc.gov/marc/specifications/specrecstruc.html
// ... MARC 21 sets the length of the length of field portion of the entry at four characters, thus a field may contain a maximum of 9999 octets.
// Note: We're limiting controlField value length and sibfieldValue length with this parameter, records can be too long before single field hitting
// this restriction
const maximumFieldLength = 9999;

// DEVELOP: Can we somehow check actual field length in addition to single field/subfield value
// 9999 includes indicators + subfield separators + subfield coded in datafields, these could be subtracted

// DEVELOP: Can we check the record length (maximum 99999 octets) ?
// https://www.loc.gov/marc/specifications/specrecstruc.html
// Record length (character positions 00-04), contains a five-character ASCII numeric string equal to the length of the entire record,
// including itself and the record terminator. The five-character numeric string is right justified and unused positions contain zeroes (zero fill).
// The maximum length of a record is 99999 octets.

// DEVELOP: We could add a checker for MARC21 hardcoded codes in leader

// https://www.loc.gov/marc/specifications/specrecstruc.html#leader
// ...
// * Indicator count (character position 10), contains one ASCII numeric character specifying the number of indicators
// occurring in each variable data field. In MARC 21 records, the indicator count is always 2.
// * Subfield code length (character position 11), contains one ASCII numeric character specifying the sum of the lengths
// of the delimiter and the data element identifier used in the record. In MARC 21 records,
// the subfield code length is always 2. The ANSI Z39.2 and ISO 2709 name for this data element is identifier length .
// * Entry map (character positions 20-23), contains four single digit ASCII numeric characters that specify the structure of the entries in the directory.
// ** Length of length-of-field (character position 20): specifies the length of that part of each directory entry; in MARC 21 records, it is always set to 4.
// ** Length of starting-character-position (character position 21): specifies the length of that part of each directory entry; in MARC 21 records, it is always set to 5.
// ** Length of implementation-defined (character position 22): specifies that part of each directory entry; in MARC 21 records, a directory entry does not contain an implementation-defined portion, therefore this position is always set to 0.
// ** Undefined (character position 23): this character position is undefined; it is always set to 0.


// Default setting for validationOptions:
// strict: false                  // All validationOptions below are set to true
//
// fields: true,                  // Do not allow record without fields
// subfields: true,               // Do not allow empty subfields
// subfieldValues: true,          // Do not allow subfields without value
// controlFieldValues: true       // Do not allow controlFields without value
// leader: false,                 // Do not allow record without leader, with empty leader or with leader with length != 24
// characters: false              // Do not allow erronous characters in tags, indicators and subfield codes
// noControlCharacters: false,    // Do not allow ASCII control characters in field/subfield values
// noAdditionalProperties: false  // Do not allow additional properties in fields


export default function ({strict = false, fields = true, subfields = true, subfieldValues = true, controlFieldValues = true, leader = false, characters = false, noControlCharacters = false, noAdditionalProperties = false}) {
  if (strict) {
    return schema({fields: true, subfields: true, subfieldValues: true, controlFieldValues: true, leader: true, characters: true, noControlCharacters: true, noAdditionalProperties: true});
  }
  return schema({fields, subfields, subfieldValues, controlFieldValues, leader, characters, noControlCharacters, noAdditionalProperties});
}

function schema({fields = true, subfields = true, subfieldValues = true, controlFieldValues = true, leader = false, characters = false, noControlCharacters = false, noAdditionalProperties = false}) {
  return {
    type: 'object',
    properties: {
      leader: {
        type: 'string',
        minLength: leader ? 24 : 0,
        maxLength: leader ? 24 : maximumFieldLength,
        pattern: characters ? controlFieldValuePattern : anythingPattern,
        maxOccurence: 1
      },
      fields: {
        type: 'array',
        minItems: fields ? 1 : 0,
        items: {
          anyOf: [
            {
              type: 'object',
              properties: {
                tag: {
                  type: 'string',
                  minLength: 3,
                  maxLength: 3,
                  pattern: characters ? controlFieldTagPattern : anythingPattern
                },
                value: {
                  type: 'string',
                  minLength: controlFieldValues ? 1 : 0,
                  maxLength: maximumFieldLength,
                  pattern: characters ? controlFieldValuePattern : anythingPattern
                },
                ind1: false,
                ind2: false,
                subfields: false
              },
              required: controlFieldValues ? ['tag', 'value'] : ['tag'],
              additionalProperties: !noAdditionalProperties
            },
            {
              type: 'object',
              properties: {
                tag: {
                  type: 'string',
                  minLength: 3,
                  maxLength: 3,
                  pattern: characters ? dataFieldTagPattern : anythingPattern
                },
                ind1: {
                  type: 'string',
                  minLength: 1,
                  maxLength: 1,
                  pattern: characters ? indicatorPattern : anythingPattern
                },
                ind2: {
                  type: 'string',
                  minLength: 1,
                  maxLength: 1,
                  pattern: characters ? indicatorPattern : anythingPattern
                },
                subfields: {
                  type: 'array',
                  minItems: subfields ? 1 : 0,
                  items: {
                    type: 'object',
                    properties: {
                      code: {
                        type: 'string',
                        minLength: 1,
                        maxLength: 1,
                        pattern: characters ? subfieldCodePattern : anythingPattern
                      },
                      value: {
                        type: 'string',
                        maxLength: maximumFieldLength,
                        minLength: subfieldValues ? 1 : 0,
                        pattern: noControlCharacters ? dataFieldValuePatternNoControlCharacters : dataFieldValuePattern
                      }
                    },
                    required: subfieldValues ? ['code', 'value'] : ['code'],
                    additionalProperties: !noAdditionalProperties
                  }
                },
                value: false,
                additionalProperties: !noAdditionalProperties
              },
              required: [
                'tag',
                'ind1',
                'ind2',
                'subfields'
              ],
              additionalProperties: !noAdditionalProperties
            }
          ]
        }
      }
    },
    required: leader ? ['leader', 'fields'] : ['fields']
  };
}