src/schema.js
const anythingPattern = /[\s\S]*/su;
// https://www.loc.gov/marc/specifications/specrecstruc.html
// tag. A three character string used to identify or label an associated variable field.
// The tag may consist of ASCII numeric characters (decimal integers 0-9) and/or ASCII alphabetic characters (uppercase or lowercase, but not both).
// https://www.loc.gov/marc/specifications/specrecstruc.html
// control field. A variable field containing information useful or required for the processing of the record.
// Control fields are assigned tags beginning with two zeroes. Control fields with fixed length data elements are restricted to ASCII graphics.
// NOTE: Aleph uses also some other controlfields with non-numeric tags (FMT, LDR if its handled as a controlfield)
// ASCII - all printable/graphic: 32-126 (\x20 - \x7E)
const controlFieldTagPattern = /^(?:[0A-Z][0A-Z][0-9A-Z])|(?:[0a-z][0a-z][0-9a-z])$/u;
const controlFieldValuePattern = /^[\x20-\x7E]*$/u;
// https://www.loc.gov/marc/specifications/specrecstruc.html
// data field. A variable field containing bibliographic or other data. Data fields are assigned tags beginning with characters other than two zeroes.
// Data fields contain data in any MARC 21 character set unless a field-specific restriction applies.
const dataFieldTagPattern = /^(?:(?:(?:[1-9A-Z][0-9A-Z])|(?:[0-9A-Z][1-9A-Z]))[0-9A-Z])|(?:(?:(?:[1-9a-z][0-9a-z])|(?:[0-9a-z][1-9a-z]))[0-9a-z])$/u;
// https://www.loc.gov/marc/specifications/specrecstruc.html
// data element identifier: A one-character code used to identify individual data elements within a variable field.
// The data element may be any ASCII lowercase alphabetic, numeric, or graphic symbol except blank.
//
// http://oeis.org/wiki/ASCII#ASCII_graphic.2Fnongraphic_characters
// "Among the ninety-five ASCII printable characters, there are the ninety-four [visible] ASCII graphic characters
// (of which the space is not) and the [invisible] ASCII nongraphic character, namely the space character."
// ASCII - all printable/graphic: 32-126 (\x20 - \x7E)
// ASCII - blank/space: 32 (\x20)
// ASCII - uppercase alphabetic: 65-90 (\x41 - \x5A)
// eslint-disable-next-line no-control-regex
const subfieldCodePattern = /^[\x21-\x40\x5B-\x7E]$/u;
// https://www.loc.gov/marc/specifications/specrecstruc.html:
// ... An indicator may be any ASCII lowercase alphabetic, numeric, or blank .
const indicatorPattern = /^[0-9a-z ]$/u;
// Option to not allow ASCII control characters in subfield values
// eslint-disable-next-line no-control-regex
const dataFieldValuePatternNoControlCharacters = /^[^\x00-\x1F\x7F]*$/u;
// Match anything - no restrictions
const dataFieldValuePattern = /.*/u;
// https://www.loc.gov/marc/specifications/specrecstruc.html
// ... MARC 21 sets the length of the length of field portion of the entry at four characters, thus a field may contain a maximum of 9999 octets.
// Note: We're limiting controlField value length and sibfieldValue length with this parameter, records can be too long before single field hitting
// this restriction
const maximumFieldLength = 9999;
// DEVELOP: Can we somehow check actual field length in addition to single field/subfield value
// 9999 includes indicators + subfield separators + subfield coded in datafields, these could be subtracted
// DEVELOP: Can we check the record length (maximum 99999 octets) ?
// https://www.loc.gov/marc/specifications/specrecstruc.html
// Record length (character positions 00-04), contains a five-character ASCII numeric string equal to the length of the entire record,
// including itself and the record terminator. The five-character numeric string is right justified and unused positions contain zeroes (zero fill).
// The maximum length of a record is 99999 octets.
// DEVELOP: We could add a checker for MARC21 hardcoded codes in leader
// https://www.loc.gov/marc/specifications/specrecstruc.html#leader
// ...
// * Indicator count (character position 10), contains one ASCII numeric character specifying the number of indicators
// occurring in each variable data field. In MARC 21 records, the indicator count is always 2.
// * Subfield code length (character position 11), contains one ASCII numeric character specifying the sum of the lengths
// of the delimiter and the data element identifier used in the record. In MARC 21 records,
// the subfield code length is always 2. The ANSI Z39.2 and ISO 2709 name for this data element is identifier length .
// * Entry map (character positions 20-23), contains four single digit ASCII numeric characters that specify the structure of the entries in the directory.
// ** Length of length-of-field (character position 20): specifies the length of that part of each directory entry; in MARC 21 records, it is always set to 4.
// ** Length of starting-character-position (character position 21): specifies the length of that part of each directory entry; in MARC 21 records, it is always set to 5.
// ** Length of implementation-defined (character position 22): specifies that part of each directory entry; in MARC 21 records, a directory entry does not contain an implementation-defined portion, therefore this position is always set to 0.
// ** Undefined (character position 23): this character position is undefined; it is always set to 0.
// Default setting for validationOptions:
// strict: false // All validationOptions below are set to true
//
// fields: true, // Do not allow record without fields
// subfields: true, // Do not allow empty subfields
// subfieldValues: true, // Do not allow subfields without value
// controlFieldValues: true // Do not allow controlFields without value
// leader: false, // Do not allow record without leader, with empty leader or with leader with length != 24
// characters: false // Do not allow erronous characters in tags, indicators and subfield codes
// noControlCharacters: false, // Do not allow ASCII control characters in field/subfield values
// noAdditionalProperties: false // Do not allow additional properties in fields
export default function ({strict = false, fields = true, subfields = true, subfieldValues = true, controlFieldValues = true, leader = false, characters = false, noControlCharacters = false, noAdditionalProperties = false}) {
if (strict) {
return schema({fields: true, subfields: true, subfieldValues: true, controlFieldValues: true, leader: true, characters: true, noControlCharacters: true, noAdditionalProperties: true});
}
return schema({fields, subfields, subfieldValues, controlFieldValues, leader, characters, noControlCharacters, noAdditionalProperties});
}
function schema({fields = true, subfields = true, subfieldValues = true, controlFieldValues = true, leader = false, characters = false, noControlCharacters = false, noAdditionalProperties = false}) {
return {
type: 'object',
properties: {
leader: {
type: 'string',
minLength: leader ? 24 : 0,
maxLength: leader ? 24 : maximumFieldLength,
pattern: characters ? controlFieldValuePattern : anythingPattern,
maxOccurence: 1
},
fields: {
type: 'array',
minItems: fields ? 1 : 0,
items: {
anyOf: [
{
type: 'object',
properties: {
tag: {
type: 'string',
minLength: 3,
maxLength: 3,
pattern: characters ? controlFieldTagPattern : anythingPattern
},
value: {
type: 'string',
minLength: controlFieldValues ? 1 : 0,
maxLength: maximumFieldLength,
pattern: characters ? controlFieldValuePattern : anythingPattern
},
ind1: false,
ind2: false,
subfields: false
},
required: controlFieldValues ? ['tag', 'value'] : ['tag'],
additionalProperties: !noAdditionalProperties
},
{
type: 'object',
properties: {
tag: {
type: 'string',
minLength: 3,
maxLength: 3,
pattern: characters ? dataFieldTagPattern : anythingPattern
},
ind1: {
type: 'string',
minLength: 1,
maxLength: 1,
pattern: characters ? indicatorPattern : anythingPattern
},
ind2: {
type: 'string',
minLength: 1,
maxLength: 1,
pattern: characters ? indicatorPattern : anythingPattern
},
subfields: {
type: 'array',
minItems: subfields ? 1 : 0,
items: {
type: 'object',
properties: {
code: {
type: 'string',
minLength: 1,
maxLength: 1,
pattern: characters ? subfieldCodePattern : anythingPattern
},
value: {
type: 'string',
maxLength: maximumFieldLength,
minLength: subfieldValues ? 1 : 0,
pattern: noControlCharacters ? dataFieldValuePatternNoControlCharacters : dataFieldValuePattern
}
},
required: subfieldValues ? ['code', 'value'] : ['code'],
additionalProperties: !noAdditionalProperties
}
},
value: false,
additionalProperties: !noAdditionalProperties
},
required: [
'tag',
'ind1',
'ind2',
'subfields'
],
additionalProperties: !noAdditionalProperties
}
]
}
}
},
required: leader ? ['leader', 'fields'] : ['fields']
};
}