NatLibFi/marc-record-validators-melinda

View on GitHub
src/punctuation/index.js

Summary

Maintainability
F
4 days
Test Coverage
import {MarcRecord} from '@natlibfi/marc-record';
import createDebug from 'debug';
import {autRules, bibRules} from './rules';

const debug = createDebug('@natlibfi/marc-record-validator-melinda/punctuation:dev');

function cloneDeep(field) {
  const r = new MarcRecord();
  r.appendField(field);
  return r.get(field.tag)?.[0];
}

export default function () {
  function readPunctuationRulesFromJSON(recordType) {
    const json = getRules(recordType);
    const rules = json.filter(row => row.selector !== '').map(row => {
      const {selector, namePortion, description, portion, preceedingPunctuation, exceptions} = row;
      return {
        selector: new RegExp(selector.replace(/X/ug, '.'), 'u'),
        namePortion: namePortion.replace(/\$/ug, '').trim(),
        description, portion, preceedingPunctuation, exceptions
      };
    });

    return rules;
  }

  function getRules(recordType) {
    if (recordType === 'z') {
      return autRules;
    }

    return bibRules;
  }

  function fieldToString(field) {
    if (field && field.subfields) {
      const ind1 = field.ind1 || ' ';
      const ind2 = field.ind2 || ' ';
      const subfields = field.subfields.map(subfield => `‡${subfield.code}${subfield.value}`).join('');
      return `${field.tag} ${ind1}${ind2} ${subfields}`;
    }

    return `${field.tag}    ${field.value}`;
  }

  function validateField(recordType = 'a') {
    return function (element) {
      const testField = cloneDeep(element);
      debug(`Original field: ${JSON.stringify(element)}`);
      const punctuated = punctuateField(testField, recordType);
      debug(`Punctuation result: ${JSON.stringify(punctuated)}`);
      if (!punctuated) {
        debug('No punctuation result -> true');
        return true;
      }

      if (MarcRecord.isEqual(punctuated, element)) {
        debug(`Original field (element): ${JSON.stringify(element)}`);
        debug('Punctuation result equals original field');
        return true;
      }

      return false;
    };
  }

  function punctuateField(field, recordType) {
    const rules = readPunctuationRulesFromJSON(recordType);
    debug(`Handling field ${field.tag}`);
    debug(`Field contents: ${fieldToString(field)}`);
    const rulesForField = getRulesForField(field.tag, rules);
    if (rulesForField.length === 0) {
      debug(`No matching rules for field ${field.tag}`);
      return;
    }

    let currentPortion; // eslint-disable-line functional/no-let
    let preceedingField; // eslint-disable-line functional/no-let
    let inNamePortion = true; // eslint-disable-line functional/no-let

    debug(`Field subfields: ${field.subfields.map(sub => sub.code)}`);
    debug(`Field portions: ${field.subfields.map(sub => getPortion(sub, rulesForField))}`);

    field.subfields.forEach(subfield => {
      debug(`Handling subfield ${subfield.code}`);
      let portion = getPortion(subfield, rulesForField); // eslint-disable-line functional/no-let

      if (portion === false) {
        return;
      }

      if (portion === 'CF' || portion === 'NC') {
        return;
      }

      if (inNamePortion && portion.includes('T', 'S')) { // eslint-disable-line functional/no-conditional-statements
        debug(`Portion changed to ${portion}. Not in name portion anymore`);
        inNamePortion = false;
      }

      if (inNamePortion && portion === 'NT') { // eslint-disable-line functional/no-conditional-statements
        portion = 'N';
      }

      if (!inNamePortion && portion === 'NT') { // eslint-disable-line functional/no-conditional-statements
        portion = 'T';
      }

      debug(`Current portion is ${portion}.`);

      if (currentPortion) {
        if (currentPortion === portion) { // eslint-disable-line functional/no-conditional-statements
          debug(`Current stayed as ${portion}. Adding punctuation for subfield.`);
          addSubfieldPunctuation(preceedingField, subfield, rulesForField);
        } else {
          debug(`Current portion changed to ${portion}.`);
          if (portion !== 'S') { // eslint-disable-line functional/no-conditional-statements
            debug('Adding punctuation for portion.');
            addNamePortionPunctuation(preceedingField);
          }
        }
      }

      currentPortion = portion;
      preceedingField = subfield;
    });

    if (recordType !== 'z') { // eslint-disable-line functional/no-conditional-statements
      addNamePortionPunctuation(preceedingField);
    }

    debug(`After punctuation: ${fieldToString(field)}`);

    return field;
  }

  function getRulesForField(tag, rules) {
    return rules.filter(rule => rule.selector.test(tag));
  }

  function getPortion(subfield, rules) {
    debug(`Looking for namePortion for ${subfield.code}`);
    const [portion] = rules.filter(rule => rule.namePortion === subfield.code).map(rule => rule.portion);

    if (portion === undefined) {
      return false;
    }

    return portion.toUpperCase();
  }

  function addNamePortionPunctuation(preceedingSubfield) {
    const subfieldContainsPunctuation = (/[?")\].\-!,]$/u).test(preceedingSubfield.value);
    if (!subfieldContainsPunctuation) { // eslint-disable-line functional/no-conditional-statements
      const nextValue = `${preceedingSubfield.value}.`;
      debug(`Updated subfield ${preceedingSubfield.code} from '${preceedingSubfield.value}' to '${nextValue}'`);
      preceedingSubfield.value = nextValue; // eslint-disable-line functional/immutable-data
    }
  }

  function addSubfieldPunctuation(preceedingSubfield, currentSubfield, rules) {
    const punctType = getPrecedingPunctuation(currentSubfield, rules);
    const exceptionsFunctions = getExceptions(currentSubfield, rules);

    const isExceptionCase = exceptionsFunctions.some(fn => fn(preceedingSubfield));

    if (isExceptionCase) {
      return;
    }

    const endsInPunctuation = (/[?")\]\-!,]$/u).test(preceedingSubfield.value);
    debug(`addSubfieldPunctuation -- punctType: ${punctType} endsInPunctuation: ${endsInPunctuation}`);

    if (!endsInPunctuation) {
      if (punctType === 'PERIOD' && !(/\.$/u).test(preceedingSubfield.value)) { // eslint-disable-line functional/no-conditional-statements
        const nextValue = `${preceedingSubfield.value}.`;
        debug(`Updated subfield ${preceedingSubfield.code} from '${preceedingSubfield.value}' to '${nextValue}'`);
        preceedingSubfield.value = nextValue; // eslint-disable-line functional/immutable-data
      }
    }

    if (punctType === 'COMMA') {
      if (!(/,$/u).test(preceedingSubfield.value)) {
        if (!(/^[[(]/u).test(currentSubfield.value)) { // eslint-disable-line functional/no-conditional-statements
          const nextValue = `${preceedingSubfield.value},`;
          debug(`Updated subfield ${preceedingSubfield.code} from '${preceedingSubfield.value}' to '${nextValue}'`);
          preceedingSubfield.value = nextValue; // eslint-disable-line functional/immutable-data
        }
      }
    }

    if (punctType === 'COND_COMMA') {
      if (!(/[-,]$/u).test(preceedingSubfield.value)) { // eslint-disable-line functional/no-conditional-statements
        const nextValue = `${preceedingSubfield.value},`;
        debug(`Updated subfield ${preceedingSubfield.code} from '${preceedingSubfield.value}' to '${nextValue}'`);
        preceedingSubfield.value = nextValue; // eslint-disable-line functional/immutable-data
      }
    }

    if (punctType === 'SPACECOLON') {
      if (!(/:$/u).test(preceedingSubfield.value)) { // eslint-disable-line functional/no-conditional-statements
        const nextValue = `${preceedingSubfield.value} :`;
        debug(`Updated subfield ${preceedingSubfield.code} from '${preceedingSubfield.value}' to '${nextValue}'`);
        preceedingSubfield.value = nextValue; // eslint-disable-line functional/immutable-data
      }
      if ((/[^ ]:$/u).test(preceedingSubfield.value)) { // eslint-disable-line functional/no-conditional-statements
        const nextValue = `${preceedingSubfield.value.slice(0, -1)} :`;
        debug(`Updated subfield ${preceedingSubfield.code} from '${preceedingSubfield.value}' to '${nextValue}'`);
        preceedingSubfield.value = nextValue; // eslint-disable-line functional/immutable-data
      }

    }

    debug('addSubfieldPunctuation -- end');
  }

  function getPrecedingPunctuation(subfield, rules) {
    const [punct] = rules.filter(rule => rule.namePortion === subfield.code).map(rule => rule.preceedingPunctuation);

    if (punct === undefined) {
      throw new Error(`Unknown subfield code ${subfield.code}`);
    }

    return punct.toUpperCase();
  }

  function getExceptions(subfield, rules) {
    const [exception] = rules.filter(rule => rule.namePortion === subfield.code).map(rule => parseExceptions(rule.exceptions));

    if (exception === undefined) {
      throw new Error(`Unknown subfield code ${subfield.code}`);
    }

    return exception;
  }

  function parseExceptions(expectionsString) {
    const exceptionRules = expectionsString.split('\n');
    const exceptionFuncs = [];

    exceptionRules.forEach(exceptionRule => {
      const match = (/- (.*) if preceded by (.*)/u).exec(exceptionRule); // eslint-disable-line prefer-named-capture-group
      if (match) { // eslint-disable-line functional/no-conditional-statements
        const [, type, preceededCode] = match;
        const normalizedType = type.trim().toUpperCase().trim();
        const normalizedCode = preceededCode.replace(/\$/ug, '').trim();
        exceptionFuncs.push(ifPrecededByException(normalizedCode, normalizedType)); // eslint-disable-line functional/immutable-data
      }
    });

    return exceptionFuncs;
  }

  function ifPrecededByException(code, type) {
    return preceedingSubfield => {
      if (code === preceedingSubfield.code) {
        debug(`Adding ${type} to ${preceedingSubfield.code}`);
        if (type === 'SEMICOLON' && !(/;$/u).test(preceedingSubfield.value)) { // eslint-disable-line functional/no-conditional-statements
          const nextValue = `${preceedingSubfield.value} ;`;
          debug(`Updated subfield ${preceedingSubfield.code} from '${preceedingSubfield.value}' to '${nextValue}'`);
          preceedingSubfield.value = nextValue; // eslint-disable-line functional/immutable-data
        }

        if (type === 'COLON' && !(/:$/u).test(preceedingSubfield.value)) { // eslint-disable-line functional/no-conditional-statements
          const nextValue = `${preceedingSubfield.value} :`;
          debug(`Updated subfield ${preceedingSubfield.code} from '${preceedingSubfield.value}' to '${nextValue}'`);
          preceedingSubfield.value = nextValue; // eslint-disable-line functional/immutable-data
        }

        return true;
      }

      return false;
    };
  }

  function validate(record) {
    return {valid: record.fields.every(validateField(record.leader[6]))};
  }

  function fix(record) {
    record.fields.map(field => punctuateField(field, record.leader[6]));
    return true;
  }

  return {
    description: 'Fixes punctuation of fields',
    validate,
    fix
  };
}