NatLibFi/marc-record-validators-melinda

View on GitHub
src/sortFields.js

Summary

Maintainability
C
1 day
Test Coverage
// Taken from project marc-record-js, file marcSortFields.js as this contains more and more Melinda-specific rules.

import clone from 'clone';
//import createDebugLogger from 'debug';
import {fieldHasSubfield, fieldToString} from './utils';
import {sortByTag, sortAlphabetically, fieldOrderComparator as globalFieldOrderComparator} from '@natlibfi/marc-record/dist/marcFieldSort';
import {isValidSubfield8} from './subfield8Utils';
import {isValidSubfield6, subfield6GetOccurrenceNumber} from './subfield6Utils';

//const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:sortFields');
//const debugData = debug.extend('data');
//const debugDev = debug.extend('dev');

const BIG_BAD_NUMBER = 999999999;
export default function () {

  return {
    description: 'Sort fields using both generic and Melinda specific rules',
    validate, fix
  };

  function fix(record) {
    const res = {message: [], fix: [], valid: true};

    record.fields.sort(fieldOrderComparator); // eslint-disable-line functional/immutable-data

    return res;
  }

  function validate(record) {
    const res = {message: []};

    const fields = record.fields.map(f => clone(f));
    fields.sort(fieldOrderComparator); // eslint-disable-line functional/immutable-data


    const relocatedFields = fields.filter((f, i) => fieldToString(f) !== fieldToString(record.fields[i]));

    if (relocatedFields.length > 0) { // eslint-disable-line functional/no-conditional-statements
      res.message.push(`${relocatedFields.length} field(s) in new places`); // eslint-disable-line functional/immutable-data
    }

    res.valid = !(res.message.length >= 1); // eslint-disable-line functional/immutable-data
    return res;
  }
}


export const relatorTermScore = { // Here bigger is better
  // NB! This is exportable as field internal $e sorting in marc-record-validators-js uses this.
  // NB! The more abstract, the earlier it appears.
  // Note that terms with same abstraction level might also have order preferences
  // We should 1) check the order of these, and 2) add translations (support Swedish at the very least)
  // work/teos > expression/ekspressio > manifestation/manifestaatio
  'säveltäjä': 100, 'composer': 100,
  'kirjoittaja': 99, 'author': 100,
  'sarjakuvantekijä': 99,
  'taiteilija': 98,
  'sanoittaja': 90,
  'käsikirjoittaja': 90,
  // expression:
  'toimittaja': 80, 'editor': 80,
  'sovittaja': 80, 'arranger': 80,
  'kuvittaja': 75,
  'editointi': 71, // for music, editor/toimittaja is another thing
  'kääntäjä': 70,
  'lukija': 61,
  // Manifestation level
  'esittäjä': 60,
  'johtaja': 50, // orkesterinjohtaja
  'kustantaja': 41,
  'julkaisija': 40

};

export function scoreRelatorTerm(value) { // sortRelatorTerms.js validator should call this on future version
  const normValue = value.replace(/[.,]+$/u, '');
  if (normValue in relatorTermScore) {
    return relatorTermScore[normValue];
  }
  return 0;
}

export function fieldOrderComparator(fieldA, fieldB) {

  //const sorterFunctions = [sortByTag, sortByIndexTerms, sortAlphabetically, sortByRelatorTerm, sortByOccurrenceNumber, preferFenniKeep, sortByFieldLinkAndSequenceNumber];

  const sorterFunctions = [sortByTag, sortByIndexTerms, sortAlphabetically, sortByRelatorTerm, sortByOccurrenceNumber, preferFenniKeep, sortByFieldLinkAndSequenceNumber];
  //const sorterFunctions = [sortByIndexTerms, sortByRelatorTerm, sortByOccurrenceNumber, preferFenniKeep, sortByFieldLinkAndSequenceNumber];

  return globalFieldOrderComparator(fieldA, fieldB, sorterFunctions);
}

function sortByIndexTerms(fieldA, fieldB) { // eslint-disable-line complexity, max-statements

  const indexTermFields = ['600', '610', '611', '630', '648', '650', '651', '652', '653', '654', '655', '656', '657', '658', '659', '662'];

  function scoreInd2(val) {
    const ind2Score = {'0': 0, '1': 1, '2': 2, '3': 3, '4': 8, '5': 5, '6': 6, '7': 7};

    if (val in ind2Score) {
      return ind2Score[val];
    }
    return 9;
  }

  // ATM this is not needed.
  // You may need this, if you change compare function order in sorterFunctions
  // istanbul ignore next
  if (fieldA.tag !== fieldB.tag) {
    return 0;
  }

  if (!indexTermFields.includes(fieldA.tag)) {
    return 0;
  }

  // Puts ind2=4 last
  if (scoreInd2(fieldA.ind2) > scoreInd2(fieldB.ind2)) {
    return 1;
  }
  if (scoreInd2(fieldA.ind2) < scoreInd2(fieldB.ind2)) {
    return -1;
  }

  function scoreDictionary(dictionary) {
    const dictionarySortIndex = {
      'yso/fin': 0,
      'yso/swe': 1,
      'yso/eng': 2,
      'slm/fin': 0.1,
      'slm/swe': 1.1,
      'kauno/fin': 2.1,
      'kauno/swe': 2.2,
      'kaunokki': 4,
      'bella': 5
    };

    if (dictionary in dictionarySortIndex) {
      return dictionarySortIndex[dictionary];
    }
    return BIG_BAD_NUMBER;
  }

  const dictionaryA = selectFirstValue(fieldA, '2');
  const dictionaryB = selectFirstValue(fieldB, '2');

  const dictScoreA = scoreDictionary(dictionaryA);
  const dictScoreB = scoreDictionary(dictionaryB);
  // Use priority order for listed dictionaries:
  if (dictScoreA > dictScoreB) {
    return 1;
  }
  if (dictScoreA < dictScoreB) {
    return -1;
  }
  // Unlisted dictionaries: sort $2 value alphabetically:
  //if (dictScoreA === BIG_BAD_NUMBER) {
  if (dictionaryA > dictionaryB) {
    return 1;
  }
  if (dictionaryA < dictionaryB) {
    return -1;
  }
  //}
  return 0;
}


function preferKeep(fieldA, fieldB, keepOwner = 'FENNI') {
  const hasKeepA = fieldHasSubfield(fieldA, '9', `${keepOwner}<KEEP>`);
  const hasKeepB = fieldHasSubfield(fieldB, '9', `${keepOwner}<KEEP>`);

  if (hasKeepA && !hasKeepB) {
    return -1;
  }
  if (!hasKeepA && hasKeepB) {
    return 1;
  }

  return 0;
}


function preferFenniKeep(fieldA, fieldB) {
  const fenniPreference = preferKeep(fieldA, fieldB, 'FENNI');
  if (fenniPreference !== 0) {
    return fenniPreference;
  }
  const violaPreference = preferKeep(fieldA, fieldB, 'VIOLA');
  if (violaPreference !== 0) {
    return violaPreference;
  }
  return preferKeep(fieldA, fieldB, 'FIKKA');
}

function sortByRelatorTerm(fieldA, fieldB) {
  //if (!['600', '610', '611', '630', '700', '710', '711', '730', '800', '810', '811', '830'].includes(fieldA.tag)) {
  if (!['700', '710', '711', '730'].includes(fieldA.tag)) {
    return 0;
  }

  function fieldGetMaxRelatorTermScore(field) {
    if (!field.subfields) {
      return -1;
    }
    // If field has $t, it's a teos-nimeke-auktoriteetti, and thus meaningless. These should follow all $t-less fields...
    if (fieldHasSubfield(field, 't')) {
      return -1;
    }
    const relatorSubfieldCode = ['611', '711', '811'].includes(field.tag) ? 'j' : 'e';
    const e = field.subfields.filter(sf => sf.code === relatorSubfieldCode);
    if (e.length === 0) { // No $e is still better than having a $t
      return 0;
    }
    const scores = e.map(sf => scoreRelatorTerm(sf.value));
    //debugDev(`RELATOR SCORE FOR '${fieldToString(field)}': ${scores.join(', ')}`);
    return Math.max(...scores);
  }

  const scoreA = fieldGetMaxRelatorTermScore(fieldA);
  const scoreB = fieldGetMaxRelatorTermScore(fieldB);

  if (scoreA < scoreB) {
    return 1;
  }
  if (scoreA > scoreB) {
    return -1;
  }
  return 0;
}


function fieldGetMinLinkAndSequenceNumber(field) {
  if (!field.subfields) {
    return BIG_BAD_NUMBER;
  }
  const relevantSubfields = field.subfields.filter(sf => isValidSubfield8(sf));
  // If val is something like "1.2\x" parseFloat() would give a syntax erro because of hex-like escape sequnce (at least on Chrome). Thus remove tail:
  const scores = relevantSubfields.map(sf => parseFloat(sf.value.replace(/\\.*$/u, '')));
  if (scores.length === 0) {
    return BIG_BAD_NUMBER;
  }
  return Math.min(...scores);
}

function sortByFieldLinkAndSequenceNumber(fieldA, fieldB) { // Sort by subfield $8 that is...
  const scoreA = fieldGetMinLinkAndSequenceNumber(fieldA);
  const scoreB = fieldGetMinLinkAndSequenceNumber(fieldB);
  //debugDev(` sf-8-A-score for '${fieldToString(fieldA)}: ${scoreA}`);
  //debugDev(` sf-8-B-score for '${fieldToString(fieldB)}: ${scoreB}`);
  if (scoreA === scoreB) {
    return 0;
  }
  if (scoreB === 0) {
    return 1;
  }
  if (scoreA === 0) {
    return -1;
  }
  if (scoreA > scoreB) { // smaller is better
    return 1;
  }
  return -1;
}


function sortByOccurrenceNumber(fieldA, fieldB) { // Sort by subfield $6

  function fieldGetOccurrenceNumber(field) { // should this function be exported? (based on validator sortRelatorFields.js)
    if (!field.subfields) {
      return 0;
    }
    const subfield6 = field.subfields.find(sf => isValidSubfield6(sf));
    if (subfield6 === undefined) {
      return 0;
    }
    return parseInt(subfield6GetOccurrenceNumber(subfield6), 10);
  }

  if (fieldA.tag !== '880') {
    return 0;
  }
  const scoreA = fieldGetOccurrenceNumber(fieldA);
  const scoreB = fieldGetOccurrenceNumber(fieldB);

  //debugDev(`A: '${fieldToString(fieldA)}: ${scoreA}`);
  //debugDev(`B: '${fieldToString(fieldB)}: ${scoreB}`);

  if (scoreA === scoreB) {
    return 0;
  }
  if (scoreB === 0) {
    return -1;
  }
  if (scoreA === 0) {
    return 1;
  }
  if (scoreA > scoreB) { // smaller is better
    return 1;
  }
  return -1;
}


function selectFirstValue(field, subcode) {
  return field.subfields
    .filter(subfield => subcode === subfield.code)
    .map(subfield => subfield.value)
    .slice(0, 1);
}