NatLibFi/marc-record-serializers

View on GitHub
src/iso2709.js

Summary

Maintainability
D
2 days
Test Coverage
/**
*
* @licstart  The following is the entire license notice for the JavaScript code in this file.
*
* Copyright 2014-2017 Pasi Tuominen
* Copyright 2018-2020 University Of Helsinki (The National Library Of Finland)
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* @licend  The above is the entire license notice
* for the JavaScript code in this file.
*
*/

import {EventEmitter} from 'events';
import {MarcRecord} from '@natlibfi/marc-record';
//import createDebugLogger from 'debug';

//const debug = createDebugLogger('@natlibfi/marc-record-serializers:iso2709');
//const debugData = debug.extend('data');

export function reader(stream, validationOptions = {}) {

  const emitter = new class extends EventEmitter { }();

  start();
  return emitter;

  function start() {

    // eslint-disable-next-line no-var
    var charbuffer = '';

    stream.on('end', () => {
      emitter.emit('end');
    });

    stream.on('error', error => {
      emitter.emit('error', error);
    });

    stream.on('data', data => {
      charbuffer += data;

      // eslint-disable-next-line functional/no-loop-statements
      while (1) { // eslint-disable-line no-constant-condition
        const pos = charbuffer.indexOf('\x1D');

        if (pos === -1) {
          break;
        }

        const raw = charbuffer.substr(0, pos);
        charbuffer = charbuffer.substr(pos + 1);

        try {
          emitter.emit('data', from(raw, validationOptions));
        } catch (excp) {
          emitter.emit('error', excp);
        }
      }
    });
  }
}


// eslint-disable-next-line max-statements
export function from(dataStr, validationOptions = {}) {
  const leader = dataStr.substring(0, 24);
  const record = {
    leader,
    fields: []
  };

  // Parse directory section
  const directory = parseDirectory(dataStr);
  const directoryEntries = parseDirectoryEntries(directory);

  // Locate start of data fields (first occurrence of '\x1E')
  const dataStartPos = dataStr.search('\x1E') + 1;
  const dataFieldStr = dataStr.substring(dataStartPos);

  // Loop through directory entries to read data fields
  // eslint-disable-next-line functional/no-let
  let i = 0;

  // eslint-disable-next-line functional/no-loop-statements, no-plusplus
  for (i = 0; i < directoryEntries.length; i++) {
    const tag = dirFieldTag(directoryEntries[i]);

    // NOTE: fieldLength is the number of UTF-8 bytes in a string
    const fieldLength = trimNumericField(dirFieldLength(directoryEntries[i]));

    const startCharPos = trimNumericField(dirStartingCharacterPosition(directoryEntries[i]));

    // Append control fields for tags 00X
    // Note: this cannot handle controlFields that have other tags
    //       Alephs FMT will cause problems! (Of course, non-numeric fields are not standard MARC21 anyways)
    // eslint-disable-next-line functional/no-conditional-statements
    if (tag.substring(0, 2) === '00') {
      const fieldElementStr = dataFieldStr.substring(startCharPos, parseInt(startCharPos, 10) + parseInt(fieldLength, 10) - 1);

      // eslint-disable-next-line functional/immutable-data
      record.fields.push({
        tag,
        value: fieldElementStr
      });
    } else {
      // eslint-disable-next-line functional/no-let
      let dataElementStr = utf8Substr(dataFieldStr, parseInt(startCharPos, 10), parseInt(fieldLength, 10));

      // eslint-disable-next-line functional/no-conditional-statements
      if (dataElementStr[2] !== '\x1F') {
        dataElementStr = dataFieldStr[startCharPos - 1] + dataElementStr;
      }

      // Parse indicators and convert '\x1F' characters to spaces
      // for valid XML output
      // eslint-disable-next-line functional/no-let
      let ind1 = dataElementStr.charAt(0);
      // eslint-disable-next-line functional/no-conditional-statements
      if (ind1 === '\x1F') {
        ind1 = ' ';
      }

      // eslint-disable-next-line functional/no-let
      let ind2 = dataElementStr.charAt(1);
      // eslint-disable-next-line functional/no-conditional-statements
      if (ind2 === '\x1F') {
        ind2 = ' ';
      }

      // Create a <datafield> element

      const datafield = {
        tag,
        ind1,
        ind2,
        subfields: []
      };

      // Parse all subfields
      dataElementStr = dataElementStr.substring(2);
      // Bypass indicators
      // eslint-disable-next-line functional/no-let
      let j = 0;
      // eslint-disable-next-line functional/no-let
      let currElementStr = '';

      // eslint-disable-next-line functional/no-loop-statements, no-plusplus
      for (j = 0; j < dataElementStr.length; j++) {
        // '\x1F' begins a new subfield, '\x1E' ends all fields
        if (dataElementStr.charAt(j) === '\x1F' || dataElementStr.charAt(j) === '\x1E' || j === dataElementStr.length - 1) {
          if (currElementStr !== '') { // eslint-disable-line max-depth
            // eslint-disable-next-line functional/no-conditional-statements
            if (j === dataElementStr.length - 1) { // eslint-disable-line max-depth
              currElementStr += dataElementStr.charAt(j);
            }

            // Parse code attribute
            const code = currElementStr.charAt(0);
            currElementStr = currElementStr.substring(1);

            // Remove trailing control characters
            // eslint-disable-next-line functional/no-conditional-statements
            if (currElementStr.charAt(currElementStr.length - 1) === '\x1F' || currElementStr.charAt(currElementStr.length - 1) === '\x1E') { // eslint-disable-line max-depth
              currElementStr = currElementStr.substring(0, currElementStr.length - 1);
            }

            // Create a <subfield> element

            // eslint-disable-next-line functional/immutable-data
            datafield.subfields.push({code, value: currElementStr});
            currElementStr = '';
          }
        // eslint-disable-next-line functional/no-conditional-statements
        } else {
          currElementStr += dataElementStr.charAt(j);
        }
      }

      // eslint-disable-next-line functional/immutable-data
      record.fields.push(datafield);
    }
  }

  return new MarcRecord(record, validationOptions);

  // Returns the entire directory starting at position 24.
  // Control character '\x1E' marks the end of directory.
  function parseDirectory(dataStr) {
    // eslint-disable-next-line functional/no-let
    let currChar = '';
    // eslint-disable-next-line functional/no-let
    let directory = '';
    // eslint-disable-next-line functional/no-let
    let pos = 24;

    // eslint-disable-next-line functional/no-loop-statements
    while (currChar !== '\x1E') {
      currChar = dataStr.charAt(pos);
      // eslint-disable-next-line functional/no-conditional-statements
      if (currChar !== 'x1E') {
        directory += currChar;
      }

      pos += 1;

      if (pos > dataStr.length) {
        throw new Error('Invalid record');
      }
    }

    return directory;
  }

  // Returns an array of 12-character directory entries.
  function parseDirectoryEntries(directoryStr) {
    const directoryEntries = [];
    let pos = 0; // eslint-disable-line functional/no-let
    let count = 0; // eslint-disable-line functional/no-let

    // eslint-disable-next-line functional/no-loop-statements
    while (directoryStr.length - pos >= 12) {
      directoryEntries[count] = directoryStr.substring(pos, pos + 12); // eslint-disable-line functional/immutable-data
      pos += 12;
      count += 1;
    }

    return directoryEntries;
  }

  // Removes leading zeros from a numeric data field.
  function trimNumericField(input) {
    // eslint-disable-next-line functional/no-let
    let string = input;
    // eslint-disable-next-line functional/no-loop-statements
    while (string.length > 1 && string.charAt(0) === '0') {
      string = string.substring(1);
    }

    return string;
  }

  // Functions return a specified field in a single 12-character
  // directory entry.
  function dirFieldTag(directoryEntry) {
    return directoryEntry.substring(0, 3);
  }

  function dirFieldLength(directoryEntry) {
    return directoryEntry.substring(3, 7);
  }

  function dirStartingCharacterPosition(directoryEntry) {
    return directoryEntry.substring(7, 12);
  }
}

export function to(record) {

  //let tag; // eslint-disable-line functional/no-let
  //let ind1; // eslint-disable-line functional/no-let
  //let ind2; // eslint-disable-line functional/no-let

  let {leader} = record; // eslint-disable-line functional/no-let
  let marcStr = ''; // eslint-disable-line functional/no-let
  let directoryStr = ''; // eslint-disable-line functional/no-let
  let dataFieldStr = ''; // eslint-disable-line functional/no-let
  let charPos = 0; // eslint-disable-line functional/no-let

  record.getControlfields().forEach(field => {
    directoryStr += field.tag;
    // eslint-disable-next-line functional/no-conditional-statements
    if (field.value === undefined || field.value === '') {
      // Special case: control field contents empty
      directoryStr += addLeadingZeros(1, 4);
      directoryStr += addLeadingZeros(charPos, 5);
      charPos += 1;
      dataFieldStr += '\x1E';
    // eslint-disable-next-line functional/no-conditional-statements
    } else {
      directoryStr += addLeadingZeros(field.value.length + 1, 4);
      // Add character position
      directoryStr += addLeadingZeros(charPos, 5);
      // Advance character position counter
      charPos += lengthInUtf8Bytes(field.value) + 1;

      dataFieldStr += `${field.value}\x1E`;
    }
  });

  record.getDatafields().forEach(field => {
    const {tag, ind1, ind2} = field;

    // Add tag to directory
    directoryStr += tag;

    // Add indicators
    dataFieldStr += `${ind1 + ind2}\x1F`;

    let currDataField = ''; // eslint-disable-line functional/no-let

    field.subfields.forEach((subfield, i) => {
      let subFieldStr = subfield.value; // eslint-disable-line functional/no-let
      const {code} = subfield;
      subFieldStr = code + subFieldStr;

      // Add terminator for subfield or data field
      // eslint-disable-next-line functional/no-conditional-statements
      if (i === field.subfields.length - 1) {
        subFieldStr += '\x1E';
      // eslint-disable-next-line functional/no-conditional-statements
      } else {
        subFieldStr += '\x1F';
      }

      currDataField += subFieldStr;
    });

    dataFieldStr += currDataField;

    // Add length of field containing indicators and a terminator
    // (3 characters total)

    // directoryStr += addLeadingZeros(lengthInUtf8Bytes(currDataField) + 3, 4);
    // directoryStr += addLeadingZeros(currDataField.length + 3, 4);
    directoryStr += addLeadingZeros(stringToByteArray(currDataField).length + 3, 4);

    // Add character position
    directoryStr += addLeadingZeros(charPos, 5);
    // Advance character position counter
    charPos += lengthInUtf8Bytes(currDataField) + 3;
  });

  // Recalculate and write new string length into leader
  const newStrLength = stringToByteArray(`${leader + directoryStr}\x1E${dataFieldStr}\x1D`).length;
  leader = addLeadingZeros(newStrLength, 5) + leader.substring(5);

  // Recalculate base address position
  const newBaseAddrPos = 24 + directoryStr.length + 1;
  leader = leader.substring(0, 12) + addLeadingZeros(newBaseAddrPos, 5) + leader.substring(17);

  marcStr += `${leader + directoryStr}\x1E${dataFieldStr}\x1D`;

  return marcStr;

  // Adds leading zeros to the specified numeric field.
  function addLeadingZeros(numField, length) {

    let newNumField = numField; // eslint-disable-line functional/no-let
    // eslint-disable-next-line functional/no-loop-statements
    while (newNumField.toString().length < length) {
      newNumField = `0${newNumField.toString()}`;
    }

    return newNumField;
  }

  // Returns the length of the input string in UTF8 bytes.
  function lengthInUtf8Bytes(str) {
    const m = encodeURIComponent(str).match(/%[89ABab]/gu);
    return str.length + (m ? m.length : 0);
  }
}

// Returns a UTF-8 substring.
function utf8Substr(str, startInBytes, lengthInBytes) {
  const strBytes = stringToByteArray(str);
  const subStrBytes = [];
  let count = 0; // eslint-disable-line functional/no-let

  // eslint-disable-next-line functional/no-loop-statements
  for (let i = startInBytes; count < lengthInBytes; i++) { // eslint-disable-line functional/no-let, no-plusplus
    // eslint-disable-next-line functional/immutable-data
    subStrBytes.push(strBytes[i]);
    count += 1;
  }

  return byteArrayToString(subStrBytes);

  // Converts the byte array to a UTF-8 string.
  // From http://stackoverflow.com/questions/1240408/reading-bytes-from-a-javascript-string?lq=1
  function byteArrayToString(byteArray) {
    let str = ''; // eslint-disable-line functional/no-let
    // eslint-disable-next-line functional/no-loop-statements
    for (let i = 0; i < byteArray.length; i++) { // eslint-disable-line functional/no-let, no-plusplus
      // eslint-disable-next-line no-nested-ternary
      str += byteArray[i] <= 0x7F ? byteArray[i] === 0x25 ? '%25' // %
        : String.fromCharCode(byteArray[i]) : `%${byteArray[i].toString(16).toUpperCase()}`;
    }

    return decodeURIComponent(str);
  }
}

// Converts the input UTF-8 string to a byte array.
// From http://stackoverflow.com/questions/1240408/reading-bytes-from-a-javascript-string?lq=1
function stringToByteArray(str) {
  const byteArray = [];

  // eslint-disable-next-line functional/no-loop-statements, functional/no-let, no-plusplus
  for (let i = 0; i < str.length; i++) {
    // eslint-disable-next-line functional/no-conditional-statements
    if (str.charCodeAt(i) <= 0x7F) {
      // eslint-disable-next-line functional/immutable-data
      byteArray.push(str.charCodeAt(i));
    // eslint-disable-next-line functional/no-conditional-statements
    } else {
      const h = encodeURIComponent(str.charAt(i)).substr(1).split('%');
      // eslint-disable-next-line functional/no-loop-statements, functional/no-let, no-plusplus
      for (let j = 0; j < h.length; j++) {
        // eslint-disable-next-line functional/immutable-data
        byteArray.push(parseInt(h[j], 16));
      }
    }
  }

  return byteArray;
}