NatLibFi/marc-record-converters

View on GitHub
lib/converters/iso2709.js

Summary

Maintainability
F
4 days
Test Coverage
/**
 *
 * @licstart  The following is the entire license notice for the JavaScript code in this file. 
 *
 * ISC License
 *  
 * Modified work: Copyright (c) 2015, 2017 University Of Helsinki (The National Library Of Finland)  
 * Original work: Copyright (c) 2015 Pasi Tuominen
 *  
 * Permission to use, copy, modify, and/or distribute this software for
 * any purpose with or without fee is hereby granted, provided that the
 * above copyright notice and this permission notice appear in all
 * copies.
 *  
 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY
 * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *  
 * @licend  The above is the entire license notice
 * for the JavaScript code in this file.
 *
 **/

/* istanbul ignore next: umd wrapper */
(function(root, factory) {

  'use strict';

  if (typeof define === 'function' && define.amd) {
    define(['marc-record-js'], factory);
} else if (typeof module === 'object' && module.exports) {
  module.exports = factory(require('marc-record-js'));
}

}(this, factory));


function factory(MarcRecord, xmldom)
{


  /**
   * Returns the entire directory starting at position 24. Control character '\x1E' marks the end of directory
   */
  function parseDirectory(data_str)
  {

    var curr_char = '',
        directory = '',
        pos = 24;

    while (curr_char !== '\x1E') {
      curr_char = data_str.charAt(pos);
      if (curr_char !== 'x1E')
        directory += curr_char;
      pos++;
      
      if (pos > data_str.length) {
        throw new Error('Invalid record');
      }
    }
    
    return directory;

  }

  /**
   * Returns an array of 12-character directory entries.
   */
  function parseDirectoryEntries(directory_str)
  {

    var directory_entries = [],
        pos = 0,
        count = 0;
    
    while (directory_str.length - pos >= 12) {
      directory_entries[count] = directory_str.substring(pos, pos + 12);
      pos += 12;
      count++;
    }

    return directory_entries;
  }
  
  /**
   * Removes leading zeros from a numeric data field.
   */
  function trimNumericField(input)
  {
    while (input.length > 1 && input.charAt(0) === '0') {
      input = input.substring(1);
    }
    
    return input;

  }

  function dirFieldTag(directory_entry)
  {
    return directory_entry.substring(0, 3);
  }
  
  function dirFieldLength(directory_entry)
  {
    return directory_entry.substring(3, 7);
  }
  
  function dirStartingCharacterPosition(directory_entry)
  {
    return directory_entry.substring(7, 12);
  }

  /**
   * Returns a UTF-8 substring
   */
  function substrUTF8(str, start_in_bytes, length_in_bytes)
  {

    var str_bytes = stringToByteArray(str),
        substr_bytes = [],
        count = 0;
    
    for (var i = start_in_bytes; count < length_in_bytes; i++) {
      substr_bytes.push(str_bytes[i]);
      count++;
    }
    
    return byteArrayToString(substr_bytes);

  }
  
  /**
   * Converts the input UTF-8 string to a byte array. From http://stackoverflow.com/questions/1240408/reading-bytes-from-a-javascript-string?lq=1
   */
  function stringToByteArray(str)
  {

    var encoded_chars,
        byte_array = [];
    
    for (var i = 0; i < str.length; i++)

      if (str.charCodeAt(i) <= 0x7F) {
        byte_array.push(str.charCodeAt(i));
      } else {

        encoded_chars = encodeURIComponent(str.charAt(i)).substr(1).split('%');

        for (var j = 0; j < encoded_chars.length; j++) {
          byte_array.push(parseInt(encoded_chars[j], 16));
        }
        
      }
    
    return byte_array;
    
  }

  /**
   * Converts the byte array to a UTF-8 string. From http://stackoverflow.com/questions/1240408/reading-bytes-from-a-javascript-string?lq=1
   */
  function byteArrayToString(byte_array)
  {

    var str = '';
    
    for (var i = 0; i < byte_array.length; i++) {
      str += byte_array[i] <= 0x7F ? byte_array[i] === 0x25 ? "%25" : String.fromCharCode(byte_array[i]) : "%" + byte_array[i].toString(16).toUpperCase();
    }
    
    return decodeURIComponent(str);
    
  }


  /**
   * Adds leading zeros to the specified numeric field
   */
  function addLeadingZeros(num_field, length)
  {

    while (num_field.toString().length < length) {
      num_field = "0" + num_field.toString();
    }
    
    return num_field;
    
  }

  /**
   * Returns the length of the input string in UTF8 bytes
   */
  function lengthInUtf8Bytes(str)
  {

    var match = encodeURIComponent(str).match(/%[89ABab]/g);  
    return str.length + ( match ? match.length : 0);
    
  }

  function convertFromISO2709(record_data)
  {

    function convertRecord(record_str)
    {
      
      /**
       * Parse directory section
       */
      var directory = parseDirectory(record_str),
          directory_entries = parseDirectoryEntries(directory),
          /**
           * Locate start of data fields (First occurrence of '\x1E')
           */
          data_start_pos = record_str.search('\x1E') + 1,
          data_field_str = record_str.substring(data_start_pos),
          record = {
            leader: record_str.substring(0, 24),
            fields: []
          };
      
      function processDirectoryEntry(entry)
      {
        
        var field_element_str, data_element_str, datafield;      
        var tag = dirFieldTag(entry),
            /**
             * NOTE: fieldLength is the number of UTF-8 bytes in a string
             */
            field_length = trimNumericField(dirFieldLength(entry)),
            start_char_pos = trimNumericField(dirStartingCharacterPosition(entry));


        function processDataElement(str)
        {

          var code, ind1, ind2;
          var curr_element_str = '';
          
          str.split('').forEach(function(item, index, arr) {
            
            /**
             * '\x1F' begins a new subfield, '\x1E' ends all fields
             */
            if (item === '\x1F' || item.charAt(index) === '\x1E' || index === arr.length - 1) {
              if (curr_element_str !== "") {

                curr_element_str = index === arr.length - 1 ? curr_element_str + item : curr_element_str;
                
                /**
                 * Parse code attribute
                 */
                code = curr_element_str.charAt(0);
                curr_element_str = curr_element_str.substring(1);

                /**
                 * Remove trailing control characters
                 **/
                if (curr_element_str.charAt(curr_element_str.length - 1) === '\x1F' || curr_element_str.charAt(curr_element_str.length - 1) === '\x1E') {
                  curr_element_str = curr_element_str.substring(0, curr_element_str.length - 1);
                }

                /**
                 * Create a <subfield> element              
                 **/
                datafield.subfields.push({
                  code: code,
                  value: curr_element_str
                });
                curr_element_str = "";
                
              }
            } else {
              curr_element_str += item;
            }
          });
        }
        
        /**
         * Append control fields for tags 00X
         */
        if (tag.substring(0, 2) == '00') {
          
          field_element_str = data_field_str.substring(start_char_pos, parseInt(start_char_pos, 10) + parseInt(field_length, 10) - 1);
          
          record.fields.push({
            tag: tag,
            value: field_element_str
          });
          
          /**
           * Otherwise append a data field
           */
        } else {
          
          data_element_str = substrUTF8(data_field_str, parseInt(start_char_pos, 10), parseInt(field_length, 10));
          
          if (data_element_str[2] !== '\x1F')
            data_element_str = data_field_str[start_char_pos - 1] + data_element_str;

          /**
           * Parse indicators and convert '\x1F' characters to spaces for valid XML output
           */
          ind1 = data_element_str.charAt(0);
          ind1 = ind1 == '\x1F' ? ' ' : ind1;
          
          ind2 = data_element_str.charAt(1);
          ind2 = ind2 == '\x1F' ? ' ' : ind2;

          /**
           * Create a <datafield> element                
           */
          datafield = {
            tag: tag,
            ind1: ind1,
            ind2: ind2,
            subfields: []
          };

          /**
           * Parse all subfields
           **/
          data_element_str = data_element_str.substring(2);

          /**
           * Bypass indicators
           */
          processDataElement(data_element_str);

          record.fields.push(datafield);

        }
        
      }
      
      /**
       * Loop through directory entries to read data fields
       **/
      directory_entries.forEach(processDirectoryEntry);
      return new MarcRecord(record);

    }

     /**
      * The last element will always be empty because records end in char 1D
      */
    return record_data.split(/\x1D/).slice(0, -1).map(convertRecord).reduce(function(product, item) {
      return Array.isArray(product) ? product.concat(item) : [product, item];
    });
    
  }


  function convertToISO2709(record_data)
  {

    function convertRecord(record)
    {
    
    var record_str = '',
        directory_str = '',
        datafield_str = '',
        leader = record.leader,
        char_pos = 0;
    
    record.getControlfields().forEach(function(field) {
      
      directory_str += field.tag;
      
      if (field.value === undefined || field.value === '') {
        /**
         * Special case: control field contents empty
         */
        directory_str += addLeadingZeros(1, 4);
        directory_str += addLeadingZeros(char_pos, 5);
        char_pos++;
        datafield_str += '\x1E';
      } else {
        directory_str += addLeadingZeros(field.value.length + 1, 4);
        /**
         * Add character position
         */
        directory_str += addLeadingZeros(char_pos, 5);
        /**
         * Advance character position counter
         */
        char_pos += lengthInUtf8Bytes(field.value) + 1;
        datafield_str += field.value + '\x1E';
      }

    });
    
    record.getDatafields().forEach(function(field) {

      var curr_datafield = '',
          tag = field.tag,
          ind1 = field.ind1,
          ind2 = field.ind2;

      /**
       *Add tag to directory
       */
      directory_str += tag;

      /**
       * Add indicators
       */
      datafield_str += ind1 + ind2 + '\x1F';

      field.subfields.forEach(function(subfield, index) {
        
        var subfield_str = subfield.code + subfield.value;

        /**
         * Add terminator for subfield or data field
         */
        subfield_str += index === field.subfields.length - 1 ? '\x1E' : '\x1F';
        curr_datafield += subfield_str;

      });

      datafield_str += curr_datafield;

      /**
       * Add length of field containing indicators and a terminator (3 characters total)
       */
      directory_str += addLeadingZeros(stringToByteArray(curr_datafield).length + 3, 4);

      /**
       * Add character position
       */
      directory_str += addLeadingZeros(char_pos, 5);
      /**
       * Advance character position counter
       */
      char_pos += lengthInUtf8Bytes(curr_datafield) + 3;
      
    });

    var new_str_length, new_base_addr_pos;
    
    /**
     * Recalculate and write new string length into leader
     */
    new_str_length = stringToByteArray(leader + directory_str + '\x1E' + datafield_str + '\x1D').length;
    leader = addLeadingZeros(new_str_length, 5) + leader.substring(5);
    
    /**
     * Recalculate base address position
     **/
    new_base_addr_pos = 24 + directory_str.length + 1;
    leader = leader.substring(0, 12) + addLeadingZeros(new_base_addr_pos, 5) + leader.substring(17);
    
    record_str += leader + directory_str + '\x1E' + datafield_str + '\x1D';
    
    return record_str;

    }

    return Array.isArray(record_data) ? record_data.reduce(function(product, item) {
      return product + convertRecord(item);
    }, '') : convertRecord(record_data);
    
  }

  return {
    to: convertToISO2709,
    from: convertFromISO2709
  };

}