lib/rets4r/client/parsers/metadata.rb
require 'delegate'
require 'nokogiri'
require 'rexml/document'
require 'yaml'
require 'rets4r/client/parsers/compact'
module RETS4R
class Client
# Provides a Hash-like representation of metadata.
# Currently only compact metadata is supported.
#
# String keys represent data that has come from the parsed metadata file.
# Symbol keys are used to indicate categories such as :lookup_types. All are pluralized
# except for :search_help, and have are snakecase.
#
# The following is the basic structure of a metadata object, which generally follows the
# RETS specification metadata structure, but with a few notable non-nested exceptions such as
# lookup_types.
#
# {:foreign_keys => {<fkey_id> => {...}},
# 'Comments' => ...,
# 'SystemID' => ...,
# 'SystemDescription' => ...
# <Resource Name> => {...,
# :lookup_types => {
# <Lookup Name> => {<Lookup Type Value> => {...}}},
#
# :objects => {<Object Type> => {...}},
# :classes => {<Class Name>: => {...,
# :tables => {<System Name> => {...}}},
# :search_help => {<Search Help ID> => {...}},
# :lookups => {<Lookup Name> => {...}}
# :edit_masks: => {<Edit Mask ID>: => {...}}
#
# Update related metadata is currently NOT handled by the parser. The following metadata
# types ARE handled by the parser: System, Resource, Class, Table, Object, Lookup,
# LookupType, ForeignKeys, SearchHelp, and EditMask.
#
# To generate a metadata object, use one of CompactDocument parse methods.
class Metadata < DelegateClass(Hash)
# The initial version of this would set the hash default_proc to create new
# hashes that would in turn create new hashes, which is quite clean, but also
# meant that you couldn't simply check to see if a given key was nil. Because this is
# meant to be a mostly transparent replacement of the REXML-based parser, I decided to
# manually create nested hashes as needed in case existing code relied on the
# existence of nils.
def initialize
super(Hash.new)
end
## Helper access methods to ensure that nested hashes are created as needed.
def resource(name)
self[name] ||= {}
end
def resource_classes(resource)
resource(resource)[:classes] ||= {}
end
def resource_class(resource, klass)
resource_classes(resource)[klass] ||= {}
end
def class_tables(resource, klass)
resource_class(resource, klass)[:tables] ||= {}
end
def resource_objects(resource)
resource(resource)[:objects] ||= {}
end
def resource_lookups(resource)
resource(resource)[:lookups] ||= {}
end
def resource_lookup_types(resource, lookup)
lookups = resource(resource)[:lookup_types] ||= {}
lookups[lookup] ||= {}
end
def search_help(resource)
resource(resource)[:search_help] ||= {}
end
def edit_masks(resource)
resource(resource)[:edit_masks] ||= {}
end
def foreign_keys
self[:foreign_keys] ||= {}
end
# Nokogiri SAX compact metadata parser
class CompactDocument < Nokogiri::XML::SAX::Document
DELIMITER = "\t"
def self.parse_file(filename)
new.parse_file(filename)
end
def initialize
@parser = Nokogiri::XML::SAX::Parser.new(self)
end
def parse_file(filename = 'metadata.xml')
parse(File.open(filename))
end
def parse(content)
@metadata = Metadata.new
@stack = []
@current_content = ''
@parser.parse(content)
@metadata
end
def start_element name, raw_attrs = []
attrs = Hash[*raw_attrs.flatten]
case name.upcase
when 'DATA'
@current_content = ''
when 'COLUMNS'
@current_content = ''
@columns = []
when 'COMMENTS'
@current_content = ''
else
@stack << [name.upcase, attrs]
end
end
def end_element name
case name.upcase
when 'DATA'
process_content_as_data
when 'COLUMNS'
process_content_as_columns
when 'SYSTEM'
# unlike the other tags here, SYSTEM cotains its own content so it
# needs to be processed as well as removed from the stack.
process_content_as_system
@stack.pop
when 'COMMENTS'
process_content_as_comments
else
@stack.pop
end
end
def characters content
@current_content << content if receives_content? @stack.last[0]
end
private
def receives_content? tag
tag =~ /^(X-)?(METADATA|SYSTEM)/i
end
def process_content_as_columns
@columns = @current_content.split(DELIMITER)
end
def process_content_as_data
data = hashify_current_content
tag, attrs = @stack.last
resource = data.delete('ResourceID') || attrs['Resource']
klass = data.delete('ClassName') || attrs['Class']
case tag
when 'METADATA-RESOURCE'
@metadata.resource(resource).merge!(data)
when 'METADATA-CLASS'
@metadata.resource_class(resource, klass).merge!(data)
when 'METADATA-TABLE'
@metadata.class_tables(resource, klass)[data.delete('SystemName')] = data
when 'METADATA-OBJECT'
@metadata.resource_objects(resource)[data.delete('ObjectType')] = data
when 'METADATA-LOOKUP'
@metadata.resource_lookups(resource)[data.delete('LookupName')] = data
when 'METADATA-LOOKUP_TYPE'
@metadata.resource_lookup_types(resource, attrs['Lookup'])[data.delete('Value')] = data
when 'METADATA-FOREIGNKEYS'
@metadata.foreign_keys[data.delete('ForeignKeyID')] = data
when 'METADATA-SEARCH_HELP'
@metadata.search_help(resource)[data.delete('SearchHelpID')] = data
when 'METADATA-EDITMASK'
@metadata.edit_masks(resource)[data.delete('EditMaskID')] = data
end
end
def process_content_as_system
tag, attrs = @stack.last
@metadata.merge! attrs
end
def process_content_as_comments
@metadata['Comments'] = @current_content.strip
end
def hashify_current_content
# While not necessary anymore, I've left the setting of the default_proc to that
# of the metadata object so that the default value will be consistent throughout
# all the metadata.
@columns.zip(@current_content.split(DELIMITER)).inject(
Hash.new(&@metadata.default_proc)) do |h, (k,v)|
h[k] = v unless k.empty?
next h
end
end
end
end
## Kept for compatibility with previous versions.
MetadataParser = Metadata::CompactDocument
end
end