lib/rets4r/client.rb
# RETS4R Client
#
# Copyright (c) 2006 Scott Patterson <scott.patterson@digitalaun.com>
#
# This program is copyrighted free software by Scott Patterson. You can
# redistribute it and/or modify it under the same terms of Ruby's license;
# either the dual license version in 2003 (see the file RUBYS), or any later
# version.
#
# TODO: 1.0 Support (Adding this support should be fairly easy)
# TODO: 2.0 Support (Adding this support will be very difficult since it is a completely different methodology)
# TODO: Case-insensitive header
require 'cgi'
require 'digest/md5'
require 'logger'
require 'net/http'
require 'uri'
require 'webrick/httputils'
require 'rets4r/client/dataobject'
require 'rets4r/client/exceptions'
require 'rets4r/client/links'
require 'rets4r/client/parsers/response_parser'
require 'rets4r/client/requester'
require 'rets4r/client/metadata_request'
module RETS4R
class Client
COMPACT_FORMAT = 'COMPACT'
METHOD_GET = 'GET'
METHOD_POST = 'POST'
METHOD_HEAD = 'HEAD'
DEFAULT_METHOD = METHOD_GET
DEFAULT_RETRY = 2
SUPPORTED_RETS_VERSIONS = ['1.5', '1.7', '1.7.2']
CAPABILITY_LIST = [
'Action',
'ChangePassword',
'GetObject',
'Login',
'LoginComplete',
'Logout',
'Search',
'GetMetadata',
'Update'
]
attr_accessor :mimemap
attr_reader :format, :urls
# Constructor
#
# Requires the URL to the RETS server and takes an optional output format. The output format
# determines the type of data returned by the various RETS transaction methods.
def initialize(url, format = COMPACT_FORMAT)
@request_struct = RETS4R::Client::Requester.new
@format = format
@urls = RETS4R::Client::Links.from_login_url(url)
@request_method = DEFAULT_METHOD
@response_parser = RETS4R::Client::ResponseParser.new
self.mimemap = {
'image/jpeg' => 'jpg',
'image/gif' => 'gif'
}
if block_given?
yield self
end
end
# Assigns a block that will be called just before the request is sent.
# This block must accept three parameters:
# * self
# * Net::HTTP instance
# * Hash of headers
#
# The block's return value will be ignored. If you want to prevent the request
# to go through, raise an exception.
#
# == Example
#
# client = RETS4R::Client.new(...)
# # Make a new pre_request_block that calculates the RETS-UA-Authorization header.
# client.set_pre_request_block do |rets, http, headers|
# a1 = Digest::MD5.hexdigest([headers["User-Agent"], @password].join(":"))
# if headers.has_key?("Cookie") then
# cookie = headers["Cookie"].split(";").map(&:strip).select {|c| c =~ /rets-session-id/i}
# cookie = cookie ? cookie.split("=").last : ""
# else
# cookie = ""
# end
#
# parts = [a1, "", cookie, headers["RETS-Version"]]
# headers["RETS-UA-Authorization"] = "Digest " + Digest::MD5.hexdigest(parts.join(":"))
# end
def set_pre_request_block(&block)
@request_struct.pre_request_block = block
end
# Assigns a block that will be called just before the response is returned to the calling method.
# This block must accept three parameters:
# * self
# * Net::HTTP instance
# * Hash of headers
#
# The block's return value will be ignored.
def set_post_request_block(&block)
@request_struct.post_request_block = block
end
# So very much delegated to the request struct
def set_header(name, value)
@request_struct.set_header(name, value)
end
def get_header(name)
@request_struct.headers[name]
end
def user_agent=(name)
@request_struct.set_header('User-Agent', name)
end
def user_agent
@request_struct.user_agent
end
def rets_version=(version)
@request_struct.rets_version = version
end
def rets_version
@request_struct.rets_version
end
def request_method=(method)
@request_method = method
@request_struct.method = method
end
def request_method
@request_method
end
def logger=(logger)
@logger = logger
@request_struct.logger = logger
end
def logger
@logger
end
#### RETS Transaction Methods ####
#
# Most of these transaction methods mirror the RETS specification methods, so if you are
# unsure what they mean, you should check the RETS specification. The latest version can be
# found at http://www.rets.org
# Attempts to log into the server using the provided username and password.
#
# If called with a block, the results of the login action are yielded,
# and logout is called when the block returns. In that case, #login
# returns the block's value. If called without a block, returns the
# result.
#
# As specified in the RETS specification, the Action URL is called and
# the results made available in the #secondary_results accessor of the
# results object.
def login(username, password) #:yields: login_results
@request_struct.username = username
@request_struct.password = password
# We are required to set the Accept header to this by the RETS 1.5 specification.
set_header('Accept', '*/*')
response = request(@urls.login)
# Parse response to get other URLS
results = @response_parser.parse_key_value(response.body)
# TODO: fix test to like this
# results = ResponseDocument.safe_parse(response.body).validate!.parse_key_value
if (results.success?)
CAPABILITY_LIST.each do |capability|
next unless results.response[capability]
uri = URI.parse(results.response[capability])
if uri.absolute?
@urls[capability] = uri
else
base = @urls.login.clone
base.path = results.response[capability]
@urls[capability] = base
end
end
logger.debug("Capability URL List: #{@urls.inspect}") if logger
else
raise LoginError.new(response.message + "(#{results.reply_code}: #{results.reply_text})")
end
# Perform the mandatory get request on the action URL.
results.secondary_response = perform_action_url
# We only yield
if block_given?
begin
yield results
ensure
self.logout
end
else
results
end
end
# Logs out of the RETS server.
def logout()
# If no logout URL is provided, then we assume that logout is not necessary (not to
# mention impossible without a URL). We don't throw an exception, though, but we might
# want to if this becomes an issue in the future.
request(@urls.logout) if @urls.logout
end
# Requests Metadata from the server. An optional type and id can be specified to request
# subsets of the Metadata. Please see the RETS specification for more details on this.
# The format variable tells the server which format to return the Metadata in. Unless you
# need the raw metadata in a specified format, you really shouldn't specify the format.
#
# If called with a block, yields the results and returns the value of the block, or
# returns the metadata directly.
def get_metadata(type = 'METADATA-SYSTEM', id = '*')
xml = download_metadata(type, id)
result = @response_parser.parse_metadata(xml, @format)
# TODO: fix test to like this
# result = ResponseDocument.safe_parse(xml).validate!.to_rexml
if block_given?
yield result
else
result
end
end
def download_metadata(type, id)
req = MetadataRequest.new(@urls.metadata, type, id, @format, @request_struct)
req.request.body
end
# Performs a GetObject transaction on the server. For details on the arguments, please see
# the RETS specification on GetObject requests.
#
# This method either returns an Array of DataObject instances, or yields each DataObject
# as it is created. If a block is given, the number of objects yielded is returned.
#
# TODO: how much of this could we move over to WEBrick::HTTPRequest#parse?
def get_object(resource, type, id, location = false) #:yields: data_object
header = {
'Accept' => mimemap.keys.join(',')
}
data = {
'Resource' => resource,
'Type' => type,
'ID' => id,
'Location' => location ? '1' : '0'
}
response = request(@urls.objects, data, header)
results = block_given? ? 0 : []
if response['content-type'] && response['content-type'].include?('text/xml')
# This probably means that there was an error.
# Response parser will likely raise an exception.
# TODO: test this
rr = ResponseDocument.safe_parse(response.body).validate!.to_transaction
return rr
elsif response['content-type'] && response['content-type'].include?('multipart/parallel')
content_type = process_content_type(response['content-type'])
# TODO: log this
# puts "SPLIT ON #{content_type['boundary']}"
boundary = content_type['boundary']
if boundary =~ /\s*'([^']*)\s*/
boundary = $1
end
parts = response.body.split("\r\n--#{boundary}")
parts.shift # Get rid of the initial boundary
# TODO: log this
# puts "GOT PARTS #{parts.length}"
parts.each do |part|
(raw_header, raw_data) = part.split("\r\n\r\n")
# TODO: log this
# puts raw_data.nil?
next unless raw_data
data_header = process_header(raw_header)
data_object = DataObject.new(data_header, raw_data)
if block_given?
yield data_object
results += 1
else
results << data_object
end
end
else
info = {
'content-type' => response['content-type'], # Compatibility shim. Deprecated.
'Content-Type' => response['content-type'],
'Object-ID' => response['Object-ID'],
'Content-ID' => response['Content-ID']
}
if response['Transfer-Encoding'].to_s.downcase == "chunked" || response['Content-Length'].to_i > 100 then
data_object = DataObject.new(info, response.body)
if block_given?
yield data_object
results += 1
else
results << data_object
end
end
end
results
end
# Peforms a RETS search transaction. Again, please see the RETS specification for details
# on what these parameters mean. The options parameter takes a hash of options that will
# added to the search statement.
def search(search_type, klass, query, options = false)
header = {}
# Required Data
data = {
'SearchType' => search_type,
'Class' => klass,
'Query' => query,
'QueryType' => 'DMQL2',
'Format' => format,
'Count' => '0'
}
# Options
#--
# We might want to switch this to merge!, but I've kept it like this for now because it
# explicitly casts each value as a string prior to performing the search, so we find out now
# if can't force a value into the string context. I suppose it doesn't really matter when
# that happens, though...
#++
options.each { |k,v| data[k] = v.to_s } if options
response = request(@urls.search, data, header)
# TODO: make parser configurable
results = RETS4R::Client::CompactNokogiriParser.new(response.body)
if block_given?
results.each {|result| yield result}
else
return results.to_a
end
end
def count(search_type, klass, query, options = false)
header = {}
# Required Data
data = {
'SearchType' => search_type,
'Class' => klass,
'Query' => query,
'QueryType' => 'DMQL2',
'Format' => format,
'Count' => '2'
}
options.each { |k,v| data[k] = v.to_s } if options
response = request(@urls.search, data, header)
# TODO: fix test to like this
# ResponseDocument.safe_parse(xml).validate!.parse_count
@response_parser.parse_count(response.body)
end
private
# XXX: This is crap. It does not properly handle quotes.
def process_content_type(text)
content = {}
field_start = text.index(';')
content['content-type'] = text[0 ... field_start].strip
parts = text.split(';')
parts.each do |part|
(name, value) = part.gsub(/\"/, '').split('=')
content[name.strip] = value ? value.strip : value
end
content
end
# Processes the HTTP header
#--
#++
def process_header(raw)
# this util gives us arrays of values. We are only set up to handle one header value.
WEBrick::HTTPUtils.parse_header(raw.strip).map.inject({}) do |h,(k,v)|
h[k]=v.first; h
end
end
# This is the primary transaction method, which the other public methods make use of.
# Given a url for the transaction (endpoint) it makes a request to the RETS server.
#
#--
# This needs to be better documented, but for now please see the public transaction methods
# for how to make use of this method.
#++
def request(url, data = {}, header = {}, method = @request_method, retry_auth = DEFAULT_RETRY)
@request_struct.request(url, data, header, method, retry_auth)
end
# If an action URL is present in the URL capability list, it calls that action URL and returns the
# raw result. Throws a generic RETSException if it is unable to follow the URL.
def perform_action_url
begin
if @urls.has_key?('Action')
return request(@urls.action, {}, {}, METHOD_GET)
end
rescue
raise RETSException.new("Unable to follow action URL: '#{$!}'.")
end
end
# Provides a proxy class to allow for net/http to log its debug to the logger.
class HTTPDebugLogger
def initialize(logger)
@logger = logger
end
def <<(data)
@logger.debug(data)
end
end
end
end