eventmachine/eventmachine

View on GitHub
lib/em/protocols/httpclient.rb

Summary

Maintainability
D
2 days
Test Coverage
#--
#
# Author:: Francis Cianfrocca (gmail: blackhedd)
# Homepage::  http://rubyeventmachine.com
# Date:: 16 July 2006
#
# See EventMachine and EventMachine::Connection for documentation and
# usage examples.
#
#----------------------------------------------------------------------------
#
# Copyright (C) 2006-07 by Francis Cianfrocca. All Rights Reserved.
# Gmail: blackhedd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of either: 1) the GNU General Public License
# as published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version; or 2) Ruby's License.
#
# See the file COPYING for complete licensing information.
#
#---------------------------------------------------------------------------
#
#


module EventMachine
  module Protocols

    #
    # @example
    #
    #   EventMachine.run {
    #     http = EventMachine::Protocols::HttpClient.request(
    #       :host => server,
    #       :port => 80,
    #       :request => "/index.html",
    #       :query_string => "parm1=value1&parm2=value2"
    #     )
    #     http.callback {|response|
    #       puts response[:status]
    #       puts response[:headers]
    #       puts response[:content]
    #     }
    #   }
    #--
    # @todo Add streaming so we can support enormous POSTs. Current max is 20meg.
    #   Timeout for connections that run too long or hang somewhere in the middle.
    #   Persistent connections (HTTP/1.1), may need a associated delegate object.
    #   DNS: Some way to cache DNS lookups for hostnames we connect to. Ruby's
    #   DNS lookups are unbelievably slow.
    #   HEAD requests.
    #   Convenience methods for requests. get, post, url, etc.
    #   SSL.
    #   Handle status codes like 304, 100, etc.
    #   Refactor this code so that protocol errors all get handled one way (an exception?),
    #   instead of sprinkling set_deferred_status :failed calls everywhere.
    #
    # @deprecated Please use [EM-HTTP-Request](https://github.com/igrigorik/em-http-request) instead.
    #
    class HttpClient < Connection
      include EventMachine::Deferrable

      MaxPostContentLength = 20 * 1024 * 1024

      def initialize
        warn "HttpClient is deprecated and will be removed. EM-Http-Request should be used instead."
        @connected = false
      end

      # @param args [Hash] The request arguments
      # @option args [String] :host The host IP/DNS name
      # @option args [Integer] :port The port to connect too
      # @option args [String] :verb The request type [GET | POST | DELETE | PUT]
      # @option args [String] :request The request path
      # @option args [Hash] :basic_auth The basic auth credentials (:username and :password)
      # @option args [String] :content The request content
      # @option args [String] :contenttype The content type (e.g. text/plain)
      # @option args [String] :query_string The query string
      # @option args [String] :host_header The host header to set
      # @option args [String] :cookie Cookies to set
      def self.request( args = {} )
        args[:port] ||= 80
        EventMachine.connect( args[:host], args[:port], self ) {|c|
          # According to the docs, we will get here AFTER post_init is called.
          c.instance_eval {@args = args}
        }
      end

      def post_init
        @start_time = Time.now
        @data = ""
        @read_state = :base
      end

      # We send the request when we get a connection.
      # AND, we set an instance variable to indicate we passed through here.
      # That allows #unbind to know whether there was a successful connection.
      # NB: This naive technique won't work when we have to support multiple
      # requests on a single connection.
      def connection_completed
        @connected = true
        send_request @args
      end

      def send_request args
        args[:verb] ||= args[:method] # Support :method as an alternative to :verb.
        args[:verb] ||= :get # IS THIS A GOOD IDEA, to default to GET if nothing was specified?

        verb = args[:verb].to_s.upcase
        unless ["GET", "POST", "PUT", "DELETE", "HEAD"].include?(verb)
          set_deferred_status :failed, {:status => 0} # TODO, not signalling the error type
          return # NOTE THE EARLY RETURN, we're not sending any data.
        end

        request = args[:request] || "/"
        unless request[0,1] == "/"
          request = "/" + request
        end

        qs = args[:query_string] || ""
        if qs.length > 0 and qs[0,1] != '?'
          qs = "?" + qs
        end

        version = args[:version] || "1.1"

        # Allow an override for the host header if it's not the connect-string.
        host = args[:host_header] || args[:host] || "_"
        # For now, ALWAYS tuck in the port string, although we may want to omit it if it's the default.
        port = args[:port].to_i != 80 ? ":#{args[:port]}" : ""

        # POST items.
        postcontenttype = args[:contenttype] || "application/octet-stream"
        postcontent = args[:content] || ""
        raise "oversized content in HTTP POST" if postcontent.length > MaxPostContentLength

        # ESSENTIAL for the request's line-endings to be CRLF, not LF. Some servers misbehave otherwise.
        # TODO: We ASSUME the caller wants to send a 1.1 request. May not be a good assumption.
        req = [
          "#{verb} #{request}#{qs} HTTP/#{version}",
          "Host: #{host}#{port}",
          "User-agent: Ruby EventMachine",
        ]

          if verb == "POST" || verb == "PUT"
            req << "Content-type: #{postcontenttype}"
            req << "Content-length: #{postcontent.length}"
          end

          # TODO, this cookie handler assumes it's getting a single, semicolon-delimited string.
          # Eventually we will want to deal intelligently with arrays and hashes.
          if args[:cookie]
            req << "Cookie: #{args[:cookie]}"
          end

          # Allow custom HTTP headers, e.g. SOAPAction
          args[:custom_headers].each do |k,v|
            req << "#{k}: #{v}"
          end if args[:custom_headers]

          # Basic-auth stanza contributed by Matt Murphy.
          if args[:basic_auth]
            basic_auth_string = ["#{args[:basic_auth][:username]}:#{args[:basic_auth][:password]}"].pack('m').strip.gsub(/\n/,'')
            req << "Authorization: Basic #{basic_auth_string}"
          end

          req << ""
          reqstring = req.map {|l| "#{l}\r\n"}.join
          send_data reqstring

          if verb == "POST" || verb == "PUT"
            send_data postcontent
          end
      end


      def receive_data data
        while data and data.length > 0
          case @read_state
          when :base
            # Perform any per-request initialization here and don't consume any data.
            @data = ""
            @headers = []
            @content_length = nil # not zero
            @content = ""
            @status = nil
            @chunked = false
            @chunk_length = nil
            @read_state = :header
            @connection_close = nil
          when :header
            ary = data.split( /\r?\n/m, 2 )
            if ary.length == 2
              data = ary.last
              if ary.first == ""
                if (@content_length and @content_length > 0) || @chunked || @connection_close
                  @read_state = :content
                else
                  dispatch_response
                  @read_state = :base
                end
              else
                @headers << ary.first
                if @headers.length == 1
                  parse_response_line
                elsif ary.first =~ /\Acontent-length:\s*/i
                  # Only take the FIRST content-length header that appears,
                  # which we can distinguish because @content_length is nil.
                  # TODO, it's actually a fatal error if there is more than one
                  # content-length header, because the caller is presumptively
                  # a bad guy. (There is an exploit that depends on multiple
                  # content-length headers.)
                  @content_length ||= $'.to_i
                elsif ary.first =~ /\Aconnection:\s*close/i
                  @connection_close = true
                elsif ary.first =~ /\Atransfer-encoding:\s*chunked/i
                  @chunked = true
                end
              end
            else
              @data << data
              data = ""
            end
          when :content
            if @chunked && @chunk_length
              bytes_needed = @chunk_length - @chunk_read
              new_data = data[0, bytes_needed]
              @chunk_read += new_data.length
              @content += new_data
              data = data[bytes_needed..-1] || ""
              if @chunk_length == @chunk_read && data[0,2] == "\r\n"
                @chunk_length = nil
                data = data[2..-1]
              end
            elsif @chunked
              if (m = data.match(/\A(\S*)\r\n/m))
                data = data[m[0].length..-1]
                @chunk_length = m[1].to_i(16)
                @chunk_read = 0
                if @chunk_length == 0
                  dispatch_response
                  @read_state = :base
                end
              end
            elsif @content_length
              # If there was no content-length header, we have to wait until the connection
              # closes. Everything we get until that point is content.
              # TODO: Must impose a content-size limit, and also must implement chunking.
              # Also, must support either temporary files for large content, or calling
              # a content-consumer block supplied by the user.
              bytes_needed = @content_length - @content.length
              @content += data[0, bytes_needed]
              data = data[bytes_needed..-1] || ""
              if @content_length == @content.length
                dispatch_response
                @read_state = :base
              end
            else
              @content << data
              data = ""
            end
          end
        end
      end


      # We get called here when we have received an HTTP response line.
      # It's an opportunity to throw an exception or trigger other exceptional
      # handling.
      def parse_response_line
        if @headers.first =~ /\AHTTP\/1\.[01] ([\d]{3})/
          @status = $1.to_i
        else
          set_deferred_status :failed, {
            :status => 0 # crappy way of signifying an unrecognized response. TODO, find a better way to do this.
          }
          close_connection
        end
      end
      private :parse_response_line

      def dispatch_response
        @read_state = :base
        set_deferred_status :succeeded, {
          :content => @content,
          :headers => @headers,
          :status => @status
        }
        # TODO, we close the connection for now, but this is wrong for persistent clients.
        close_connection
      end

      def unbind
        if !@connected
          set_deferred_status :failed, {:status => 0} # YECCCCH. Find a better way to signal no-connect/network error.
        elsif (@read_state == :content and @content_length == nil)
          dispatch_response
        end
      end
    end
  end
end