rapid7/metasploit-framework

View on GitHub
lib/msf/core/exploit/pdf_parse.rb

Summary

Maintainability
C
1 day
Test Coverage
# -*- coding: binary -*-
###
#
# This module provides methods for parsing and interacting
# with the PDF format.
#
###

module Msf
module Exploit::PDF_Parse

  def initialize(info = {})
    super

    register_options(
      [
        OptString.new('FILENAME', [ true, 'The file name.', 'some.pdf']),
      ], Msf::Exploit::PDF_Parse
    )

  end

  def read_pdf()
    stream = File.binread("#{datastore['INFILENAME']}")
    return stream
  end

  def xref_trailer_parse(offset, stream)

    a = offset
    b = stream.index(/>>/,a) + 2
    return stream[a..b]
  end

  def trailer_parse(xref_trailer)
    trailer = Hash.new()

    if match = xref_trailer.match(/Size (\d+)/m)
      trailer['Size'] = match[1]
    end

    if match = xref_trailer.match(/Root (\d+ \d)/m)
      trailer["Root"] = match[1]
    end

    if match = xref_trailer.match(/Info (\d+ \d)/m)
      trailer["Info"] = match[1]
    end

    if match = xref_trailer.match(/ID(\[.+\])/m)
      trailer["ID"] = match[1]
    end

    if match = xref_trailer.match(/Prev (\d+)/m)
      trailer["Prev"] = match[1]
    end

    if match = xref_trailer.match(/XRefStm (\d+)/m)
      trailer["XRefStm"] = match[1]
    end

    return trailer
  end

  def object_locate(xref_trailer,obj_name)

    found = false
    match = obj_name.match(/(\d+) (\d+)/)
    obj = match[1]
    gen = match[2]

    xrefs_end = xref_trailer.index(/trailer/) - 1
    xrefs = xref_trailer[0..xrefs_end]

    if gen.to_i != 0

    else
      len = xrefs.length
      match = xrefs.match(/xref\r?\n?(\d+) (\d+)\r?\n?/m)
      offset = 0


      while offset < len

        if match
          start_obj = match[1]
          num_obj = match[2]
          offset = match.end(0)
        else
          break
        end

        if start_obj.to_i > obj.to_i
          jump = num_obj.to_i * 20
          offset += jump
        else
          if obj.to_i <= ( start_obj.to_i + num_obj.to_i - 1)

            jump = (obj.to_i - start_obj.to_i) * 20
            offset += jump
            found = true
            break
          else
            jump = num_obj.to_i * 20
            offset += jump
          end
        end

        xrefs.index(/(\d+) (\d+)\r?\n?/m,offset)
        match = Regexp.last_match
      end


    end

    if found
      offset_end = offset + 11
      return xrefs[offset..offset_end].to_i
    else
      return nil
    end

  end

  def parse_object(xref_trailers,obj_name,stream)

    for xrefs in xref_trailers
      offset = object_locate(xrefs,obj_name)
      if offset
        break
      end
    end


    if offset
      stream.index(/endobj/,offset)
      object_end = Regexp.last_match.end(0)
      return stream[offset..object_end]
    else
      return nil
    end
  end

  def xref_create(stream,offset,num_obj)


    xref = Array.new()
    object = String.new()

    case
      when num_obj.to_s == "1"

        obj = stream.index(/(\d+) \d obj/,offset)
        if obj
          num = obj.to_s
          dif = 10 - num.length
          out = String.new
          while dif > 0
            out << "0"
            dif -= 1
          end
          out << num
          xref.push("#{out}")
          object = "#{Regexp.last_match(1)}"
        end

      when num_obj.to_s == "*"

        len = stream.length
        n = offset
        while n < len
          obj = stream.index(/(\d+) \d obj/,n)
          if obj != nil
            num = obj.to_s
            dif = 10 - num.length
            out = String.new
            while dif > 0
              out << "0"
              dif -= 1
            end
            out << num
            xref.push("#{out}")
            n = Regexp.last_match.end(0)

            if object.empty?
              object = "#{Regexp.last_match(1)}"
            end
          else
            break
          end
        end



    end

    output = String.new()
    output << "#{object} #{xref.length}\r\n"
    xref.each {|xref_| output << "#{xref_} 00000 n\r\n"}

    return output
  end

  def parse_pdf(stream)

    xref_array = Array.new()

    startxrefs = Array.new()
    startxref_offsets = Hash.new()

    xref_trailers = Array.new()
    xref_trailer = Hash.new()

    trailers = Array.new()
    trailer = Hash.new()

    len = stream.length
    n = 0
    while n < len
      obj = stream.index(/startxref\r?\n?/m,n)
      if obj != nil
        n = Regexp.last_match.end(0)
        stream.index(/\d+/,n)
        startxref_offsets["#{Regexp.last_match}"] = "#{obj}"
        startxrefs.push("#{Regexp.last_match}")
      else
        break
      end
    end

    xref_trailer = xref_trailer_parse(startxrefs.last.to_i,stream)
    xref_trailers.push(xref_trailer)


    trailer = trailer_parse(xref_trailer)
    trailers.push(trailer)

    root_obj = trailers[0].fetch("Root")

    while trailer["Prev"]
      xref_trailer = xref_trailer_parse(trailer.fetch("Prev").to_i,stream)
      xref_trailers.push(xref_trailer)


      trailer = trailer_parse(xref_trailer)
      trailers.each {|check| if check.fetch("Prev") == trailer["Prev"] then trailer.delete("Prev") end}
      if trailer.has_key?("Prev")
        trailers.push(trailer)
      end
    end

    return xref_trailers, trailers, startxrefs, root_obj
  end

end
end