lib/msf/core/exploit/pdf_parse.rb
# -*- coding: binary -*-
###
#
# This module provides methods for parsing and interacting
# with the PDF format.
#
###
module Msf
module Exploit::PDF_Parse
def initialize(info = {})
super
register_options(
[
OptString.new('FILENAME', [ true, 'The file name.', 'some.pdf']),
], Msf::Exploit::PDF_Parse
)
end
def read_pdf()
stream = File.binread("#{datastore['INFILENAME']}")
return stream
end
def xref_trailer_parse(offset, stream)
a = offset
b = stream.index(/>>/,a) + 2
return stream[a..b]
end
def trailer_parse(xref_trailer)
trailer = Hash.new()
if match = xref_trailer.match(/Size (\d+)/m)
trailer['Size'] = match[1]
end
if match = xref_trailer.match(/Root (\d+ \d)/m)
trailer["Root"] = match[1]
end
if match = xref_trailer.match(/Info (\d+ \d)/m)
trailer["Info"] = match[1]
end
if match = xref_trailer.match(/ID(\[.+\])/m)
trailer["ID"] = match[1]
end
if match = xref_trailer.match(/Prev (\d+)/m)
trailer["Prev"] = match[1]
end
if match = xref_trailer.match(/XRefStm (\d+)/m)
trailer["XRefStm"] = match[1]
end
return trailer
end
def object_locate(xref_trailer,obj_name)
found = false
match = obj_name.match(/(\d+) (\d+)/)
obj = match[1]
gen = match[2]
xrefs_end = xref_trailer.index(/trailer/) - 1
xrefs = xref_trailer[0..xrefs_end]
if gen.to_i != 0
else
len = xrefs.length
match = xrefs.match(/xref\r?\n?(\d+) (\d+)\r?\n?/m)
offset = 0
while offset < len
if match
start_obj = match[1]
num_obj = match[2]
offset = match.end(0)
else
break
end
if start_obj.to_i > obj.to_i
jump = num_obj.to_i * 20
offset += jump
else
if obj.to_i <= ( start_obj.to_i + num_obj.to_i - 1)
jump = (obj.to_i - start_obj.to_i) * 20
offset += jump
found = true
break
else
jump = num_obj.to_i * 20
offset += jump
end
end
xrefs.index(/(\d+) (\d+)\r?\n?/m,offset)
match = Regexp.last_match
end
end
if found
offset_end = offset + 11
return xrefs[offset..offset_end].to_i
else
return nil
end
end
def parse_object(xref_trailers,obj_name,stream)
for xrefs in xref_trailers
offset = object_locate(xrefs,obj_name)
if offset
break
end
end
if offset
stream.index(/endobj/,offset)
object_end = Regexp.last_match.end(0)
return stream[offset..object_end]
else
return nil
end
end
def xref_create(stream,offset,num_obj)
xref = Array.new()
object = String.new()
case
when num_obj.to_s == "1"
obj = stream.index(/(\d+) \d obj/,offset)
if obj
num = obj.to_s
dif = 10 - num.length
out = String.new
while dif > 0
out << "0"
dif -= 1
end
out << num
xref.push("#{out}")
object = "#{Regexp.last_match(1)}"
end
when num_obj.to_s == "*"
len = stream.length
n = offset
while n < len
obj = stream.index(/(\d+) \d obj/,n)
if obj != nil
num = obj.to_s
dif = 10 - num.length
out = String.new
while dif > 0
out << "0"
dif -= 1
end
out << num
xref.push("#{out}")
n = Regexp.last_match.end(0)
if object.empty?
object = "#{Regexp.last_match(1)}"
end
else
break
end
end
end
output = String.new()
output << "#{object} #{xref.length}\r\n"
xref.each {|xref_| output << "#{xref_} 00000 n\r\n"}
return output
end
def parse_pdf(stream)
xref_array = Array.new()
startxrefs = Array.new()
startxref_offsets = Hash.new()
xref_trailers = Array.new()
xref_trailer = Hash.new()
trailers = Array.new()
trailer = Hash.new()
len = stream.length
n = 0
while n < len
obj = stream.index(/startxref\r?\n?/m,n)
if obj != nil
n = Regexp.last_match.end(0)
stream.index(/\d+/,n)
startxref_offsets["#{Regexp.last_match}"] = "#{obj}"
startxrefs.push("#{Regexp.last_match}")
else
break
end
end
xref_trailer = xref_trailer_parse(startxrefs.last.to_i,stream)
xref_trailers.push(xref_trailer)
trailer = trailer_parse(xref_trailer)
trailers.push(trailer)
root_obj = trailers[0].fetch("Root")
while trailer["Prev"]
xref_trailer = xref_trailer_parse(trailer.fetch("Prev").to_i,stream)
xref_trailers.push(xref_trailer)
trailer = trailer_parse(xref_trailer)
trailers.each {|check| if check.fetch("Prev") == trailer["Prev"] then trailer.delete("Prev") end}
if trailer.has_key?("Prev")
trailers.push(trailer)
end
end
return xref_trailers, trailers, startxrefs, root_obj
end
end
end