dmendel/bindata

View on GitHub
examples/gzip.rb

Summary

Maintainability
A
0 mins
Test Coverage
require 'bindata'
require 'zlib'

# An example of a reader / writer for the GZIP file format as per rfc1952.
# See notes at the end of this file for implementation discussions.
class Gzip < BinData::Record
  # Binary representation of a ruby Time object
  class Mtime < BinData::Primitive
    uint32le :time

    def set(val)
      self.time = val.to_i
    end

    def get
      Time.at(time)
    end
  end

  # Known compression methods
  DEFLATE = 8

  endian :little

  uint16  :ident,      asserted_value: 0x8b1f
  uint8   :compression_method, initial_value: DEFLATE

  bit3    :freserved,  asserted_value: 0
  bit1    :fcomment,   value: -> { comment.length > 0 ? 1 : 0 }
  bit1    :ffile_name, value: -> { file_name.length > 0 ? 1 : 0 }
  bit1    :fextra,     value: -> { extra.len > 0 ? 1 : 0 }
  bit1    :fcrc16,     value: 0  # see note at end of file
  bit1    :ftext

  mtime   :mtime
  uint8   :extra_flags
  uint8   :os,         initial_value: 255   # unknown OS

  # The following fields are optional depending on the bits in flags

  struct  :extra,      onlyif: -> { fextra.nonzero? } do
    uint16 :len,  length: -> { data.length }
    string :data, read_length: :len
  end
  stringz :file_name,  onlyif: -> { ffile_name.nonzero? }
  stringz :comment,    onlyif: -> { fcomment.nonzero? }
  uint16  :crc16,      onlyif: -> { fcrc16.nonzero? }

  # The length of compressed data must be calculated from the current file offset
  count_bytes_remaining :bytes_remaining
  string :compressed_data, read_length: -> { bytes_remaining - footer.num_bytes }

  struct :footer do
    uint32 :crc32
    uint32 :uncompressed_size
  end

  def data=(data)
    # Zlib.deflate includes a header + footer which we must discard
    self.compressed_data = Zlib::Deflate.deflate(data)[2..-5]
    self.footer.crc32 = Zlib::crc32(data)
    self.footer.uncompressed_size = data.size
  end
end

if __FILE__ == $0
  # Write a gzip file.
  print "Creating a gzip file ... "
  g = Gzip.new
  g.data = "the cat sat on the mat"
  g.file_name = "poetry"
  g.mtime = Time.now
  g.comment = "A stunning piece of prose"
  File.open("poetry.gz", "w") do |io|
    g.write(io)
  end
  puts "done."
  puts

  # Read the created gzip file.
  print "Reading newly created gzip file ... "
  g = Gzip.new
  File.open("poetry.gz", "r") do |io|
    g.read(io)
  end
  puts "done."
  puts

  puts "Printing gzip file details in the format of gzip -l -v"

  # compression ratio
  ratio = 100.0 * (g.footer.uncompressed_size - g.compressed_data.size) /
            g.footer.uncompressed_size

  comp_meth = (g.compression_method == Gzip::DEFLATE) ? "defla" : ""

  # Output using the same format as gzip -l -v
  puts "method  crc     date  time           compressed        " +
       "uncompressed  ratio uncompressed_name"
  puts "%5s %08x %6s %5s %19s %19s %5.1f%% %s" % [comp_meth,
                                                  g.footer.crc32,
                                                  g.mtime.strftime('%b %d'),
                                                  g.mtime.strftime('%H:%M'),
                                                  g.num_bytes,
                                                  g.footer.uncompressed_size,
                                                  ratio,
                                                  g.file_name]
  puts "Comment: #{g.comment}" if g.comment?
  puts

  puts "Executing gzip -l -v"
  puts `gzip -l -v poetry.gz`
end

# Notes:
#
# Mtime: A convenience wrapper that allow a ruby Time object to be used instead
# of manually dealing with the raw form (seconds since 1 Jan 1970)
#
# rfc1952 specifies an optional crc16 field.  The gzip command line client
# uses this field for multi-part gzip.  Hence we ignore this.

# We are cheating and using the Zlib library for compression.  We can't use
# this library for decompression as zlib requires an adler32 checksum while
# gzip uses crc32.