minad/mimemagic

View on GitHub
lib/mimemagic/tables.rb

Summary

Maintainability
C
1 day
Test Coverage
# -*- coding: binary -*-
# frozen_string_literal: true
# Generated from script/freedesktop.org.xml
require 'nokogiri'
require 'mimemagic/path'

class MimeMagic
  EXTENSIONS = {}
  TYPES = {}
  MAGIC = []

  def self.str2int(s)
    return s.to_i(16) if s[0..1].downcase == '0x'
    return s.to_i(8) if s[0..0].downcase == '0'
    s.to_i(10)
  end

  def self.get_matches(parent)
    parent.elements.map {|match|
      if match['mask']
        nil
      else
        type = match['type']
        value = match['value']
        offset = match['offset'].split(':').map {|x| x.to_i }
        offset = offset.size == 2 ? offset[0]..offset[1] : offset[0]
        case type
        when 'string'
          # This *one* pattern match, in the entirety of fd.o's mime types blows up the parser
          # because of the escape character \c, so right here we have a hideous hack to
          # accommodate that.
          if value == '\chapter'
            '\chapter'
          else
            value.gsub!(/\\(x[\dA-Fa-f]{1,2}|0\d{1,3}|\d{1,3}|.)/) {
              eval("\"\\#{$1}\"")
            }
          end
        when 'big16'
          value = str2int(value)
          value = ((value >> 8).chr + (value & 0xFF).chr)
        when 'big32'
          value = str2int(value)
          value = (((value >> 24) & 0xFF).chr + ((value >> 16) & 0xFF).chr + ((value >> 8) & 0xFF).chr + (value & 0xFF).chr)
        when 'little16'
          value = str2int(value)
          value = ((value & 0xFF).chr + (value >> 8).chr)
        when 'little32'
          value = str2int(value)
          value = ((value & 0xFF).chr + ((value >> 8) & 0xFF).chr + ((value >> 16) & 0xFF).chr + ((value >> 24) & 0xFF).chr)
        when 'host16' # use little endian
          value = str2int(value)
          value = ((value & 0xFF).chr + (value >> 8).chr)
        when 'host32' # use little endian
          value = str2int(value)
          value = ((value & 0xFF).chr + ((value >> 8) & 0xFF).chr + ((value >> 16) & 0xFF).chr + ((value >> 24) & 0xFF).chr)
        when 'byte'
          value = str2int(value)
          value = value.chr
        end
        children = get_matches(match)
        children.empty? ? [offset, value] : [offset, value, children]
      end
    }.compact
  end

  def self.open_mime_database
    path = MimeMagic::DATABASE_PATH
    File.open(path)
  end

  def self.parse_database
    file = open_mime_database

    doc = Nokogiri::XML(file)
    extensions = {}
    types = {}
    magics = []
    (doc/'mime-info/mime-type').each do |mime|
      comments = Hash[*(mime/'comment').map {|comment| [comment['xml:lang'], comment.inner_text] }.flatten]
      type = mime['type']
      subclass = (mime/'sub-class-of').map{|x| x['type']}
      exts = (mime/'glob').map{|x| x['pattern'] =~ /^\*\.([^\[\]]+)$/ ? $1.downcase : nil }.compact
      (mime/'magic').each do |magic|
        priority = magic['priority'].to_i
        matches = get_matches(magic)
        magics << [priority, type, matches]
      end
      if !exts.empty?
        exts.each{|x|
          extensions[x] = type if !extensions.include?(x)
        }
        types[type] = [exts,subclass,comments[nil]]
      end
    end

    magics = magics.sort {|a,b| [-a[0],a[1]] <=> [-b[0],b[1]] }

    common_types = [
      "image/jpeg",                                                              # .jpg
      "image/png",                                                               # .png
      "image/gif",                                                               # .gif
      "image/tiff",                                                              # .tiff
      "image/bmp",                                                               # .bmp
      "image/vnd.adobe.photoshop",                                               # .psd
      "image/webp",                                                              # .webp
      "image/svg+xml",                                                           # .svg

      "video/x-msvideo",                                                         # .avi
      "video/x-ms-wmv",                                                          # .wmv
      "video/mp4",                                                               # .mp4, .m4v
      "video/quicktime",                                                         # .mov
      "video/mpeg",                                                              # .mpeg
      "video/ogg",                                                               # .ogv
      "video/webm",                                                              # .webm
      "video/x-matroska",                                                        # .mkv
      "video/x-flv",                                                             # .flv

      "audio/mpeg",                                                              # .mp3
      "audio/x-wav",                                                             # .wav
      "audio/aac",                                                               # .aac
      "audio/flac",                                                              # .flac
      "audio/mp4",                                                               # .m4a
      "audio/ogg",                                                               # .ogg

      "application/pdf",                                                         # .pdf
      "application/msword",                                                      # .doc
      "application/vnd.openxmlformats-officedocument.wordprocessingml.document", # .docx
      "application/vnd.ms-powerpoint",                                           # .pps
      "application/vnd.openxmlformats-officedocument.presentationml.slideshow",  # .ppsx
      "application/vnd.ms-excel",                                                # .pps
      "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",       # .ppsx
    ]

    common_magics = common_types.map do |common_type|
      magics.find { |_, type, _| type == common_type }
    end

    magics = (common_magics.compact + magics).uniq

    extensions.keys.sort.each do |key|
      EXTENSIONS[key] = extensions[key]
    end
    types.keys.sort.each do |key|
      exts = types[key][0]
      parents = types[key][1].sort
      comment = types[key][2]

      TYPES[key] = [exts, parents, comment]
    end
    magics.each do |priority, type, matches|
      MAGIC << [type, matches]
    end
  end
end