Commit 749a7e59 by Daniel Mendler

initial commit

parents
lib/mimemagic.rb
lib/mimemagic_tables.rb
test/test_mimemagic.rb
script/freedesktop.org.xml
script/generate-mime.rb
Rakefile
README
MimeMagic is a library to detect the mime type of a file by extension or by content.
Usage
=====
require 'mimemagic'
MimeMagic.by_extension('html').text?
MimeMagic.by_extension('.html').child_of? 'text/plain'
MimeMagic.by_magic(File.open('test.html'))
etc...
Authors
=======
Daniel Mendler
\ No newline at end of file
require 'hoe'
$:.unshift 'lib'
require 'mimemagic'
Hoe.new 'mimemagic', MimeMagic::VERSION do |mimemagic|
mimemagic.developer 'Daniel Mendler', 'mail@daniel-mendler.de'
mimemagic.summary = 'Mime detection by extension or content'
end
require 'mimemagic_tables'
require 'stringio'
# Mime type detection
class MimeMagic
VERSION = '0.1'
attr_reader :type, :mediatype, :subtype
# Mime type by type string
def initialize(type)
@type = type
@mediatype = @type.split('/')[0]
@subtype = @type.split('/')[1]
end
# Add custom mime type. You have to
# specify the type, a string list of file extensions,
# a string list of parent mime types and an optional
# detector block for magic detection.
def self.add(type, extensions, parents, *magics)
TYPES[type] = [extensions, parents, block_given? ? proc(&block) : nil]
extensions.each do |ext|
EXTENSIONS[ext] = type
end
MAGIC.unshift [type, magics] if magics
end
# Returns true if type is a text format
def text?
child_of? 'text/plain'
end
# Returns true if type is child of parent type
def child_of?(parent)
child?(type, parent)
end
# Get string list of file extensions
def extensions
TYPES.key?(type) ? TYPES[type][0] : []
end
# Lookup mime type by file extension
def self.by_extension(ext)
ext = ext.downcase
mime = EXTENSIONS[ext] || (ext[0..0] == '.' && EXTENSIONS[ext[1..-1]])
mime ? new(mime) : nil
end
# Lookup mime type by magic content analysis
# That could be slow
def self.by_magic(content)
io = content.respond_to?(:seek) ? content : StringIO.new(content.to_s, 'rb')
mime = MAGIC.find {|type, matches| magic_match(io, matches) }
mime ? new(mime[0]) : nil
end
# Return type as string
def to_s
type
end
# Allow comparison with string
def ==(x)
type == x.to_s
end
private
def child?(child, parent)
return true if child == parent
TYPES.key?(child) ? TYPES[child][1].any? {|p| child?(p, parent) } : false
end
def self.magic_match(io, matches)
matches.any? do |offset, value, children|
if Range === offset
io.seek(offset.begin)
match = io.read(offset.end - offset.begin + value.length).include?(value)
else
io.seek(offset)
match = value == io.read(value.length)
end
match && (!children || magic_match(io, children))
end
rescue
false
end
end
# -*- encoding: utf-8 -*-
Gem::Specification.new do |s|
s.name = %q{mimemagic}
s.version = "0.1"
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
s.authors = ["Daniel Mendler"]
s.date = %q{2009-05-09}
s.email = ["mail@daniel-mendler.de"]
s.files = ["lib/mimemagic.rb", "lib/mimemagic_tables.rb", "test/test_mimemagic.rb", "script/freedesktop.org.xml", "script/generate-mime.rb", "Rakefile"]
s.has_rdoc = true
s.rdoc_options = ["--main", "README.txt"]
s.require_paths = ["lib"]
s.rubyforge_project = %q{mimemagic}
s.rubygems_version = %q{1.3.1}
s.summary = %q{Mime detection by extension or content}
s.test_files = ["test/test_mimemagic.rb"]
if s.respond_to? :specification_version then
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
s.specification_version = 2
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
s.add_development_dependency(%q<hoe>, [">= 1.8.3"])
else
s.add_dependency(%q<hoe>, [">= 1.8.3"])
end
else
s.add_dependency(%q<hoe>, [">= 1.8.3"])
end
end
This source diff could not be displayed because it is too large. You can view the blob instead.
#!/usr/bin/ruby
require 'rexml/document'
def str2int(s)
return s.to_i(16) if s[0..1].downcase == '0x'
return s.to_i(8) if s[0..0].downcase == '0'
s.to_i(10)
end
def get_matches(parent)
parent.get_elements('match').map {|match|
if match.attributes['mask']
nil
else
type = match.attributes['type']
value = match.attributes['value']
offset = match.attributes['offset'].split(':').map {|x| x.to_i }
offset = offset.size == 2 ? offset[0]..offset[1] : offset[0]
case type
when 'string'
value.gsub!(/\\(x[\dA-Fa-f]{1,2}|0\d{1,3}|\d{1,3}|.)/) { eval("\"\\#{$1}\"") }
when 'big16'
value = str2int(value)
value = ((value >> 8).chr + (value & 0xFF).chr)
when 'big32'
value = str2int(value)
value = (((value >> 24) & 0xFF).chr + ((value >> 16) & 0xFF).chr + ((value >> 8) & 0xFF).chr + (value & 0xFF).chr)
when 'little16'
value = str2int(value)
value = ((value & 0xFF).chr + (value >> 8).chr)
when 'little32'
value = str2int(value)
value = ((value & 0xFF).chr + ((value >> 8) & 0xFF).chr + ((value >> 16) & 0xFF).chr + ((value >> 24) & 0xFF).chr)
when 'host16' # use little endian
value = str2int(value)
value = ((value & 0xFF).chr + (value >> 8).chr)
when 'host32' # use little endian
value = str2int(value)
value = ((value & 0xFF).chr + ((value >> 8) & 0xFF).chr + ((value >> 16) & 0xFF).chr + ((value >> 24) & 0xFF).chr)
when 'byte'
value = str2int(value)
value = value.chr
end
children = get_matches(match)
children.empty? ? [offset, value] : [offset, value, children]
end
}.compact
end
FILE = ARGV[0] || '/usr/share/mime/packages/freedesktop.org.xml'
file = File.new(FILE)
doc = REXML::Document.new(file)
extensions = {}
types = {}
magics = []
doc.each_element('mime-info/mime-type') do |mime|
type = mime.attributes['type']
subclass = mime.get_elements('sub-class-of').map{|x| x.attributes['type']}
exts = mime.get_elements('glob').map{|x| x.attributes['pattern'] =~ /^\*\.([^\[\]]+)$/ ? $1.downcase : nil }.compact
mime.get_elements('magic').each do |magic|
priority = magic.attributes['priority'].to_i
matches = get_matches(magic)
magics << [priority, type, matches]
end
if !exts.empty?
exts.each{|x|
extensions[x] = type if !extensions.include?(x)
}
types[type] = [exts,subclass]
end
end
magics = magics.sort {|a,b| b[0] <=> a[0] }.map {|x| [x[1], x[2]] }
puts "# Generated from #{FILE}"
puts "class MimeMagic"
puts " private"
puts " EXTENSIONS = {"
extensions.keys.sort.each do |key|
puts " '#{key}' => '#{extensions[key]}',"
end
puts " }"
puts " TYPES = {"
types.keys.sort.each do |key|
exts = types[key][0].sort.inspect
parents = types[key][1].sort.inspect
puts " '#{key}' => [#{exts}, #{parents}],"
end
puts " }"
puts " MAGIC = ["
magics.each do |type, matches|
puts " ['#{type}', #{matches.inspect}],"
end
puts " ]"
puts "end"
require 'mimemagic'
class TC_MimeMagic < Test::Unit::TestCase
def test_text?
assert MimeMagic.new('text/plain').text?
assert MimeMagic.new('text/html').text?
assert !MimeMagic.new('application/octet-stream').text?
assert !MimeMagic.new('image/png').text?
end
def test_child_of?
assert MimeMagic.new('text/html').child_of?('text/plain')
assert MimeMagic.new('text/x-java').child_of?('text/plain')
end
def test_extensions
assert_equal %w(htm html), MimeMagic.new('text/html').extensions
end
def test_by_extension
assert_equal 'text/html', MimeMagic.by_extension('html').to_s
assert_equal 'application/x-ruby', MimeMagic.by_extension('rb').to_s
assert_nil MimeMagic.by_extension('crazy')
assert_nil MimeMagic.by_extension('')
end
def test_by_magic
assert_equal 'application/x-executable', MimeMagic.by_magic(File.open('/bin/ls')).to_s
assert_equal 'application/x-sharedlib', MimeMagic.by_magic(File.open('/lib/libc.so.6')).to_s
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment