Commit 7cd73b35 by Daniel Mendler Committed by GitHub

Merge pull request #40 from janko-m/performance-improvements

Improve performance
parents d12011fd 1b587b93
require 'mimemagic/tables'
require 'mimemagic/version'
require 'stringio'
# Mime type detection
class MimeMagic
attr_reader :type, :mediatype, :subtype
......@@ -110,41 +112,28 @@ class MimeMagic
if io.respond_to?(:seek) && io.respond_to?(:read)
io.binmode
io.set_encoding(Encoding::BINARY) if io.respond_to?(:set_encoding)
MAGIC.send(method) { |type, matches| magic_match_io(io, matches) }
buffer = "".force_encoding(Encoding::BINARY)
MAGIC.send(method) { |type, matches| magic_match_io(io, matches, buffer) }
else
str = io.respond_to?(:read) ? io.read : io.to_s
str = str.force_encoding(Encoding::BINARY) if str.respond_to?(:force_encoding)
MAGIC.send(method) { |type, matches| magic_match_str(str, matches) }
magic_match(StringIO.new(str), method)
end
end
def self.magic_match_io(io, matches)
def self.magic_match_io(io, matches, buffer)
matches.any? do |offset, value, children|
match =
if Range === offset
io.seek(offset.begin)
x = io.read(offset.end - offset.begin + value.bytesize)
x = io.read(offset.end - offset.begin + value.bytesize, buffer)
x && x.include?(value)
else
io.seek(offset)
io.read(value.bytesize) == value
end
match && (!children || magic_match_io(io, children))
end
end
def self.magic_match_str(str, matches)
matches.any? do |offset, value, children|
match =
if Range === offset
x = str[offset.begin, offset.end - offset.begin + value.bytesize]
x && x.include?(value)
else
str[offset, value.bytesize] == value
io.read(value.bytesize, buffer) == value
end
match && (!children || magic_match_str(str, children))
match && (!children || magic_match_io(io, children, buffer))
end
end
private_class_method :magic_match, :magic_match_io, :magic_match_str
private_class_method :magic_match, :magic_match_io
end
......@@ -1498,6 +1498,30 @@ class MimeMagic
# @private
# :nodoc:
MAGIC = [
['image/jpeg', [[0, "\377\330\377"], [0, "\377\330"]]],
['image/png', [[0, "\211PNG"]]],
['image/gif', [[0, 'GIF8']]],
['image/tiff', [[0, "MM\000*"], [0, "II*\000"]]],
['image/bmp', [[0, 'BM', [[14, "\f"], [14, '@'], [14, '(']]]]],
['image/vnd.adobe.photoshop', []],
['image/webp', [[0, 'RIFF', [[8, 'WEBP']]]]],
['image/svg+xml', [[0..256, '<!DOCTYPE svg'], [0..256, '<svg']]],
['video/x-msvideo', [[0, 'RIFF', [[8, 'AVI ']]], [0, 'AVF0', [[8, 'AVI ']]]]],
['video/mp4', [[4, 'ftypisom'], [4, 'ftypmp42'], [4, 'ftypMSNV'], [4, 'ftypM4V '], [4, 'ftypf4v ']]],
['video/quicktime', [[12, 'mdat'], [4, 'mdat'], [4, 'moov'], [4, 'ftypqt']]],
['video/mpeg', [[0, "G?\377\020"], [0, "\000\000\001\263"], [0, "\000\000\001\272"]]],
['video/ogg', [[0, 'OggS']]],
['video/webm', [[0, "\032E\337\243", [[5..65, "B\202", [[8..75, 'webm']]]]]]],
['video/x-flv', [[0, 'FLV']]],
['audio/mpeg', [[0, "\377\373"], [0, 'ID3']]],
['audio/x-wav', [[8, 'WAVE'], [8, 'WAV ']]],
['audio/aac', [[0, 'ADIF']]],
['audio/flac', [[0, 'fLaC']]],
['audio/mp4', [[4, 'ftypM4A']]],
['audio/ogg', [[0, 'OggS']]],
['application/pdf', [[0..1024, '%PDF-']]],
['application/msword', [[0, "1\276\000\000"], [0, 'PO^Q`'], [0, "\3767\000#"], [0, "\333\245-\000\000\000"], [2112, 'MSWordDoc'], [2108, 'MSWordDoc'], [2112, 'Microsoft Word document data'], [546, 'bjbj'], [546, 'jbjb']]],
['application/vnd.ms-excel', [[2080, 'Microsoft Excel 5.0 Worksheet']]],
['application/vnd.stardivision.writer', [[2089, 'StarWriter']]],
['application/x-docbook+xml', [[0, '<?xml', [[0..100, '-//OASIS//DTD DocBook XML'], [0..100, '-//KDE//DTD DocBook XML']]]]],
['image/x-eps', [[0, '%!', [[15, 'EPS']]], [0, "\004%!", [[16, 'EPS']]], [0, "\305\320\323\306"]]],
......@@ -1515,7 +1539,6 @@ class MimeMagic
['audio/x-opus+ogg', [[0, 'OggS', [[28, 'OpusHead']]]]],
['audio/x-speex+ogg', [[0, 'OggS', [[28, 'Speex ']]]]],
['audio/x-vorbis+ogg', [[0, 'OggS', [[28, "\001vorbis"]]]]],
['image/svg+xml', [[0..256, '<!DOCTYPE svg'], [0..256, '<svg']]],
['image/x-kodak-kdc', [[242, 'EASTMAN KODAK COMPANY']]],
['image/x-niff', [[0, 'IIN1']]],
['text/x-qml', [[0..256, 'import Qt ']]],
......@@ -1525,7 +1548,6 @@ class MimeMagic
['application/rss+xml', [[0..256, '<rss '], [0..256, '<RSS ']]],
['application/vnd.apple.mpegurl', [[0, '#EXTM3U', [[0..128, '#EXT-X-TARGETDURATION'], [0..128, '#EXT-X-STREAM-INF']]]]],
['text/x-opml+xml', [[0..256, '<opml ']]],
['application/msword', [[0, "1\276\000\000"], [0, 'PO^Q`'], [0, "\3767\000#"], [0, "\333\245-\000\000\000"], [2112, 'MSWordDoc'], [2108, 'MSWordDoc'], [2112, 'Microsoft Word document data'], [546, 'bjbj'], [546, 'jbjb']]],
['application/vnd.ms-cab-compressed', [[0, "MSCF\000\000\000\000"]]],
['application/vnd.ms-wpl', [[0..256, '<?wpl']]],
['application/x-7z-compressed', [[0, "7z\274\257'\034"]]],
......@@ -1576,7 +1598,6 @@ class MimeMagic
['application/mxf', [[0..256, "\006\016+4\002\005\001\001\r\001\002\001\001\002"]]],
['application/ogg', [[0, 'OggS']]],
['application/owl+xml', [[0..256, '<Ontology']]],
['application/pdf', [[0..1024, '%PDF-']]],
['application/pgp-encrypted', [[0, '-----BEGIN PGP MESSAGE-----']]],
['application/pgp-keys', [[0, '-----BEGIN PGP PUBLIC KEY BLOCK-----'], [0, '-----BEGIN PGP PRIVATE KEY BLOCK-----'], [0, "\225\001"], [0, "\225\000"], [0, "\231\000"], [0, "\231\001"]]],
['application/pgp-signature', [[0, '-----BEGIN PGP SIGNATURE-----']]],
......@@ -1591,7 +1612,6 @@ class MimeMagic
['application/vnd.lotus-wordpro', [[0, 'WordPro']]],
['application/vnd.ms-access', [[0, "\000\001\000\000Standard Jet DB"]]],
['application/vnd.ms-asf', [[0, "0&\262u"], [0, '[Reference]']]],
['application/vnd.ms-excel', [[2080, 'Microsoft Excel 5.0 Worksheet']]],
['application/vnd.ms-tnef', [[0, "x\237>\""]]],
['application/vnd.oasis.opendocument.chart', [[0, "PK\003\004", [[30, 'mimetype', [[38, 'application/vnd.oasis.opendocument.chart']]]]]]],
['application/vnd.oasis.opendocument.chart-template', [[0, "PK\003\004", [[30, 'mimetype', [[38, 'application/vnd.oasis.opendocument.chart-template']]]]]]],
......@@ -1731,14 +1751,9 @@ class MimeMagic
['application/xspf+xml', [[0..64, "<playlist version=\"1"], [0..64, "<playlist version='1"]]],
['audio/AMR', [[0, "#!AMR\n"], [0, "#!AMR_MC1.0\n"]]],
['audio/AMR-WB', [[0, "#!AMR-WB\n"], [0, "#!AMR-WB_MC1.0\n"]]],
['audio/aac', [[0, 'ADIF']]],
['audio/ac3', [[0, "\vw"]]],
['audio/annodex', [[0, 'OggS', [[28, "fishead\000", [[56..512, "CMML\000\000\000\000"]]]]]]],
['audio/flac', [[0, 'fLaC']]],
['audio/midi', [[0, 'MThd']]],
['audio/mp4', [[4, 'ftypM4A']]],
['audio/mpeg', [[0, "\377\373"], [0, 'ID3']]],
['audio/ogg', [[0, 'OggS']]],
['audio/prs.sid', [[0, 'PSID']]],
['audio/vnd.dts', [[0, "\177\376\200\001"], [0, "\200\001\177\376"], [0, "\037\377\350\000"], [0, "\350\000\037\377"]]],
['audio/x-adpcm', [[0, '.snd', [[12, "\000\000\000\027"]]], [0, ".sd\000", [[12, "\001\000\000\000"], [12, "\002\000\000\000"], [12, "\003\000\000\000"], [12, "\004\000\000\000"], [12, "\005\000\000\000"], [12, "\006\000\000\000"], [12, "\a\000\000\000"], [12, "\027\000\000\000"]]]]],
......@@ -1758,29 +1773,21 @@ class MimeMagic
['audio/x-speex', [[0, 'Speex']]],
['audio/x-stm', [[20, "!Scream!\032"], [20, "!SCREAM!\032"], [20, "BMOD2STM\032"]]],
['audio/x-tta', [[0, 'TTA1']]],
['audio/x-wav', [[8, 'WAVE'], [8, 'WAV ']]],
['audio/x-wavpack', [[0, 'wvpk']]],
['audio/x-wavpack-correction', [[0, 'wvpk']]],
['audio/x-xi', [[0, 'Extended Instrument:']]],
['audio/x-xm', [[0, 'Extended Module:']]],
['audio/x-xmf', [[0, 'XMF_'], [0, "XMF_2.00\000\000\000\002"]]],
['image/bmp', [[0, 'BM', [[14, "\f"], [14, '@'], [14, '(']]]]],
['image/dpx', [[0, 'SDPX']]],
['image/fits', [[0, 'SIMPLE =']]],
['image/gif', [[0, 'GIF8']]],
['image/jp2', [[0, "\377O\377Q\000"], [3, "\fjP "], [20, 'jp2']]],
['image/jpeg', [[0, "\377\330\377"], [0, "\377\330"]]],
['image/openraster', [[0, "PK\003\004", [[30, 'mimetype', [[38, 'image/openraster']]]]]]],
['image/png', [[0, "\211PNG"]]],
['image/tiff', [[0, "MM\000*"], [0, "II*\000"]]],
['image/vnd.adobe.photoshop', []],
['image/vnd.djvu', [[0, 'AT&TFORM', [[12, 'DJVU']]], [0, 'FORM', [[8, 'DJVU']]]]],
['image/vnd.djvu+multipage', [[0, 'AT&TFORM', [[12, 'DJVM']]], [0, 'FORM', [[8, 'DJVM']]]]],
['image/vnd.dxf', [[0..64, "\nHEADER\n"], [0..64, "\r\nHEADER\r\n"]]],
['image/vnd.microsoft.icon', [[0, "\000\000\001\000", [[5, "\000"]]]]],
['image/vnd.ms-modi', [[0, "EP*\000"]]],
['image/vnd.zbrush.pcx', [[0, "\n", [[1, "\000"], [1, "\002"], [1, "\003"], [1, "\005"]]]]],
['image/webp', [[0, 'RIFF', [[8, 'WEBP']]]]],
['image/x-applix-graphics', [[0, '*BEGIN', [[7, 'GRAPHICS']]]]],
['image/x-canon-crw', [[0, "II\032\000\000\000HEAPCCDR"]]],
['image/x-dds', [[0, 'DDS']]],
......@@ -1795,8 +1802,8 @@ class MimeMagic
['image/x-olympus-orf', [[0, "IIRO\b\000\000\000"]]],
['image/x-panasonic-raw', [[0, "IIU\000\b\000\000\000"]]],
['image/x-panasonic-raw2', [[0, "IIU\000\030\000\000\000"]]],
['image/x-pict', [[10, "\000\021", [[12, "\002\377", [[14, "\f\000", [[16, "\377\376"]]]]]]]]],
['image/x-pict', [[522, "\000\021", [[524, "\002\377", [[526, "\f\000", [[528, "\377\376"]]]]]]]]],
['image/x-pict', [[10, "\000\021", [[12, "\002\377", [[14, "\f\000", [[16, "\377\376"]]]]]]]]],
['image/x-portable-bitmap', [[0, 'P1', [[2, "\n"], [2, ' '], [2, "\t"], [2, "\r"]]], [0, 'P4', [[2, "\n"], [2, ' '], [2, "\t"], [2, "\r"]]]]],
['image/x-portable-graymap', [[0, 'P2', [[2, "\n"], [2, ' '], [2, "\t"], [2, "\r"]]], [0, 'P5', [[2, "\n"], [2, ' '], [2, "\t"], [2, "\r"]]]]],
['image/x-portable-pixmap', [[0, 'P3', [[2, "\n"], [2, ' '], [2, "\t"], [2, "\r"]]], [0, 'P6', [[2, "\n"], [2, ' '], [2, "\t"], [2, "\r"]]]]],
......@@ -1835,10 +1842,10 @@ class MimeMagic
['text/x-makefile', [[0, '#!/usr/bin/make'], [0, '#! /usr/bin/make']]],
['text/x-matlab', [[0, 'function']]],
['text/x-microdvd', [[0, '{1}'], [0, '{0}'], [0..6, '}{']]],
['text/x-modelica', [[0, 'function']]],
['text/x-modelica', [[0, 'class']]],
['text/x-modelica', [[0, 'model']]],
['text/x-modelica', [[0, 'record']]],
['text/x-modelica', [[0, 'model']]],
['text/x-modelica', [[0, 'function']]],
['text/x-mpsub', [[0..256, 'FORMAT=']]],
['text/x-mrml', [[0, '<mrml ']]],
['text/x-ms-regedit', [[0, 'REGEDIT'], [0, 'Windows Registry Editor Version 5.00'], [0, "\377\376W\000i\000n\000d\000o\000w\000s\000 \000R\000e\000g\000i\000s\000t\000r\000y\000 \000E\000d\000i\000t\000o\000r\000"]]],
......@@ -1856,16 +1863,9 @@ class MimeMagic
['video/annodex', [[0, 'OggS', [[28, "fishead\000", [[56..512, "CMML\000\000\000\000"]]]]]]],
['video/dv', []],
['video/mp2t', []],
['video/mp4', [[4, 'ftypisom'], [4, 'ftypmp42'], [4, 'ftypMSNV'], [4, 'ftypM4V '], [4, 'ftypf4v ']]],
['video/mpeg', [[0, "G?\377\020"], [0, "\000\000\001\263"], [0, "\000\000\001\272"]]],
['video/ogg', [[0, 'OggS']]],
['video/quicktime', [[12, 'mdat'], [4, 'mdat'], [4, 'moov'], [4, 'ftypqt']]],
['video/vnd.mpegurl', [[0, '#EXTM4U']]],
['video/webm', [[0, "\032E\337\243", [[5..65, "B\202", [[8..75, 'webm']]]]]]],
['video/x-flic', [[0, "\021\257"], [0, "\022\257"]]],
['video/x-flv', [[0, 'FLV']]],
['video/x-mng', [[0, "\212MNG\r\n\032\n"]]],
['video/x-msvideo', [[0, 'RIFF', [[8, 'AVI ']]], [0, 'AVF0', [[8, 'AVI ']]]]],
['video/x-nsv', [[0, 'NSVf']]],
['video/x-sgi-movie', [[0, 'MOVI']]],
['x-epoc/x-sisx-app', [[0, "z\032 \020"]]],
......
......@@ -90,6 +90,48 @@ end
magics = magics.sort {|a,b| [-a[0],a[1]] <=> [-b[0],b[1]] }
common_types = [
"image/jpeg", # .jpg
"image/png", # .png
"image/gif", # .gif
"image/tiff", # .tiff
"image/bmp", # .bmp
"image/vnd.adobe.photoshop", # .psd
"image/webp", # .webp
"image/svg+xml", # .svg
"video/x-msvideo", # .avi
"video/x-ms-wmv", # .wmv
"video/mp4", # .mp4, .m4v
"video/quicktime", # .mov
"video/mpeg", # .mpeg
"video/ogg", # .ogv
"video/webm", # .webm
"video/x-matroska", # .mkv
"video/x-flv", # .flv
"audio/mpeg", # .mp3
"audio/x-wav", # .wav
"audio/aac", # .aac
"audio/flac", # .flac
"audio/mp4", # .m4a
"audio/ogg", # .ogg
"application/pdf", # .pdf
"application/msword", # .doc
"application/vnd.openxmlformats-officedocument.wordprocessingml.document", # .docx
"application/vnd.ms-powerpoint", # .pps
"application/vnd.openxmlformats-officedocument.presentationml.slideshow", # .ppsx
"application/vnd.ms-excel", # .pps
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", # .ppsx
]
common_magics = common_types.map do |common_type|
magics.find { |_, type, _| type == common_type }
end
magics = (common_magics.compact + magics).uniq
puts "# -*- coding: binary -*-"
puts "# Generated from #{FILE}"
puts "class MimeMagic"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment