Commit 6b264e8a by Mark Wunsch

Merge pull request #22 from mavenlink/faster_encode

Faster encode technique
parents 81517a21 8493fc91
...@@ -15,3 +15,4 @@ spec/reports ...@@ -15,3 +15,4 @@ spec/reports
test/tmp test/tmp
test/version_tmp test/version_tmp
tmp tmp
.idea
...@@ -3,4 +3,8 @@ ...@@ -3,4 +3,8 @@
require 'rumoji' require 'rumoji'
Rumoji.decode_io(STDIN, STDOUT) if ARGV.first == 'encode'
Rumoji.encode_io(STDIN, STDOUT)
else
Rumoji.decode_io(STDIN, STDOUT)
end
...@@ -8,8 +8,7 @@ module Rumoji ...@@ -8,8 +8,7 @@ module Rumoji
# Transform emoji into its cheat-sheet code # Transform emoji into its cheat-sheet code
def encode(str) def encode(str)
io = StringIO.new(str) str.gsub(Emoji::ALL_REGEXP) { |match| (emoji = Emoji.find_by_string(match)) && emoji.code || match }
encode_io(io).string
end end
# Transform a cheat-sheet code into an Emoji # Transform a cheat-sheet code into an Emoji
...@@ -18,39 +17,9 @@ module Rumoji ...@@ -18,39 +17,9 @@ module Rumoji
end end
def encode_io(readable, writeable=StringIO.new("")) def encode_io(readable, writeable=StringIO.new(""))
previous_codepoints = [] readable.each_line do |line|
writeable.write encode(line)
readable.each_codepoint do |codepoint|
possible_emoji = Emoji.select_by_codepoint(codepoint)
sequence = if possible_emoji.empty?
# If this codepoint is not part of an emoji
# just write it back out, along with any previously stored codepoints.
dump_codepoints_to_string(previous_codepoints << codepoint)
elsif !possible_emoji.any?(&:multiple?)
# If this codepoint is part of an emoji, but not one
# that contains multiple codepoints, write out the
# first possiblity's cheat code and the previously stored codepoints
dump_codepoints_to_string(previous_codepoints) << possible_emoji.first.code
else
# This codepoint is a part of an emoji with multiple
# codepoints, so push it onto the stack.
previous_codepoints.push(codepoint)
# Check and see if this completes the emoji, otherwise,
# write out an empty string.
if found = possible_emoji.find {|e| e.codepoints.eql?(previous_codepoints) }
previous_codepoints.clear and found.code
else
""
end
end
writeable.write sequence
end end
# Write any remaining codepoints.
writeable.write dump_codepoints_to_string(previous_codepoints) if previous_codepoints.any?
writeable writeable
end end
...@@ -60,13 +29,4 @@ module Rumoji ...@@ -60,13 +29,4 @@ module Rumoji
end end
writeable writeable
end end
private
def dump_codepoints_to_string(codepoints)
codepoints.pack("U*").tap do
codepoints.clear
end
end
end end
...@@ -2,9 +2,10 @@ ...@@ -2,9 +2,10 @@
module Rumoji module Rumoji
class Emoji class Emoji
attr_reader :name attr_reader :name, :string
def initialize(string, symbols, name = nil) def initialize(string, symbols, name = nil)
@string = string
@codepoints = string.codepoints @codepoints = string.codepoints
@cheat_codes = [symbols].flatten @cheat_codes = [symbols].flatten
@name = name || @cheat_codes.first.to_s.upcase.gsub("_", " ") @name = name || @cheat_codes.first.to_s.upcase.gsub("_", " ")
...@@ -23,7 +24,7 @@ module Rumoji ...@@ -23,7 +24,7 @@ module Rumoji
end end
def to_s def to_s
codepoints.pack("U*") @string
end end
def hash def hash
...@@ -31,7 +32,7 @@ module Rumoji ...@@ -31,7 +32,7 @@ module Rumoji
end end
def hex def hex
@codepoints.map{|point| point.to_s(16).upcase }.join("-") @hex ||= @codepoints.map{|point| point.to_s(16).upcase }.join("-")
end end
def codepoints def codepoints
...@@ -50,21 +51,18 @@ module Rumoji ...@@ -50,21 +51,18 @@ module Rumoji
ALL = PEOPLE | NATURE | OBJECTS | PLACES | SYMBOLS ALL = PEOPLE | NATURE | OBJECTS | PLACES | SYMBOLS
ALL_REGEXP = Regexp.new(ALL.map(&:string).join('|'))
def self.find(symbol) def self.find(symbol)
ALL.find {|emoji| emoji.include? symbol } ALL.find {|emoji| emoji.include? symbol }
end end
def self.find_by_string(string) STRING_LOOKUP = ALL.each.with_object({}) do |emoji, lookup|
ALL.find {|emoji| emoji.to_s == string } lookup[emoji.string] = emoji
end
def self.select_by_codepoint(codepoint)
ALL.select {|emoji| emoji.codepoints.include? codepoint }
end end
def self.find_by_codepoint(codepoint) def self.find_by_string(string)
ALL.find {|emoji| emoji.hex == codepoint.to_s(16).upcase } STRING_LOOKUP[string]
end end
end end
end end
...@@ -75,7 +75,7 @@ module Rumoji ...@@ -75,7 +75,7 @@ module Rumoji
self.new("\u{1f564}", [:clock930]), self.new("\u{1f564}", [:clock930]),
self.new("\u{3297}" , [:congratulations]), self.new("\u{3297}" , [:congratulations]),
self.new("\u{1f192}", [:cool]), self.new("\u{1f192}", [:cool]),
self.new("\u{00a9}" , [:copyright]), # self.new("\u{00a9}" , [:copyright]),
self.new("\u{27b0}" , [:curly_loop]), self.new("\u{27b0}" , [:curly_loop]),
self.new("\u{1f4b1}", [:currency_exchange]), self.new("\u{1f4b1}", [:currency_exchange]),
self.new("\u{1f6c3}", [:customs]), self.new("\u{1f6c3}", [:customs]),
...@@ -143,7 +143,7 @@ module Rumoji ...@@ -143,7 +143,7 @@ module Rumoji
self.new("\u{1f518}", [:radio_button]), self.new("\u{1f518}", [:radio_button]),
self.new("\u{267b}" , [:recycle]), self.new("\u{267b}" , [:recycle]),
self.new("\u{1f534}", [:red_circle]), self.new("\u{1f534}", [:red_circle]),
self.new("\u{00ae}" , [:registered]), # self.new("\u{00ae}" , [:registered]),
self.new("\u{1f501}", [:repeat]), self.new("\u{1f501}", [:repeat]),
self.new("\u{1f502}", [:repeat_one]), self.new("\u{1f502}", [:repeat_one]),
self.new("\u{1f6bb}", [:restroom]), self.new("\u{1f6bb}", [:restroom]),
......
...@@ -69,12 +69,5 @@ describe Rumoji::Emoji do ...@@ -69,12 +69,5 @@ describe Rumoji::Emoji do
it "finds an emoji from a string" do it "finds an emoji from a string" do
subject.find_by_string(smile_str).symbol.must_equal smile_sym subject.find_by_string(smile_str).symbol.must_equal smile_sym
end end
it "finds an emoji from a codepoint" do
smile_str.codepoints.map do |point|
subject.find_by_codepoint(point).symbol.must_equal smile_sym
end
end
end end
end end
...@@ -18,6 +18,44 @@ describe Rumoji do ...@@ -18,6 +18,44 @@ describe Rumoji do
Rumoji.encode(@smile).must_equal ":smile:" Rumoji.encode(@smile).must_equal ":smile:"
Rumoji.encode("#{@smile}").must_equal ":smile:" Rumoji.encode("#{@smile}").must_equal ":smile:"
end end
it "keeps codepoints that match the beginnings of multi-codepoint emoji" do
text = "i like #hashtags and 1direction they are the #1 band. end with 9"
Rumoji.encode(text).must_equal text
end
describe "with multiple codepoints" do
it "transforms a stream" do
Rumoji.encode("#{@zero}").must_equal ":zero:"
Rumoji.encode("#{@us}").must_equal ":us:"
end
it "transforms a stream of many emoji" do
num = ":one: :two: :three: :four: :five: :six: :seven: :eight: :nine: :zero: :hash:"
emoji = "1️⃣ 2️⃣ 3️⃣ 4️⃣ 5️⃣ 6️⃣ 7️⃣ 8️⃣ 9️⃣ 0️⃣ #️⃣"
Rumoji.encode(emoji).must_equal num
end
it "does not encode double digits" do
num = ":zero: :one: :two: :three: :four: :five: :six: :seven: :eight: :nine: :hash:"
double_digits = "00 11 22 33 44 55 66 77 88 99 ##"
Rumoji.encode(double_digits).wont_equal num
end
describe "with leading and trailing characters" do
it "is able to pull multipoint emoji out of a sequence" do
string = "An example of a multipoint emoji is the #{@us} flag."
Rumoji.encode(string).must_equal "An example of a multipoint emoji is the :us: flag."
end
end
describe "with trailing emoji" do
it "writes characters that are in a multipoint emoji followed by an emoji" do
string = "I would like 0#{@poop}"
Rumoji.encode(string).must_equal "I would like 0:poop:"
end
end
end
end end
describe "#decode" do describe "#decode" do
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment