Commit 7248588c by Andrew Cantino

memoize, build lookup tables, and use regexp for speed

parent dc61de23
...@@ -8,8 +8,7 @@ module Rumoji ...@@ -8,8 +8,7 @@ module Rumoji
# Transform emoji into its cheat-sheet code # Transform emoji into its cheat-sheet code
def encode(str) def encode(str)
io = StringIO.new(str) str.gsub(Emoji::ALL_REGEXP) { |match| (emoji = Emoji.find_by_string(match)) && emoji.code || match }
encode_io(io).string
end end
# Transform a cheat-sheet code into an Emoji # Transform a cheat-sheet code into an Emoji
......
...@@ -2,9 +2,10 @@ ...@@ -2,9 +2,10 @@
module Rumoji module Rumoji
class Emoji class Emoji
attr_reader :name attr_reader :name, :string
def initialize(string, symbols, name = nil) def initialize(string, symbols, name = nil)
@string = string
@codepoints = string.codepoints @codepoints = string.codepoints
@cheat_codes = [symbols].flatten @cheat_codes = [symbols].flatten
@name = name || @cheat_codes.first.to_s.upcase.gsub("_", " ") @name = name || @cheat_codes.first.to_s.upcase.gsub("_", " ")
...@@ -23,7 +24,7 @@ module Rumoji ...@@ -23,7 +24,7 @@ module Rumoji
end end
def to_s def to_s
codepoints.pack("U*") @string
end end
def hash def hash
...@@ -31,7 +32,7 @@ module Rumoji ...@@ -31,7 +32,7 @@ module Rumoji
end end
def hex def hex
@codepoints.map{|point| point.to_s(16).upcase }.join("-") @hex ||= @codepoints.map{|point| point.to_s(16).upcase }.join("-")
end end
def codepoints def codepoints
...@@ -50,16 +51,29 @@ module Rumoji ...@@ -50,16 +51,29 @@ module Rumoji
ALL = PEOPLE | NATURE | OBJECTS | PLACES | SYMBOLS ALL = PEOPLE | NATURE | OBJECTS | PLACES | SYMBOLS
ALL_REGEXP = Regexp.new(ALL.map(&:string).join('|'))
def self.find(symbol) def self.find(symbol)
ALL.find {|emoji| emoji.include? symbol } ALL.find {|emoji| emoji.include? symbol }
end end
STRING_LOOKUP = ALL.each.with_object({}) do |emoji, lookup|
lookup[emoji.string] = emoji
end
def self.find_by_string(string) def self.find_by_string(string)
ALL.find {|emoji| emoji.to_s == string } STRING_LOOKUP[string]
end
CODEPOINT_LOOKUP = ALL.each.with_object({}) do |emoji, lookup|
emoji.codepoints.each do |codepoint|
lookup[codepoint] ||= []
lookup[codepoint] << emoji unless lookup[codepoint].include?(emoji)
end
end end
def self.select_by_codepoint(codepoint) def self.select_by_codepoint(codepoint)
ALL.select {|emoji| emoji.codepoints.include? codepoint } CODEPOINT_LOOKUP[codepoint] || []
end end
def self.find_by_codepoint(codepoint) def self.find_by_codepoint(codepoint)
......
...@@ -18,6 +18,44 @@ describe Rumoji do ...@@ -18,6 +18,44 @@ describe Rumoji do
Rumoji.encode(@smile).must_equal ":smile:" Rumoji.encode(@smile).must_equal ":smile:"
Rumoji.encode("#{@smile}").must_equal ":smile:" Rumoji.encode("#{@smile}").must_equal ":smile:"
end end
it "keeps codepoints that match the beginnings of multi-codepoint emoji" do
text = "i like #hashtags and 1direction they are the #1 band. end with 9"
Rumoji.encode(text).must_equal text
end
describe "with multiple codepoints" do
it "transforms a stream" do
Rumoji.encode("#{@zero}").must_equal ":zero:"
Rumoji.encode("#{@us}").must_equal ":us:"
end
it "transforms a stream of many emoji" do
num = ":one: :two: :three: :four: :five: :six: :seven: :eight: :nine: :zero: :hash:"
emoji = "1️⃣ 2️⃣ 3️⃣ 4️⃣ 5️⃣ 6️⃣ 7️⃣ 8️⃣ 9️⃣ 0️⃣ #️⃣"
Rumoji.encode(emoji).must_equal num
end
it "does not encode double digits" do
num = ":zero: :one: :two: :three: :four: :five: :six: :seven: :eight: :nine: :hash:"
double_digits = "00 11 22 33 44 55 66 77 88 99 ##"
Rumoji.encode(double_digits).wont_equal num
end
describe "with leading and trailing characters" do
it "is able to pull multipoint emoji out of a sequence" do
string = "An example of a multipoint emoji is the #{@us} flag."
Rumoji.encode(string).must_equal "An example of a multipoint emoji is the :us: flag."
end
end
describe "with trailing emoji" do
it "writes characters that are in a multipoint emoji followed by an emoji" do
string = "I would like 0#{@poop}"
Rumoji.encode(string).must_equal "I would like 0:poop:"
end
end
end
end end
describe "#decode" do describe "#decode" do
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment