Commit 8df02271 by Mark Wunsch

Support lookup for emoji with multiple codepoints

In addition, this code rewrites encode to just defer to encode_io. This
is an attempt to fix #2, though I believe the lookahead code can and
should be refined.
parent 2bf99099
...@@ -8,11 +8,8 @@ module Rumoji ...@@ -8,11 +8,8 @@ module Rumoji
# Transform emoji into its cheat-sheet code # Transform emoji into its cheat-sheet code
def encode(str) def encode(str)
remapped_codepoints = str.codepoints.flat_map do |codepoint| io = StringIO.new(str)
emoji = Emoji.find_by_codepoint(codepoint) encode_io(io).string
emoji ? emoji.code.codepoints.entries : codepoint
end
remapped_codepoints.pack("U*")
end end
# Transform a cheat-sheet code into an Emoji # Transform a cheat-sheet code into an Emoji
...@@ -21,10 +18,17 @@ module Rumoji ...@@ -21,10 +18,17 @@ module Rumoji
end end
def encode_io(readable, writeable=StringIO.new("")) def encode_io(readable, writeable=StringIO.new(""))
readable.each_codepoint do |codepoint| codepoints = readable.each_codepoint.entries
emoji = Emoji.find_by_codepoint(codepoint) codepoints.each_with_index do |codepoint, index|
emoji_or_character = emoji ? emoji.code : [codepoint].pack("U") possible_emoji = Emoji.select_by_codepoint(codepoint)
writeable.write emoji_or_character sequence = if possible_emoji.empty?
[codepoint].pack("U")
elsif possible_emoji.size.eql?(1)
possible_emoji.first.code
else
find_multipoint_emoji_in_set(possible_emoji, codepoints[index + 1])
end
writeable.write sequence
end end
writeable writeable
end end
...@@ -36,4 +40,13 @@ module Rumoji ...@@ -36,4 +40,13 @@ module Rumoji
writeable writeable
end end
private
def find_multipoint_emoji_in_set(possible_emoji_set, next_codepoint)
if next_codepoint
emoji_or_nil = possible_emoji_set.find {|emoji| emoji.codepoints.include?(next_codepoint) }
emoji_or_nil.nil? ? "" : emoji_or_nil.code
end
end
end end
...@@ -23,7 +23,7 @@ module Rumoji ...@@ -23,7 +23,7 @@ module Rumoji
end end
def to_s def to_s
@codepoints.to_a.pack("U*") codepoints.pack("U*")
end end
def hash def hash
...@@ -34,6 +34,10 @@ module Rumoji ...@@ -34,6 +34,10 @@ module Rumoji
@codepoints.map{|point| point.to_s(16).upcase }.join("-") @codepoints.map{|point| point.to_s(16).upcase }.join("-")
end end
def codepoints
@codepoints.entries
end
autoload :PEOPLE, 'rumoji/emoji/people' autoload :PEOPLE, 'rumoji/emoji/people'
autoload :NATURE, 'rumoji/emoji/nature' autoload :NATURE, 'rumoji/emoji/nature'
autoload :OBJECTS, 'rumoji/emoji/objects' autoload :OBJECTS, 'rumoji/emoji/objects'
...@@ -50,6 +54,10 @@ module Rumoji ...@@ -50,6 +54,10 @@ module Rumoji
ALL.find {|emoji| emoji.to_s == string } ALL.find {|emoji| emoji.to_s == string }
end end
def self.select_by_codepoint(codepoint)
ALL.select {|emoji| emoji.codepoints.include? codepoint }
end
def self.find_by_codepoint(codepoint) def self.find_by_codepoint(codepoint)
ALL.find {|emoji| emoji.hex == codepoint.to_s(16).upcase } ALL.find {|emoji| emoji.hex == codepoint.to_s(16).upcase }
end end
......
...@@ -98,16 +98,16 @@ module Rumoji ...@@ -98,16 +98,16 @@ module Rumoji
self.new("\u{26a0}" , [:warning]), self.new("\u{26a0}" , [:warning]),
self.new("\u{1f492}", [:wedding]), self.new("\u{1f492}", [:wedding]),
# Regional Indicator Symbols # Regional Indicator Symbols
self.new("\u{1f1ef}\u{1f1f5}", [:jp], "REGIONAL INDICATOR SYMBOL LETTERS JP"), self.new("\u{1f1ef 1f1f5}", [:jp], "REGIONAL INDICATOR SYMBOL LETTERS JP"),
self.new("\u{1f1f0}\u{1f1f7}", [:kr], "REGIONAL INDICATOR SYMBOL LETTERS KR"), self.new("\u{1f1f0 1f1f7}", [:kr], "REGIONAL INDICATOR SYMBOL LETTERS KR"),
self.new("\u{1f1e8}\u{1f1f3}", [:cn], "REGIONAL INDICATOR SYMBOL LETTERS CN"), self.new("\u{1f1e8 1f1f3}", [:cn], "REGIONAL INDICATOR SYMBOL LETTERS CN"),
self.new("\u{1f1fa}\u{1f1f8}", [:us], "REGIONAL INDICATOR SYMBOL LETTERS US"), self.new("\u{1f1fa 1f1f8}", [:us], "REGIONAL INDICATOR SYMBOL LETTERS US"),
self.new("\u{1f1eb}\u{1f1f7}", [:fr], "REGIONAL INDICATOR SYMBOL LETTERS FR"), self.new("\u{1f1eb 1f1f7}", [:fr], "REGIONAL INDICATOR SYMBOL LETTERS FR"),
self.new("\u{1f1ea}\u{1f1f8}", [:es], "REGIONAL INDICATOR SYMBOL LETTERS ES"), self.new("\u{1f1ea 1f1f8}", [:es], "REGIONAL INDICATOR SYMBOL LETTERS ES"),
self.new("\u{1f1ee}\u{1f1f9}", [:it], "REGIONAL INDICATOR SYMBOL LETTERS IT"), self.new("\u{1f1ee 1f1f9}", [:it], "REGIONAL INDICATOR SYMBOL LETTERS IT"),
self.new("\u{1f1f7}\u{1f1fa}", [:ru], "REGIONAL INDICATOR SYMBOL LETTERS RU"), self.new("\u{1f1f7 1f1fa}", [:ru], "REGIONAL INDICATOR SYMBOL LETTERS RU"),
self.new("\u{1f1ec}\u{1f1e7}", [:gb, :uk], "REGIONAL INDICATOR SYMBOL LETTERS GB"), self.new("\u{1f1ec 1f1e7}", [:gb, :uk], "REGIONAL INDICATOR SYMBOL LETTERS GB"),
self.new("\u{1f1e9}\u{1f1ea}", [:de], "REGIONAL INDICATOR SYMBOL LETTERS DE"), self.new("\u{1f1e9 1f1ea}", [:de], "REGIONAL INDICATOR SYMBOL LETTERS DE"),
] ]
end end
end end
...@@ -81,7 +81,7 @@ module Rumoji ...@@ -81,7 +81,7 @@ module Rumoji
self.new("\u{1f6c3}", [:customs]), self.new("\u{1f6c3}", [:customs]),
self.new("\u{1f4a0}", [:diamond_shape_with_a_dot_inside]), self.new("\u{1f4a0}", [:diamond_shape_with_a_dot_inside]),
self.new("\u{1f6af}", [:do_not_litter]), self.new("\u{1f6af}", [:do_not_litter]),
self.new("\u{0038}" , [:eight]), self.new("\u{0038 20e3}" , [:eight]),
self.new("\u{2734}" , [:eight_pointed_black_star]), self.new("\u{2734}" , [:eight_pointed_black_star]),
self.new("\u{2733}" , [:eight_spoked_asterisk]), self.new("\u{2733}" , [:eight_spoked_asterisk]),
self.new("\u{1f51a}", [:end]), self.new("\u{1f51a}", [:end]),
...@@ -90,7 +90,7 @@ module Rumoji ...@@ -90,7 +90,7 @@ module Rumoji
self.new("\u{0034}" , [:four]), self.new("\u{0034}" , [:four]),
self.new("\u{1f193}", [:free]), self.new("\u{1f193}", [:free]),
self.new("\u{264a}" , [:gemini]), self.new("\u{264a}" , [:gemini]),
self.new("\u{0023}" , [:hash]), self.new("\u{0023 20e3}" , [:hash]),
self.new("\u{1f49f}", [:heart_decoration]), self.new("\u{1f49f}", [:heart_decoration]),
self.new("\u{2714}" , [:heavy_check_mark]), self.new("\u{2714}" , [:heavy_check_mark]),
self.new("\u{2797}" , [:heavy_division_sign]), self.new("\u{2797}" , [:heavy_division_sign]),
...@@ -120,7 +120,7 @@ module Rumoji ...@@ -120,7 +120,7 @@ module Rumoji
self.new("\u{274e}" , [:negative_squared_cross_mark]), self.new("\u{274e}" , [:negative_squared_cross_mark]),
self.new("\u{1f195}", [:new]), self.new("\u{1f195}", [:new]),
self.new("\u{1f196}", [:ng]), self.new("\u{1f196}", [:ng]),
self.new("\u{0039}" , [:nine]), self.new("\u{0039 20e3}" , [:nine]),
self.new("\u{1f6b3}", [:no_bicycles]), self.new("\u{1f6b3}", [:no_bicycles]),
self.new("\u{26d4}" , [:no_entry]), self.new("\u{26d4}" , [:no_entry]),
self.new("\u{1f6ab}", [:no_entry_sign]), self.new("\u{1f6ab}", [:no_entry_sign]),
...@@ -132,7 +132,7 @@ module Rumoji ...@@ -132,7 +132,7 @@ module Rumoji
self.new("\u{1f17e}", [:o2]), self.new("\u{1f17e}", [:o2]),
self.new("\u{1f197}", [:ok]), self.new("\u{1f197}", [:ok]),
self.new("\u{1f51b}", [:on]), self.new("\u{1f51b}", [:on]),
self.new("\u{0031}" , [:one]), self.new("\u{0031 20e3}" , [:one]),
self.new("\u{26ce}" , [:ophiuchus]), self.new("\u{26ce}" , [:ophiuchus]),
self.new("\u{1f17f}", [:parking]), self.new("\u{1f17f}", [:parking]),
self.new("\u{303d}" , [:part_alternation_mark]), self.new("\u{303d}" , [:part_alternation_mark]),
...@@ -152,9 +152,9 @@ module Rumoji ...@@ -152,9 +152,9 @@ module Rumoji
self.new("\u{2650}" , [:sagittarius]), self.new("\u{2650}" , [:sagittarius]),
self.new("\u{264f}" , [:scorpius]), self.new("\u{264f}" , [:scorpius]),
self.new("\u{3299}" , [:secret]), self.new("\u{3299}" , [:secret]),
self.new("\u{0037}" , [:seven]), self.new("\u{0037 20e3}" , [:seven]),
self.new("\u{1f4f6}", [:signal_strength]), self.new("\u{1f4f6}", [:signal_strength]),
self.new("\u{0036}" , [:six]), self.new("\u{0036 20e3}" , [:six]),
self.new("\u{1f52f}", [:six_pointed_star]), self.new("\u{1f52f}", [:six_pointed_star]),
self.new("\u{1f539}", [:small_blue_diamond]), self.new("\u{1f539}", [:small_blue_diamond]),
self.new("\u{1f538}", [:small_orange_diamond]), self.new("\u{1f538}", [:small_orange_diamond]),
...@@ -164,12 +164,12 @@ module Rumoji ...@@ -164,12 +164,12 @@ module Rumoji
self.new("\u{1f198}", [:sos]), self.new("\u{1f198}", [:sos]),
self.new("\u{1f523}", [:symbols]), self.new("\u{1f523}", [:symbols]),
self.new("\u{2649}" , [:taurus]), self.new("\u{2649}" , [:taurus]),
self.new("\u{0033}" , [:three]), self.new("\u{0033 20e3}" , [:three]),
self.new("\u{2122}" , [:tm]), self.new("\u{2122}" , [:tm]),
self.new("\u{1f51d}", [:top]), self.new("\u{1f51d}", [:top]),
self.new("\u{1f531}", [:trident]), self.new("\u{1f531}", [:trident]),
self.new("\u{1f500}", [:twisted_rightwards_arrows]), self.new("\u{1f500}", [:twisted_rightwards_arrows]),
self.new("\u{0032}" , [:two]), self.new("\u{0032 20e3}" , [:two]),
self.new("\u{1f239}", [:u5272]), self.new("\u{1f239}", [:u5272]),
self.new("\u{1f234}", [:u5408]), self.new("\u{1f234}", [:u5408]),
self.new("\u{1f23a}", [:u55b6]), self.new("\u{1f23a}", [:u55b6]),
...@@ -195,7 +195,7 @@ module Rumoji ...@@ -195,7 +195,7 @@ module Rumoji
self.new("\u{1f533}", [:white_square_button]), self.new("\u{1f533}", [:white_square_button]),
self.new("\u{1f6ba}", [:womens]), self.new("\u{1f6ba}", [:womens]),
self.new("\u{274c}" , [:x]), self.new("\u{274c}" , [:x]),
self.new("\u{0030}" , [:zero]) self.new("\u{0030 20e3}" , [:zero])
] ]
end end
end end
...@@ -7,6 +7,8 @@ describe Rumoji do ...@@ -7,6 +7,8 @@ describe Rumoji do
before do before do
@poop = "💩" @poop = "💩"
@smile = "😄" @smile = "😄"
@zero = "0⃣"
@us = "🇺🇸"
end end
describe "#encode" do describe "#encode" do
...@@ -27,6 +29,23 @@ describe Rumoji do ...@@ -27,6 +29,23 @@ describe Rumoji do
io = StringIO.new("#{@smile}") io = StringIO.new("#{@smile}")
Rumoji.encode_io(io).string.must_equal ":smile:" Rumoji.encode_io(io).string.must_equal ":smile:"
end end
describe "with multiple codepoints" do
it "transforms a stream" do
io1 = StringIO.new("#{@zero}")
io2 = StringIO.new("#{@us}")
Rumoji.encode_io(io1).string.must_equal ":zero:"
Rumoji.encode_io(io2).string.must_equal ":us:"
end
describe "with leading and trailing characters" do
it "is able to pull multipoint emoji out of a sequence" do
io = StringIO.new("An example of a multipoint emoji is the #{@us} flag.")
Rumoji.encode_io(io).string.must_equal "An example of a multipoint emoji is the :us: flag."
end
end
end
end end
describe "#decode_io" do describe "#decode_io" do
...@@ -34,5 +53,12 @@ describe Rumoji do ...@@ -34,5 +53,12 @@ describe Rumoji do
io = StringIO.new(":poop:") io = StringIO.new(":poop:")
Rumoji.decode_io(io).string.must_equal @poop Rumoji.decode_io(io).string.must_equal @poop
end end
describe "with multiple codepoints" do
it "decodes a stream" do
io = StringIO.new(":zero:")
Rumoji.decode_io(io).string.must_equal @zero
end
end
end end
end end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment