Commit 6c535b1d by Jason Axelson

Properly detect multi-character emoji

Previously emoji like πŸ‘¨β€πŸ‘¨β€πŸ‘¦ (:man-man-boy:) were decoded as
:man::man::boy: (πŸ‘¨πŸ‘¨πŸ‘¦) even though there is a specific emoji entry for
:man-man-boy:. The solution is to change how the regexp is constructed.
Instead of not setting a sort order for the emoji that go into the
regexp we use reverse alphabetical order to ensure that any emoji that
are inclusive of another (such as :man-man-boy: including :man) get
sorted BEFORE the emoji that they include.

We accomplish this by using a `SortedSet` and implementing `<=>` on `Emoji`
parent 3abd7063
...@@ -47,6 +47,14 @@ module Rumoji ...@@ -47,6 +47,14 @@ module Rumoji
codepoints.size > 1 codepoints.size > 1
end end
# Sort by reverse alphabetical order so that longer more complex emoji are
# matched in the regex before their simpler components
# e.g. :man-man-boy: needs to come before :man: in the regex or else
# :man-man-boy: will never be matched
def <=>(other)
other.symbol <=> symbol
end
autoload :PEOPLE, 'rumoji/emoji/people' autoload :PEOPLE, 'rumoji/emoji/people'
autoload :NATURE, 'rumoji/emoji/nature' autoload :NATURE, 'rumoji/emoji/nature'
autoload :OBJECTS, 'rumoji/emoji/objects' autoload :OBJECTS, 'rumoji/emoji/objects'
......
...@@ -5,7 +5,7 @@ require 'set' ...@@ -5,7 +5,7 @@ require 'set'
module Rumoji module Rumoji
class Emoji class Emoji
NATURE = Set[ NATURE = SortedSet[
self.new("\u{2600}" , [:sunny], "BLACK SUN WITH RAYS"), self.new("\u{2600}" , [:sunny], "BLACK SUN WITH RAYS"),
self.new("\u{2614}" , [:umbrella], "UMBRELLA WITH RAIN DROPS"), self.new("\u{2614}" , [:umbrella], "UMBRELLA WITH RAIN DROPS"),
self.new("\u{2601}" , [:cloud]), self.new("\u{2601}" , [:cloud]),
......
...@@ -5,7 +5,7 @@ require 'set' ...@@ -5,7 +5,7 @@ require 'set'
module Rumoji module Rumoji
class Emoji class Emoji
OBJECTS = Set[ OBJECTS = SortedSet[
self.new("\u{1F38D}", [:bamboo], "PINE DECORATION"), # "Japanese new year's door decoration self.new("\u{1F38D}", [:bamboo], "PINE DECORATION"), # "Japanese new year's door decoration
self.new("\u{1F49D}", [:gift_heart], "HEART WITH RIBBON"), self.new("\u{1F49D}", [:gift_heart], "HEART WITH RIBBON"),
self.new("\u{1F38E}", [:dolls], "JAPANESE DOLLS"), # "Japanese Hinamatsuri or girls' doll festival" self.new("\u{1F38E}", [:dolls], "JAPANESE DOLLS"), # "Japanese Hinamatsuri or girls' doll festival"
......
...@@ -5,7 +5,7 @@ require 'set' ...@@ -5,7 +5,7 @@ require 'set'
module Rumoji module Rumoji
class Emoji class Emoji
PEOPLE = Set[ PEOPLE = SortedSet[
self.new("\u{1F604}", [:smile, :simple_smile], "SMILING FACE WITH OPEN MOUTH AND SMILING EYES"), self.new("\u{1F604}", [:smile, :simple_smile], "SMILING FACE WITH OPEN MOUTH AND SMILING EYES"),
self.new("\u{1F606}", [:laughing], "SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES"), self.new("\u{1F606}", [:laughing], "SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES"),
self.new("\u{1F60A}", [:blush], "SMILING FACE WITH SMILING EYES"), self.new("\u{1F60A}", [:blush], "SMILING FACE WITH SMILING EYES"),
......
...@@ -5,7 +5,7 @@ require 'set' ...@@ -5,7 +5,7 @@ require 'set'
module Rumoji module Rumoji
class Emoji class Emoji
PLACES = Set[ PLACES = SortedSet[
self.new("\u{1f6a1}", [:aerial_tramway]), self.new("\u{1f6a1}", [:aerial_tramway]),
self.new("\u{2708}" , [:airplane]), self.new("\u{2708}" , [:airplane]),
self.new("\u{1f691}", [:ambulance]), self.new("\u{1f691}", [:ambulance]),
......
...@@ -5,7 +5,7 @@ require 'set' ...@@ -5,7 +5,7 @@ require 'set'
module Rumoji module Rumoji
class Emoji class Emoji
SYMBOLS = Set[ SYMBOLS = SortedSet[
self.new("\u{1f4af}", [:"100"]), self.new("\u{1f4af}", [:"100"]),
self.new("\u{1f522}", [:"1234"]), self.new("\u{1f522}", [:"1234"]),
self.new("\u{1f170}", [:a]), self.new("\u{1f170}", [:a]),
......
...@@ -10,6 +10,7 @@ describe Rumoji do ...@@ -10,6 +10,7 @@ describe Rumoji do
@zero = "0️⃣" @zero = "0️⃣"
@us = "πŸ‡ΊπŸ‡Έ" @us = "πŸ‡ΊπŸ‡Έ"
@non_potable_water = "🚱" @non_potable_water = "🚱"
@man_man_boy_boy = "πŸ‘¨β€πŸ‘¨β€πŸ‘¦β€πŸ‘¦"
end end
describe "#encode" do describe "#encode" do
...@@ -24,6 +25,12 @@ describe Rumoji do ...@@ -24,6 +25,12 @@ describe Rumoji do
Rumoji.encode(text).must_equal text Rumoji.encode(text).must_equal text
end end
it "encodes man_man_boy_boy" do
text = "#{@man_man_boy_boy}"
Rumoji.encode(text).must_equal ":man-man-boy-boy:"
Rumoji.encode(text).wont_equal ":man::man::boy::boy:"
end
describe "with multiple codepoints" do describe "with multiple codepoints" do
it "transforms a stream" do it "transforms a stream" do
Rumoji.encode("#{@zero}").must_equal ":zero:" Rumoji.encode("#{@zero}").must_equal ":zero:"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment