Commit 6c535b1d by Jason Axelson

Properly detect multi-character emoji

Previously emoji like πŸ‘¨β€πŸ‘¨β€πŸ‘¦ (:man-man-boy:) were decoded as
:man::man::boy: (πŸ‘¨πŸ‘¨πŸ‘¦) even though there is a specific emoji entry for
:man-man-boy:. The solution is to change how the regexp is constructed.
Instead of not setting a sort order for the emoji that go into the
regexp we use reverse alphabetical order to ensure that any emoji that
are inclusive of another (such as :man-man-boy: including :man) get
sorted BEFORE the emoji that they include.

We accomplish this by using a `SortedSet` and implementing `<=>` on `Emoji`
parent 3abd7063
......@@ -47,6 +47,14 @@ module Rumoji
codepoints.size > 1
end
# Sort by reverse alphabetical order so that longer more complex emoji are
# matched in the regex before their simpler components
# e.g. :man-man-boy: needs to come before :man: in the regex or else
# :man-man-boy: will never be matched
def <=>(other)
other.symbol <=> symbol
end
autoload :PEOPLE, 'rumoji/emoji/people'
autoload :NATURE, 'rumoji/emoji/nature'
autoload :OBJECTS, 'rumoji/emoji/objects'
......
......@@ -5,7 +5,7 @@ require 'set'
module Rumoji
class Emoji
NATURE = Set[
NATURE = SortedSet[
self.new("\u{2600}" , [:sunny], "BLACK SUN WITH RAYS"),
self.new("\u{2614}" , [:umbrella], "UMBRELLA WITH RAIN DROPS"),
self.new("\u{2601}" , [:cloud]),
......
......@@ -5,7 +5,7 @@ require 'set'
module Rumoji
class Emoji
OBJECTS = Set[
OBJECTS = SortedSet[
self.new("\u{1F38D}", [:bamboo], "PINE DECORATION"), # "Japanese new year's door decoration
self.new("\u{1F49D}", [:gift_heart], "HEART WITH RIBBON"),
self.new("\u{1F38E}", [:dolls], "JAPANESE DOLLS"), # "Japanese Hinamatsuri or girls' doll festival"
......
......@@ -5,7 +5,7 @@ require 'set'
module Rumoji
class Emoji
PEOPLE = Set[
PEOPLE = SortedSet[
self.new("\u{1F604}", [:smile, :simple_smile], "SMILING FACE WITH OPEN MOUTH AND SMILING EYES"),
self.new("\u{1F606}", [:laughing], "SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES"),
self.new("\u{1F60A}", [:blush], "SMILING FACE WITH SMILING EYES"),
......
......@@ -5,7 +5,7 @@ require 'set'
module Rumoji
class Emoji
PLACES = Set[
PLACES = SortedSet[
self.new("\u{1f6a1}", [:aerial_tramway]),
self.new("\u{2708}" , [:airplane]),
self.new("\u{1f691}", [:ambulance]),
......
......@@ -5,7 +5,7 @@ require 'set'
module Rumoji
class Emoji
SYMBOLS = Set[
SYMBOLS = SortedSet[
self.new("\u{1f4af}", [:"100"]),
self.new("\u{1f522}", [:"1234"]),
self.new("\u{1f170}", [:a]),
......
......@@ -10,6 +10,7 @@ describe Rumoji do
@zero = "0️⃣"
@us = "πŸ‡ΊπŸ‡Έ"
@non_potable_water = "🚱"
@man_man_boy_boy = "πŸ‘¨β€πŸ‘¨β€πŸ‘¦β€πŸ‘¦"
end
describe "#encode" do
......@@ -24,6 +25,12 @@ describe Rumoji do
Rumoji.encode(text).must_equal text
end
it "encodes man_man_boy_boy" do
text = "#{@man_man_boy_boy}"
Rumoji.encode(text).must_equal ":man-man-boy-boy:"
Rumoji.encode(text).wont_equal ":man::man::boy::boy:"
end
describe "with multiple codepoints" do
it "transforms a stream" do
Rumoji.encode("#{@zero}").must_equal ":zero:"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment