Commit a20768b9 by Mark Wunsch

Update to codepoint traversal to better handle multi-codepoint emoji.

This is a fix for #7.
In addition to the updated iterator, I've adjusted the numeric emoji to
use accurate unicode codepoints.
parent a6896619
......@@ -18,37 +18,40 @@ module Rumoji
end
def encode_io(readable, writeable=StringIO.new(""))
codepoints = readable.each_codepoint
previous_emoji = []
previous_codepoints = []
codepoints.each_with_object(writeable) do |codepoint, writer|
readable.each_codepoint do |codepoint|
possible_emoji = Emoji.select_by_codepoint(codepoint)
last_emoji = previous_emoji.pop
sequence = if last_emoji.nil? || !last_emoji.codepoints.include?(codepoint)
if possible_emoji.empty? || last_emoji
last_codepoint = last_emoji && last_emoji.codepoints.first
sequence_from_codepoints(last_codepoint, codepoint)
else
multiple_codepoint_emoji = possible_emoji.select(&:multiple?)
if multiple_codepoint_emoji.empty?
possible_emoji.first.code
else
previous_emoji.concat(multiple_codepoint_emoji) ; ""
end
sequence = if possible_emoji.empty?
# If this codepoint is not part of an emoji
# just write it back out, along with any previously stored codepoints.
sequence_from_codepoints(previous_codepoints, codepoint).tap do
previous_codepoints.clear
end
elsif !possible_emoji.any?(&:multiple?)
# If this codepoint is part of an emoji, but not one
# that contains multiple codepoints, write out the
# first possiblity's cheat code.
possible_emoji.first.code
else
last_emoji.code
# This codepoint is a part of an emoji with multiple
# codepoints, so push it onto the stack.
previous_codepoints.push(codepoint)
# Check and see if this completes the emoji, otherwise,
# write out an empty string.
if found = possible_emoji.find {|e| e.codepoints.eql?(previous_codepoints) }
previous_codepoints.clear and found.code
else
""
end
end
writer.write sequence
writeable.write sequence
end
# If the last character was the beginning of a possible emoji, tack the
# first codepoint onto the end.
if last_emoji = previous_emoji.pop
writeable.write sequence_from_codepoints(last_emoji.codepoints.first)
end
# Write any remaining codepoints.
writeable.write sequence_from_codepoints(previous_codepoints) if previous_codepoints.any?
writeable
end
......@@ -64,7 +67,7 @@ private
def sequence_from_codepoints(*codepoints)
compacted = codepoints.flatten.compact
compacted.pack("U" * compacted.size)
compacted.pack("U*")
end
end
......@@ -59,11 +59,12 @@ module Rumoji
end
def self.select_by_codepoint(codepoint)
ALL.select {|emoji| emoji.codepoints.first.eql? codepoint }
ALL.select {|emoji| emoji.codepoints.include? codepoint }
end
def self.find_by_codepoint(codepoint)
ALL.find {|emoji| emoji.hex == codepoint.to_s(16).upcase }
end
end
end
......@@ -81,16 +81,16 @@ module Rumoji
self.new("\u{1f6c3}", [:customs]),
self.new("\u{1f4a0}", [:diamond_shape_with_a_dot_inside]),
self.new("\u{1f6af}", [:do_not_litter]),
self.new("\u{0038 20e3}" , [:eight]),
self.new("\u{0038 fe0f 20e3}" , [:eight]),
self.new("\u{2734}" , [:eight_pointed_black_star]),
self.new("\u{2733}" , [:eight_spoked_asterisk]),
self.new("\u{1f51a}", [:end]),
self.new("\u{23e9}" , [:fast_forward]),
self.new("\u{0035 20e3}" , [:five]),
self.new("\u{0034 20e3}" , [:four]),
self.new("\u{0035 fe0f 20e3}" , [:five]),
self.new("\u{0034 fe0f 20e3}" , [:four]),
self.new("\u{1f193}", [:free]),
self.new("\u{264a}" , [:gemini]),
self.new("\u{0023 20e3}" , [:hash]),
self.new("\u{0023 fe0f 20e3}" , [:hash]),
self.new("\u{1f49f}", [:heart_decoration]),
self.new("\u{2714}" , [:heavy_check_mark]),
self.new("\u{2797}" , [:heavy_division_sign]),
......@@ -120,7 +120,7 @@ module Rumoji
self.new("\u{274e}" , [:negative_squared_cross_mark]),
self.new("\u{1f195}", [:new]),
self.new("\u{1f196}", [:ng]),
self.new("\u{0039 20e3}" , [:nine]),
self.new("\u{0039 fe0f 20e3}" , [:nine]),
self.new("\u{1f6b3}", [:no_bicycles]),
self.new("\u{26d4}" , [:no_entry]),
self.new("\u{1f6ab}", [:no_entry_sign]),
......@@ -132,7 +132,7 @@ module Rumoji
self.new("\u{1f17e}", [:o2]),
self.new("\u{1f197}", [:ok]),
self.new("\u{1f51b}", [:on]),
self.new("\u{0031 20e3}" , [:one]),
self.new("\u{0031 fe0f 20e3}" , [:one]),
self.new("\u{26ce}" , [:ophiuchus]),
self.new("\u{1f17f}", [:parking]),
self.new("\u{303d}" , [:part_alternation_mark]),
......@@ -152,9 +152,9 @@ module Rumoji
self.new("\u{2650}" , [:sagittarius]),
self.new("\u{264f}" , [:scorpius]),
self.new("\u{3299}" , [:secret]),
self.new("\u{0037 20e3}" , [:seven]),
self.new("\u{0037 fe0f 20e3}" , [:seven]),
self.new("\u{1f4f6}", [:signal_strength]),
self.new("\u{0036 20e3}" , [:six]),
self.new("\u{0036 fe0f 20e3}" , [:six]),
self.new("\u{1f52f}", [:six_pointed_star]),
self.new("\u{1f539}", [:small_blue_diamond]),
self.new("\u{1f538}", [:small_orange_diamond]),
......@@ -164,12 +164,12 @@ module Rumoji
self.new("\u{1f198}", [:sos]),
self.new("\u{1f523}", [:symbols]),
self.new("\u{2649}" , [:taurus]),
self.new("\u{0033 20e3}" , [:three]),
self.new("\u{0033 fe0f 20e3}" , [:three]),
self.new("\u{2122}" , [:tm]),
self.new("\u{1f51d}", [:top]),
self.new("\u{1f531}", [:trident]),
self.new("\u{1f500}", [:twisted_rightwards_arrows]),
self.new("\u{0032 20e3}" , [:two]),
self.new("\u{0032 fe0f 20e3}" , [:two]),
self.new("\u{1f239}", [:u5272]),
self.new("\u{1f234}", [:u5408]),
self.new("\u{1f23a}", [:u55b6]),
......@@ -195,7 +195,7 @@ module Rumoji
self.new("\u{1f533}", [:white_square_button]),
self.new("\u{1f6ba}", [:womens]),
self.new("\u{274c}" , [:x]),
self.new("\u{0030 20e3}" , [:zero])
self.new("\u{0030 fe0f 20e3}" , [:zero])
]
end
end
......@@ -7,7 +7,7 @@ describe Rumoji do
before do
@poop = "💩"
@smile = "😄"
@zero = "0⃣"
@zero = "0⃣"
@us = "🇺🇸"
@non_potable_water = "🚱"
end
......@@ -51,11 +51,17 @@ describe Rumoji do
end
it "transforms a stream of many emoji" do
num = ":one::two::three::four::five::six::seven::eight::nine::hash:"
emoji = StringIO.new("1⃣2⃣3⃣4⃣5⃣6⃣7⃣8⃣9⃣#⃣")
num = ":one: :two: :three: :four: :five: :six: :seven: :eight: :nine: :zero: :hash:"
emoji = StringIO.new"1️⃣ 2️⃣ 3️⃣ 4️⃣ 5️⃣ 6️⃣ 7️⃣ 8️⃣ 9️⃣ 0️⃣ #️⃣"
Rumoji.encode_io(emoji).string.must_equal num
end
it "does not encode double digits" do
num = ":zero: :one: :two: :three: :four: :five: :six: :seven: :eight: :nine: :hash:"
double_digits = StringIO.new("00 11 22 33 44 55 66 77 88 99 ##")
Rumoji.encode_io(double_digits).string.wont_equal num
end
describe "with leading and trailing characters" do
it "is able to pull multipoint emoji out of a sequence" do
io = StringIO.new("An example of a multipoint emoji is the #{@us} flag.")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment