Commit 6b264e8a by Mark Wunsch

Merge pull request #22 from mavenlink/faster_encode

Faster encode technique
parents 81517a21 8493fc91
......@@ -15,3 +15,4 @@ spec/reports
test/tmp
test/version_tmp
tmp
.idea
......@@ -3,4 +3,8 @@
require 'rumoji'
Rumoji.decode_io(STDIN, STDOUT)
if ARGV.first == 'encode'
Rumoji.encode_io(STDIN, STDOUT)
else
Rumoji.decode_io(STDIN, STDOUT)
end
......@@ -8,8 +8,7 @@ module Rumoji
# Transform emoji into its cheat-sheet code
def encode(str)
io = StringIO.new(str)
encode_io(io).string
str.gsub(Emoji::ALL_REGEXP) { |match| (emoji = Emoji.find_by_string(match)) && emoji.code || match }
end
# Transform a cheat-sheet code into an Emoji
......@@ -18,39 +17,9 @@ module Rumoji
end
def encode_io(readable, writeable=StringIO.new(""))
previous_codepoints = []
readable.each_codepoint do |codepoint|
possible_emoji = Emoji.select_by_codepoint(codepoint)
sequence = if possible_emoji.empty?
# If this codepoint is not part of an emoji
# just write it back out, along with any previously stored codepoints.
dump_codepoints_to_string(previous_codepoints << codepoint)
elsif !possible_emoji.any?(&:multiple?)
# If this codepoint is part of an emoji, but not one
# that contains multiple codepoints, write out the
# first possiblity's cheat code and the previously stored codepoints
dump_codepoints_to_string(previous_codepoints) << possible_emoji.first.code
else
# This codepoint is a part of an emoji with multiple
# codepoints, so push it onto the stack.
previous_codepoints.push(codepoint)
# Check and see if this completes the emoji, otherwise,
# write out an empty string.
if found = possible_emoji.find {|e| e.codepoints.eql?(previous_codepoints) }
previous_codepoints.clear and found.code
else
""
end
end
writeable.write sequence
readable.each_line do |line|
writeable.write encode(line)
end
# Write any remaining codepoints.
writeable.write dump_codepoints_to_string(previous_codepoints) if previous_codepoints.any?
writeable
end
......@@ -60,13 +29,4 @@ module Rumoji
end
writeable
end
private
def dump_codepoints_to_string(codepoints)
codepoints.pack("U*").tap do
codepoints.clear
end
end
end
......@@ -2,9 +2,10 @@
module Rumoji
class Emoji
attr_reader :name
attr_reader :name, :string
def initialize(string, symbols, name = nil)
@string = string
@codepoints = string.codepoints
@cheat_codes = [symbols].flatten
@name = name || @cheat_codes.first.to_s.upcase.gsub("_", " ")
......@@ -23,7 +24,7 @@ module Rumoji
end
def to_s
codepoints.pack("U*")
@string
end
def hash
......@@ -31,7 +32,7 @@ module Rumoji
end
def hex
@codepoints.map{|point| point.to_s(16).upcase }.join("-")
@hex ||= @codepoints.map{|point| point.to_s(16).upcase }.join("-")
end
def codepoints
......@@ -50,21 +51,18 @@ module Rumoji
ALL = PEOPLE | NATURE | OBJECTS | PLACES | SYMBOLS
ALL_REGEXP = Regexp.new(ALL.map(&:string).join('|'))
def self.find(symbol)
ALL.find {|emoji| emoji.include? symbol }
end
def self.find_by_string(string)
ALL.find {|emoji| emoji.to_s == string }
end
def self.select_by_codepoint(codepoint)
ALL.select {|emoji| emoji.codepoints.include? codepoint }
STRING_LOOKUP = ALL.each.with_object({}) do |emoji, lookup|
lookup[emoji.string] = emoji
end
def self.find_by_codepoint(codepoint)
ALL.find {|emoji| emoji.hex == codepoint.to_s(16).upcase }
def self.find_by_string(string)
STRING_LOOKUP[string]
end
end
end
......@@ -75,7 +75,7 @@ module Rumoji
self.new("\u{1f564}", [:clock930]),
self.new("\u{3297}" , [:congratulations]),
self.new("\u{1f192}", [:cool]),
self.new("\u{00a9}" , [:copyright]),
# self.new("\u{00a9}" , [:copyright]),
self.new("\u{27b0}" , [:curly_loop]),
self.new("\u{1f4b1}", [:currency_exchange]),
self.new("\u{1f6c3}", [:customs]),
......@@ -143,7 +143,7 @@ module Rumoji
self.new("\u{1f518}", [:radio_button]),
self.new("\u{267b}" , [:recycle]),
self.new("\u{1f534}", [:red_circle]),
self.new("\u{00ae}" , [:registered]),
# self.new("\u{00ae}" , [:registered]),
self.new("\u{1f501}", [:repeat]),
self.new("\u{1f502}", [:repeat_one]),
self.new("\u{1f6bb}", [:restroom]),
......
......@@ -69,12 +69,5 @@ describe Rumoji::Emoji do
it "finds an emoji from a string" do
subject.find_by_string(smile_str).symbol.must_equal smile_sym
end
it "finds an emoji from a codepoint" do
smile_str.codepoints.map do |point|
subject.find_by_codepoint(point).symbol.must_equal smile_sym
end
end
end
end
......@@ -18,6 +18,44 @@ describe Rumoji do
Rumoji.encode(@smile).must_equal ":smile:"
Rumoji.encode("#{@smile}").must_equal ":smile:"
end
it "keeps codepoints that match the beginnings of multi-codepoint emoji" do
text = "i like #hashtags and 1direction they are the #1 band. end with 9"
Rumoji.encode(text).must_equal text
end
describe "with multiple codepoints" do
it "transforms a stream" do
Rumoji.encode("#{@zero}").must_equal ":zero:"
Rumoji.encode("#{@us}").must_equal ":us:"
end
it "transforms a stream of many emoji" do
num = ":one: :two: :three: :four: :five: :six: :seven: :eight: :nine: :zero: :hash:"
emoji = "1️⃣ 2️⃣ 3️⃣ 4️⃣ 5️⃣ 6️⃣ 7️⃣ 8️⃣ 9️⃣ 0️⃣ #️⃣"
Rumoji.encode(emoji).must_equal num
end
it "does not encode double digits" do
num = ":zero: :one: :two: :three: :four: :five: :six: :seven: :eight: :nine: :hash:"
double_digits = "00 11 22 33 44 55 66 77 88 99 ##"
Rumoji.encode(double_digits).wont_equal num
end
describe "with leading and trailing characters" do
it "is able to pull multipoint emoji out of a sequence" do
string = "An example of a multipoint emoji is the #{@us} flag."
Rumoji.encode(string).must_equal "An example of a multipoint emoji is the :us: flag."
end
end
describe "with trailing emoji" do
it "writes characters that are in a multipoint emoji followed by an emoji" do
string = "I would like 0#{@poop}"
Rumoji.encode(string).must_equal "I would like 0:poop:"
end
end
end
end
describe "#decode" do
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment