Skip to content

Instantly share code, notes, and snippets.

@kronos
Created February 2, 2010 23:26
Show Gist options
  • Save kronos/293163 to your computer and use it in GitHub Desktop.
Save kronos/293163 to your computer and use it in GitHub Desktop.
From 70bbac4dbbb7eca14b73557d9ea32fb8f6df58e5 Mon Sep 17 00:00:00 2001
From: Ivan Samsonov <hronya@gmail.com>
Date: Wed, 3 Feb 2010 02:23:51 +0300
Subject: [PATCH] Speed up Array#pack
Implement 'w' directive for String#unpack
Speed up String#rpartition
---
kernel/common/pack.rb | 425 ++++++++++++++--------------
kernel/common/string.rb | 39 ++-
spec/tags/ruby/core/string/unpack_tags.txt | 2 -
3 files changed, 240 insertions(+), 226 deletions(-)
delete mode 100644 spec/tags/ruby/core/string/unpack_tags.txt
diff --git a/kernel/common/pack.rb b/kernel/common/pack.rb
index e87fd09..1102856 100644
--- a/kernel/common/pack.rb
+++ b/kernel/common/pack.rb
@@ -7,7 +7,7 @@ class Array::Packer
BASE_64_B2A[63] = '/'
POINTER_SIZE = Rubinius::L64 ? 8 : 4
- def initialize(array,schema)
+ def initialize(array, schema)
@source = array
@schema = schema
@index = 0
@@ -19,78 +19,106 @@ class Array::Packer
@ptr ||= FFI::MemoryPointer.new(16) # enough for all uses
end
- def parse(schema)
- # The schema is an array of arrays like [["A", "6"], ["u", "*"],
- # ["X", ""]]. It represents the parsed form of "A6u*X". Remove
- # strings in the schema between # and \n
- schema = schema.gsub(/#.*/, '')
- schema.scan(/([^\s\d!_\*])([\d!_\*]*)/)
- end
-
def dispatch
- parse(@schema).each do |kind, t|
- t = nil if t.empty?
+ schema = @schema.gsub(/#.*/, '')
+ i = 0
+ length = schema.length
+ native_size = false
- case kind # TODO: switch kind to ints
- when 'X'
- size = (t || 1).to_i
- raise ArgumentError, "you're backing up too far" if size > @result.size
- @result.shorten!(size) if size > 0
- when 'x'
- size = (t || 1).to_i
- @result << "\000" * size
- when 'a', 'A', 'Z'
- ascii_string(kind, t)
- when 'b', 'B'
- bit_string(kind, t)
- when 'c', 'C'
- character(kind, t)
- when 'M'
+ while i < length do
+ kind = schema[i]
+ i += 1
+
+ next if kind.isspace
+
+ if schema[i] == ?_ or schema[i] == ?!
+ if kind == ?s or kind == ?S or kind == ?i or kind == ?I or kind == ?l or kind == ?L
+ i += 1
+ native_size = true
+ else
+ raise ArgumentError, "#{schema[i]} allowed only after types sSiIlL"
+ end
+ end
+
+ if i == length
+ len = 1
+ elsif schema[i].isdigit
+ len = 0
+ begin
+ len = len * 10 + schema[i] - ?0
+ i += 1
+ end while i < length and schema[i].isdigit
+ elsif schema[i] == ?*
+ len = case kind
+ when ?@, ?X, ?x, ?u
+ 0
+ when ?P, ?M, ?m
+ 1
+ when ?u, ?m, ?H, ?h, ?B, ?b, ?A, ?a, ?Z
+ nil # size depends on item
+ else
+ @source.size - @index
+ end
+ i += 1
+ else
+ len = 1
+ end
+ case kind
+ when ?X
+ raise ArgumentError, "you're backing up too far" if len > @result.size
+ @result.shorten!(len) if len > 0
+ when ?x
+ @result << "\000" * len
+ when ?a, ?A, ?Z
+ ascii_string(kind, len)
+ when ?b, ?B
+ bit_string(kind, len)
+ when ?c, ?C
+ character(kind, len)
+ when ?M
# for some reason MRI responds to to_s here
item = Type.coerce_to(fetch_item(), String, :to_s)
- line_length = 72
- if t && t =~ /^\d/ && t.to_i >= 3
- line_length = t.to_i
+ if len >= 3
+ line_length = len
+ else
+ line_length = 72
end
line_length += 1 # bug compatibility with MRI
@result << item.scan(/.{1,#{line_length}}/m).map { |line|
line.gsub(/[^ -<>-~\t\n]/) { |m| "=%02X" % m[0] } + "=\n"
}.join
- when 'm'
- @result << encode(kind, t, :base64).join.sub(/(A{1,2})\n\Z/) { "#{'=' * $1.size}\n" }
- when 'w'
- ber_compress(kind, t)
- when 'u'
- @result << encode(kind, t, :uuencode).join.gsub(/ /, '`')
- when 'd', 'D', 'e', 'E', 'f', 'F', 'g', 'G'
- decimal(kind, t)
- when 'i', 'I', 's', 'S', 'l', 'L', 'v', 'V'
- integer(kind, t)
- when 'N'
- net_long(t)
- when 'n'
- net_short(t)
- when 'p', 'P'
- pointer(kind, t)
- when 'Q', 'q'
- integer64(kind, t)
- when 'H', 'h'
- hex_string(kind, t)
- when 'U'
- utf_string(kind, t)
- when '@'
- pos = if(t.nil?)
- raise ArgumentError
- else
- t.to_i
- end
- if(pos < @result.size)
- @result = @result[0...pos]
- elsif(pos > @result.size)
+ when ?m
+ @result << encode(kind, len, :base64).join.sub(/(A{1,2})\n\Z/) { "#{'=' * $1.size}\n" }
+ when ?w
+ ber_compress(kind, len)
+ when ?u
+ @result << encode(kind, len, :uuencode).join.gsub(/ /, '`')
+ when ?d, ?D, ?e, ?E, ?f, ?F, ?g, ?G
+ decimal(kind, len)
+ when ?i, ?I, ?s, ?S, ?l, ?L, ?v, ?V
+ integer(kind, len, native_size)
+ native_size = false
+ when ?N
+ net_long(len)
+ when ?n
+ net_short(len)
+ when ?p, ?P
+ pointer(kind, len)
+ when ?Q, ?q
+ integer64(kind, len)
+ when ?H, ?h
+ hex_string(kind, len)
+ when ?U
+ utf_string(kind, len)
+ when ?@
+ pos = len
+ if pos < @result.size
+ @result = @result.substring(0, pos)
+ elsif pos > @result.size
@result << "\x00" * (pos - @result.size)
end
- when '%'
+ when ?%
raise ArgumentError, "#{kind} not implemented"
end
end
@@ -110,63 +138,49 @@ class Array::Packer
return item
end
- def parse_tail(t, kind, remaining = @source.size - @index)
- if(t != nil && (t.include?('_') || t.include?('!')))
- unless 'sSiIlL'.include?(kind)
- raise ArgumentError, "#{t} allowed only after types sSiIlL"
- end
- t = t.delete("_!")
- end
-
- case t
- when nil
- 1
- when '*'
- remaining
- else
- m = t.match(/(\d+)/)
- m ? m[0].to_i : 1
- end
- end
-
# A, a, Z
- def ascii_string(kind, t)
+ def ascii_string(kind, size)
item = fetch_item()
# MRI nil compatibilty for string functions
- item = "" if item.nil?
+ item = "" unless item
- item = Type.coerce_to(item, String, :to_str)
- size = parse_tail(t, kind, item.size + (kind == "Z" ? 1 : 0))
+ item = StringValue(item)
+ size = item.size + (kind == ?Z ? 1 : 0) unless size
padsize = size - item.size
- filler = kind == "A" ? " " : "\0"
+ filler = kind == ?A ? " " : "\0"
- @result << item.split(//).first(size).join
+ @result << item.substring(0, size)
@result << filler * padsize if padsize > 0
end
# B, b
- def bit_string(kind, t)
+ def bit_string(kind, size)
item = fetch_item()
# MRI nil compatibilty for string functions
- item = "" if item.nil?
+ item = "" unless item
- item = Type.coerce_to(item, String, :to_str)
+ item = StringValue(item)
byte = 0
- lsb = (kind == "b")
- size = parse_tail(t, kind, item.size)
+ lsb = (kind == ?b)
+ size = item.size unless size
- bits = item.split(//).map { |c| c[0] & 01 }
- min = [size, item.size].min
+ min = size < item.size ? size : item.size
- bits.first(min).each_with_index do |bit, i| # TODO: this can be cleaner
- i &= 07
+ if min > 0
+ i = 0
+ item.each_byte do |bit|
+ bit &= 01
+ j = i & 07
+ byte |= bit << (lsb ? j : 07 - j)
- byte |= bit << (lsb ? i : 07 - i)
+ if j == 07
+ @result << byte.chr
+ byte = 0
+ end
- if i == 07
- @result << byte.chr
- byte = 0
+ i += 1
+ break if i >= min
end
end
@@ -180,48 +194,48 @@ class Array::Packer
end
# C, c
- def character(kind, t)
- parse_tail(t, kind).times do
+ def character(kind, size)
+ size.times do
@result << (Type.coerce_to(fetch_item(), Integer, :to_int) & 0xff).chr
end
end
# P, p
- def pointer(kind, t)
- count = parse_tail(t, kind, kind == 'p' ? @source.size - @index : 1)
- raise ArgumentError, "too few array elements" if
- @index + count > @source.length && kind == 'p'
-
- count.times do
+ def pointer(kind, size)
+ size.times do
item = fetch_item()
- if(item == nil)
- @result << "\x00"*POINTER_SIZE
+ unless item
+ @result << "\x00" * POINTER_SIZE
else
- item = StringValue item
+ item = StringValue(item)
raise ArgumentError, "not implemented"
end
end
end
# H, h
- def hex_string(kind, t)
- item = Type.coerce_to(fetch_item(), String, :to_str)
+ def hex_string(kind, size)
+ item = StringValue(fetch_item())
# MRI nil compatibilty for string functions
- item = "" if item.nil?
-
- size = parse_tail(t, kind, item.length)
- str = item[0, size].scan(/..?/)
+ item = "" unless item
- numbers = if kind == "h"
- str.map { |b| b.reverse.hex }
- else
- str.map { |b| b. hex }
- end
+ size = item.length unless size
- if kind == 'H' && !numbers.empty? && numbers.last < 16
- numbers[-1] *= 16
+ str = item.substring(0, size)
+ tail = nil
+ if kind == ?h
+ 0.step(str.length-2, 2) do |i|
+ @result << "#{item[i+1].chr}#{item[i].chr}".hex.chr
+ end
+ else
+ 0.step(str.length-2, 2) do |i|
+ @result << item.substring(i, 2).hex.chr
+ end
end
+ tail = item.substring(str.length-1, 1).hex.chr if str.length % 2 == 1
+
+ @result << (kind == ?H ? (tail[0]*16).chr : tail) if tail
diff = size - item.length
@@ -234,57 +248,38 @@ class Array::Packer
left = diff / 2
end
- left.times do
- numbers << 0
- end
+ @result << "\000" * left
end
-
- @result << numbers.map { |n| n.chr }.join
end
- def decimal(kind, t)
- size = parse_tail(t, kind)
-
+ def decimal(kind, size)
want_double = case kind
- when 'E', 'D', 'd', 'G' then true
- when 'e', 'F', 'f', 'g' then false
+ when ?E, ?D, ?d, ?G then true
+ when ?e, ?F, ?f, ?g then false
end
little_endian = case kind
- when 'e', 'E' then true
- when 'g', 'G' then false
+ when ?e, ?E then true
+ when ?g, ?G then false
else endian?(:little)
end
size.times do
- item = fetch_item()
- item = FloatValue item
+ item = FloatValue(fetch_item())
bytes = item.to_packed(want_double)
bytes.reverse! if little_endian ^ endian?(:little)
@result << bytes
end
end
- QUAD_MAX = 2 ** 64
- MAX = 2 ** Rubinius::WORDSIZE
+ QUAD_MAX = 1 << 64
+ MAX = 1 << Rubinius::WORDSIZE
# Q, q
- def integer64(kind, t)
- size = parse_tail(t, kind)
-
- bytes = 8
-
- little_endian = true
-
- if @index + size > @source.length
- raise ArgumentError, "too few array elements"
- end
-
+ def integer64(kind, size)
size.times do |i|
val = Type.coerce_to(fetch_item, Integer, :to_int)
- max_wordsize = 64
-
if val.abs >= QUAD_MAX
raise RangeError, "bignum too big to convert into 'unsigned long'"
end
@@ -295,14 +290,8 @@ class Array::Packer
end
# N
- def net_long(t)
- size = parse_tail(t, 'N')
-
- if @index + size > @source.length
- raise ArgumentError, "too few array elements"
- end
-
- size.times do |i|
+ def net_long(size)
+ size.times do
val = Type.coerce_to(fetch_item, Integer, :to_int)
# These ranges checks are so stupid. They don't even check the actual
@@ -318,14 +307,8 @@ class Array::Packer
end
# n
- def net_short(t)
- size = parse_tail(t, 'n')
-
- if @index + size > @source.length
- raise ArgumentError, "too few array elements"
- end
-
- size.times do |i|
+ def net_short(size)
+ size.times do
val = Type.coerce_to(fetch_item, Integer, :to_int)
# These ranges checks are so stupid. They don't even check the actual
@@ -341,58 +324,50 @@ class Array::Packer
end
# i, s, l, I, S, L, V, v
- def integer(kind, t)
- size = parse_tail(t, kind)
-
- if(t && (t.include?('_') || t.include?('!')))
+ def integer(kind, size, native_size)
+ if native_size
# Native sizes
bytes = case kind
- when 'L', 'l' then Rubinius::SIZEOF_LONG
- when 'I', 'i' then Rubinius::SIZEOF_INT
- when 'S', 's' then Rubinius::SIZEOF_SHORT
+ when ?L, ?l then Rubinius::SIZEOF_LONG
+ when ?I, ?i then Rubinius::SIZEOF_INT
+ when ?S, ?s then Rubinius::SIZEOF_SHORT
end
else
bytes = case kind
- when 'L', 'l' then 4
- when 'I', 'i' then 4
- when 'S', 's' then 2
- when 'V' then 4
- when 'v' then 2
+ when ?L, ?l, ?I, ?i, ?V then 4
+ when ?S, ?s, ?v then 2
end
end
- unsigned = (kind =~ /I|S|L/)
+ unsigned = kind == ?I or kind == ?S or kind == ?L
little_endian = case kind
- when 'V', 'v' then true
+ when ?V, ?v then true
else endian?(:little)
end
- raise ArgumentError, "too few array elements" if
- @index + size > @source.length
-
- size.times do |i|
+ size.times do
item = Type.coerce_to(fetch_item(), Integer, :to_int)
if item.abs >= MAX
raise RangeError, "bignum too big to convert into 'unsigned long'"
end
- @result << if little_endian
- item += 2 ** (8 * bytes) if item < 0
- (0...bytes).map { |b| ((item >> (b * 8)) & 0xFF).chr }
- else # ugly
- (0...bytes).map {n=(item & 0xFF).chr;item >>= 8; n}.reverse
- end.join
+ if little_endian
+ item += 1 << (8 * bytes) if item < 0
+ bytes.times { |b| @result << ((item >> (b * 8)) & 0xFF).chr }
+ else # ugly
+ @result << (0...bytes).map {n=(item & 0xFF).chr;item >>= 8; n}.reverse.join
+ end
end
end
# w
- def ber_compress(kind, t)
- parse_tail(t, kind).times do
- chars = ''
+ def ber_compress(kind, size)
+ size.times do
item = Type.coerce_to(fetch_item(), Integer, :to_int)
raise ArgumentError, "can't compress negative numbers" if item < 0
+ chars = ''
chars << (item & 0x7f)
while (item >>= 7) > 0 do
chars << ((item & 0x7f) | 0x80)
@@ -402,12 +377,12 @@ class Array::Packer
end
# u, m
- def encode(kind, t, type = :base64)
- item = Type.coerce_to(fetch_item(), String, :to_str)
+ def encode(kind, size, type = :base64)
+ item = StringValue(fetch_item())
line_length = 45
- if t && t =~ /^\d/ && t.to_i >= 3
- line_length = t.to_i
+ if size >= 3
+ line_length = size
line_length -= line_length % 3
end
@@ -431,7 +406,7 @@ class Array::Packer
l = "#{encoded.join}\n"
if(type == :uuencode)
- (line.size + ?\s).chr+l
+ (line.size + ?\s).chr + l
else
l
end
@@ -439,38 +414,36 @@ class Array::Packer
end
# U
- def utf_string(kind, t)
- parse_tail(t, kind).times do
+ def utf_string(kind, size)
+ size.times do
item = Type.coerce_to(fetch_item(), Integer, :to_i)
raise RangeError, "pack(U): value out of range" if item < 0
bytes = 0
- f = [2 ** 7, 2 ** 11, 2 ** 16, 2 ** 21, 2 ** 26, 2 ** 31].find { |n|
+
+ f = [1 << 7, 1 << 11, 1 << 16, 1 << 21, 1 << 26, 1 << 31].find do |n|
bytes += 1
item < n
- }
+ end
- raise RangeError, "pack(U): value out of range" if f.nil?
+ raise RangeError, "pack(U): value out of range" unless f
if bytes == 1
@result << item
- bytes = 0
- end
-
- i = bytes
+ else
+ i = bytes
+ buf = []
- buf = []
- if i > 0
(i-1).times do
- buf.unshift((item | 0x80) & 0xBF)
+ buf.unshift(((item | 0x80) & 0xBF).chr)
item >>= 6
end
# catch the highest bits - the mask depends on the byte count
- buf.unshift(item | ((0x3F00 >> bytes)) & 0xFC)
- end
+ buf.unshift((item | ((0x3F00 >> bytes)) & 0xFC).chr)
- @result << buf.map { |n| n.chr }.join
+ @result << buf.join
+ end
end
end
end
@@ -632,6 +605,26 @@ class String::Unpacker
return i
end
+ def ber_decompress(i, count, elements)
+ count = @length - i if count == '*'
+
+ count.times do
+ n = 0
+ while i < @length do
+ item = @source[i]
+ i += 1
+ n <<= 7
+ n |= item & 0x7f
+ if item & 0x80 == 0
+ elements << n
+ break
+ end
+ end
+ end
+
+ return i
+ end
+
# Like int, but always does 4 bytes.
def long(i, count, elements, signed)
# TODO honor _.
@@ -922,6 +915,8 @@ class String::Unpacker
i = base64(i, count, elements)
when 'U'
i = utf8(i, count, elements)
+ when 'w'
+ i = ber_decompress(i, count, elements)
when 'X'
count = length - i if count == '*'
diff --git a/kernel/common/string.rb b/kernel/common/string.rb
index 89205b6..c811f0e 100644
--- a/kernel/common/string.rb
+++ b/kernel/common/string.rb
@@ -1280,17 +1280,38 @@ class String
return [self, "", ""]
end
+ # call-seq:
+ #
+ # str.rpartition(sep) => [head, sep, tail]
+ # str.rpartition(regexp) => [head, match, tail]
+ #
+ # Searches _sep_ or pattern (_regexp_) in the string from the end
+ # of the string, and returns the part before it, the match, and the part
+ # after it.
+ # If it is not found, returns two empty strings and _str_.
+ #
+ # "hello".rpartition("l") #=> ["hel", "l", "o"]
+ # "hello".rpartition("x") #=> ["", "", "hello"]
+ # "hello".rpartition(/.l/) #=> ["he", "ll", "o"]
+ #
def rpartition(pattern)
- pattern = Type.coerce_to(pattern, String, :to_str) unless pattern.is_a? Regexp
- i = rindex(pattern)
- return ["", "", self] unless i
-
- if pattern.is_a? Regexp
- match = Regexp.last_match
- [match.pre_match, match[0], match.post_match]
+ if pattern.kind_of? Regexp
+ if m = pattern.search_region(self, 0, size, false)
+ [m.pre_match, m[0], m.post_match]
+ end
else
- last = i+pattern.length
- [self[0...i], self[i...last], self[last...length]]
+ pattern = StringValue(pattern)
+ if i = rindex(pattern)
+ post_start = i + pattern.length
+ post_len = size - post_start
+
+ return [self.substring(0, i),
+ pattern.dup,
+ self.substring(post_start, post_len)]
+ end
+
+ # Nothing worked out, this is the default.
+ return ["", "", self]
end
end
diff --git a/spec/tags/ruby/core/string/unpack_tags.txt b/spec/tags/ruby/core/string/unpack_tags.txt
deleted file mode 100644
index 37de522..0000000
--- a/spec/tags/ruby/core/string/unpack_tags.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-fails:String#unpack with 'w' directive produces a BER-compressed integer
-fails:String#unpack with 'w' directive produces a BER-compressed integer
--
1.6.1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment