Created
November 27, 2010 03:34
-
-
Save matthewd/717534 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Rubinius | |
class Sprinter | |
class << self | |
def get(format) | |
@cache ||= {} | |
if format.tainted? | |
new(format) | |
else | |
@cache[format] || | |
(@cache[format] = new(format)) | |
end | |
end | |
end | |
def initialize(format) | |
Builder.new(self, format).build | |
end | |
class Builder | |
def initialize(code, format) | |
@code, @format = code, format | |
@verbose = false | |
@@seen ||= {} | |
if @@seen[format] | |
@verbose = false | |
else | |
@@seen[format] = true | |
end | |
@g = ::Rubinius::Generator.new | |
@g.name = :call | |
@g.file = :sprintf | |
@g.set_line 1 | |
end | |
def build | |
self.parse | |
@g.required_args = @arg_count | |
@g.total_args = @arg_count + 1 | |
# We won't use it, but we accept a splat; our semantics require | |
# that we ignore any excess arguments provided. | |
@g.splat_index = @arg_count | |
@g.local_count = @arg_count + 1 | |
if @index_mode == :absolute | |
@g.local_names = (0...@arg_count).map {|i| :"#{i + 1}$" } + [:splat] | |
else | |
@g.local_names = (0...@arg_count).map {|i| nil } + [:splat] | |
end | |
@g.ret | |
@g.close | |
@g.encode | |
cm = @g.package ::Rubinius::CompiledMethod | |
# Careful with this: CM::Instruction#to_s uses String#% | |
if @verbose | |
puts | |
puts @format.inspect | |
puts cm.decode | |
puts | |
end | |
ss = ::Rubinius::StaticScope.new Object | |
::Rubinius.attach_method @g.name, cm, ss, @code | |
end | |
def meta_op_minus | |
@op_minus ||= @g.find_literal(:-) | |
@g.meta_send_op_minus @op_minus | |
end | |
def invert | |
@g.meta_push_0 | |
@g.swap | |
meta_op_minus | |
end | |
def is_negative | |
@g.meta_push_0 | |
@g.meta_send_op_lt @g.find_literal(:<) | |
end | |
def justify(direction, may_be_negative=true) | |
if may_be_negative && direction != :ljust | |
width_done = @g.new_label | |
@g.dup | |
is_negative | |
if_false do | |
@g.send direction, 1 | |
@g.goto width_done | |
end | |
invert | |
@g.send :ljust, 1 | |
width_done.set! | |
else | |
@g.send direction, 1 | |
end | |
end | |
RADIX = { 'd' => 10, 'i' => 10, 'u' => 10, 'x' => 16, 'o' => 8, 'b' => 2 } | |
PREFIX = { 'o' => '0', 'x' => '0x', 'X' => '0X', 'b' => '0b', 'B' => '0B' } | |
def next_index(specified=nil) | |
if specified | |
specified = specified.to_i | |
raise ArgumentError, "invalid positional index" if specified == 0 | |
raise ArgumentError, "unnumbered mixed with numbered" if @index_mode == :relative | |
@index_mode = :absolute | |
@arg_count = specified if specified > @arg_count | |
specified - 1 | |
else | |
raise ArgumentError, "unnumbered mixed with numbered" if @index_mode == :absolute | |
@index_mode = :relative | |
(@arg_count += 1) - 1 | |
end | |
end | |
RE = / | |
([^%]+|%(?:[\n\0]|\z)) # 1 | |
| | |
% | |
( # 2 | |
([0# +-]*) # 3 | |
(?:([0-9]+)\$)? # 4 | |
([0# +-]*) # 5 | |
(?: | |
(\*(?:([0-9]+)\$)?|([1-9][0-9]*))? # 6 7 8 | |
(?:\.(\*(?:([0-9]+)\$)?|([1-9][0-9]*))?)? # 9 10 11 | |
) | |
(?:([0-9]+)\$)? # 12 | |
([BbcdEefGgiopsuXx]) # 13 | |
) | |
| | |
(%)(?:%|[-+0-9# *.$]+\$[0-9.]*\z) # 14 | |
| | |
(%) # 15 | |
/x | |
def append_literal(str) | |
@g.push_unique_literal str | |
append_str false | |
end | |
def append_str taint | |
if @has_content | |
if taint | |
@g.swap | |
@g.move_down 2 | |
end | |
@g.swap | |
@g.string_append | |
else | |
@g.string_dup | |
@has_content = true | |
if @pre_tainted | |
@g.send_vcall :taint | |
end | |
end | |
if taint | |
@g.swap | |
tainting_done = @g.new_label | |
@g.send_vcall :tainted? | |
if_true do | |
@g.send_vcall :taint | |
end | |
end | |
end | |
class Atom | |
def initialize(b, g, format_code, flags) | |
@b, @g = b, g | |
@format_code, @flags = format_code, flags | |
@f_alt = flags.index(?#) | |
@f_zero = flags.index(?0) | |
@f_plus = flags.index(?+) | |
@f_ljust = flags.index(?-) | |
@f_space = flags.index(?\ ) | |
@just_dir = @f_ljust ? :ljust : :rjust | |
@prefix = PREFIX[@format_code] if @f_alt | |
@full_leader_size = @prefix ? @prefix.size : 0 | |
@full_leader_size += 1 if @f_plus || @f_space | |
end | |
def prepend_prefix | |
if @prefix | |
@g.push_literal @prefix | |
@g.string_dup | |
@g.string_append | |
end | |
end | |
def set_value(ref) | |
@field_index = @b.next_index(ref) | |
end | |
def set_width(full, ref, static) | |
@width_static = static && static.to_i | |
if full && !static | |
@width_index = @b.next_index(ref) | |
end | |
@has_width = @width_static || @width_index | |
end | |
def set_precision(full, ref, static) | |
@prec_static = static && static.to_i | |
if full && !static | |
@prec_index = @b.next_index(ref) | |
end | |
if @format_code == 'g' && @f_alt && !full | |
@prec_static = 4 | |
end | |
@has_precision = @prec_static || @prec_index | |
end | |
def push_value | |
@g.push_local @field_index | |
end | |
def push_width(adjust=true) | |
yield if block_given? | |
if @width_static | |
raise ArgumentError, "width too big" unless @width_static.class == Fixnum | |
if adjust && @full_leader_size > 0 | |
@g.push(@width_static - @full_leader_size) | |
else | |
@g.push @width_static | |
end | |
if block_given? | |
@g.swap | |
@b.if_true do | |
@g.meta_push_1 | |
@b.meta_op_minus | |
end | |
end | |
elsif @width_index | |
@g.push_local @width_index | |
@b.force_type :Fixnum, :Integer do | |
# If we had to do a conversion, we check we ended up | |
# with a Fixnum | |
@g.dup | |
@b.push_Fixnum | |
@g.swap | |
@g.kind_of | |
@b.if_false do | |
@b.raise_ArgumentError "width too big" | |
end | |
end | |
n = adjust ? @full_leader_size : 0 | |
if block_given? | |
adjusted = @g.new_label | |
@g.swap | |
@b.if_true do | |
@g.push n + 1 | |
@b.meta_op_minus | |
if n > 0 | |
@g.goto adjusted | |
end | |
end | |
if n > 0 | |
@g.push n | |
@b.meta_op_minus | |
adjusted.set! | |
end | |
elsif n > 0 | |
@g.push n | |
@b.meta_op_minus | |
end | |
else | |
raise "push without a width" | |
end | |
end | |
def push_precision | |
yield if block_given? | |
if @prec_static | |
raise ArgumentError, "precision too big" unless @prec_static.class == Fixnum | |
@g.push @prec_static | |
elsif @prec_index | |
@g.push_local @prec_index | |
@b.force_type :Fixnum, :Integer do | |
# If we had to do a conversion, we check we ended up | |
# with a Fixnum | |
@g.dup | |
@b.push_Fixnum | |
@g.swap | |
@g.kind_of | |
@b.if_false do | |
@b.raise_ArgumentError "precision too big" | |
end | |
end | |
else | |
raise "push without a precision" | |
end | |
if block_given? | |
@g.swap | |
@b.if_true do | |
@g.meta_push_1 | |
@b.meta_op_minus | |
end | |
end | |
end | |
def push_format_string | |
float_format_code = @format_code | |
float_format_code = 'f' if @format_code == 'g' && @f_alt | |
leader = "%#{@flags}" | |
if !@width_index && !@prec_index | |
leader << @width_static.to_s if @width_static | |
leader << ".#{@prec_static}" if @prec_static | |
@g.push_literal "#{leader}#{float_format_code}" | |
else | |
format_parts = 1 | |
if @prec_static | |
@g.push_literal ".#{@prec_static}#{float_format_code}" | |
else | |
@g.push_literal(float_format_code) | |
if @prec_index | |
push_precision | |
@g.send_vcall :to_s | |
format_parts += 1 | |
if @width_index | |
@g.push_literal "." | |
format_parts += 1 | |
end | |
end | |
end | |
if @width_static | |
leader << @width_static.to_s | |
elsif @width_index | |
push_width | |
@g.send_vcall :to_s | |
format_parts += 1 | |
end | |
leader << "." if @prec_index && !@width_index | |
@g.push_literal leader | |
@g.string_dup | |
format_parts.times do | |
@g.string_append | |
end | |
end | |
end | |
def positive_sign | |
if @f_plus | |
'+' | |
elsif @f_space | |
' ' | |
else | |
'' | |
end | |
end | |
def justify_width(adjust=true) | |
if @has_width | |
push_width adjust | |
@b.justify @just_dir, @width_static.nil? | |
end | |
end | |
def zero_pad? | |
@has_precision || (@has_width && @f_zero) | |
end | |
def zero_pad(pad="0", &readjust) | |
if @has_precision | |
push_precision &readjust | |
@g.push_literal pad | |
@g.send :rjust, 2 | |
elsif @has_width && @f_zero | |
push_width true, &readjust | |
@g.push_literal pad | |
@g.send :rjust, 2 | |
end | |
end | |
def width? | |
@has_width | |
end | |
def precision? | |
@has_precision | |
end | |
end | |
def push_Kernel | |
@lit_Kernel ||= @g.add_literal(:Kernel) | |
@slot_Kernel ||= @g.add_literal(nil) | |
@g.push_const_fast @lit_Kernel, @slot_Kernel | |
end | |
def push_Fixnum | |
@lit_Fixnum ||= @g.add_literal(:Fixnum) | |
@slot_Fixnum ||= @g.add_literal(nil) | |
@g.push_const_fast @lit_Fixnum, @slot_Fixnum | |
end | |
def raise_ArgumentError(msg) | |
@lit_ArgumentError ||= @g.add_literal(:ArgumentError) | |
@slot_ArgumentError ||= @g.add_literal(nil) | |
@lit_new ||= @g.add_literal(:new) | |
@g.push_const_fast @lit_ArgumentError, @slot_ArgumentError | |
@g.push_unique_literal msg | |
@g.send_stack @lit_new, 1 | |
@g.raise_exc | |
end | |
def force_type(klass, method=klass) | |
@g.dup | |
@g.push_const klass | |
@g.swap | |
@g.kind_of | |
if_false do | |
push_Kernel | |
@g.swap | |
@g.send method, 1 | |
yield if block_given? | |
end | |
end | |
def if_true | |
l = @g.new_label | |
@g.gif l | |
yield | |
l.set! | |
end | |
def if_false | |
l = @g.new_label | |
@g.git l | |
yield | |
l.set! | |
end | |
def parse | |
@arg_count = 0 | |
@index_mode = nil | |
@pre_tainted = @format.tainted? | |
bignum_width = bignum_precision = nil | |
pos = 0 | |
while match = RE.match_start(@format, pos) | |
pos = match.end(0) | |
_, | |
plain_string, | |
whole_format, | |
flags_a, | |
field_ref_a, | |
flags_b, | |
width_full, width_ref, width_static, | |
prec_full, prec_ref, prec_static, | |
field_ref_b, | |
format_code, | |
literal_char, | |
invalid_format = *match | |
if plain_string | |
append_literal plain_string | |
elsif literal_char | |
append_literal literal_char | |
elsif invalid_format || (field_ref_a && field_ref_b) | |
raise ArgumentError, "malformed format string" | |
else | |
field_ref = field_ref_a || field_ref_b | |
flags = "#{flags_a}#{flags_b}" | |
alt = flags.index(?#) | |
zero = flags.index(?0) | |
plus = flags.index(?+) | |
ljust = flags.index(?-) | |
space = flags.index(?\ ) | |
atom = Atom.new(self, @g, format_code, flags) | |
atom.set_width width_full, width_ref, width_static | |
atom.set_precision prec_full, prec_ref, prec_static | |
atom.set_value field_ref | |
case format_code | |
when 's', 'p', 'c' | |
atom.push_value | |
case format_code | |
when 's' | |
unless @pre_tainted | |
@g.dup | |
end | |
force_type :String | |
when 'c' | |
unless @pre_tainted | |
@g.dup | |
end | |
force_type :Fixnum, :Integer | |
chr_range_ok = @g.new_label | |
@g.dup | |
@g.push 256 | |
@g.meta_send_op_lt @g.find_literal(:<) | |
if_true do | |
@g.dup | |
@g.meta_push_neg_1 | |
@g.meta_send_op_gt @g.find_literal(:>) | |
@g.git chr_range_ok | |
end | |
@g.push 256 | |
@g.send :%, 1 | |
chr_range_ok.set! | |
@g.send_vcall :chr | |
when 'p' | |
@g.send_vcall :inspect | |
unless @pre_tainted | |
@g.dup | |
end | |
end | |
atom.justify_width | |
if atom.precision? | |
@g.meta_push_0 | |
atom.push_precision | |
@g.send :[], 2 | |
end | |
append_str !@pre_tainted | |
when 'e', 'E', 'f', 'g', 'G' | |
atom.push_value | |
force_type :Float | |
format_done = @g.new_label | |
@g.dup | |
@g.send_vcall :finite? | |
if_true do | |
atom.push_format_string | |
@g.send :to_s_formatted, 1, true | |
@g.goto format_done | |
end | |
formatted_non_finite = @g.new_label | |
@g.dup | |
@g.send_vcall :nan? | |
if_false do | |
is_negative | |
if_false do | |
@g.push_literal "#{atom.positive_sign}Inf" | |
@g.goto formatted_non_finite | |
end | |
@g.push_literal '-Inf' | |
@g.goto formatted_non_finite | |
end | |
@g.pop | |
@g.push_literal 'NaN' | |
formatted_non_finite.set! | |
atom.justify_width false | |
format_done.set! | |
append_str false | |
when 'd', 'i', 'u', 'B', 'b', 'o', 'X', 'x' | |
radix = RADIX[format_code.downcase] | |
atom.push_value | |
# Bignum is obviously also perfectly acceptable. But we | |
# just address the most common case by avoiding the call | |
# if we've been given a Fixnum. The call is enough | |
# overhead to bother, but not something to panic about. | |
force_type :Fixnum, :Integer | |
if plus || space || (radix == 10 && format_code != 'u') | |
@g.dup | |
# stash away whether it's negative | |
is_negative | |
@g.dup | |
@g.move_down 2 | |
if_true do | |
# but treat it as positive for now | |
invert | |
end | |
if radix == 10 | |
@g.send_vcall :to_s | |
else | |
@g.push radix | |
@g.send :to_s, 1 | |
end | |
else | |
have_formatted = @g.new_label | |
@g.dup | |
is_negative | |
if_false do | |
if radix == 10 | |
@g.send_vcall :to_s | |
else | |
@g.push radix | |
@g.send :to_s, 1 | |
end | |
@g.goto have_formatted | |
end | |
if format_code == 'u' | |
# Now we need to find how many bits we need to | |
# represent the number, starting with a native int, | |
# then incrementing by 32 each round. | |
more_bits_loop = @g.new_label | |
got_enough_bits = @g.new_label | |
# Push a positive version of the number ($N) | |
@g.dup | |
invert | |
# Push the baseline ($B), starting from a native int: | |
# 2**32 or 2**64, as appropriate | |
@g.meta_push_1 | |
l_native = @g.find_literal(2.size * 8) | |
@g.push_literal_at l_native | |
@g.send :<<, 1 | |
# Switch to $N | |
@g.swap | |
# For the first time, because it's what we've used | |
# above, we'll shift it by our native int size | |
@g.push_literal_at l_native | |
more_bits_loop.set! | |
# Throw out the bits from $N that $B can offset | |
@g.send :>>, 1 | |
# Check whether $N == 0 | |
@g.dup | |
@g.meta_push_0 | |
@g.meta_send_op_equal @g.find_literal(:==) | |
@g.git got_enough_bits | |
# Switch to $B | |
@g.swap | |
l_32 = @g.find_literal(32) | |
@g.push_literal_at l_32 | |
# Add 32 bits | |
@g.send :<<, 1 | |
# Switch to $N | |
@g.swap | |
# We'll throw out 32 bits this time | |
@g.push_literal_at l_32 | |
@g.goto more_bits_loop | |
got_enough_bits.set! | |
# Pop the spare copy of $N, which is 0 | |
@g.pop | |
# Now we're left with $B; we can now use it, by adding | |
# it to the (negative) number still on the stack from | |
# earlier. | |
# $B is a Bignum; no point using meta_send_op_plus. | |
@g.send :+, 1 | |
@g.send_vcall :to_s | |
padding = "." | |
else | |
# (num + radix ** num.to_s(radix).size).to_s(radix) | |
@g.push radix | |
@g.dup_many 2 | |
@g.send :to_s, 1 | |
@g.send_vcall :size | |
@g.send :**, 1 | |
@g.meta_send_op_plus @g.find_literal(:+) | |
@g.push radix | |
@g.send :to_s, 1 | |
padding = (radix - 1).to_s(radix) | |
end | |
if atom.zero_pad? | |
atom.zero_pad padding | |
elsif !atom.precision? && !zero | |
@g.push_literal ".." | |
@g.string_dup | |
@g.string_append | |
end | |
have_formatted.set! | |
end | |
# 'B' also returns an uppercase string, but there, the | |
# only alpha character is in the prefix -- and that's | |
# already uppercase | |
if format_code == 'X' | |
@g.send_vcall :upcase | |
end | |
if !(plus || space) && (radix == 10 && format_code != 'u') | |
atom.zero_pad do | |
# If it decides to do any padding, zero_pad will yield | |
# before it modifies the stack, and we must ensure the | |
# top of the stack is a boolean indicating whether to | |
# subtract one from the requested width (for a minus | |
# sign to be prepended below), followed by the string- | |
# in-progress. | |
@g.swap | |
@g.dup | |
@g.move_down 2 | |
end | |
else | |
atom.zero_pad | |
end | |
atom.prepend_prefix | |
if plus || space | |
append_sign = @g.new_label | |
@g.swap | |
if_true do | |
@g.push_literal '-' | |
@g.goto append_sign | |
end | |
@g.push_literal atom.positive_sign | |
append_sign.set! | |
@g.string_dup | |
@g.string_append | |
elsif radix == 10 && format_code != 'u' | |
@g.swap | |
if_true do | |
@g.push_literal '-' | |
@g.string_dup | |
@g.string_append | |
end | |
end | |
if atom.precision? || !zero | |
atom.justify_width false | |
end | |
append_str false | |
else | |
raise ArgumentError, "bad format character: #{format_code}" | |
end | |
end | |
end | |
unless @has_content | |
append_literal '' | |
end | |
if @index_mode != :absolute | |
no_exception = @g.new_label | |
# If we've used relative arguments, and $DEBUG is true, we | |
# throw an exception if passed more arguments than we need. | |
# Check this first; it's much faster, and generally false | |
@g.passed_arg @arg_count | |
@g.gif no_exception | |
Rubinius::AST::GlobalVariableAccess.new(0, :$DEBUG).bytecode(@g) | |
@g.gif no_exception | |
raise_ArgumentError "too many arguments for format string" | |
no_exception.set! | |
end | |
end | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment