Skip to content

Instantly share code, notes, and snippets.

@mudge
Last active December 16, 2022 12:10
Show Gist options
  • Save mudge/1f958ec848b6c31e287baa7a60a88064 to your computer and use it in GitHub Desktop.
Save mudge/1f958ec848b6c31e287baa7a60a88064 to your computer and use it in GitHub Desktop.
A Ruby regular expression to parse data URIs based on RFC 2397.
require 'base64'
class DataUri
REGEXP = %r{
data:
(?<mediatype>
(?<mimetype> .+? / .+? )?
(?<parameters> (?: ; .+? = .+? )* )
)?
(?<extension>;base64)?
,
(?<data>.*)
}x.freeze
attr_reader :uri, :match
def initialize(uri)
match = REGEXP.match(uri)
raise ArgumentError, 'invalid data URI' unless match
@uri = uri
@match = match
end
def mediatype
"#{mimetype}#{parameters}"
end
def decoded_data
return data unless base64?
Base64.decode64(data)
end
def base64?
!String(extension).empty?
end
private
def mimetype
return 'text/plain' if String(match[:mimetype]).empty?
match[:mimetype]
end
def parameters
return ';charset=US-ASCII' if String(match[:mimetype]).empty? && String(match[:parameters]).empty?
match[:parameters]
end
def extension
match[:extension]
end
def data
match[:data]
end
end
RSpec.describe DataUri do
it 'raises an Argument Error if not given a valid data URI' do
expect { described_class.new('not a data URI') }
.to raise_error(ArgumentError)
end
describe '#mediatype' do
it 'returns the mediatype of the URI' do
uri = described_class.new('data:text/plain;charset=iso-8859-7,hello')
expect(uri.mediatype).to eq('text/plain;charset=iso-8859-7')
end
it 'returns the default mediatype if none is given' do
uri = described_class.new('data:,hello')
expect(uri.mediatype).to eq('text/plain;charset=US-ASCII')
end
it 'returns the default mediatype if only a charset is given' do
uri = described_class.new('data:;charset=UTF-8,hello')
expect(uri.mediatype).to eq('text/plain;charset=UTF-8')
end
it 'does not include the default parameters if a MIME type is given' do
uri = described_class.new('data:text/csv,hello')
expect(uri.mediatype).to eq('text/csv')
end
end
describe '#base64?' do
it 'returns true if the base64 extension is present' do
uri = described_class.new('data:;base64,R0lGODdh')
expect(uri).to be_base64
end
it 'returns false if there is no base64 extension' do
uri = described_class.new('data:,R0lGODdh')
expect(uri).not_to be_base64
end
end
describe '#decoded_data' do
it 'returns the data if it is not base64 encoded' do
uri = described_class.new('data:,hello')
expect(uri.decoded_data).to eq('hello')
end
it 'decodes data if it is base64 encoded' do
uri = described_class.new('data:;base64,aGVsbG8=')
expect(uri.decoded_data).to eq('hello')
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment