Skip to content

Instantly share code, notes, and snippets.

@khornberg
Created November 25, 2014 22:57
Show Gist options
  • Save khornberg/a0e6d13c5559ea458ddd to your computer and use it in GitHub Desktop.
Save khornberg/a0e6d13c5559ea458ddd to your computer and use it in GitHub Desktop.
Attempt to read gzip files in logstash

This attempt has thus failed to allow Logstash to read gzipped files. It is based off of the line codec.

I think it is because of the way Logstash buffers the input stream. It seems that a delimiter is required that will not be there in the gzipped files.

The spec file gzip_spec.rb passes with out issue.

The trouble starts when I run bin/logstash agent -f gzip.conf --debug. Either I get not output or I get Error: Unexpected end of ZLIB input stream on larger files.

One can reproduce the nothing error by creating a file with echo "hello world" | gzip > file.gz and running the gzip.conf. One can reproduce the end of ZLIB input stream error by creating a gzipped file of the logstash README.md.

input {
file {
path => "/home/kyle/projects/logstash/file.gz"
start_position => beginning
codec => "gzip"
}
}
output {
stdout {
codec => rubydebug
}
}
# encoding: utf-8
require "logstash/codecs/base"
require "logstash/util/charset"
# Gzipped line-oriented text data.
#
# Decoding behavior: Only whole line events will be emitted.
#
# Encoding behavior: Each event will be emitted as is.
class LogStash::Codecs::Gzip < LogStash::Codecs::Base
config_name "gzip"
milestone 1
# Set the desired text format for encoding.
config :format, :validate => :string
# The character encoding used in this input. Examples include "UTF-8"
# and "cp1252"
#
# This setting is useful if your log files are in Latin-1 (aka cp1252)
# or in another character set other than UTF-8.
#
# This only affects "plain" format logs since json is UTF-8 already.
config :charset, :validate => ::Encoding.name_list, :default => "UTF-8"
public
def register
@converter = LogStash::Util::Charset.new(@charset)
@converter.logger = @logger
end
public
def decode(data)
require "zlib"
require "stringio"
gz = Zlib::GzipReader.new(StringIO.new(data))
gz.each_line do |line|
yield LogStash::Event.new("message" => @converter.convert(line))
print line
end
end # def decode
public
def encode(data)
if data.is_a? LogStash::Event and @format
@on_event.call(data.sprintf(@format))
else
@on_event.call(data.to_s)
end
end # def encode
end # class LogStash::Codecs::Plain
# encoding: utf-8
require "logstash/codecs/gzip"
require "logstash/event"
describe LogStash::Codecs::Gzip do
subject do
next LogStash::Codecs::Gzip.new
end
context "#encode" do
let (:event) {LogStash::Event.new({"message" => "hello world", "host" => "test"})}
it "should return a default date formatted line" do
expect(subject).to receive(:on_event).once.and_call_original
subject.on_event do |d|
insist {d} == event.to_s
end
subject.encode(event)
end
it "should respect the supplied format" do
format = "%{host}"
subject.format = format
expect(subject).to receive(:on_event).once.and_call_original
subject.on_event do |d|
insist {d} == event.sprintf(format)
end
subject.encode(event)
end
end
context "#decode" do
it "should return an event from a gzipped ascii string" do
decoded = false
text = ["1f8b080041ab74540003cb48cdc9c95728cf2fca49e102002d3b08af0c000000"].pack('H*') #compressed string in hex
subject.decode(text) do |e|
decoded = true
insist { e.is_a?(LogStash::Event) }
insist { e["message"] } == "hello world\n"
end
insist { decoded } == true
end
it "should return an event from a valid gzipped utf-8 string" do
text = ["1f8b080021ab74540003f33dbc272f3923358f0b00598f619d09000000"].pack('H*') #compressed string in hex
subject.decode(text) do |e|
insist { e.is_a?(LogStash::Event) }
insist { e["message"] } == "München\n"
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment