Skip to content

Instantly share code, notes, and snippets.

@mjwillson
Last active December 29, 2015 16:09
Show Gist options
  • Save mjwillson/7695728 to your computer and use it in GitHub Desktop.
Save mjwillson/7695728 to your computer and use it in GitHub Desktop.
ann -- ultra-basic console-based multiclass text annotation tool
#!/usr/bin/env ruby
require 'optparse'
OPTIONS = {}
PARSER = OptionParser.new do |opts|
opts.banner = "Usage: #{$0} [OPTIONS] INPUT_FILE [HOTKEY OUTPUT_FILE]..."
opts.separator(<<END
#{$0} -- ultra-basic console-based multiclass text annotation tool
Give it an input file with one example per line, and a series of hotkeys and
output files to be written to, e.g.
ann examples.txt y yes-file.txt n no-file.txt
For each line in the input it will issue a prompt, e.g.
Annotation (y/n):
at the console. If you press y the example is written to yes-file.txt,
if n then no-file.txt. (hotkeys are case-insensitive). Supports saving
and resuming progress with -s.
END
)
opts.separator("Options:")
opts.on("-a", "--append", "Append to output files (default is to write)") {|v| OPTIONS[:append] = v}
opts.on("-p", "--prompt PROMPT", "Overwrite the default prompt, which is",
"'Annotation? (h/o/t/k/e/y/s)'") {|v| OPTIONS[:prompt] = v}
opts.on("-s", "--save-progress", "Saves/resumes progress to/from a file <INPUT_FILE>.ann-pos",
"containing the byte offset within INPUT_FILE of the",
"next line to be annotated. Requires input is a file.",
"You can then Ctrl-C to stop annotating and your position",
"will be saved. Implies --append.") do |v|
OPTIONS[:save_progress] = true
OPTIONS[:append] = true
end
opts.on("--progress-file PROGRESS_FILENAME", "Like --save-progress but overrides the default progress",
"filename. Useful if annotating same dataset for different tasks.") do |v|
OPTIONS[:save_progress] = true
OPTIONS[:progress_file] = v
OPTIONS[:append] = true
end
opts.on_tail("-h", "--help", "This info") {puts opts; exit}
end
PARSER.parse!(ARGV)
def bad_args(message=nil)
if message then STDERR.puts message; STDERR.puts; end
STDERR.puts PARSER
exit 1
end
unless ARGV.count > 1 && ARGV.count.odd?
bad_args("Need an INPUT-FILE followed by pairs of HOTKEY OUTPUT-FILE arguments")
end
INPUT_FILE = File.open(ARGV.shift, "r")
PROGRESS_FILENAME = if OPTIONS[:save_progress]
bad_args("--save-progress requires input is a file") unless File.file?(INPUT_FILE)
OPTIONS[:progress_file] || "#{INPUT_FILE.path}.ann-pos"
end
if PROGRESS_FILENAME && File.exists?(PROGRESS_FILENAME)
INPUT_FILE.seek(File.read(PROGRESS_FILENAME).to_i)
end
WRITE_MODE = OPTIONS[:append] ? 'a' : 'w'
HOTKEY_TO_FILE = {}
HOTKEYS = []
ARGV.each_slice(2) do |hotkey, filename|
bad_args("Hotkeys must be single characters") if hotkey.length != 1
file = File.open(filename, WRITE_MODE)
HOTKEY_TO_FILE[hotkey.downcase] = file
HOTKEYS << hotkey.downcase
end
bad_args("STDIN must be a terminal") unless STDIN.tty?
# This is a bit of a kludge and won't work on windows.
# I tried Curses.getch and Curses.cbreak but no joy.
def get_char
state = `stty -g`
`stty raw -echo -icanon isig`
STDIN.getc.chr
ensure
`stty #{state}`
end
PROMPT = OPTIONS[:prompt] || "Annotation? (#{HOTKEYS.join('/')})"
PROMPT << ": "
until INPUT_FILE.eof?
line = INPUT_FILE.readline
puts
puts line
puts
begin
print PROMPT
STDOUT.flush
# required to get character-at-a-time unbuffered input:
char = get_char()
puts
STDOUT.flush
end until HOTKEY_TO_FILE.has_key?(char)
HOTKEY_TO_FILE[char].write(line)
if PROGRESS_FILENAME
File.open(PROGRESS_FILENAME, 'w') {|f| f.write(INPUT_FILE.pos.to_s)}
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment