Skip to content

Instantly share code, notes, and snippets.

@rlankenau
Last active December 20, 2015 09:09
Show Gist options
  • Save rlankenau/6105437 to your computer and use it in GitHub Desktop.
Save rlankenau/6105437 to your computer and use it in GitHub Desktop.
Code to split RetroSheet records by game
private boolean isStartLine(Text t)
{
/* Find the end of the first field */
int fieldTerm = t.find(",");
int idTerm = t.find("id");
return (idTerm != -1 && fieldTerm != -1 && idTerm < fieldTerm);
}
public boolean nextKeyValue() throws IOException
{
Text line = new Text();
Text newline = new Text("\n");
Date d = new Date();
if(key == null)
key = new LongWritable();
key.set(pos);
if(value == null)
value = new Text();
value.clear();
int newSize = 0;
if(deferred_line != null) {
/* We held onto a line on the last invocation. Copy it in before we start */
value.append(deferred_line.getBytes(), 0, deferred_line.getLength());
value.append(newline.getBytes(), 0, newline.getLength());
this.pos+=deferred_line.getLength();
deferred_line=null;
}
newSize = in.readLine(line, maxLineLength);
value.append(line.getBytes(), 0, line.getLength());
value.append(newline.getBytes(), 0, newline.getLength());
this.pos+=newSize;
if(newSize == 0) {
/* If we got 0 bytes, we're at EOF and need to bail.
since we didn't get a first record, return nothing
*/
key = null;
value = null;
d = new Date();
return false;
}
while(true) {
line.clear();
newSize = in.readLine(line, maxLineLength);
if(isStartLine(line))
{
/* Save this line for the next record */
this.deferred_line = line;
/* Return the current version. */
return true;
} else if (newSize == 0) {
/* At EOF */
return true;
} else {
value.append(line.getBytes(), 0, line.getLength());
value.append(newline.getBytes(), 0, newline.getLength());
this.pos+=newSize;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment