Skip to content

Instantly share code, notes, and snippets.

@John-Colvin
Last active May 26, 2017 14:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save John-Colvin/980b11f2b7a7e23faf8dfb44bd9f1242 to your computer and use it in GitHub Desktop.
Save John-Colvin/980b11f2b7a7e23faf8dfb44bd9f1242 to your computer and use it in GitHub Desktop.
tsv with iopipe
import iopipe.textpipe;
import iopipe.bufpipe;
import iopipe.stream;
import iopipe.buffer;
int main(string[] args)
{
import std.stdio;
if (args.length < 4)
{
writefln("synopsis: %s filename keyfield valuefield", args[0]);
return 1;
}
import std.conv : to;
string filename = args[1];
size_t keyFieldIndex = args[2].to!size_t;
size_t valueFieldIndex = args[3].to!size_t;
auto sumByKey = runWithEncoding!makeCounts(filename, keyFieldIndex, valueFieldIndex);
if (sumByKey.length == 0)
writeln("No entries");
else
{
import std.algorithm : maxElement;
auto maxEntry = sumByKey.byKeyValue.maxElement!"a.value";
writeln("max_key: ", maxEntry.key, " sum: ", maxEntry.value);
}
return 0;
}
/** something vaguely like this should be in iopipe, users shouldn't need to write it */
auto ref runWithEncoding(alias process, FileT, Args...)(FileT file, auto ref Args args)
{
auto dev = openDev(file).bufd;
dev.ensureElems(4);
switch(dev.window.detectBOM)
{
case UTFType.Unknown:
case UTFType.UTF8:
return process!(UTFType.UTF8)(dev, args);
case UTFType.UTF16LE:
return process!(UTFType.UTF16LE)(dev, args);
case UTFType.UTF16BE:
return process!(UTFType.UTF16BE)(dev, args);
case UTFType.UTF32LE:
return process!(UTFType.UTF32LE)(dev, args);
case UTFType.UTF32BE:
return process!(UTFType.UTF32BE)(dev, args);
default:
assert(0);
}
}
auto makeCounts(UTFType utfType, Dev)(Dev dev, size_t keyFieldIndex, size_t valueFieldIndex)
{
import std.algorithm : max, min, splitter;
import std.conv : to;
import std.string : lineSplitter;
import std.range : take;
import std.traits : ForeachType;
size_t minFieldIndex = min(keyFieldIndex, valueFieldIndex);
size_t maxFieldIndex = max(keyFieldIndex, valueFieldIndex);
enum delim = "\t";
auto lines = dev.decodeText!utfType.byLine.asInputRange;
alias CharT = ForeachType!(typeof(lines.front()));
int[immutable(CharT)[]] sumByKey;
foreach (line; lines)
{
auto splitting = line.splitter(delim);
typeof(splitting.front) key;
int value;
size_t fieldIndex = 0;
foreach (str; splitting.take(maxFieldIndex + 1))
{
if (fieldIndex == keyFieldIndex)
key = str;
if (fieldIndex == valueFieldIndex)
value = str.to!int;
++fieldIndex;
}
if (fieldIndex == maxFieldIndex + 1)
{
if (auto p = key in sumByKey)
*p += value;
else
sumByKey[key.idup] = value;
}
}
static if (is(CharT == char))
return sumByKey;
else
return sumByKey.to!(int[string]);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment