Skip to content

Instantly share code, notes, and snippets.

@sinfu
Created October 10, 2010 19:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sinfu/619496 to your computer and use it in GitHub Desktop.
Save sinfu/619496 to your computer and use it in GitHub Desktop.
import std.array;
import std.stdio;
alias immutable(ubyte)[] bstring;
void main()
{
auto s = cast(bstring) "abc\rxyz\n\n012\r\n789\r\r...";
for (NewLineMachine match; !s.empty; )
{
auto newline = bisect!match(s);
s = newline.after;
foreach (ubyte u; newline.before)
{
writef("%02x ", u);
}
writeln();
}
}
/*
* 「改行コードの次の1バイト」を受理する
*
* 改行コード:
* LF = 0A
* FF = 0C
* CR = 0D
* CRLF = 0D 0A
* NEL = C2 85
* LS = E2 80 A8
* PS = E2 80 A9
*/
private struct NewLineMachine
{
/+pure+/ nothrow @safe:
bool opCall(char u)
{
immutable next = transition[state_][toChar[u]];
if (next == State.accept)
{
state_ = State.init; // ε-transition
return true;
}
else
{
state_ = next;
return false;
}
}
private:
State state_;
invariant()
{
assert(state_ != State.accept);
}
private static:
enum State : ubyte
{
init,
pastCR, // ready for CR or CRLF
pastC2, // ready for NEL
pastE2,
pastE280, // ready for LS or PS
ready,
accept
}
// Choose significant UTF-8 code units to minimize the size of
// our state transition table.
enum Char : ubyte
{
other,
LF, FF, CR,
x80, x85, xA8, xA9, xC2, xE2,
}
static immutable Char[char.max + 1] toChar =
[ '\n' : Char. LF, '\f' : Char. FF, '\r' : Char. CR,
'\xC2': Char.xC2, '\x85': Char.x85, '\xE2': Char.xE2,
'\x80': Char.x80, '\xA8': Char.xA8, '\xA9': Char.xA9,
];
// State transition table
static immutable State[ Char.max + 1]
[State.max ] transition =
[ State.init: [ Char. LF: State. ready, // LF
Char. FF: State. ready, // FF
Char. CR: State. pastCR,
Char. xC2: State. pastC2,
Char. xE2: State. pastE2 ],
State.pastCR: [ Char. LF: State. ready, // CRLF
Char. FF: State. accept,
Char. CR: State. accept,
Char. x80: State. accept,
Char. x85: State. accept,
Char. xA8: State. accept,
Char. xA9: State. accept,
Char. xC2: State. accept,
Char. xE2: State. accept,
Char.other: State. accept ],
State.pastC2: [ Char. x85: State. ready ], // NEL = C2 85
State.pastE2: [ Char. x80: State.pastE280 ],
State.pastE280: [ Char. xA8: State. ready, // LS = E2 80 A8
Char. xA9: State. ready ], // PS = E2 80 A9
State.ready: [ Char. LF: State. accept,
Char. FF: State. accept,
Char. CR: State. accept,
Char. x80: State. accept,
Char. x85: State. accept,
Char. xA8: State. accept,
Char. xA9: State. accept,
Char. xC2: State. accept,
Char. xE2: State. accept,
Char.other: State. accept ]
];
}
//----------------------------------------------------------------------------//
import std.functional;
import std.range;
import std.typecons;
/**
* Bisect.
*/
Tuple!(Take!R, "before", R, "after") bisect(alias pred, R)(R r)
if (isForwardRange!R)
{
// Bisect r into (p q) at the first element satisfying pred.
R p = r.save;
R q = r.save;
size_t n;
for (; !q.empty; q.popFront())
{
if (unaryFun!pred(q.front))
break;
++n;
}
return typeof(return)(take(p, n), q);
}
unittest
{
int[] r = [1,2,3,4,5];
auto mid = bisect!"a == 3"(r);
assert(mid.before == [1,2]);
assert(mid.after == [3,4,5]);
auto beg = bisect!"a == 1"(r);
assert(beg.before == []);
assert(beg.after == [1,2,3,4,5]);
auto end = bisect!"a == 5"(r);
assert(end.before == [1,2,3,4]);
assert(end.after == [5]);
auto none = bisect!"a == 0"(r);
assert(none.before == [1,2,3,4,5]);
assert(none.after == []);
}
unittest
{
int[] e;
assert(bisect!"true"(e).before.empty);
assert(bisect!"true"(e).after.empty);
assert(bisect!"false"(e).before.empty);
assert(bisect!"false"(e).after.empty);
auto bis = bisect!"a == 2"(retro([1,2,3]));
static assert(is(typeof(bis[0]) == Take!(Retro!(int[]))));
static assert(is(typeof(bis[1]) == Retro!(int[]) ));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment