Created
October 10, 2010 19:31
-
-
Save sinfu/619496 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import std.array; | |
import std.stdio; | |
alias immutable(ubyte)[] bstring; | |
void main() | |
{ | |
auto s = cast(bstring) "abc\rxyz\n\n012\r\n789\r\r..."; | |
for (NewLineMachine match; !s.empty; ) | |
{ | |
auto newline = bisect!match(s); | |
s = newline.after; | |
foreach (ubyte u; newline.before) | |
{ | |
writef("%02x ", u); | |
} | |
writeln(); | |
} | |
} | |
/* | |
* 「改行コードの次の1バイト」を受理する | |
* | |
* 改行コード: | |
* LF = 0A | |
* FF = 0C | |
* CR = 0D | |
* CRLF = 0D 0A | |
* NEL = C2 85 | |
* LS = E2 80 A8 | |
* PS = E2 80 A9 | |
*/ | |
private struct NewLineMachine | |
{ | |
/+pure+/ nothrow @safe: | |
bool opCall(char u) | |
{ | |
immutable next = transition[state_][toChar[u]]; | |
if (next == State.accept) | |
{ | |
state_ = State.init; // ε-transition | |
return true; | |
} | |
else | |
{ | |
state_ = next; | |
return false; | |
} | |
} | |
private: | |
State state_; | |
invariant() | |
{ | |
assert(state_ != State.accept); | |
} | |
private static: | |
enum State : ubyte | |
{ | |
init, | |
pastCR, // ready for CR or CRLF | |
pastC2, // ready for NEL | |
pastE2, | |
pastE280, // ready for LS or PS | |
ready, | |
accept | |
} | |
// Choose significant UTF-8 code units to minimize the size of | |
// our state transition table. | |
enum Char : ubyte | |
{ | |
other, | |
LF, FF, CR, | |
x80, x85, xA8, xA9, xC2, xE2, | |
} | |
static immutable Char[char.max + 1] toChar = | |
[ '\n' : Char. LF, '\f' : Char. FF, '\r' : Char. CR, | |
'\xC2': Char.xC2, '\x85': Char.x85, '\xE2': Char.xE2, | |
'\x80': Char.x80, '\xA8': Char.xA8, '\xA9': Char.xA9, | |
]; | |
// State transition table | |
static immutable State[ Char.max + 1] | |
[State.max ] transition = | |
[ State.init: [ Char. LF: State. ready, // LF | |
Char. FF: State. ready, // FF | |
Char. CR: State. pastCR, | |
Char. xC2: State. pastC2, | |
Char. xE2: State. pastE2 ], | |
State.pastCR: [ Char. LF: State. ready, // CRLF | |
Char. FF: State. accept, | |
Char. CR: State. accept, | |
Char. x80: State. accept, | |
Char. x85: State. accept, | |
Char. xA8: State. accept, | |
Char. xA9: State. accept, | |
Char. xC2: State. accept, | |
Char. xE2: State. accept, | |
Char.other: State. accept ], | |
State.pastC2: [ Char. x85: State. ready ], // NEL = C2 85 | |
State.pastE2: [ Char. x80: State.pastE280 ], | |
State.pastE280: [ Char. xA8: State. ready, // LS = E2 80 A8 | |
Char. xA9: State. ready ], // PS = E2 80 A9 | |
State.ready: [ Char. LF: State. accept, | |
Char. FF: State. accept, | |
Char. CR: State. accept, | |
Char. x80: State. accept, | |
Char. x85: State. accept, | |
Char. xA8: State. accept, | |
Char. xA9: State. accept, | |
Char. xC2: State. accept, | |
Char. xE2: State. accept, | |
Char.other: State. accept ] | |
]; | |
} | |
//----------------------------------------------------------------------------// | |
import std.functional; | |
import std.range; | |
import std.typecons; | |
/** | |
* Bisect. | |
*/ | |
Tuple!(Take!R, "before", R, "after") bisect(alias pred, R)(R r) | |
if (isForwardRange!R) | |
{ | |
// Bisect r into (p q) at the first element satisfying pred. | |
R p = r.save; | |
R q = r.save; | |
size_t n; | |
for (; !q.empty; q.popFront()) | |
{ | |
if (unaryFun!pred(q.front)) | |
break; | |
++n; | |
} | |
return typeof(return)(take(p, n), q); | |
} | |
unittest | |
{ | |
int[] r = [1,2,3,4,5]; | |
auto mid = bisect!"a == 3"(r); | |
assert(mid.before == [1,2]); | |
assert(mid.after == [3,4,5]); | |
auto beg = bisect!"a == 1"(r); | |
assert(beg.before == []); | |
assert(beg.after == [1,2,3,4,5]); | |
auto end = bisect!"a == 5"(r); | |
assert(end.before == [1,2,3,4]); | |
assert(end.after == [5]); | |
auto none = bisect!"a == 0"(r); | |
assert(none.before == [1,2,3,4,5]); | |
assert(none.after == []); | |
} | |
unittest | |
{ | |
int[] e; | |
assert(bisect!"true"(e).before.empty); | |
assert(bisect!"true"(e).after.empty); | |
assert(bisect!"false"(e).before.empty); | |
assert(bisect!"false"(e).after.empty); | |
auto bis = bisect!"a == 2"(retro([1,2,3])); | |
static assert(is(typeof(bis[0]) == Take!(Retro!(int[])))); | |
static assert(is(typeof(bis[1]) == Retro!(int[]) )); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment