Skip to content

Instantly share code, notes, and snippets.

@washort
Created June 3, 2012 03:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save washort/2861703 to your computer and use it in GitHub Desktop.
Save washort/2861703 to your computer and use it in GitHub Desktop.
Twine
from unittest import TestCase
from terml.twine import SourceSpan, Twine
class SourceSpanTests(TestCase):
def test_creation(self):
ss = SourceSpan("http://example.org/t", True, 1, 0, 1, 9)
self.assertEqual(ss,
SourceSpan("http://example.org/t", True, 1, 0, 1, 9))
self.assertEqual(list(ss), ["http://example.org/t", True, 1, 0, 1, 9])
self.assertEqual(ss.uri, "http://example.org/t")
self.assertEqual(ss.isOneToOne, True)
self.assertEqual(ss.startLine, 1)
self.assertEqual(ss.startCol, 0)
self.assertEqual(ss.endLine, 1)
self.assertEqual(ss.endCol, 9)
def test_oneToOne(self):
ss = SourceSpan("http://example.org/t", True, 1, 0, 1, 9)
self.assertEqual(list(ss.notOneToOne()),
["http://example.org/t", False, 1, 0, 1, 9])
self.assertRaises(ValueError, SourceSpan,
"http://example.org/t", True, 1, 0, 2, 7)
class TwineTests(TestCase):
def test_creation(self):
ss = SourceSpan("http://example.org/t", True, 1, 0, 1, 9)
t = Twine(u"foo baz")
self.assertEqual(t.span, None)
t = Twine(u"foo baz", ss)
self.assertEqual(t.span, ss)
def test_asFrom(self):
t = Twine(u"foo baz").asFrom("test.txt")
self.assertEqual(t.span, SourceSpan("test.txt", True, 1, 0, 1, 6))
self.assertEqual(Twine(u"abc\ndef").asFrom("test.txt").span,
SourceSpan("test.txt", False, 1, 0, 2, 2))
self.assertEqual(Twine(u"abc\ndef").asFrom("test.txt", 3, 10).span,
SourceSpan("test.txt", False, 3, 10, 4, 2))
self.assertEqual(Twine(u"abcdef").asFrom("test.txt").span,
SourceSpan("test.txt", True, 1, 0, 1, 5))
self.assertEqual(Twine(u"abcdef\n").asFrom("test.txt").span,
SourceSpan("test.txt", True, 1, 0, 1, 6))
self.assertEqual(Twine(u"abcdef\nghijkl").asFrom("test.txt").span,
SourceSpan("test.txt", False, 1, 0, 2, 5))
self.assertEqual(Twine(u"abcdef\nghijkl").asFrom("test.txt")[:6].span,
SourceSpan("test.txt", True, 1, 0, 1, 5))
self.assertEqual(Twine(u"").asFrom("test.txt").span, None)
self.assertEqual(Twine(u"\n").asFrom("test.txt").span,
SourceSpan("test.txt", True, 1, 0, 1, 0))
self.assertEqual(Twine(u"\n\n").asFrom("test.txt").span,
SourceSpan("test.txt", False, 1, 0, 2, 0))
self.assertEqual(Twine(u"abcdef\n\n").asFrom("test.txt").span,
SourceSpan("test.txt", False, 1, 0, 2, 0))
self.assertEqual(Twine(u"abcdef\ng\n").asFrom("test.txt").span,
SourceSpan("test.txt", False, 1, 0, 2, 1))
self.assertEqual(Twine(u"abcdef\ng").asFrom("test.txt").span,
SourceSpan("test.txt", False, 1, 0, 2, 0))
def test_slice(self):
t = Twine(u"abc\ndef\n\nghij\n\n").asFrom("foo:bar")
self.assertEqual(t[:3].span,
SourceSpan("foo:bar", True, 1, 0, 1, 2))
self.assertEqual(t[2:6].span,
SourceSpan("foo:bar", False, 1, 2, 2, 1))
self.assertEqual(t[2].span,
SourceSpan("foo:bar", True, 1, 2, 1, 2))
def test_split(self):
t = Twine(u"abc\ndef\n\nghij\n\n").asFrom("foo:bar")
self.assertEqual([x.span for x in t.split('\n')],
[SourceSpan("foo:bar", True, 1, 0, 1, 2),
SourceSpan("foo:bar", True, 2, 0, 2, 2),
None,
SourceSpan("foo:bar", True, 4, 0, 4, 3),
None,
None])
def test_rsplit(self):
t = Twine(u"abc\ndef\n\nghij\n\n").asFrom("foo:bar")
self.assertEqual([x.span for x in t.rsplit('\n')],
[SourceSpan("foo:bar", True, 1, 0, 1, 2),
SourceSpan("foo:bar", True, 2, 0, 2, 2),
None,
SourceSpan("foo:bar", True, 4, 0, 4, 3),
None,
None])
def test_concat(self):
t1 = Twine(u"foo ", SourceSpan("foo:bar", True, 1, 0, 1, 3))
t2 = Twine(u"baz", SourceSpan("foo:bar", True, 1, 3, 1, 5))
self.assertEqual((t1 + t2).span,
SourceSpan("foo:bar", True, 1, 0, 1, 5))
self.assertEqual((t1 + t2).parts,
[t1, t2])
def test_eq(self):
t1 = Twine(u"foo ", SourceSpan("foo:bar", True, 1, 0, 1, 3))
self.assertEqual(t1,
Twine(u"foo ", SourceSpan("foo:bar", True, 1, 0, 1, 3)))
self.assertEqual(t1, u"foo ")
def test_join(self):
ts = Twine(u'one two three', SourceSpan("foo:bar", True, 1, 0, 1, 3))
words = ts.split(u' ')
t = Twine(u', ').join(words)
self.assertEqual(t.span, None)
self.assertEqual(t.parts,
(u'one', u', ', u'two', u', ', u'three'))
self.assertEqual(t.sourceMap,
(((0, 3), SourceSpan("foo:bar", True, 1, 0, 1, 2)),
((5, 8), SourceSpan("foo:bar", True, 1, 4, 1, 6)),
((10, 15), SourceSpan("foo:bar", True, 1, 8, 1, 12))))
def test_replace(self):
t = Twine(u'one two three').asFrom("foo:bar")
t2 = t.replace(u'two', u'eleventy')
self.assertEqual(t.span, SourceSpan("foo:bar", False, 1, 0, 1, 12))
self.assertEqual(t2.parts, (u'one ', u'eleventy', u' three'))
self.assertEqual(t2.sourceMap,
(((0, 4), SourceSpan("foo:bar", True, 1, 0, 1, 3)),
((4, 12), SourceSpan("foo:bar", False, 1, 4, 1, 6)),
(12, 18), SourceSpan("foo:bar", True, 1, 7, 1, 12)))
# def test_format(self):
# pass
# def test_mod(self):
# pass
from collections import namedtuple
_SourceSpan = namedtuple("SourceSpan",
"uri isOneToOne startLine startCol endLine endCol")
class SourceSpan(_SourceSpan):
"""
Information about the original location of a span of text.
Twines use this to remember where they came from.
uri: Name of document this text came from.
isOneToOne: Whether each character in that Twine maps to the
corresponding source character position.
startLine, endLine: Line numbers for the beginning and end of the
span. Line numbers start at 1.
startCol, endCol: Column numbers for the beginning and end of the
span. Column numbers start at 0.
"""
def __new__(*args, **kwargs):
ss = _SourceSpan.__new__(*args, **kwargs)
if (ss.startLine != ss.endLine and ss.isOneToOne):
raise ValueError("one-to-one spans must be on a line")
return ss
def notOneToOne(self):
return SourceSpan(self.uri, False, self.startLine, self.startCol,
self.endLine, self.endCol)
def __repr__(self):
return "<%s#:%s::%s>" % (self.uri,
"span" if self.isOneToOne else "blob",
':'.join(str(x) for x in self[2:]))
def spanCover(a, b):
"""
Create a new SourceSpan that covers spans `a` and `b`.
"""
if a is None or b is None or a.uri != b.uri:
return None
if (a.isOneToOne and b.isOneToOne
and a.endLine == b.startLine
and b.endCol + 1 == b.startCol):
# These spans are adjacent.
return SourceSpan(a.uri, True,
a.startLine, a.startCol,
b.endLine, b.endCol)
# find the earlier start point
if a.startLine < b.startLine:
startLine = a.startLine
startCol = a.startCol
elif a.startLine == b.startLine:
startLine = a.startLine
startCol = min(a.startCol, b.startCol)
else:
startLine = b.startLine
startCol = b.startCol
#find the later end point
if b.endLine > a.endLine:
endLine = b.endLine
endCol = b.endCol
elif a.endLine == b.endLine:
endLine = a.endLine
endCol = max(a.endCol, b.endCol)
else:
endLine = a.endLine
endCol = a.endCol
return SourceSpan(a.uri, False, startLine, startCol, endLine, endCol)
class Twine(unicode):
"""
A text string that remembers where it came from.
"""
def __new__(self, input, span=None):
return unicode.__new__(self, input)
def __init__(self, input, span=None):
self._span = span
@classmethod
def fromParts(cls, parts):
"""
Return a Twine that contains, in sequence, all the Twines in
the iterable `parts`.
"""
if not parts:
return Twine(u"")
elif len(parts) == 1:
return parts[0]
else:
return CompositeTwine(parts)
def asFrom(self, sourceURI, startLine=1, startCol=0):
"""
Return a Twine with source span info from the given URI and
(optionally) start position.
"""
parts = []
s = unicode(self)
ln = len(s)
start = 0
end = 0
while start < ln:
end = s.find('\n', start)
if end == -1:
end = ln - 1
endCol = startCol + end - start
ss = SourceSpan(sourceURI, True, startLine, startCol, startLine, endCol)
parts.append(Twine(s[start:end+1], ss))
startLine += 1
startCol = 0
start = end + 1
return Twine.fromParts(parts)
@property
def span(self):
return self._span
def __getslice__(self, i, j):
return self.__getitem__(slice(i, j))
def __getitem__(self, idxOrSlice):
"""
Return a new Twine sliced out of this one, with a matching
SourceSpan.
"""
if isinstance(idxOrSlice, int):
start = idxOrSlice
stop = start + 1
step = 1
else:
start, stop, step = idxOrSlice.indices(len(self))
if start == stop:
return Twine(u"")
if start == 0 and stop == len(self):
return self
return self._slice(start, stop, step)
def _slice(self, start, stop, step):
"""
This twine is atomic, so a simple slice and updated SourceSpan
will do.
"""
s = unicode.__getitem__(self, slice(start, stop, step))
if self._span and self._span.isOneToOne:
startCol = self._span.startCol + start
endCol = startCol + (stop - start) - 1
span = SourceSpan(self._span.uri, step == 1,
self._span.startLine,
startCol,
self._span.endLine,
endCol)
else:
span = self._span
return Twine(s, span)
class CompositeTwine(Twine):
def __new__(self, parts):
return Twine.__new__(self, parts)
def __init__(self, parts):
self._parts = tuple(parts)
@property
def parts(self):
return self._parts
@property
def span(self):
if not self._parts:
return None
ss = self._parts[0].span
for part in self._parts[1:]:
if not ss:
return None
ss = spanCover(ss, part.span)
return ss
def __len__(self):
return sum(len(p) for p in self._parts)
def _getPartAt(self, pos):
"""
Find the part that `pos` is an index into. For instance, if
self._parts is ['abc', 'def', 'ghi'], 2 is an index into part
0, and 4 is an index into part 1.
"""
search = 0
for i, p in enumerate(self._parts):
if pos < search + len(p):
return [i, pos - search]
search += len(p)
raise IndexError("%s bigger than %s" % (pos, search))
def _slice(self, start, stop, step):
"""
Build a slice by extracting the relevant parts from this
twine, slicing them if necessary, and returning a new
CompositeTwine made from them.
"""
leftIdx, leftOffset = self._getPartAt(start)
left = self._parts[leftIdx]
rightIdx, rightOffset = self._getPartAt(stop)
if leftIdx == rightIdx:
# slice start/end falls in the same part
return left[leftOffset:rightOffset]
else:
right = self._parts[rightIdx]
leftScrap = left[leftOffset::step]
middle = self._parts[leftIdx + 1:rightIdx]
if step != 1:
# gotta count leftovers on the end of each part after
# slicing with steps
newMiddle = []
stepOffset = step - (len(leftScrap) % step)
for part in middle:
newMiddle.append(part[stepOffset::step])
stepOffset = step - (len(part) % step)
middle = tuple(newMiddle)
else:
stepOffset = 0
rightScrap = right[stepOffset:rightOffset:step]
return Twine.fromParts((leftScrap,) + middle + (rightScrap,))
def __repr__(self):
return repr(u''.join(self._parts))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment