Skip to content

Instantly share code, notes, and snippets.

@Dutcho
Created February 17, 2023 15:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Dutcho/6be62b7c0e473f4ccbabb95687921a58 to your computer and use it in GitHub Desktop.
Save Dutcho/6be62b7c0e473f4ccbabb95687921a58 to your computer and use it in GitHub Desktop.
Minimal proof of concept for atomic and iterable string types
"""Minimal proof of concept for atomic and iterable string types, 17 Feb 2023.
Illustration for Python Ideas discussion
see https://discuss.python.org/t/an-interesting-pytype-experiment-and-a-possible-extension-to-strings/23749/51
"""
from collections.abc import Container, Sequence, Sized
from typing import Final, overload, Self, TYPE_CHECKING
class Char:
"""Hypothetical char type.
>>> Char(65), Char('B'), int(Char('C'))
('A', 'B', 67)
"""
if TYPE_CHECKING:
@staticmethod
def _typing_checks() -> None: # because mypy doesn't check doctests in docstrings
print(Char(65), Char('B'), int(Char('C')))
def __init__(self, codepoint: int | str) -> None:
self.codepoint: Final = codepoint if isinstance(codepoint, int) else ord(codepoint)
def __eq__(self, other: object) -> bool:
return isinstance(other, Char) and self.codepoint == other.codepoint
def __int__(self) -> int:
return self.codepoint
def __str__(self) -> str:
return chr(self.codepoint)
def __repr__(self) -> str:
return repr(str(self))
class AtomicText(Sized, Container[Char]):
"""Atomic non-iterable string (named 'text' to differentiate).
Various equivalents of all string methods were omitted for brevity.
>>> a = AtomicText('hello')
>>> a, len(a), Char('e') in a
('hello', 5, True)
>>> a[0] # below runtime error and mypy error: Value of type "AtomicText" is not indexable [index]
Traceback (most recent call last):
TypeError: 'AtomicText' object is not subscriptable
"""
if TYPE_CHECKING:
@staticmethod
def _typing_checks() -> None: # because mypy doesn't check doctests in docstrings
a = AtomicText('hello')
print(a, len(a), Char('e') in a)
print(a[0])
def __init__(self, plain_old_string: str = '', /) -> None:
self.chars: Final[tuple[Char, ...]] = tuple(Char(ord(ch)) for ch in plain_old_string)
def __len__(self) -> int:
return len(self.chars)
def __contains__(self, ch: object, /) -> bool: # semantics differ from str.__contains__
return isinstance(ch, Char) and ch in self.chars
def __str__(self) -> str:
return ''.join(str(ch) for ch in self.chars)
def __repr__(self) -> str:
return repr(str(self))
class IterableText(AtomicText, Sequence[Char]):
"""Iterable version of Text, which closely mimics plain old string.
>>> for t in IterableText('hello'), 'hello': # IterableText is indistinguishable from plain old string
... t, t[0], t[-4:], *t
('hello', 'h', 'ello', 'h', 'e', 'l', 'l', 'o')
('hello', 'h', 'ello', 'h', 'e', 'l', 'l', 'o')
"""
if TYPE_CHECKING:
@staticmethod
def _typing_checks() -> None: # because mypy doesn't check doctests in docstrings
for t in IterableText('hello'), 'hello':
print(t, t[0], t[-4:], *t)
@overload
def __getitem__(self, index: int, /) -> Char:
...
@overload
def __getitem__(self, sl: slice, /) -> Self:
...
def __getitem__(self, item: int | slice, /) -> Char | Self:
if isinstance(item, int):
return self.chars[item]
else:
cls = type(self)
return cls(''.join(str(ch) for ch in self.chars[item]))
if __name__ == '__main__':
import doctest
doctest.testmod()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment