Last active
April 17, 2017 07:14
-
-
Save njsmith/7af696f98272f21294ebd0a7dbf268d4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A modest proposal for making libraries like urllib3 async-friendly. | |
# Step 1: we need some way abstract over different network APIs. We | |
# encapsulate that into a class providing the basic networking operations we | |
# need. Two unusual but crucial points: | |
# - these methods are all semantically blocking | |
# - they're all async-colored (but wait until the end before judging this) | |
import abc | |
class AbstractNetworkIO(abc.ABC): | |
@abc.abstractmethod | |
async def create_connected_socket(self, host, port): | |
"""Returns an opaque object representing a socket connected to the | |
given host:port.""" | |
@abc.abstractmethod | |
async def sendall(self, sock, data): | |
"""Sends the given data to the given opaque socket object.""" | |
@abc.abstractmethod | |
async def recv(self, sock, bufsize): | |
"""You know what this does.""" | |
# Here's some examples of what concrete implementations might look like. Note | |
# that while the methods on the BlockingNetworkIO class are async-colored, | |
# they never actually yield: | |
import socket | |
class BlockingNetworkIO(AbstractNetworkIO): | |
def __init__(self, timeout): | |
self.timeout = timeout | |
async def create_connected_socket(self, host, port): | |
sock = socket.create_connection((host, port)) | |
sock.settimeout(self.timeout) | |
return sock | |
async def sendall(self, sock, data): | |
return sock.sendall(data) | |
async def recv(self, sock, bufsize): | |
return sock.recv(bufsize) | |
try: | |
import trio | |
except ImportError: | |
pass | |
else: | |
class TrioNetworkIO(AbstractNetworkIO): | |
async def create_connected_socket(self, host, port): | |
sock = trio.socket.socket() | |
addr = await sock.resolve_remote_address(host, port) | |
await sock.connect(addr) | |
return sock | |
async def sendall(self, sock, data): | |
return await sock.sendall(data) | |
async def recv(self, sock, bufsize): | |
return await sock.recv(bufsize) | |
# Step 2: implement the core logic on top of these backends. Compared to a | |
# naive blocking implementation, there are two changes: | |
# | |
# - the outermost API functions need to accept a NetworkIO instance, and | |
# thread it down the call stack. | |
# | |
# - many functions become async-colored, because they transitively call one of | |
# the NetworkIO methods. However, the actual *logic* is identical to what it | |
# would be in a naive blocking implementation: the only difference is the | |
# addition of some "async" and "await" keywords at appropriate places. | |
# I'm a little too lazy to write an actual http client here so you get an echo | |
# client instead | |
async def abstract_echo_client(nio, host, port): | |
sock = await nio.create_connected_socket(host, port) | |
HELLO = b"hello!" | |
await sock.sendall(HELLO) | |
received = bytearray() | |
while len(received) < len(HELLO): | |
data = await sock.recv(4096) | |
if not data: | |
break | |
received += data | |
print("got", received) | |
# Step 3: write little shim layers so that end-users don't have to be exposed | |
# to the NetworkIO objects, and to paper over any little differences in idiom | |
# between different backends. This is the main public API that users interact | |
# with. | |
# | |
# For the blocking backend, we want to erase the async coloration, which is | |
# really easy because we know that the coroutine stack never actually yields, | |
# so we can use a trivial coroutine runner. We also want to implement the | |
# weird timeout parameter that is commonly expected for code using stdlib | |
# blocking sockets. | |
# | |
# For trio, the idiomatic thing is not to take a timeout parameter, but | |
# instead inherit it from ambient context, so it's pretty trivial. | |
# A trivial coroutine runner, for coroutines that are secretly synchronous. | |
def run_unyielding_coro(fn, *args, **kwargs): | |
coro = fn(*args, **kwargs) | |
try: | |
coro.send(None) | |
except StopIteration as exc: | |
return exc.value | |
else: | |
raise RuntimeError("coroutine yielded? can't happen") | |
def blocking_echo_client(host, port, timeout=None): | |
nio = BlockingNetworkIO(timeout) | |
return run_unyielding_coro(abstract_echo_client, nio, host, port) | |
async def trio_echo_client(host, port): | |
nio = TrioNetworkIO() | |
return await abstract_echo_client(nio, host, port) | |
# Okay! This is a general pattern for writing protocol libraries that do I/O, | |
# but support switchable backends, on Python 3.5+. Now the $1e6 question: what | |
# about Python 2? Dropping support for python 2 is, alas, probably not in the | |
# cards quite yet. | |
# | |
# Well... remember how we pointed out that the only difference between the | |
# code we wrote, and the code we would have written if we were only targeting | |
# blocking socket I/O, was that our code has some extra "async" and "await" | |
# keywords? | |
# Step 4: as part of our release process, run a simple script over the code to | |
# delete all instances of the "async" and "await" keywords, and package that | |
# up as the python 2 release (!!). This can be done reliably using the | |
# tokenize module. (Or lib2to3 I suppose, if you want to really pull out the | |
# big guns. But tokenize should be simpler do the job.) | |
# There are a few wrinkles. | |
# | |
# Wrinkle 1: Having re-colored abstract_echo_client and everything below it as | |
# synchronous, we need to get rid of the coroutine code inside the blocking | |
# shim layer. This can be done by adding a few lines to the definition of | |
# run_unyielding_coro; everything else stays the same: | |
async def probefn(): | |
return None | |
# True iff this code has been recolored | |
HAVE_ASYNC = (probefn() is not None) | |
def run_unyielding_coro(fn, *args, **kwargs): | |
if not HAVE_ASYNC: | |
return fn(*args, **kwargs) | |
else: | |
# .. same as above ... | |
coro = fn(*args, **kwargs) | |
try: | |
coro.send(None) | |
except StopIteration as exc: | |
return exc.value | |
else: | |
raise RuntimeError("coroutine yielded? can't happen") | |
# Wrinkle 2: It won't be *quite* true that deleting async/await makes | |
# everything under abstract_echo_client into synchronous code... because there | |
# are these annoying async context managers and async iterators that have | |
# different method names than their synchronous-colored equivalents. So we | |
# also need to add some code to adapt these, maybe little decorators like: | |
def synchronizable_async_cm(cls): | |
if SYNCHRONOUS_MODE: | |
cls.__enter__ = cls.__aenter__ | |
cls.__exit__ = cls.__aexit__ | |
return cls | |
@synchronizable_async_cm | |
class SomeAsyncManager(object): | |
async def __aenter__(self): | |
... | |
async def __aexit__(self, cls, value, tb): | |
... | |
# The equivalent for iterators is a bit more convoluted because of the | |
# iterable/iterator split, but not in any deep way. | |
# | |
# An alternative approach would be to adapt our script to also replace the | |
# tokens __aenter__/__aexit__/__aiter__/__anext__ and StopAsyncIteration with | |
# their sync equivalents. These are just names and not keywords so our script | |
# would in theory be a little bit less robust – it could have false positives | |
# if someone used these as a regular variable name or false negatives if | |
# someone were to like, setattr(someclass, "__aenter__", ...). But in practice | |
# I bet it would work pretty well. | |
# I think that's all the wrinkles? | |
# Conclusion: | |
# | |
# This is potentially a general technique for writing I/O-ful protocol | |
# libraries that expose an idiomatic synchronous API on python 2/3, and also | |
# idiomatic async asynchronous APIs on python 3.5+, with support for multiple | |
# async backends. | |
# | |
# Downsides: | |
# - code generation is always a bit ugly. In this case the transformation is | |
# particularly transparent (literally nothing but a token-wise | |
# s/async|await//g), but it's still a downside. | |
# | |
# - no support for async on pre-3.5 pythons (so e.g., no | |
# urllib3-on-twisted-on-python-2 -- but urllib3-on-twisted-on-python-3 | |
# *would* be supported) | |
# | |
# Upsides: | |
# - did I mention idiomatic public APIs? trying to use py2-compatible | |
# callbacks internally and expose a nice async/await and backend-agnostic | |
# API externally is going to be super annoying, I suspect? | |
# | |
# - essentially no code duplication, except for whatever's needed at the | |
# "shim" layer to support multiple backends (really more an issue of | |
# supporting multiple backends then the sync/async split per se). |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment