Skip to content

Instantly share code, notes, and snippets.

@bryanhelmig
Last active November 4, 2022 16:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bryanhelmig/3d7ef811d82c99a21564101dfdc3dbaa to your computer and use it in GitHub Desktop.
Save bryanhelmig/3d7ef811d82c99a21564101dfdc3dbaa to your computer and use it in GitHub Desktop.
handle openapi bytes (davinci, babbage, curie, ada, etc.)
def try_strip_bytes_prefix(token: str) -> str:
if token.startswith("bytes:") and "\\" in token:
return token[6:]
return token
def try_bytes_fix(tokens: list[str]) -> str:
# this encode/decode dance fixes places where openai provides
# "bytes: \\xf0\\x9f\\x98" + "bytes:\\x83" for tokens and replaces
# with the appropriate emoji (e.g. "😃")
return (
"".join(try_strip_bytes_prefix(token) for token in tokens)
.encode()
.decode("unicode-escape")
.encode("latin-1")
.decode("utf-8")
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment