custom tokenizer
doc = nlp("gimme that book")
print([w.text for w in doc])
from spacy.symbols import ORTH
special_case = [{ORTH: "gim"}, {ORTH: "me"}]
nlp.tokenizer.add_special_case("gimme", special_case)
print([w.text for w in doc])
