Skip to content

Instantly share code, notes, and snippets.

@iamalbert
Last active October 25, 2016 05:57
Show Gist options
  • Save iamalbert/99ab3fa64f1d7442adfec7290d996edf to your computer and use it in GitHub Desktop.
Save iamalbert/99ab3fa64f1d7442adfec7290d996edf to your computer and use it in GitHub Desktop.
Python Collection
#!/usr/bin/env python3
import argparse
import json
import sys
import collections
import itertools
def main(args):
pass
# usage: xx.py [-h] INPUT [OUTPUT]
if __name__ == '__main__':
ap = argparse.ArgumentParser()
ap.add_argument('input', type=argparse.FileType('r') )
ap.add_argument('output', type=argparse.FileType('w'),
default="-", nargs="?")
args = ap.parse_args()
print(args, file=sys.stderr)
main(args)
import jieba
jieba.set_dictionary('data/dict.txt.big')
CJK_REGEXP = re.compile(
'[\u4e00-\ufaff]+'
'|[A-Za-z0-9]+'
'|[^ ]'
)
def CJK_chunk(s):
#print( re.findall(CJK_REGEXP, s) )
return [ _.strip() for _ in re.findall(CJK_REGEXP, s) ]
def tokenize(s):
ret = []
for chunk in CJK_chunk(s):
ret.extend( jieba.cut(chunk) )
return ret
$ pip install pipreqs
$ pipreqs /path/to/project
$ pipreqs --force /path/to/project # when requirements.txt already exists
def chunked(iterable, n):
it = iter(iterable)
while True:
c = tuple(itertools.islice(it, n))
if c:
yield c
else:
return
def windowed(iterable, n, step=1):
window = ()
for ele in iterable:
window += (ele,)
if len(window) == n:
yield window
window = window[step:]
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, set):
return list(obj)
return json.JSONEncoder.default(self, obj)
json.dumps({1,2,3,4,5}), cls=CustomEncoder)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment