Created
February 27, 2019 06:52
-
-
Save computercolin/52de5dc5349822ca088c40d744ef9718 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Note, in all examples, could be class, or function, or for loop. | |
# Many cases, can replace with list compreension + fn. But if fn will never | |
# be used with scalar input, I usually write it as generator. | |
# | |
# Nothing that special about generators, just they're easy way to make | |
# iteration producers or filters that are fairly readable. | |
# | |
# Can also chain together (producer, transformer, transformer, fitler, etc). | |
# | |
## MK_USER_USER_FRIEND_FOLLOW_ABVR_MAP_FILE_TASK_GENER | |
# | |
# Object that takes paths and outputs file_tasks is simple abstraction so | |
# reasonable place to put code boundary. | |
# Makes calling code much easier to read. | |
# | |
def mk_user_user_inter_abvr_map_file_task_gener(fetchtype: str = None, data_in_dir=None, | |
data_out_dir=None, stageno=None): | |
in_fp_l, in_fmeta_l = get_fetch_infiles_fpaths_fmeta(data_in_dir, fetchtype) | |
out_fn_l = [gen_out_fname_from_fetch_meta(fmeta, "stg-%d-abvrmap" % stageno, '.tsv') | |
for fmeta in in_fmeta_l] | |
for in_fpath, in_fmeta, out_fname in zip(in_fp_l, in_fmeta_l, out_fn_l): | |
if in_fmeta['td'] != fetchtype: | |
raise InputDataError("Err File contains wrong data type! Expected %s" | |
" Got %s -- %s" % (fetchtype, in_fmeta['td'], in_fmeta['fname'])) | |
out_fpath = pathj(data_out_dir, out_fname) | |
yield { | |
'name': "stg%d-%s-%s" % (stageno, fetchtype, in_fmeta['sn']), | |
'file_dep': [in_fpath], | |
'actions': [( | |
extract_write_user_user_abvr_map_f, (in_fpath , out_fpath, stageno, fetchtype) )], | |
'targets': [out_fpath], | |
} | |
## UNAME_RAND_GENER | |
# | |
# Generate and don't stop. | |
# Note: Designed for few k outputs. | |
# If this were expected to exhaust int range, would need checks, mem efficiency. | |
# | |
def uname_rand_gener(min_id=100000, max_id=999999): | |
sentinel = object() | |
visited = {} | |
base = 'urlyte3.so.' | |
while True: | |
n = random.randint(min_id, max_id +1) | |
if n in visited: | |
continue | |
visited[n] = sentinel | |
s = "u%d.%s" % (n, base) | |
yield s | |
### CSV_ROWREADER | |
# | |
# Utility rowreader written in relatively few lines. | |
# It is easy to read "transform" code that uses multifile_csv_reader! | |
# It is easy to read these generators. | |
# Everyone wins! | |
# | |
def mk_multifile_csv_rowreader(tgt_fpaths, delimeter=',', | |
quotechar='"', escapechar='\\', lineterminator='\n'): | |
"""Yield lines, parsed as csv, from each file in tgt_fpaths. File contents are "chained" together into | |
continuous iteration. Requires "unix" style csv (each row spans only 1 line).""" | |
linereader = adv_open_r_multifile_linereader(tgt_fpaths, text_mode=True, newline=lineterminator) | |
csvr = csv.reader(linereader, delimeter=delimeter, quotechar=quotechar, | |
escapechar=escapechar, lineterminator=lineterminator) | |
yield from csvr | |
def adv_open_r_multifile_linereader(fpaths, text_mode=True, newline='\n', raise_on_missing=True, | |
gzip_override_assume_gzip=DEF_GZIP_OVERRIDE_ASSUME_GZIPPED, | |
gzip_heur_enable_file_ext_detect=DEF_GZIP_HEUR_ENABLE_FILE_EXT_DETECT, | |
gzip_heur_enable_file_startbytes_detect=DEF_GZIP_HEUR_ENABLE_FILE_STARTBYTES_DETECT): | |
"""Open fpaths in order and yield contents one line at a time. Contents are "chained" together into | |
one continuous iteration.""" | |
for fpath in fpaths: | |
try: | |
fh = adv_open_r_fh(fpath, text_mode=text_mode, newline=newline, | |
gzip_override_assume_gzip=gzip_override_assume_gzip, | |
gzip_heur_enable_file_ext_detect=gzip_heur_enable_file_ext_detect, | |
gzip_heur_enable_file_startbytes_detect=gzip_heur_enable_file_startbytes_detect) | |
except FileNotFoundError: | |
if raise_on_missing: raise | |
continue | |
with fh: | |
yield from fh | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment