Skip to content

Instantly share code, notes, and snippets.

🚀
Working from Earth...

Mindey mindey

Block or report user

Report or block mindey

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
View schematize.py
### For finding schema
def schematize(obj):
'''
Get schema of nested JSON, assuming first item in lists.
'''
if isinstance(obj, dict):
return {k: schematize(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [schematize(elem) for elem in obj][:1]
View schematize.py
### For finding schema
def schematize(obj):
'''
Get schema of nested JSON, assuming first item in lists.
'''
if isinstance(obj, dict):
return {k: schematize(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [schematize(elem) for elem in obj][:1]
View md_language_splitter_autodetect.py
import os
import collections
import langdetect
LANGUAGE_CODES = os.listdir(langdetect.PROFILES_DIRECTORY)
def detect_language(text, max_length=2):
""" Make sure we return N-letter keys for languages"""
shorter = {'zh-cn': 'cn', 'zh-tw': 'zh'}
code = langdetect.detect(text)
View xarray_fundamentals.py
import pandas
import xarray
ds = xarray.Dataset(
{'x': ([None], [1,2,3] ),
'y': ([None], [4,5,6] )},
)
# *is equivalent to*
View parallelize_df.py
import pandas
from dask import dataframe
from dask.diagnostics import ProgressBar
def parallel_apply(df, func, progress=True, chunkrows=100, scheduler_address=None, *args, **kwargs):
if scheduler_address:
from dask.distributed import Client
client = Client(scheduler_address)
View complex_frames.py
# Practically, it's useful if we have complex observations (rows) and variables (columns):
df = pandas.DataFrame(
data=pandas.np.array(
[[1,2,3,4,5],
[6,7,8,9,10],
[11,12,13,14,15]]).T,
index=pandas.MultiIndex.from_arrays(
[['x','x','x','y','z'],
['a','a','b','b','c'],
View grouped_apply.py
import pandas
import multiprocessing
def apply_parallel(grouped_df, func):
with multiprocessing.Pool(multiprocessing.cpu_count()) as p:
ret_list = p.map(func, [group for name, group in grouped_df])
return pandas.concat(ret_list)
View keybase.md

Keybase proof

I hereby claim:

  • I am mindey on github.
  • I am mindey (https://keybase.io/mindey) on keybase.
  • I have a public key whose fingerprint is 5AFD B16B 8980 5133 F450 688B DA58 0D1D 5F5C C7AD

To claim this, I am signing this object:

View keybase.md

Keybase proof

I hereby claim:

  • I am mindey on github.
  • I am mindey (https://keybase.io/mindey) on keybase.
  • I have a public key whose fingerprint is 5AFD B16B 8980 5133 F450 688B DA58 0D1D 5F5C C7AD

To claim this, I am signing this object:

View recordscreen.py
#!/usr/bin/env python
""" A simple screen-capture utility. Utilizes ffmpeg or avconv with h264
support. By default it captures the entire desktop.
"""
################################ LICENSE BLOCK ################################
# Copyright (c) 2011 Nathan Vegdahl
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
You can’t perform that action at this time.