This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
deactive_proxy () { | |
if [ ! "$1" = "nondestructive" ] ; then | |
unset -f deactive_proxy | |
if [ -n "$_OLD_HTTP_PROXY" ] ; then | |
http_proxy="$_OLD_HTTP_PROXY" | |
export http_proxy | |
unset _OLD_HTTP_PROXY | |
else |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
The path is a directory by default | |
''' | |
def hdfs_list(path, subtract_one=True): | |
fs = spark._jvm.org.apache.hadoop.fs.FileSystem.get(spark._jsc.hadoopConfiguration()) | |
list_status = fs.listStatus(spark._jvm.org.apache.hadoop.fs.Path(path)) | |
# file.getPath().getName(), file.getBlockSize(), file.getLen() | |
files_size = [file.getLen() for file in list_status] | |
totol_size_in_MB = sum(files_size) / 1024.0 / 1024.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set -ex | |
# method 1 | |
# [2020-01-01, 2020-01-31] | |
for i in {0..30}; do | |
thedate=$(date -I -d "2020-01-01 +$i days") | |
echo $thedate | |
done | |
# method 2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def unique_everseen(iterable, key=None): | |
"List unique elements, preserving order. Remember all elements ever seen." | |
# unique_everseen('AAAABBBCCDAABBB') --> A B C D | |
# unique_everseen('ABBCcAD', str.lower) --> A B C D | |
from itertools import filterfalse | |
seen = set() | |
seen_add = seen.add | |
if key is None: | |
for element in filterfalse(seen.__contains__, iterable): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
*~ | |
*.swp | |
*.pyc | |
*.pyo | |
__pycache__/ | |
*.bak | |
*.backup | |
bak/ | |
backup/ | |
*.tmp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 启用鼠标 | |
set -g mouse on | |
# 屏幕回滚最大行数 | |
set-option -g history-limit 10000 | |
# 在tmux中按前缀+r可重载当前配置 | |
bind r source-file ~/.tmux.conf \; display "Reloaded!" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class dotdict(dict): | |
"""dot.notation access to dictionary attributes""" | |
__getattr__ = dict.get | |
__setattr__ = dict.__setitem__ | |
__delattr__ = dict.__delitem__ | |
class TrieTree: | |
""" 字典树 """ | |
class Node: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from glob import glob | |
import multiprocessing | |
from tqdm import tqdm | |
import pandas as pd | |
def json_reader(fname): | |
df = pd.read_json(fname, lines=True) | |
return df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_yesterday(n_days_ago=1, FMT='%Y-%m-%d'): | |
import datetime | |
return datetime.datetime.strftime( | |
datetime.datetime.now() - datetime.timedelta(n_days_ago), FMT | |
) | |
def dateAdd(thedate, num, FMT='%Y-%m-%d'): | |
import datetime | |
strptime, strftime = datetime.datetime.strptime, datetime.datetime.strftime |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import base64 | |
from IPython.core.display import display, HTML | |
# pip install facets_overview | |
from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator | |
def display_overview(df_train, df_test=None): | |
gfsg = GenericFeatureStatisticsGenerator() | |
if df_test is not None: | |
proto = gfsg.ProtoFromDataFrames([{'name': 'train', 'table': df_train}, |
OlderNewer