Skip to content

Instantly share code, notes, and snippets.

@xialu4820723
Last active April 25, 2023 14:46
Show Gist options
  • Save xialu4820723/2bdafee2422752b85858bc49f01deb7d to your computer and use it in GitHub Desktop.
Save xialu4820723/2bdafee2422752b85858bc49f01deb7d to your computer and use it in GitHub Desktop.
util snippet
# csv sort and remove column
sort -h --field-separator=',' 374160.csv | cut -d, -f6 --complement | less
# shell print for download csv file
sort -h --field-separator=',' 1532881.csv | cut -d, -f6 --complement | perl -pe 's/((?<=,)|(?<=^)),/ ,/g;' | column -t -s, | less -S
# run funtion in remote machine
ssh ${machine} "$(typeset -f function_name); function_name"
# kill process
ps -aux | grep flume | grep java | grep -v grep | awk '{print $2}' | xargs kill
# multi folder proto compile
for filename in $(ls); do
if [ -d ${filename} ]; then
echo $filename
protoc ${filename}/*.proto --go_out=.
fi
done
# branch delete
git branch -r | grep -v -e master -e develop | sed 's/origin\///' | xargs -I {} git push origin :{}
# sqlite3 to mysql
# https://github.com/athlite/sqlite3-to-mysql
sqlite3 login.db .dump > sqlite.sql && bash sqlite3-to-mysql.txt sqlite.sql > mysql.sql && rm sqlite.sql
# move mongo collection
mongoexport -h ${mongo_host} -d spider -c spider_tasks | mongoimport -h ${mongo_host} -d editortool -c spider_tasks --drop
# move all files except backup to backup
ls | grep -v backup | xargs -I{} mv '{}' backup
function add_mvn_path_if_not(){
command -v mvn >/dev/null || export PATH=/data/apache-maven-3.5.3/bin:$PATH
}
# get parent folder name
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
# input params parse
skip_package=true
skip_push=true
while [ "$1" != "" ]; do
PARAM=$1
VALUE=$2
case ${PARAM} in
-pack | --package)
skip_package=false
;;
-push | --push)
skip_push=false
;;
*)
echo "ERROR: unknown parameter \"$PARAM\""
echo "USAGE: indexer|ugc|serving|cpp [-pack][-push]"
exit 1
;;
esac
shift
done
mysqldump -h localhost -uuser -p dbname table_name > table_name.sql
mycli_dev < table_name.sql
# 只搜索go源码,不搜索go单测源码。参考官方文档地址:https://www.jetbrains.com/help/go/settings-scopes.html#f511650a
file[engine]:*/*.go&&!file[engine]:*/*test.go
# find duplicate fields in mongo
pipeline = [
{'$group': {
'_id': '$url',
'uniqueIds': {'$addToSet': '$_id'},
'count': {'$sum': 1},
}},
{'$match': {'count': {'$gt': 1}}}
]
result = s.aggregate(pipeline)
# print source code of python function
import inspect
lines = inspect.getsource(foo)
print(lines)
# python3 compatible with mysqldb
import pymysql
pymysql.install_as_MySQLdb()
import MySQLdb
conn = MySQLdb.connect(host='...',
<connection info>,
cursorclass=MySQLdb.cursors.DictCursor)
# log quick start
import logging
logging.basicConfig(level = logging.INFO,
format = '%(asctime)s [%(name)s] %(levelname)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
logging.info("start")
# fetch kafka
from kafka import KafkaConsumer
import json
from pprint import pprint
bootstrap = 'bootstrap host name'
kakfa_consumer = KafkaConsumer('logtopic',
group_id='test',
bootstrap_servers=bootstrap)
for log_message in kakfa_consumer:
log_json = json.loads(log_message.value.decode('utf-8'))
if log_json['module'] == 'module_name':
pprint(log_json)
# pandas gist
# %run ../library_py3.ipynb
import sys
sys.path.insert(0, "~/.local/lib/python3.5/site-packages")
import pandas as pd
pd.set_option('display.expand_frame_repr', False)
import numpy as np
from IPython.display import display, HTML
def pp(df):
display(HTML(df.to_html()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment