Skip to content

Instantly share code, notes, and snippets.

@Miopas
Miopas / download_file_from_gcp.py
Created December 9, 2020 20:38
Download and upload files for Google Cloud Platform.
'''
To access your bucket in Linux, you need to download the project credential from GCP and set it the environment variable as
> export GOOGLE_APPLICATION_CREDENTIALS='/path/to/dir/*.json'
Same for uploading files.
'''
import sys
from google.cloud import storage
def download_blob(bucket_name, source_blob_name, destination_file_name):
'''Downloads a blob from the bucket.'''
@Miopas
Miopas / download_file_from_google_drive.py
Created December 9, 2020 20:31
Download files from Google Drive in Python
import requests
import pdb
import sys
def download_file_from_google_drive(id, destination):
URL = "https://docs.google.com/uc?export=download"
session = requests.Session()
response = session.get(URL, params = { 'id' : id }, stream = True)
@Miopas
Miopas / parse_bz2.py
Created December 9, 2020 20:28
Process compressed files in Python.
'''
python parse_bz2.py *.bz2 ${dest}
'''
import sys
from bz2 import BZ2File as bzopen
import json
import pandas as pd
infile = sys.argv[1]
@Miopas
Miopas / read_zst.py
Created May 20, 2020 13:51
read zst files using stream
# python 3.6
import zstandard
import pathlib
import shutil
import os
import math
import pandas as pd
import sys
def decompress_zstandard_to_folder(input_file, destination_dir):
@Miopas
Miopas / latex.txt
Last active February 5, 2020 21:21
LaTex tips.
# fix the order of citations
\bibliographystyle{unsrtnat}
\usepackage[numbers,sort&compress]{natbib}
# figures layout
# credit: https://tex.stackexchange.com/questions/129077/figure-in-beamer
\begin{tabular}{p{.3\textwidth} p{.7\textwidth}}
\adjincludegraphics[width=.8\linewidth,valign=t]{example-image}
&
@Miopas
Miopas / awk.sh
Last active June 8, 2020 01:21
awk quick notes
# word count
cat result | awk -F"\t" '{key=$1"\t"$2; c[key]++} END {for (i in c) print c[i],i}'
# sum, count, mean, median, min, max
# credit:https://unix.stackexchange.com/questions/13731/is-there-a-way-to-get-the-min-max-median-and-average-of-a-list-of-numbers-in
#!/bin/sh
cat test.txt | sort -n | awk '
BEGIN {
c = 0;
sum = 0;
@Miopas
Miopas / my_iTerm2.json
Created February 28, 2019 02:53
my_iTerm2.json
{
"Ansi 5 Color" : {
"Green Component" : 0.10802463442087173,
"Red Component" : 0.77738940715789795,
"Blue Component" : 0.43516635894775391
},
"Tags" : [
],
"Ansi 12 Color" : {
@Miopas
Miopas / html_entity_convert.py
Created February 27, 2019 06:48
HTML entity convert
# reference: https://stackoverflow.com/questions/2087370/decode-html-entities-in-python-string
from bs4 import BeautifulSoup
BeautifulSoup("<p>&pound;682m</p>")
# output is like:
# <html><body><p>£682m</p></body></html>
@Miopas
Miopas / gensim.py
Created February 27, 2019 06:23
train word2vector model with gensim
# train
import logging
import os
from gensim.models import word2vec
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
sentences = word2vec.LineSentence('/path/to/your/data')
model = word2vec.Word2Vec(sentences,
@Miopas
Miopas / neo4j.sh
Created February 25, 2019 07:29
command for neo4j
./cypher-shell -a bolt://localhost:1688 -u neo4j -p work "match(n)-[r]->(m) return n, r, m limit 10;" >/tmp/a.txt