Skip to content

Instantly share code, notes, and snippets.

@Miopas
Miopas / download_file_from_gcp.py
Created Dec 9, 2020
Download and upload files for Google Cloud Platform.
View download_file_from_gcp.py
'''
To access your bucket in Linux, you need to download the project credential from GCP and set it the environment variable as
> export GOOGLE_APPLICATION_CREDENTIALS='/path/to/dir/*.json'
Same for uploading files.
'''
import sys
from google.cloud import storage
def download_blob(bucket_name, source_blob_name, destination_file_name):
'''Downloads a blob from the bucket.'''
@Miopas
Miopas / download_file_from_google_drive.py
Created Dec 9, 2020
Download files from Google Drive in Python
View download_file_from_google_drive.py
import requests
import pdb
import sys
def download_file_from_google_drive(id, destination):
URL = "https://docs.google.com/uc?export=download"
session = requests.Session()
response = session.get(URL, params = { 'id' : id }, stream = True)
@Miopas
Miopas / parse_bz2.py
Created Dec 9, 2020
Process compressed files in Python.
View parse_bz2.py
'''
python parse_bz2.py *.bz2 ${dest}
'''
import sys
from bz2 import BZ2File as bzopen
import json
import pandas as pd
infile = sys.argv[1]
@Miopas
Miopas / read_zst.py
Created May 20, 2020
read zst files using stream
View read_zst.py
# python 3.6
import zstandard
import pathlib
import shutil
import os
import math
import pandas as pd
import sys
def decompress_zstandard_to_folder(input_file, destination_dir):
@Miopas
Miopas / latex.txt
Last active Feb 5, 2020
LaTex tips.
View latex.txt
# fix the order of citations
\bibliographystyle{unsrtnat}
\usepackage[numbers,sort&compress]{natbib}
# figures layout
# credit: https://tex.stackexchange.com/questions/129077/figure-in-beamer
\begin{tabular}{p{.3\textwidth} p{.7\textwidth}}
\adjincludegraphics[width=.8\linewidth,valign=t]{example-image}
&
@Miopas
Miopas / awk.sh
Last active Jun 8, 2020
awk quick notes
View awk.sh
# word count
cat result | awk -F"\t" '{key=$1"\t"$2; c[key]++} END {for (i in c) print c[i],i}'
# sum, count, mean, median, min, max
# credit:https://unix.stackexchange.com/questions/13731/is-there-a-way-to-get-the-min-max-median-and-average-of-a-list-of-numbers-in
#!/bin/sh
cat test.txt | sort -n | awk '
BEGIN {
c = 0;
sum = 0;
View my_iTerm2.json
{
"Ansi 5 Color" : {
"Green Component" : 0.10802463442087173,
"Red Component" : 0.77738940715789795,
"Blue Component" : 0.43516635894775391
},
"Tags" : [
],
"Ansi 12 Color" : {
View html_entity_convert.py
# reference: https://stackoverflow.com/questions/2087370/decode-html-entities-in-python-string
from bs4 import BeautifulSoup
BeautifulSoup("<p>&pound;682m</p>")
# output is like:
# <html><body><p>£682m</p></body></html>
@Miopas
Miopas / gensim.py
Created Feb 27, 2019
train word2vector model with gensim
View gensim.py
# train
import logging
import os
from gensim.models import word2vec
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
sentences = word2vec.LineSentence('/path/to/your/data')
model = word2vec.Word2Vec(sentences,
@Miopas
Miopas / neo4j.sh
Created Feb 25, 2019
command for neo4j
View neo4j.sh
./cypher-shell -a bolt://localhost:1688 -u neo4j -p work "match(n)-[r]->(m) return n, r, m limit 10;" >/tmp/a.txt