This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
To access your bucket in Linux, you need to download the project credential from GCP and set it the environment variable as | |
> export GOOGLE_APPLICATION_CREDENTIALS='/path/to/dir/*.json' | |
Same for uploading files. | |
''' | |
import sys | |
from google.cloud import storage | |
def download_blob(bucket_name, source_blob_name, destination_file_name): | |
'''Downloads a blob from the bucket.''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import pdb | |
import sys | |
def download_file_from_google_drive(id, destination): | |
URL = "https://docs.google.com/uc?export=download" | |
session = requests.Session() | |
response = session.get(URL, params = { 'id' : id }, stream = True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
python parse_bz2.py *.bz2 ${dest} | |
''' | |
import sys | |
from bz2 import BZ2File as bzopen | |
import json | |
import pandas as pd | |
infile = sys.argv[1] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# python 3.6 | |
import zstandard | |
import pathlib | |
import shutil | |
import os | |
import math | |
import pandas as pd | |
import sys | |
def decompress_zstandard_to_folder(input_file, destination_dir): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# fix the order of citations | |
\bibliographystyle{unsrtnat} | |
\usepackage[numbers,sort&compress]{natbib} | |
# figures layout | |
# credit: https://tex.stackexchange.com/questions/129077/figure-in-beamer | |
\begin{tabular}{p{.3\textwidth} p{.7\textwidth}} | |
\adjincludegraphics[width=.8\linewidth,valign=t]{example-image} | |
& |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# word count | |
cat result | awk -F"\t" '{key=$1"\t"$2; c[key]++} END {for (i in c) print c[i],i}' | |
# sum, count, mean, median, min, max | |
# credit:https://unix.stackexchange.com/questions/13731/is-there-a-way-to-get-the-min-max-median-and-average-of-a-list-of-numbers-in | |
#!/bin/sh | |
cat test.txt | sort -n | awk ' | |
BEGIN { | |
c = 0; | |
sum = 0; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"Ansi 5 Color" : { | |
"Green Component" : 0.10802463442087173, | |
"Red Component" : 0.77738940715789795, | |
"Blue Component" : 0.43516635894775391 | |
}, | |
"Tags" : [ | |
], | |
"Ansi 12 Color" : { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# reference: https://stackoverflow.com/questions/2087370/decode-html-entities-in-python-string | |
from bs4 import BeautifulSoup | |
BeautifulSoup("<p>£682m</p>") | |
# output is like: | |
# <html><body><p>£682m</p></body></html> | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# train | |
import logging | |
import os | |
from gensim.models import word2vec | |
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) | |
sentences = word2vec.LineSentence('/path/to/your/data') | |
model = word2vec.Word2Vec(sentences, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
./cypher-shell -a bolt://localhost:1688 -u neo4j -p work "match(n)-[r]->(m) return n, r, m limit 10;" >/tmp/a.txt |
NewerOlder