Skip to content

Instantly share code, notes, and snippets.

View vepetkov's full-sized avatar

V. Petkov vepetkov

  • Munich, Germany
View GitHub Profile
@vepetkov
vepetkov / pyhive_sample.py
Last active May 19, 2020 12:59
PyHive Sample
from pyhive import hive
import pandas as pd
from vdom import pre
# Nteract Data Explorer
pd.options.display.html.table_schema = True # Data Explorer On!
pd.options.display.max_rows = None # Send all the data! (careful!)
def getHiveConn(host, username, port=10000, schema="db_user1"):
return hive.connect(host=host, port=port, username=username, database=schema, auth=None)
@vepetkov
vepetkov / hive_csv2avro.py
Last active November 12, 2020 20:45
Convert a CSV to Hive DDL + AVRO Schema (with type inference)
#!/usr/bin/python
import pandas
import sys
import argparse
import string
import subprocess
import json
import textwrap
import re
@vepetkov
vepetkov / gitlab_backup.py
Created September 4, 2018 11:29
Backup All GitLab Projects
## pip install python-gitlab gitpython
import gitlab # python-gitlab
from git import Repo # gitpython
import os, time
##########################
### Python Gitlab Config: ~/.python-gitlab.cfg
# [global]
# default = GitLab
# ssl_verify = true
@vepetkov
vepetkov / hdfs_pq_access.py
Created September 4, 2018 11:10
Python HDFS + Parquet (hdfs3, PyArrow + libhdfs, HdfsCLI + Knox)
##################################################################
## Native hdfs access (only on the cluster)
# conda install -c conda-forge libhdfs3=2.3.0=1 hdfs3 --yes
import hdfs3
import pandas as pd
nameNodeHost = 'hadoopnn1.localdomain'
nameNodeIPCPort = 8020
hdfs = hdfs3.HDFileSystem(nameNodeHost, port=nameNodeIPCPort)