Skip to content

Instantly share code, notes, and snippets.

@nikkisharma536
nikkisharma536 / pandas_import.py
Last active October 18, 2018 09:08
pandas import
import pandas as pd
data = pd.read_csv('data path url')
@nikkisharma536
nikkisharma536 / core-site.xml
Created December 28, 2018 11:04
Hadoop- core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/Users/nikki/hadoop/hdfs/tmp</value>
<description>A base for other temporary directories.</description>
</property>
<property>
<name>fs.default.name</name>
@nikkisharma536
nikkisharma536 / mapred-site.xml
Created December 28, 2018 11:07
hadoop-mapred-site.xml
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>localhost:9010</value>
</property>
<property>
<name>fs.s3a.impl</name>
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
@nikkisharma536
nikkisharma536 / hdfs-site.xml
Created December 28, 2018 11:08
hadoop-hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value></value>
</property>
<property>
<name>fs.s3a.access.key</name>
<value>ACCESS_KEY_HERE</value>
@nikkisharma536
nikkisharma536 / .profile.sh
Last active December 28, 2018 11:14
hadoop-Env variable
##Add to ~/.profile:
export JAVA_HOME=$(/usr/libexec/java_home)
export HIVE_AUX_JARS_PATH=/usr/local/Cellar/hadoop/3.1.1/libexec/share/hadoop/tools/lib/
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/usr/local/Cellar/hadoop/3.1.1/libexec/share/hadoop/tools/lib/*
##Note : That should work,
# else, if that doesn't work,
@nikkisharma536
nikkisharma536 / script.sql
Created December 28, 2018 11:16
SQL script
##Creating schema:
CREATE SCHEMA IF NOT EXISTS <schema_name>;
#Creating table:
CREATE EXTERNAL TABLE IF NOT EXISTS <schema_name.table_name>
(<column_name> STRING)
LOCATION 's3a://<your-S3-bucket>/raw/access-log/2018-12-28/';
@nikkisharma536
nikkisharma536 / hive-site.xml
Created December 28, 2018 11:21
add this to hive site
<property>
<name>fs.s3a.access.key</name>
<value>ACCESS_KEY_HERE</value>
</property>
<property>
<name>fs.s3a.secret.key</name>
<value>ACCESS_SECRET_HERE</value>
</property>
@nikkisharma536
nikkisharma536 / system_utils.py
Created January 14, 2019 09:26
ETL project - system utility
import subprocess
import os
import uuid
def execute_local(args):
print('running command : %s' % ( ' '.join(args) ))
process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
output = process.communicate()
print('STDOUT:{}'.format(output))
@nikkisharma536
nikkisharma536 / ssh_utils.py
Created January 14, 2019 09:29
ETL project - ssh utility function
import paramiko
def execute_remote(key_path, instance_ip, username, cmd_arr):
key = paramiko.RSAKey.from_private_key_file(key_path)
client = paramiko.SSHClient()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
# Connect/ssh to an instance
try:
# Here 'ubuntu' is user name and 'instance_ip' is public IP of EC2
@nikkisharma536
nikkisharma536 / s3_utils.py
Created January 14, 2019 09:31
ETL project - S3 utility function
from urllib.parse import urlparse
import boto3
def split_s3_path(s3_path):
o = urlparse(s3_path.strip())
print(o)
return o.netloc, o.path[1:]