This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#snippet to get date for Nth number of day before a particular date | |
------------------------------------------------------------------- | |
start = datetime.strptime("20171012", "%Y%m%d") | |
end = start - timedelta(days=10) | |
print start,end | |
#snippet to get the previous day's date | |
--------------------------------------- | |
date --date="yesterday" +%Y/%m/%d |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pyhs2 | |
import getpass | |
import sys | |
import csv | |
if __name__ == "__main__": | |
query = sys.argv[1] #query without a semicolon at the end | |
pwd = getpass.getpass() | |
data = [] | |
header = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import subprocess | |
import shlex | |
import time | |
import os,sys,argparse | |
import re,collections | |
DEBUG = True | |
return_collection = collections.namedtuple('multi_return',['x','y','z']) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# Starts and stops script | |
# | |
# | |
# sample Startup script for the shell file | |
# | |
# chkconfig: - 95 07 | |
# description: this script is used to setup nodes in a cluster | |
# . | |
# processname: myscript |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.cloudwick.streaming.spark | |
import org.apache.log4j.{Level, Logger} | |
import org.apache.spark.Logging | |
/** Utility functions for Spark Streaming examples. */ | |
object StreamingExamples extends Logging { | |
/** Set reasonable logging levels for streaming if the user has not configured log4j. */ | |
def setStreamingLogLevels() { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Note: All this was done in HDP stand-alone cluster | |
step1 | |
----- | |
-> Keep your data source, i.e Sample_Data_generator.csv, in the root folder of the project in IDE (Eclipse in my case) | |
-> maven clean | |
-> maven install | |
-> find your .jar file in the target folder and upload it on to the HDP node | |
step2 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
create table ncdcweather( | |
stn int, | |
wban int, | |
yearmoda int, | |
temp double, | |
temp_count int, | |
dewp double, | |
deqp_count int, | |
slp double, | |
slp_count int, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
count=0 | |
for (( i = 1901; i<2016; i++ )) | |
do | |
wget -O $i.tar ftp://ftp.ncdc.noaa.gov/pub/data/gsod/$i/gsod_$i.tar | |
mkdir -p ./tarfiles/extracted/$i | |
tar -xvf ./tarfiles/$i.tar -C ./tarfiles/extracted/$i | |
count=$(ls ./tarfiles/extracted/$i/ | wc -l) | |
if [ $count -eq 0 ] | |
then |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#comma seperated file | |
#getting only required columns into an RDD | |
val csv = sc.textFile("C:/Users/avrsa/Downloads/zipcode.csv").map(line => (line.split(",")(0),line.split(",")(3),line.split(",")(4))) | |
#tab seperated file | |
input : 01000 123:456:789 | |
output: 01000,123 | |
01000,456 | |
01000,789 | |
val tsv = sc.textFile("C:/Users/avrsa/Downloads/skuData.tsv").filter(_.nonEmpty).map(x => (x.split('\t')(0),x.split('\t')(1))).flatMapValues(x => x.split(':')) |