phaneendra phani111

## Spark to calculate Avg
var data = sc.parallelize(Seq(("A", 2), ("A", 4), ("B", 2), ("Z", 0), ("B", 10)))
// data: org.apache.spark.rdd.RDD[(java.lang.String, Int)] = ParallelCollectionRDD[31] at parallelize at <console>:12

val avgValue = data.mapValues((_, 1)
        .reduceByKey((x, y) => (x._1 + y._1, x._2 + y._2))
        .mapValues{ case (sum, count) => (1.0 * sum) / count }
        .collectAsMap()
// avgValue: scala.collection.Map[java.lang.String,Double] = Map(Z -> 0.0, B -> 6.0, A -> 3.0)

## scala
val b = data.map(x =>(x._1,1)).reduceByKey(_+_).collect  // Each customer how many times purchased

val z = data.map(x=>( x._1,x._3)).mapValues(y => (y,1)).reduceByKey((a,b) => (a._1 + b._1,a._2+b._2)).
     | map{ x =>
     | val temp = x._2
     | val total = temp._1
     | val count = temp._2
     | (x._1,total,total/count)
     | }.collect   //How much average money spent and how many times purchased

## sublime.sh
sudo add-apt-repository ppa:webupd8team/sublime-text-3;
sudo apt-get update;
sudo apt-get install sublime-text-installer;
sudo ln -s /usr/lib/sublime-text-3/sublime_text /usr/local/bin/sublime;

## .scala
// method1
val i = 1 to 10 toList

i.flatMap(List.fill(2)(_))

// method 2
for { e <- i ; y <- 1 to 2} yield e

// method 3
def f(num : Int, arr : List[Int]) : List[Int] = {

## s3-upload.sh
#!/bin/bash -e
#
# Copyright 2014 Tony Burns
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#

## python_decorator_guide.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                phani111
                / python_decorator_guide.md
            
            
              Created
              May 19, 2018 12:28
                — forked from Zearin/python_decorator_guide.md
            
              
                The best explanation of Python decorators I’ve ever seen. (An archived answer from StackOverflow.)
              
          
    NOTE: This is a question I found on StackOverflow which I’ve archived here, because the answer is so effing phenomenal.

Q: How can I make a chain of function decorators in Python?


If you are not into long explanations, see [Paolo Bergantino’s answer][2].

  
## 00-LogParser-Hive-Regex
This gist includes hive ql scripts to create an external partitioned table for Syslog
generated log files using regex serde;
Usecase:  Count the number of occurances of processes that got logged, by year, month,
day and process.

Includes:
---------
Sample data and structure:           01-SampleDataAndStructure
Data download:                       02-DataDownload
Data load commands:                  03-DataLoadCommands

## individual partition loading .md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                phani111
                / individual partition loading .md
            
            
              Created
              July 5, 2018 11:09
                — forked from seanickle/individual partition loading .md
            
              
                Athena json individual partition loading lambda
              
          
    the basic MSCK REPAIR TABLE table-name was not working for me. but this was

import boto3
import os
import uuid
import pytz
import datetime

def make_athena_client():
    athena_client = boto3.client('athena',

  
## script.py
import argparse, datetime, logging

import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import SetupOptions


class GetTimestampFn(beam.DoFn):
  """Prints element timestamp"""
  def process(self, element, timestamp=beam.DoFn.TimestampParam):

## update_workspace.sh
#!/bin/bash

# This script will update a new workspace created on Cloud9 IDE with the latest packages.
# In order to use it, create a update_workspace.sh file in your C9 Workspace and then make it executable using the command
# `touch update_workspace.sh && chmod +x update_workspace.sh`.
# Now you can open the updata_workspace.sh file and copy/paste this full script, save and close.
# Run the command `./update_workspace.sh` to execute the script.
#
# Alternatively you can use this command to download and make this script executable from github
# wget -O update_workspace.sh  https://gist.githubusercontent.com/aubort/836888f8aaeeeff75024c87e9c9199f0/raw && chmod +x update_workspace.sh
	var data = sc.parallelize(Seq(("A", 2), ("A", 4), ("B", 2), ("Z", 0), ("B", 10)))
	// data: org.apache.spark.rdd.RDD[(java.lang.String, Int)] = ParallelCollectionRDD[31] at parallelize at <console>:12

	val avgValue = data.mapValues((_, 1)
	.reduceByKey((x, y) => (x._1 + y._1, x._2 + y._2))
	.mapValues{ case (sum, count) => (1.0 * sum) / count }
	.collectAsMap()
	// avgValue: scala.collection.Map[java.lang.String,Double] = Map(Z -> 0.0, B -> 6.0, A -> 3.0)
	val b = data.map(x =>(x._1,1)).reduceByKey(_+_).collect // Each customer how many times purchased

	val z = data.map(x=>( x._1,x._3)).mapValues(y => (y,1)).reduceByKey((a,b) => (a._1 + b._1,a._2+b._2)).
	\| map{ x =>
	\| val temp = x._2
	\| val total = temp._1
	\| val count = temp._2
	\| (x._1,total,total/count)
	\| }.collect //How much average money spent and how many times purchased
	sudo add-apt-repository ppa:webupd8team/sublime-text-3;
	sudo apt-get update;
	sudo apt-get install sublime-text-installer;
	sudo ln -s /usr/lib/sublime-text-3/sublime_text /usr/local/bin/sublime;
	// method1
	val i = 1 to 10 toList

	i.flatMap(List.fill(2)(_))

	// method 2
	for { e <- i ; y <- 1 to 2} yield e

	// method 3
	def f(num : Int, arr : List[Int]) : List[Int] = {
	#!/bin/bash -e
	#
	# Copyright 2014 Tony Burns
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	This gist includes hive ql scripts to create an external partitioned table for Syslog
	generated log files using regex serde;
	Usecase: Count the number of occurances of processes that got logged, by year, month,
	day and process.

	Includes:
	---------
	Sample data and structure: 01-SampleDataAndStructure
	Data download: 02-DataDownload
	Data load commands: 03-DataLoadCommands
	import argparse, datetime, logging

	import apache_beam as beam
	from apache_beam.options.pipeline_options import PipelineOptions
	from apache_beam.options.pipeline_options import SetupOptions


	class GetTimestampFn(beam.DoFn):
	"""Prints element timestamp"""
	def process(self, element, timestamp=beam.DoFn.TimestampParam):
	#!/bin/bash

	# This script will update a new workspace created on Cloud9 IDE with the latest packages.
	# In order to use it, create a update_workspace.sh file in your C9 Workspace and then make it executable using the command
	# `touch update_workspace.sh && chmod +x update_workspace.sh`.
	# Now you can open the updata_workspace.sh file and copy/paste this full script, save and close.
	# Run the command `./update_workspace.sh` to execute the script.
	#
	# Alternatively you can use this command to download and make this script executable from github
	# wget -O update_workspace.sh https://gist.githubusercontent.com/aubort/836888f8aaeeeff75024c87e9c9199f0/raw && chmod +x update_workspace.sh