Skip to content

Instantly share code, notes, and snippets.

View Abhiknoldur's full-sized avatar
🏠
Working from home

Abhiknoldur

🏠
Working from home
  • Paytm
  • Noida India
View GitHub Profile
//READ nodetool snapshot documentation in cassandra
//READ stack-overflow-topic gist or directly in stackoverflow -> http://stackoverflow.com/questions/25465904/how-can-i-restore-cassandra-snapshots
//EXPORT the databases schemas
cqlsh -e "DESCRIBE SCHEMA" > my_backup_name.schema
//Create snapshot of the whole server
nodetool snapshot my_backup_name
//Compress the backups
@Abhiknoldur
Abhiknoldur / setup.py
Last active January 3, 2020 09:16
Simple script to for packaging python filles
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import io
import os
import sys
from shutil import rmtree
from setuptools import find_packages, setup, Command
vagrant@new-vm:~/new-vm/dummy-project$ ls
files.binary files.conf files.data files.lib files.log files.tmp
package com.knoldus
import org.apache.spark.sql.{SaveMode, SparkSession}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
object PowerPlantDemo {
def main(args: Array[String]): Unit = {
val spark = SparkSession
@Abhiknoldur
Abhiknoldur / SparkSql
Created September 13, 2018 07:10
Dataframe and dataset
scala> val rdd1=sc.parallelize(Seq((1,3.6)))
rdd1: org.apache.spark.rdd.RDD[(Int, Double)] = ParallelCollectionRDD[0] at parallelize at <console>:24
scala> val rdd2=sc.parallelize(Seq((1,1.1)))
rdd2: org.apache.spark.rdd.RDD[(Int, Double)] = ParallelCollectionRDD[1] at parallelize at <console>:24
scala> val jo=rdd1 join rdd2
jo: org.apache.spark.rdd.RDD[(Int, (Double, Double))] = MapPartitionsRDD[4] at join at <console>:27
scala> jo.map(r=>(r._1,(r._2._1-r._2._2)))
@Abhiknoldur
Abhiknoldur / car.json
Last active September 11, 2018 10:37
Sample json
{ "car": "supercar",
"manufacturer": "Porsche",
"model": "911",
"price": 135000,
"wiki": "http://en.wikipedia.org/wiki/Porsche_997"
}
Queries==============>
1. CREATE TABLE assignment.emp_details (
emp_id bigint,
emp_joining_date date,
emp_city text,
emp_name text,
emp_salary float,
PRIMARY KEY (emp_id, emp_joining_date)
@Abhiknoldur
Abhiknoldur / Problem1.text
Created September 10, 2018 19:06
Spark-asignments
val rdd_1 = sc.parallelize(Seq((1, 3.6)))
val rdd_2 = sc.parallelize(Seq((1, 1.1)))
println(s"Wanted Result:", findSubOfVAlues(rdd_1, rdd_2))
def findSubOfVAlues(value: RDD[(Int, Double)], value1: RDD[(Int, Double)]) = {
val combordd = value join value1
val result = combordd.map(r => (r._1, r._2._1 - r._1, r._2._2)).collect