Kipchumba Bett corneliouzbett

## wordCount.py
from pyspark import SparkContext, SparkConf

def display_words(words):
    for w, we in words.items():
        print("{} : {}".format(w, we))

if __name__ == "__main__":
    conf = SparkConf().setAppName("word count").setMaster("local[2]")
    sc = SparkContext(conf = conf)

## TakePythonExample.py
from pyspark import SparkContext, SparkConf

if __name__ == "__main__":
    conf = SparkConf().setAppName("take").setMaster("local[*]")
    sc = SparkContext(conf = conf)

    inputWords = ["spark", "hadoop", "spark", "hive", "pig", "cassandra", "hadoop"]
    wordRdd = sc.parallelize(inputWords)

    words = wordRdd.take(3)

## SparkSession.py
spark = SparkSession.builder\
    .appName("Python Spark SQL basic example")\
    .config("spark.some.config.option", "")
    .getOrCreate()

## datasets.java
import java.util.Arrays;
import java.util.Collections;
import java.io.Serializable;

import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders;

## ConvertRDDToDataframe.py
from pyspark.sql import Row

sc = spark.sparkContext

# Load a text file and convert each line to a Row.
lines = sc.textFile("examples/src/main/resources/people.txt")
parts = lines.map(lambda l: l.split(","))
people = parts.map(lambda p: Row(name=p[0], age=int(p[1])))

# Infer the schema, and register the DataFrame as a table.

## JDBCConnector.py
# Loading data for JDBC source
jdbcDF = spark.read\
  .format("jdbc")\
  .option("url", "jdbc:postgresql:dbserver")\
  .option("dbtable", "schema.tablename") \
  .option("user", "username") \
  .option("password", "password") \
  .load()

## JSONReader.py
sc = spark.sparkContext

# A JSON dataset is pointed to by path.
# The path can be either a single text file or a directory storing text files
path = "examples/src/main/resources/people.json"
peopleDF = spark.read.json(path)

# The inferred schema can be visualized using the printSchema() method
peopleDF.printSchema()
# root

## LamdaDef.py
# A lambda function can take any number of arguments, but can only have one expression

x = lamda a,b,c : (a + b) * c

print(x(1,2,3))

# output = 9

## regex.py
#A RegEx, or Regular Expression, is a sequence of characters that forms a search pattern.

# RegEx can be used to check if a string contains the specified search pattern
import re

text = "The above code is for dummies like you"

# Check if the string starts with "The" and ends with "Spain":
x = re.search("^The.*code$", text)

## Datetime.py
import datetime

dt = datetime.datetime.now()

print(dt)
print(dt.year)
print(dt.month)
	from pyspark import SparkContext, SparkConf

	def display_words(words):
	for w, we in words.items():
	print("{} : {}".format(w, we))

	if __name__ == "__main__":
	conf = SparkConf().setAppName("word count").setMaster("local[2]")
	sc = SparkContext(conf = conf)
	from pyspark import SparkContext, SparkConf

	if __name__ == "__main__":
	conf = SparkConf().setAppName("take").setMaster("local[*]")
	sc = SparkContext(conf = conf)

	inputWords = ["spark", "hadoop", "spark", "hive", "pig", "cassandra", "hadoop"]
	wordRdd = sc.parallelize(inputWords)

	words = wordRdd.take(3)
	spark = SparkSession.builder\
	.appName("Python Spark SQL basic example")\
	.config("spark.some.config.option", "")
	.getOrCreate()
	import java.util.Arrays;
	import java.util.Collections;
	import java.io.Serializable;

	import org.apache.spark.api.java.function.MapFunction;
	import org.apache.spark.sql.Dataset;
	import org.apache.spark.sql.Row;
	import org.apache.spark.sql.Encoder;
	import org.apache.spark.sql.Encoders;
	from pyspark.sql import Row

	sc = spark.sparkContext

	# Load a text file and convert each line to a Row.
	lines = sc.textFile("examples/src/main/resources/people.txt")
	parts = lines.map(lambda l: l.split(","))
	people = parts.map(lambda p: Row(name=p[0], age=int(p[1])))

	# Infer the schema, and register the DataFrame as a table.
	# Loading data for JDBC source
	jdbcDF = spark.read\
	.format("jdbc")\
	.option("url", "jdbc:postgresql:dbserver")\
	.option("dbtable", "schema.tablename") \
	.option("user", "username") \
	.option("password", "password") \
	.load()
	sc = spark.sparkContext

	# A JSON dataset is pointed to by path.
	# The path can be either a single text file or a directory storing text files
	path = "examples/src/main/resources/people.json"
	peopleDF = spark.read.json(path)

	# The inferred schema can be visualized using the printSchema() method
	peopleDF.printSchema()
	# root
	# A lambda function can take any number of arguments, but can only have one expression

	x = lamda a,b,c : (a + b) * c

	print(x(1,2,3))

	# output = 9
	#A RegEx, or Regular Expression, is a sequence of characters that forms a search pattern.

	# RegEx can be used to check if a string contains the specified search pattern
	import re

	text = "The above code is for dummies like you"

	# Check if the string starts with "The" and ends with "Spain":
	x = re.search("^The.*code$", text)
	import datetime

	dt = datetime.datetime.now()

	print(dt)
	print(dt.year)
	print(dt.month)