Kipchumba Bett corneliouzbett

## JavaRestClientTest.java
import org.json.JSONException;
import org.json.JSONObject;
import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.FileNotFoundException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.Map;

## wordCount.py
from pyspark import SparkContext, SparkConf

def display_words(words):
    for w, we in words.items():
        print("{} : {}".format(w, we))

if __name__ == "__main__":
    conf = SparkConf().setAppName("word count").setMaster("local[2]")
    sc = SparkContext(conf = conf)

## TakePythonExample.py
from pyspark import SparkContext, SparkConf

if __name__ == "__main__":
    conf = SparkConf().setAppName("take").setMaster("local[*]")
    sc = SparkContext(conf = conf)

    inputWords = ["spark", "hadoop", "spark", "hive", "pig", "cassandra", "hadoop"]
    wordRdd = sc.parallelize(inputWords)

    words = wordRdd.take(3)

## SparkSession.py
spark = SparkSession.builder\
    .appName("Python Spark SQL basic example")\
    .config("spark.some.config.option", "")
    .getOrCreate()

## JSONReader.py
sc = spark.sparkContext

# A JSON dataset is pointed to by path.
# The path can be either a single text file or a directory storing text files
path = "examples/src/main/resources/people.json"
peopleDF = spark.read.json(path)

# The inferred schema can be visualized using the printSchema() method
peopleDF.printSchema()
# root

## casewhenspark.sbt
 // Create the dataframe
 val df = Seq("Red", "Green", "Blue").map(Tuple1.apply).toDF("color")
 df.createOrReplaceTempView("data")
 val df4 = sql(""" select *, case when color = 'green' then 1 else 0 end as Green_ind from data """)
 df4.show()


   df4:org.apache.spark.sql.DataFrame = [color: string, Green_ind: integer]

## colors.xml
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <color name="colorPrimary">#008577</color>
    <color name="colorPrimaryDark">#00574B</color>
    <color name="colorAccent">#FBC02D</color>
</resources>

## docker-compose.yml
version: '3'
services:
  postgres:
    image: postgres:9.6
    environment:
      - POSTGRES_USER=airflow
      - POSTGRES_PASSWORD=airflow
      - POSTGRES_DB=airflow
    ports:
      - "5432:5432"

## database.py
def shared_locations(self):
        """
        A dictionary of shared locations whose keys are in the set 'prefix',
        'purelib', 'platlib', 'scripts', 'headers', 'data' and 'namespace'.
        The corresponding value is the absolute path of that category for
        this distribution, and takes into account any paths selected by the
        user at installation time (e.g. via command-line arguments). In the
        case of the 'namespace' key, this would be a list of absolute paths
        for the roots of namespace packages in this distribution.

## scheduler.java
public class Scheduler {
   @Scheduled(fixedDelay = 1000, initialDelay = 3000)
   public void fixedDelaySch() {
      SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
      Date now = new Date();
      String strDate = sdf.format(now);
      System.out.println("Fixed Delay scheduler:: " + strDate);
   }
}
	import org.json.JSONException;
	import org.json.JSONObject;
	import java.io.BufferedReader;
	import java.io.DataOutputStream;
	import java.io.FileNotFoundException;
	import java.io.InputStreamReader;
	import java.net.HttpURLConnection;
	import java.net.URL;
	import java.util.Map;
	from pyspark import SparkContext, SparkConf

	def display_words(words):
	for w, we in words.items():
	print("{} : {}".format(w, we))

	if __name__ == "__main__":
	conf = SparkConf().setAppName("word count").setMaster("local[2]")
	sc = SparkContext(conf = conf)
	from pyspark import SparkContext, SparkConf

	if __name__ == "__main__":
	conf = SparkConf().setAppName("take").setMaster("local[*]")
	sc = SparkContext(conf = conf)

	inputWords = ["spark", "hadoop", "spark", "hive", "pig", "cassandra", "hadoop"]
	wordRdd = sc.parallelize(inputWords)

	words = wordRdd.take(3)
	spark = SparkSession.builder\
	.appName("Python Spark SQL basic example")\
	.config("spark.some.config.option", "")
	.getOrCreate()
	sc = spark.sparkContext

	# A JSON dataset is pointed to by path.
	# The path can be either a single text file or a directory storing text files
	path = "examples/src/main/resources/people.json"
	peopleDF = spark.read.json(path)

	# The inferred schema can be visualized using the printSchema() method
	peopleDF.printSchema()
	# root
	// Create the dataframe
	val df = Seq("Red", "Green", "Blue").map(Tuple1.apply).toDF("color")
	df.createOrReplaceTempView("data")
	val df4 = sql(""" select *, case when color = 'green' then 1 else 0 end as Green_ind from data """)
	df4.show()




	df4:org.apache.spark.sql.DataFrame = [color: string, Green_ind: integer]
	<?xml version="1.0" encoding="utf-8"?>
	<resources>
	<color name="colorPrimary">#008577</color>
	<color name="colorPrimaryDark">#00574B</color>
	<color name="colorAccent">#FBC02D</color>
	</resources>
	version: '3'
	services:
	postgres:
	image: postgres:9.6
	environment:
	- POSTGRES_USER=airflow
	- POSTGRES_PASSWORD=airflow
	- POSTGRES_DB=airflow
	ports:
	- "5432:5432"
	def shared_locations(self):
	"""
	A dictionary of shared locations whose keys are in the set 'prefix',
	'purelib', 'platlib', 'scripts', 'headers', 'data' and 'namespace'.
	The corresponding value is the absolute path of that category for
	this distribution, and takes into account any paths selected by the
	user at installation time (e.g. via command-line arguments). In the
	case of the 'namespace' key, this would be a list of absolute paths
	for the roots of namespace packages in this distribution.
	public class Scheduler {
	@Scheduled(fixedDelay = 1000, initialDelay = 3000)
	public void fixedDelaySch() {
	SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
	Date now = new Date();
	String strDate = sdf.format(now);
	System.out.println("Fixed Delay scheduler:: " + strDate);
	}
	}