-
-
Save joyadauche/a4d1b03cafd224ae2644f26f19ede126 to your computer and use it in GitHub Desktop.
The Big Data Bravura: Introducing Apache Spark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark import SparkConf, SparkContext | |
conf = SparkConf().setMaster("local").setAppName("SubjectsByClass") | |
sc = SparkContext(conf=conf) | |
def get_class_and_subject(entry): | |
fields = entry.split(',') | |
class_id = fields[1] | |
number_of_subjects = int(fields[2]) | |
return (class_id, number_of_subjects) | |
student_subject_rdd = sc.textFile("./student_subject.csv") | |
class_subject_rdd = student_subject_rdd.map(get_class_and_subject) | |
modified_class_subject_rdd = class_subject_rdd.mapValues( | |
lambda value: (value, 1)) | |
total_by_class_rdd = modified_class_subject_rdd.reduceByKey( | |
lambda currentValue, nextvalue: ( | |
currentValue[0] + nextvalue[0], currentValue[1] + nextvalue[1])) | |
average_by_class_rdd = total_by_class_rdd.mapValues( | |
lambda value: value[0] / value[1]) | |
average_by_class_array = average_by_class_rdd.collect() | |
for result in average_by_class_array: | |
print('The average number of subjects for class ', | |
result[0], ' is ', f'{result[1]:.2f}') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
s101 | c201 | 10 | |
---|---|---|---|
s102 | c202 | 8 | |
s103 | c201 | 7 | |
s104 | c201 | 9 | |
s105 | c202 | 10 | |
s106 | c201 | 8 | |
s107 | c202 | 7 | |
s108 | c201 | 9 | |
s201 | c203 | 7 | |
s202 | c203 | 8 | |
s203 | c205 | 8 | |
s204 | c203 | 8 | |
s205 | c205 | 7 | |
s206 | c203 | 10 | |
s207 | c203 | 8 | |
s208 | c205 | 9 | |
s302 | c203 | 8 | |
s304 | c205 | 8 | |
s905 | c205 | 9 | |
s904 | c203 | 6 | |
s801 | c201 | 7 | |
s400 | c204 | 10 | |
s807 | c204 | 8 | |
s803 | c201 | 9 | |
s802 | c201 | 9 | |
s709 | c202 | 7 | |
s605 | c205 | 10 | |
s603 | c203 | 8 | |
s602 | c205 | 7 | |
s509 | c204 | 7 | |
s507 | c204 | 6 | |
s505 | c205 | 7 | |
s504 | c205 | 8 | |
s503 | c205 | 10 | |
s502 | c202 | 9 | |
s501 | c203 | 9 | |
s409 | c203 | 8 | |
s408 | c205 | 11 | |
s407 | c202 | 10 | |
s406 | c204 | 7 | |
s405 | c205 | 8 | |
s404 | c203 | 9 | |
s403 | c205 | 6 | |
s402 | c201 | 5 | |
s401 | c202 | 6 | |
s309 | c201 | 8 | |
s308 | c202 | 7 | |
s306 | c205 | 10 | |
s307 | c201 | 11 | |
s305 | c203 | 11 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment