Created
June 22, 2018 09:42
-
-
Save nithyadurai87/9a8f5a595f9beb9451eb5c01ebe90378 to your computer and use it in GitHub Desktop.
Age Grouping with spark
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from pyspark import SparkContext, SparkConf | |
| conf = SparkConf().setAppName('MyFirstStandaloneApp') | |
| sc = SparkContext(conf=conf) | |
| fle = sc.textFile("file:///home/nithya/girls.txt") | |
| def age_calc(data): | |
| sno,fname,lname,age,desig,mob,location = data.split(",") | |
| return fname,lname,age_group(int(age)),desig,location,int(age) | |
| def age_group(age): | |
| if age < 30 : | |
| return '0-30' | |
| elif age < 40: | |
| return '30-40' | |
| elif age < 50: | |
| return '40-50' | |
| elif age < 60: | |
| return '50-60' | |
| else : | |
| return '60+' | |
| age_map = fle.map(age_calc) | |
| freq = age_map.map(lambda line : line[3]).countByValue() | |
| print "Total no.of ladies",fle.count() | |
| print "Total no.of ladies on each designation",dict(freq) | |
| Under_age = sc.accumulator(0) | |
| Over_age = sc.accumulator(0) | |
| def promotion_check(data): | |
| global Over_age, Under_age | |
| age_grp = data[2] | |
| if(age_grp == "50-60"): | |
| Over_age +=1 | |
| if(age_grp == "0-30"): | |
| Under_age +=1 | |
| return data | |
| df = age_map.map(promotion_check).collect() | |
| print "Not qualified for promotions due to under age",Under_age | |
| print "Not qualified for promotions due to over age",Over_age |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment