Last active
December 22, 2015 20:18
-
-
Save yadudoc/6525175 to your computer and use it in GitHub Desktop.
Mapreduce style programming with SwiftT
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import files; | |
import io; | |
import random; | |
import string; | |
import sys; | |
import location; | |
import assert; | |
app (file out, file err) gen_data (file run, int recsize){ | |
"/bin/bash" run recsize @stdout=out @stderr=err | |
} | |
app (file out, file err) comb_data (file comb, file array[]){ | |
"/bin/bash" comb array @stdout=out @stderr=err | |
} | |
app (file out, file err) comb_data_local (file comb, string array){ | |
"/bin/bash" comb array @stdout=out @stderr=err | |
} | |
app (file out) get_uniq_sites (file sites, file array[]){ | |
"/bin/bash" sites array @stdout=out | |
} | |
main | |
{ | |
file wrapper = input_file("teragen_wrap.sh"); | |
file tgen_out[]; | |
//int loop = 10; | |
int loop=toint(argv("loop")); | |
//int fsize = 1; | |
int fsize = toint(argv("fsize")); | |
//Call gendata <loop> times. gendata creates a large file with ~10mb worth of numbers | |
//and returns a file-pointer which contains the hostname and full path of actual result | |
//file. | |
foreach item,i in [0:loop-1] { | |
file out <sprintf("intermediate/tgen_%i.out", i)>; | |
file err <sprintf("intermediate/tgen_%i.err", i)>; | |
(out, err) = gen_data(wrapper, fsize); | |
tgen_out[i]=out; | |
} | |
//get_uniq_sites takes the file-pointers from gendata and returns the site and files | |
//present there as a list in following format | |
//site1 <file1> <file2> | |
//site2 <file3> ... | |
file jobs_per_site = input_file("jobs_per_site.sh"); | |
file combiner_list <"uniq_sites">; | |
combiner_list = get_uniq_sites(jobs_per_site, tgen_out); | |
// We split the output to lines, each line will go to the site specified. | |
// eg site1 <filex> <filey> | |
string sites[] = split(read(combiner_list), "\n"); | |
// Iterate over the lines per site and send a combiner task to the site | |
// to combine all the files at that site | |
file local_combine = input_file("local_combiner.sh"); | |
//Here we are forcing jobs to sites to run local combiners | |
file comb_out[]; | |
foreach site,i in sites { | |
printf("Site %d : [%s]\n", i, site); | |
file out1 <sprintf("intermediate/sitecomb_%i.out", i)>; | |
file err1 <sprintf("intermediate/sitecomb_%i.err", i)>; | |
string site_info[] = split(site, " "); | |
location rank = hostmap_one_worker(site_info[0]); | |
printf("Site: %s Rank: %i String: %s\n", site_info[0], rank, site); | |
(out1, err1) = @location=rank comb_data_local(local_combine, site); | |
comb_out[i] = out1; | |
} | |
/* | |
foreach file_item in comb_out { | |
printf("Item in comb_out : %s \n", filename(file_item)); | |
} | |
file combine = input_file("combiner.sh"); | |
// We are doing a simple single step reduce here | |
file final <"final_result">; | |
file errs <"final_errs">; | |
(final, errs) = comb_data (combine, comb_out ); | |
*/ | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment