Skip to content

Instantly share code, notes, and snippets.

@yadudoc
Last active December 22, 2015 20:18
Show Gist options
  • Save yadudoc/6525175 to your computer and use it in GitHub Desktop.
Save yadudoc/6525175 to your computer and use it in GitHub Desktop.
Mapreduce style programming with SwiftT
import files;
import io;
import random;
import string;
import sys;
import location;
import assert;
app (file out, file err) gen_data (file run, int recsize){
"/bin/bash" run recsize @stdout=out @stderr=err
}
app (file out, file err) comb_data (file comb, file array[]){
"/bin/bash" comb array @stdout=out @stderr=err
}
app (file out, file err) comb_data_local (file comb, string array){
"/bin/bash" comb array @stdout=out @stderr=err
}
app (file out) get_uniq_sites (file sites, file array[]){
"/bin/bash" sites array @stdout=out
}
main
{
file wrapper = input_file("teragen_wrap.sh");
file tgen_out[];
//int loop = 10;
int loop=toint(argv("loop"));
//int fsize = 1;
int fsize = toint(argv("fsize"));
//Call gendata <loop> times. gendata creates a large file with ~10mb worth of numbers
//and returns a file-pointer which contains the hostname and full path of actual result
//file.
foreach item,i in [0:loop-1] {
file out <sprintf("intermediate/tgen_%i.out", i)>;
file err <sprintf("intermediate/tgen_%i.err", i)>;
(out, err) = gen_data(wrapper, fsize);
tgen_out[i]=out;
}
//get_uniq_sites takes the file-pointers from gendata and returns the site and files
//present there as a list in following format
//site1 <file1> <file2>
//site2 <file3> ...
file jobs_per_site = input_file("jobs_per_site.sh");
file combiner_list <"uniq_sites">;
combiner_list = get_uniq_sites(jobs_per_site, tgen_out);
// We split the output to lines, each line will go to the site specified.
// eg site1 <filex> <filey>
string sites[] = split(read(combiner_list), "\n");
// Iterate over the lines per site and send a combiner task to the site
// to combine all the files at that site
file local_combine = input_file("local_combiner.sh");
//Here we are forcing jobs to sites to run local combiners
file comb_out[];
foreach site,i in sites {
printf("Site %d : [%s]\n", i, site);
file out1 <sprintf("intermediate/sitecomb_%i.out", i)>;
file err1 <sprintf("intermediate/sitecomb_%i.err", i)>;
string site_info[] = split(site, " ");
location rank = hostmap_one_worker(site_info[0]);
printf("Site: %s Rank: %i String: %s\n", site_info[0], rank, site);
(out1, err1) = @location=rank comb_data_local(local_combine, site);
comb_out[i] = out1;
}
/*
foreach file_item in comb_out {
printf("Item in comb_out : %s \n", filename(file_item));
}
file combine = input_file("combiner.sh");
// We are doing a simple single step reduce here
file final <"final_result">;
file errs <"final_errs">;
(final, errs) = comb_data (combine, comb_out );
*/
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment