Skip to content

Instantly share code, notes, and snippets.

@omar-3
Last active April 5, 2020 07:23
Show Gist options
  • Save omar-3/47e01762740436aa482257f7045756c9 to your computer and use it in GitHub Desktop.
Save omar-3/47e01762740436aa482257f7045756c9 to your computer and use it in GitHub Desktop.
this is an example of python's groupby function in Chapel with 2 using cases
// this method would be super cool in databases for example in a world without GROUP BY sql command :D
use Set;
use List;
use RangeChunk;
// takes iterable of object or any stuff
// function is the thing that is common between multiple
// of some objects in iterable
// it must return some sort of trait of the object
// so we could group common elements together in one iterable
// serial
iter groupby(iterable, function) {
var commonTraits : set(function(iterable[1]).type); // could be cleaned up with a better reflection module
for i in iterable {
var commonTrait = function(i);
if commonTraits.contains(commonTrait) then continue;
commonTraits.add(commonTrait); // we check every time if we have gathered this trait before or not
var commonTraitObjects: list(i.type) = new list(i.type);
for i in iterable {
if function(i) == commonTrait {
commonTraitObjects.insert(1,i);
}
}
yield commonTraitObjects.toArray();
}
}
// standalone
iter groupby(param tag:iterKind, iterable, function)
where tag == iterKind.standalone {
var numTasks = here.maxTaskPar;
var commonTrait : set(function(iterable[1]).type);
for i in iterable {
var commonTrait = function(i);
commonTrait.add(commonTrait); // we would have only one copy of each trait
}
// we need to have an indexed data structure so every follower could
// be responsible for a portion of the common trait list and yield depending on
// the traits provided for it in this sub-"set"
var commonTraitArr = commonTrait.toArray();
var Range = commonTraitArr.domain.low..commonTraitArr.domain.high;
coforall tid in 0..#numTasks {
var tidRange = chunk(Range, numTasks, tid);
for i in tidRange {
var commonTraitObjects: list((iterable[1]).type) = new list((iterable[1]).type);
for object in iterable {
if function(object) == commonTraitArr[i] {
commonTraitObjects.insert(1, object);
}
}
yield commonTraitObjects.toArray();
}
}
}
var used = [1,2,3,4,5,1,3,5,3,6,8,9,10,12,14,6,3,1,7,2,9,10,1,5,4];
proc funcUsed_1(x: int) : bool {
return x % 2 == 0;
}
// a is now has two arrays one for even numbers and one for odd numbers
var a = groupby(used, funcUsed_1);
for i in a {
writeln(i);
}
writeln("\n");
var things = [("animal", "bear"), ("plant", "cactus"), ("vehicle", "speed boat"), ("animal", "duck"), ("vehicle", "school bus")];
proc function_1(x: 2*string) : string {
return x(1);
}
// we have grouped animals tuples together
// plant animals together
// vehicle animals together
var b = groupby(things, function_1);
for i in b {
writeln(i);
}
writeln("\n");
writeln("that was serial stuff");
writeln("this is parallel stuff");
writeln("\n");
forall i in a {
writeln(i);
}
writeln("\n");
forall i in b {
writeln(i);
}
// in parallel iterator the leader does all the thinking while the followers
// are just literally followers. There should be a sync global object between followers
// this is more obvious in the method implementation in the mason package where I did explicit leader/follower iterators not just standalone
// it is also MUCH cleaner in the package
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment