Last active
June 18, 2024 08:47
-
-
Save eguiraud/77a0ca3566e66bc6b8cd0f9e156c983b to your computer and use it in GitHub Desktop.
A thread-safe stateful Filter for RDataFrame
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ROOT | |
ROOT.gInterpreter.Declare(""" | |
// A thread-safe stateful filter that lets only one event pass for each value of | |
// "category" (where "category" is a random character). | |
// It is using gCoreMutex, which is a read-write lock, to have a bit less contention between threads. | |
class FilterOnePerKind { | |
std::unordered_set<char> _seenCategories; | |
public: | |
bool operator()(char category) { | |
{ | |
R__READ_LOCKGUARD(ROOT::gCoreMutex); // many threads can take a read lock concurrently | |
if (_seenCategories.count(category) == 1) | |
return false; | |
} | |
// if we are here, `category` was not already in _seenCategories | |
R__WRITE_LOCKGUARD(ROOT::gCoreMutex); // only one thread at a time can take the write lock | |
_seenCategories.insert(category); | |
return true; | |
} | |
}; | |
""") | |
ROOT.EnableImplicitMT(); | |
df = ROOT.RDataFrame(100).Define("category", "char(rdfentry_ % 10)") | |
cols = ROOT.std.vector['string'](["category"]) | |
df_with_unique_categories = df.Filter(ROOT.FilterOnePerKind(), cols) | |
print(df_with_unique_categories.Count().GetValue()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi @eguiraud ,
Thanks for your answer! I have opened a thread here: https://root-forum.cern.ch/t/select-unique-candidates-based-on-their-id-and-the-value-of-a/59668/3
I'm not currently running with
ROOT::EnableImplicitMT()
. For now I just want to get the expected result, then will extend functionality to MT executionBest,
Davide