Created
January 17, 2022 19:27
-
-
Save MaskRay/540e7bb31408afcee2b827140bef33e3 to your computer and use it in GitHub Desktop.
[Support] Add parallelForEach with fixed TaskSize and use it in lld/ELF's Writer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 7a28e9d1cc99e997ca8e2d463776aaa7e8c1d6da Mon Sep 17 00:00:00 2001 | |
From: Fangrui Song <> | |
Date: Mon, 17 Jan 2022 11:25:51 -0800 | |
Subject: [PATCH] [Support] Add parallelForEach with fixed TaskSize and use it | |
in lld/ELF's Writer | |
--- | |
lld/ELF/OutputSections.cpp | 2 +- | |
llvm/include/llvm/Support/Parallel.h | 21 +++++++++++++++++++++ | |
2 files changed, 22 insertions(+), 1 deletion(-) | |
diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp | |
index 07ee7d84a2cd..57916a08ca2d 100644 | |
--- a/lld/ELF/OutputSections.cpp | |
+++ b/lld/ELF/OutputSections.cpp | |
@@ -353,7 +353,7 @@ template <class ELFT> void OutputSection::writeTo(uint8_t *buf) { | |
if (nonZeroFiller) | |
fill(buf, sections.empty() ? size : sections[0]->outSecOff, filler); | |
- parallelForEachN(0, sections.size(), [&](size_t i) { | |
+ parallelForEachN(0, sections.size(), 128, [&](size_t i) { | |
InputSection *isec = sections[i]; | |
isec->writeTo<ELFT>(buf + isec->outSecOff); | |
diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h | |
index 5c3b26d5754c..122b68018d36 100644 | |
--- a/llvm/include/llvm/Support/Parallel.h | |
+++ b/llvm/include/llvm/Support/Parallel.h | |
@@ -274,6 +274,27 @@ void parallelForEachN(size_t Begin, size_t End, FuncTy Fn) { | |
Fn(I); | |
} | |
+template <class IndexTy, class FuncTy> | |
+void parallelForEach(IndexTy Begin, IndexTy End, IndexTy TaskSize, FuncTy Fn) { | |
+ IndexTy I = Begin; | |
+#if LLVM_ENABLE_THREADS | |
+ parallel::detail::TaskGroup TG; | |
+ for (; I + TaskSize < End; I += TaskSize) { | |
+ TG.spawn([=, &Fn] { | |
+ for (IndexTy J = I, E = I + TaskSize; J != E; ++J) | |
+ Fn(J); | |
+ }); | |
+ } | |
+#endif | |
+ for (; I != End; ++I) | |
+ Fn(I); | |
+} | |
+ | |
+template <class FuncTy> | |
+void parallelForEachN(size_t Begin, size_t End, size_t TaskSize, FuncTy Fn) { | |
+ parallelForEach<size_t>(Begin, End, TaskSize, Fn); | |
+} | |
+ | |
template <class IterTy, class ResultTy, class ReduceFuncTy, | |
class TransformFuncTy> | |
ResultTy parallelTransformReduce(IterTy Begin, IterTy End, ResultTy Init, | |
-- | |
2.34 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
2% speedup when linking -DCMAKE_BUILD_TYPE=Release clang.
No significant difference when linking default chrome.