Skip to content

Instantly share code, notes, and snippets.

@MaskRay
Created January 17, 2022 19:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MaskRay/540e7bb31408afcee2b827140bef33e3 to your computer and use it in GitHub Desktop.
Save MaskRay/540e7bb31408afcee2b827140bef33e3 to your computer and use it in GitHub Desktop.
[Support] Add parallelForEach with fixed TaskSize and use it in lld/ELF's Writer
From 7a28e9d1cc99e997ca8e2d463776aaa7e8c1d6da Mon Sep 17 00:00:00 2001
From: Fangrui Song <>
Date: Mon, 17 Jan 2022 11:25:51 -0800
Subject: [PATCH] [Support] Add parallelForEach with fixed TaskSize and use it
in lld/ELF's Writer
---
lld/ELF/OutputSections.cpp | 2 +-
llvm/include/llvm/Support/Parallel.h | 21 +++++++++++++++++++++
2 files changed, 22 insertions(+), 1 deletion(-)
diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index 07ee7d84a2cd..57916a08ca2d 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -353,7 +353,7 @@ template <class ELFT> void OutputSection::writeTo(uint8_t *buf) {
if (nonZeroFiller)
fill(buf, sections.empty() ? size : sections[0]->outSecOff, filler);
- parallelForEachN(0, sections.size(), [&](size_t i) {
+ parallelForEachN(0, sections.size(), 128, [&](size_t i) {
InputSection *isec = sections[i];
isec->writeTo<ELFT>(buf + isec->outSecOff);
diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h
index 5c3b26d5754c..122b68018d36 100644
--- a/llvm/include/llvm/Support/Parallel.h
+++ b/llvm/include/llvm/Support/Parallel.h
@@ -274,6 +274,27 @@ void parallelForEachN(size_t Begin, size_t End, FuncTy Fn) {
Fn(I);
}
+template <class IndexTy, class FuncTy>
+void parallelForEach(IndexTy Begin, IndexTy End, IndexTy TaskSize, FuncTy Fn) {
+ IndexTy I = Begin;
+#if LLVM_ENABLE_THREADS
+ parallel::detail::TaskGroup TG;
+ for (; I + TaskSize < End; I += TaskSize) {
+ TG.spawn([=, &Fn] {
+ for (IndexTy J = I, E = I + TaskSize; J != E; ++J)
+ Fn(J);
+ });
+ }
+#endif
+ for (; I != End; ++I)
+ Fn(I);
+}
+
+template <class FuncTy>
+void parallelForEachN(size_t Begin, size_t End, size_t TaskSize, FuncTy Fn) {
+ parallelForEach<size_t>(Begin, End, TaskSize, Fn);
+}
+
template <class IterTy, class ResultTy, class ReduceFuncTy,
class TransformFuncTy>
ResultTy parallelTransformReduce(IterTy Begin, IterTy End, ResultTy Init,
--
2.34
@MaskRay
Copy link
Author

MaskRay commented Jan 17, 2022

2% speedup when linking -DCMAKE_BUILD_TYPE=Release clang.

No significant difference when linking default chrome.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment