Skip to content

Instantly share code, notes, and snippets.

@dberlin
Created March 30, 2014 04:37
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dberlin/9867614 to your computer and use it in GitHub Desktop.
Save dberlin/9867614 to your computer and use it in GitHub Desktop.
diff --git a/lib/Basic/SourceManager.cpp b/lib/Basic/SourceManager.cpp
index b78e9f5..f6aa264 100644
--- a/lib/Basic/SourceManager.cpp
+++ b/lib/Basic/SourceManager.cpp
@@ -1200,8 +1200,11 @@ unsigned SourceManager::getPresumedColumnNumber(SourceLocation Loc,
if (isInvalid(Loc, Invalid)) return 0;
return getPresumedLoc(Loc).getColumn();
}
-
-#ifdef __SSE2__
+#ifdef __SSE4_2__
+#include <nmmintrin.h>
+#elif __AVX2__
+#include <immintrin.h>
+#elif __SSE2__
#include <emmintrin.h>
#endif
@@ -1232,7 +1235,63 @@ static void ComputeLineNumbers(DiagnosticsEngine &Diag, ContentCache *FI,
// Skip over the contents of the line.
const unsigned char *NextBuf = (const unsigned char *)Buf;
-#ifdef __SSE2__
+#ifdef __SSE4_2__
+ __m128i CRLF = _mm_set_epi8(0,0,0,0,0,0,0,0,0,0,0,0,0,0,'\r','\n');
+ // First fix up the alignment to 16 bytes.
+ while (((uintptr_t)NextBuf & (0xf)) != 0) {
+ if (*NextBuf == '\n' || *NextBuf == '\r' || *NextBuf == '\0')
+ goto FoundSpecialChar;
+ ++NextBuf;
+ }
+ // Scan 16 byte chunks for '\r' and '\n'. Ignore '\0'.
+ while (NextBuf+16 <= End) {
+ const __m128i Chunk = *(const __m128i*)NextBuf;
+ int index = _mm_cmpestri(CRLF, 2, Chunk, 16,
+ _SIDD_UBYTE_OPS|
+ _SIDD_CMP_EQUAL_ANY|
+ _SIDD_MASKED_POSITIVE_POLARITY|
+ _SIDD_LEAST_SIGNIFICANT);
+
+ // If we found a newline, adjust the pointer and jump to the
+ // handling code.
+ if (index != 16) {
+ NextBuf += index;
+ goto FoundSpecialChar;
+ }
+ NextBuf += 16;
+ }
+#elif __AVX2__
+ // Try to skip to the next newline using SSE instructions. This is very
+ // performance sensitive for programs with lots of diagnostics and in -E
+ // mode.
+ __m128i CRs128 = _mm_set1_epi8('\r');
+ __m256i CRs = _mm256_broadcastb_epi8(CRs128);
+ __m128i LFs128 = _mm_set1_epi8('\n');
+ __m256i LFs = _mm256_broadcastb_epi8(LFs128);
+
+ // First fix up the alignment to 32 bytes.
+ while (((uintptr_t)NextBuf & (0x1f)) != 0) {
+ if (*NextBuf == '\n' || *NextBuf == '\r' || *NextBuf == '\0')
+ goto FoundSpecialChar;
+ ++NextBuf;
+ }
+
+ // Scan 32 byte chunks for '\r' and '\n'. Ignore '\0'.
+ while (NextBuf+32 <= End) {
+ const __m256i Chunk = *(const __m256i*)NextBuf;
+ __m256i Cmp = _mm256_or_si256(_mm256_cmpeq_epi8(Chunk, CRs),
+ _mm256_cmpeq_epi8(Chunk, LFs));
+ unsigned Mask = _mm256_movemask_epi8(Cmp);
+
+ // If we found a newline, adjust the pointer and jump to the handling code.
+ if (Mask != 0) {
+ NextBuf += llvm::countTrailingZeros(Mask);
+ goto FoundSpecialChar;
+ }
+ NextBuf += 32;
+ }
+
+#elif __SSE2__
// Try to skip to the next newline using SSE instructions. This is very
// performance sensitive for programs with lots of diagnostics and in -E
// mode.
@@ -1265,7 +1324,7 @@ static void ComputeLineNumbers(DiagnosticsEngine &Diag, ContentCache *FI,
while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
++NextBuf;
-#ifdef __SSE2__
+#if defined(__SSE2__) || defined(__AVX2__)
FoundSpecialChar:
#endif
Offs += NextBuf-Buf;
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 0955cc5..10805bb 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -2249,7 +2249,9 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,
return true;
}
-#ifdef __SSE2__
+#ifdef __AVX2__
+#include <immintrin.h>
+#elif __SSE2__
#include <emmintrin.h>
#elif __ALTIVEC__
#include <altivec.h>
@@ -2306,13 +2308,32 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
// If there is a code-completion point avoid the fast scan because it
// doesn't check for '\0'.
!(PP && PP->getCodeCompletionFileLoc() == FileLoc)) {
- // While not aligned to a 16-byte boundary.
- while (C != '/' && ((intptr_t)CurPtr & 0x0F) != 0)
+
+#ifdef __AVX2__
+ const int alignment = 32;
+#else
+ const int alignment = 16;
+#endif
+ // While not properly aligned to the alignment we need
+ while (C != '/' && ((intptr_t)CurPtr & (alignment-1)) != 0)
C = *CurPtr++;
if (C == '/') goto FoundSlash;
-
-#ifdef __SSE2__
+#ifdef __AVX2__
+ __m128i Slashes128 = _mm_set1_epi8('/');
+ __m256i Slashes = _mm256_broadcastb_epi8(Slashes128);
+ while (CurPtr+32 <= BufferEnd) {
+ int cmp = _mm256_movemask_epi8(_mm256_cmpeq_epi8(*(const __m256i*)CurPtr, Slashes));
+ if (cmp != 0) {
+ // Adjust the pointer to point directly after the first slash. It's
+ // not necessary to set C here, it will be overwritten at the end of
+ // the outer loop.
+ CurPtr += llvm::countTrailingZeros<unsigned>(cmp) + 1;
+ goto FoundSlash;
+ }
+ CurPtr += 32;
+ }
+#elif __SSE2__
__m128i Slashes = _mm_set1_epi8('/');
while (CurPtr+16 <= BufferEnd) {
int cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(*(const __m128i*)CurPtr,
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment