Skip to content

Instantly share code, notes, and snippets.

@methane
Last active July 30, 2019 16:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save methane/f6077bd1b0b04d40a9c790d9ed670a44 to your computer and use it in GitHub Desktop.
Save methane/f6077bd1b0b04d40a9c790d9ed670a44 to your computer and use it in GitHub Desktop.
json.load performance

environment

$ ./python -m pyperf system
Show the system configuration
Unit irqbalance.service could not be found.
Unit irqbalance.service could not be found.

System state
============

CPU: use 8 logical CPUs: 0-7
Perf event: Maximum sample rate: 100000 per second
ASLR: Full randomization
Linux scheduler: No CPU is isolated
CPU Frequency: 0-7=min=800 MHz, max=3400 MHz
CPU scaling governor (intel_pstate): performance
Turbo Boost (intel_pstate): Turbo Boost disabled
IRQ affinity: Default IRQ affinity: CPU 0-7
IRQ affinity: IRQ affinity: IRQ 0-16,122-130=CPU 0-7; IRQ 120-121=CPU 0

Advices
=======

Perf event: Set max sample rate to 1
Linux scheduler: Use isolcpus=<cpu list> kernel parameter to isolate CPUs
Linux scheduler: Use rcu_nocbs=<cpu list> kernel parameter (with isolcpus) to not schedule RCU on isolated CPUs

Run "python -m pyperf system tune" to tune the system configuration to run benchmarks

$ gcc --version
gcc (Ubuntu 8.3.0-6ubuntu1) 8.3.0

$ ./configure --prefix=/home/inada-n/local/py39 --enable-optimizations --with-lto

master (old)

commit: 9211e2fd81

$ ./python -m pyperf timeit  -s "import json; x = json.dumps({'k': '1' * 2 ** 20})" "json.loads(x)"
.....................
Mean +- std dev: 1.01 ms +- 0.00 ms

       │                  c = PyUnicode_READ(kind, buf, next);
  0.02 │10bb:   cmp    $0x1,%ebp
       │      ↓ jne    226f
  4.22 │        movzbl (%r14,%r12,1),%eax
 33.62 │10c9:   mov    %eax,0x78(%rsp)
       │                  if (c == '"' || c == '\\') {
 14.16 │        cmp    $0x22,%eax
       │      ↑ je     897
  0.02 │        cmp    $0x5c,%eax
       │      ↑ je     897
       │                  else if (strict && c <= 0x1f) {
       │        cmp    $0x1f,%eax
  0.41 │      ↓ ja     10ed
       │        test   %r10b,%r10b
       │      ↓ jne    1c9b
       │              for (next = end; next < len; next++) {
 18.41 │10ed:   add    $0x1,%r12
       │        cmp    %r13,%r12
 29.14 │      ↑ jl     10bb
       │      ↑ jmpq   ddb

master (new)

commit: 8a758f5b99c5fc3fd32edeac049d7d4a4b7cc163

$ ./python -m pyperf timeit  -s "import json; x = json.dumps({'k': '1' * 2 ** 20})" "json.loads(x)"
.....................
Mean +- std dev: 1.01 ms +- 0.00 ms

perf:

       │                  c = PyUnicode_READ(kind, buf, next);
  0.04 │10bb:   cmp    $0x1,%ebp
       │      ↓ jne    226f
  4.10 │        movzbl (%r14,%r12,1),%eax
 35.26 │10c9:   mov    %eax,0x78(%rsp)
       │                  if (c == '"' || c == '\\') {
 13.72 │        cmp    $0x22,%eax
       │      ↑ je     897
  0.05 │        cmp    $0x5c,%eax
       │      ↑ je     897
       │                  else if (c <= 0x1f && strict) {
       │        cmp    $0x1f,%eax
  0.35 │      ↓ ja     10ed
       │        test   %r10b,%r10b
       │      ↓ jne    1c9b
       │              for (next = end; next < len; next++) {
 18.86 │10ed:   add    $0x1,%r12
       │        cmp    %r13,%r12
 27.61 │      ↑ jl     10bb
       │      ↑ jmpq   ddb
       │        nop
$ ./python -m pyperf timeit  -s "import json; x = json.dumps({'k': '1' * 2 ** 20})" "json.loads(x)"
.....................
Mean +- std dev: 1.01 ms +- 0.00 ms

perf:

       │                  c = PyUnicode_READ(kind, buf, next);
  0.05 │1064:   cmp    $0x1,%r12d
       │      ↓ jne    23f6
 33.40 │        movzbl (%r15,%rbx,1),%edi
  0.08 │1073:   mov    %edi,0x78(%rsp)
       │                  if (c == '"' || c == '\\') {
       │        cmp    $0x22,%edi
       │      ↑ je     84c
  0.04 │        cmp    $0x5c,%edi
       │      ↑ je     84c
       │                  minc = c < minc ? c : minc;
 33.31 │        cmp    %edi,%r8d
 33.08 │        cmova  %edi,%r8d
       │              for (next = end; next < len; next++) {
  0.00 │        add    $0x1,%rbx
       │        cmp    %r14,%rbx
       │      ↑ jl     1064
       │      ↑ jmpq   84c
       │        xchg   %ax,%ax


$ git diff
diff --git a/Modules/_json.c b/Modules/_json.c
index 76da1d345e..eab72c6a62 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -433,19 +433,16 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
     }
     while (1) {
         /* Find the end of the string or the next escape */
-        Py_UCS4 c = 0;
+        Py_UCS4 c = 0, minc = 0x20;
         for (next = end; next < len; next++) {
             c = PyUnicode_READ(kind, buf, next);
             if (c == '"' || c == '\\') {
                 break;
             }
-            else if (c <= 0x1f && strict) {
-                raise_errmsg("Invalid control character at", pystr, next);
-                goto bail;
-            }
+            minc = c < minc ? c : minc;
         }
-        if (!(c == '"' || c == '\\')) {
-            raise_errmsg("Unterminated string starting at", pystr, begin);
+        if (strict && minc <= 0x1f) {
+            raise_errmsg("Invalid control character at", pystr, next);
             goto bail;
         }
         /* Pick up this chunk if it's not zero length */
$ ./python -m pyperf timeit  -s "import json; x = json.dumps({'k': '1' * 2 ** 20})" "json.loads(x)"
.....................
Mean +- std dev: 1.03 ms +- 0.01 ms

perf:

       │                      d = PyUnicode_READ(kind, buf, next);
  0.52 │ f4c:   cmp    $0x1,%r14d
       │      ↓ jne    22e0
  8.44 │        movzbl 0x0(%r13,%rbp,1),%r9d
       │                      if (d == '"' || d == '\\') {
  1.80 │ f5c:   cmp    $0x22,%r9d
       │      ↑ je     af4
 22.57 │        cmp    $0x5c,%r9d
       │      ↑ je     af4
       │                      if (d <= 0x1f && strict) {
  0.00 │        cmp    $0x1f,%r9d
  1.99 │      ↓ ja     f7f
       │        test   %sil,%sil
       │      ↓ jne    1aea
       │                  for (next = end; next < len; next++) {
  8.41 │ f7f:   lea    0x1(%rbp),%rdi
 29.51 │        cmp    %r15,%rdi
       │      ↑ jge    7d8
  1.95 │        mov    %rdi,%rbp
 24.78 │      ↑ jmp    f4c

$ git diff
diff --git a/Modules/_json.c b/Modules/_json.c
index 76da1d345e..651bf58037 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -433,21 +433,22 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
     }
     while (1) {
         /* Find the end of the string or the next escape */
-        Py_UCS4 c = 0;
-        for (next = end; next < len; next++) {
-            c = PyUnicode_READ(kind, buf, next);
-            if (c == '"' || c == '\\') {
-                break;
-            }
-            else if (c <= 0x1f && strict) {
-                raise_errmsg("Invalid control character at", pystr, next);
-                goto bail;
+        Py_UCS4 c;
+        {
+            Py_UCS4 d = 0;
+            for (next = end; next < len; next++) {
+                d = PyUnicode_READ(kind, buf, next);
+                if (d == '"' || d == '\\') {
+                    break;
+                }
+                if (d <= 0x1f && strict) {
+                    raise_errmsg("Invalid control character at", pystr, next);
+                    goto bail;
+                }
             }
+            c = d;
         }
-        if (!(c == '"' || c == '\\')) {
-            raise_errmsg("Unterminated string starting at", pystr, begin);
-            goto bail;
-        }
+
         /* Pick up this chunk if it's not zero length */
         if (next != end) {
             APPEND_OLD_CHUNK

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment