Skip to content

Instantly share code, notes, and snippets.

@ikitommi
Last active April 21, 2018 08:24
Show Gist options
  • Save ikitommi/ced75f3e9448c0c6a0c3e893feb60abe to your computer and use it in GitHub Desktop.
Save ikitommi/ced75f3e9448c0c6a0c3e893feb60abe to your computer and use it in GitHub Desktop.
Tuning Aleph TechEmpower entry

Current

The original code looks like this:

(ns hello.handler
  (:require
    [byte-streams :as bs]
    [clojure.tools.cli :as cli]
    [aleph.http :as http]
    [cheshire.core :as json]
    [clj-tuple :as t])
  (:gen-class))

(def plaintext-response
  (t/hash-map
    :status 200
    :headers (t/hash-map "content-type" "text/plain; charset=utf-8")
    :body (bs/to-byte-array "Hello, World!")))

(def json-response
  (t/hash-map
    :status 200
    :headers (t/hash-map "content-type" "application/json")))

(defn handler [{:keys [uri] :as req}]
  (cond
    (= "/plaintext" uri) plaintext-response
    (= "/json" uri) (assoc json-response
                      :body (json/encode (t/hash-map :message "Hello, World!")))
    :else {:status 404}))

;;;

(defn -main [& args]

  (let [[{:keys [help port]} _ banner]
        (cli/cli args
          ["-p" "--port" "Server port"
           :default 8080
           :parse-fn #(Integer/parseInt %)]
          ["-h" "--[no-]help"])]

    (when help
      (println banner)
      (System/exit 0))

    (aleph.netty/leak-detector-level! :disabled)
    (http/start-server handler {:port port, :executor :none})))

With my laptop:

;; start repl with `lein perf repl`
;; perf measured with the following setup:
;;
;; Model Name:            MacBook Pro
;; Model Identifier:      MacBookPro11,3
;; Processor Name:        Intel Core i7
;; Processor Speed:       2,5 GHz
;; Number of Processors:  1
;; Total Number of Cores: 4
;; L2 Cache (per Core):   256 KB
;; L3 Cache:              6 MB
;; Memory:                16 GB

using

wrk -t2 -c100 -d2s http://localhost:8080/json

serves about 82700 req/sec.

Tuning

Benchmaring with JVM-opts with Criterium and with Aleph 0.4.4:

["-server"
 "-Xmx4096m"
 "-Dclojure.compiler.direct-linking=true"]

Setup:

(require '[byte-streams :as bs]
         '[aleph.http :as http]
         '[cheshire.core :as cheshire]
         '[clj-tuple :as t])

(def plaintext-response
  (t/hash-map
    :status 200
    :headers (t/hash-map "content-type" "text/plain; charset=utf-8")
    :body (bs/to-byte-array "Hello, World!")))

(def json-response
  (t/hash-map
    :status 200
    :headers (t/hash-map "content-type" "application/json")))

we need to capture a real Aleph request:

(class +request+)
; aleph.http.core.NettyRequest

+request+
;{:aleph/request-arrived 9336693625230,
; :aleph/keep-alive? true,
; :remote-addr nil,
; :headers {"host" "localhost:8080",
;           "user-agent" "HTTPie/0.9.9",
;           "connection" "keep-alive",
;           "accept" "*/*",
;           "accept-encoding" "gzip, deflate"},
; :server-port 8080,
; :uri "/json",
; :server-name "0.0.0.0",
; :query-string nil,
; :body nil,
; :scheme :http,
; :request-method :get}

Initial code

(require '[criterium.core :as cc])

(defn handler1 [{:keys [uri] :as req}]
  (cond
    (= "/plaintext" uri) plaintext-response
    (= "/json" uri) (assoc json-response
                      :body (cheshire/encode (t/hash-map :message "Hello, World!")))
    :else {:status 404}))

(handler1 +request+)
; => {:status 200, :headers {"content-type" "application/json"}, :body "{\"message\":\"Hello, World!\"}"}

;; 1.40µs
(cc/quick-bench (handler1 +request+))

Jsonista

https://github.com/metosin/jsonista is Clojure library for fast JSON encoding and decoding.

(require '[jsonista.core :as jsonista])

(defn handler2 [{:keys [uri] :as req}]
  (cond
    (= "/plaintext" uri) plaintext-response
    (= "/json" uri) (assoc json-response
                      :body (jsonista/write-value-as-bytes (t/hash-map :message "Hello, World!")))
    :else {:status 404}))

(handler2 +request+)
; => {:status 200, :headers {"content-type" "application/json"}, :body #object["[B" 0x78d5200b "[B@78d5200b"]}

;; 371ns
(cc/quick-bench (handler2 +request+))

-74%, nice. But let's not stop here.

No Destructure

Destructuring Maps is slow in Clojure. I have a patch bubblin' for it, but let's destrucure manually here to see the difference:

(defn handler3 [req]
  (let [uri (:uri req)]
    (cond
      (= "/plaintext" uri) plaintext-response
      (= "/json" uri) (assoc json-response
                        :body (jsonista/write-value-as-bytes (t/hash-map :message "Hello, World!")))
      :else {:status 404})))

(handler3 +request+)
; {:status 200, :headers {"content-type" "application/json"}, :body #object["[B" 0x2e58e486 "[B@2e58e486"]}

;; 344ns
(cc/quick-bench (handler3 +request+))

-8%. Easy win.

Fast equals

= is over 100x slower than .equals for Strings - which is backed with a JVM optimization. Let's see if that counts:

(defn handler4 [req]
  (let [uri (:uri req)]
    (cond
      (.equals "/plaintext" uri) plaintext-response
      (.equals "/json" uri) (assoc json-response
                              :body (jsonista/write-value-as-bytes (t/hash-map :message "Hello, World!")))
      :else {:status 404})))

(handler4 +request+)
; {:status 200, :headers {"content-type" "application/json"}, :body #object["[B" 0x2b98ab27 "[B@2b98ab27"]}

;; 269ns
(cc/quick-bench (handler4 +request+))

-21% less, nice!

Plain Clojure?

What if we used plain Clojure Maps instead? would be more like the real code we do. Let's try:

(defn handler5 [req]
  (let [uri (:uri req)]
    (cond
      (.equals "/plaintext" uri) {:status 200
                                  :headers {"content-type" "text/plain; charset=utf-8"}
                                  :body (bs/to-byte-array "Hello, World!")}
      (.equals "/json" uri) {:status 200
                             :headers {"content-type" "application/json"}
                             :body (jsonista/write-value-as-bytes {:message "Hello, World!"})}
      :else {:status 404})))

(handler5 +request+)
; {:status 200, :headers {"content-type" "application/json"}, :body #object["[B" 0x798ba1b2 "[B@798ba1b2"]}

;; 265ns
(cc/quick-bench (handler5 +request+))

Seems not to make no difference. But it's 80% faster than the original code.

New benchmark

With the last code, the original benchmark:

wrk -t2 -c100 -d2s http://localhost:8080/json

serves about 84500 req/sec, which is +2%.

not much, but could be better on a real test setup.

Tuning JVM opts

from:

java -server -Xmx2g -XX:+UseG1GC -XX:MaxGCPauseMillis=10 -jar target/hello-aleph-standalone.jar

to:

java -server -XX:+UseNUMA -XX:+UseParallelGC -XX:+AggressiveOpts -jar target/hello-aleph-standalone.jar

serves about 87000 req/sec, which is +3% more.

(not actual idea how different jvm parameters effect, but this seems better...)

all tests

original

Requests/sec:  82651.72
Transfer/sec:     14.19MB

update deps

Requests/sec:  82110.08
Transfer/sec:     14.10MB

optimized

Requests/sec:  84551.86
Transfer/sec:     14.51MB

tuned jvm-opts

Requests/sec:  86997.37
Transfer/sec:     14.93MB

draft of an PR

https://github.com/ikitommi/FrameworkBenchmarks/pull/1

t/hash-map

(defn handler1 []
  (json/write-value-as-bytes (t/hash-map :message "Hello, World!")))

will generate java code ~like this:

package hello;

import clojure.lang.AFunction;
import clojure.lang.IFn;
import clojure.lang.Keyword;
import clojure.lang.RT;
import clojure.lang.Var;

public final class test$handler1 extends AFunction {
  public static final Var const__0 = (Var)RT.var("jsonista.core", "write-value-as-bytes");
  public static final Var const__1 = (Var)RT.var("clj-tuple", "hash-map");
  public static final Keyword const__2 = (Keyword)RT.keyword((String)null, "message");

  public test$handler1() {
  }

  public static Object invokeStatic() {
    return ((IFn)const__0.getRawRoot()).invoke(((IFn)const__1.getRawRoot()).invoke(const__2, "Hello, World!"));
  }

  public Object invoke() {
    return invokeStatic();
  }
}

... the inner map is generated on each function call

normal clojure map

(defn handler2 []
  (json/write-value-as-bytes {:message "Hello, World!"}))

will generate java code ~like this:

package hello;

import clojure.lang.AFn;
import clojure.lang.AFunction;
import clojure.lang.IFn;
import clojure.lang.RT;
import clojure.lang.Var;

public final class test$handler2 extends AFunction {
  public static final Var const__0 = (Var)RT.var("jsonista.core", "write-value-as-bytes");
  public static final AFn const__2 = (AFn)RT.map(new Object[]{RT.keyword((String)null, "message"), "Hello, World!"});

  public test$handler2() {
  }

  public static Object invokeStatic() {
    return ((IFn)const__0.getRawRoot()).invoke(const__2);
  }

  public Object invoke() {
    return invokeStatic();
  }
}

=> the inner map is defined once.

@ikitommi
Copy link
Author

The whole inlined response:

(defn handler3 []
  {:status 200
   :headers {"content-type" "application/json"}
   :body (json/write-value-as-bytes {:message "Hello, World!"})})

will generate java code ~like this:

package hello;

import clojure.lang.AFn;
import clojure.lang.AFunction;
import clojure.lang.IFn;
import clojure.lang.Keyword;
import clojure.lang.RT;
import clojure.lang.Var;

public final class test$handler3 extends AFunction {
  public static final Keyword const__0 = (Keyword)RT.keyword((String)null, "status");
  public static final Object const__1 = 200L;
  public static final Keyword const__2 = (Keyword)RT.keyword((String)null, "headers");
  public static final AFn const__3 = (AFn)RT.map(new Object[]{"content-type", "application/json"});
  public static final Keyword const__4 = (Keyword)RT.keyword((String)null, "body");
  public static final Var const__5 = (Var)RT.var("jsonista.core", "write-value-as-bytes");
  public static final AFn const__7 = (AFn)RT.map(new Object[]{RT.keyword((String)null, "message"), "Hello, World!"});

  public test$handler3() {
  }

  public static Object invokeStatic() {
    return RT.mapUniqueKeys(new Object[]{const__0, const__1, const__2, const__3, const__4, ((IFn)const__5.getRawRoot()).invoke(const__7)});
  }

  public Object invoke() {
    return invokeStatic();
  }
}

... where the RT.mapUniqueKeys looks like:

static public IPersistentMap mapUniqueKeys(Object... init){
	if(init == null)
		return PersistentArrayMap.EMPTY;
	else if(init.length <= PersistentArrayMap.HASHTABLE_THRESHOLD)
		return new PersistentArrayMap(init);
	return PersistentHashMap.create(init);
}

... while the assoc-version:

(defn handler4 []
  (assoc
    {:status 200
     :headers {"content-type" "application/json"}}
    :body (json/write-value-as-bytes {:message "Hello, World!"})))

would ~produce:

package hello;

import clojure.lang.AFn;
import clojure.lang.AFunction;
import clojure.lang.IFn;
import clojure.lang.Keyword;
import clojure.lang.RT;
import clojure.lang.Var;

public final class test$handler4 extends AFunction {
  public static final Var const__0 = (Var)RT.var("clojure.core", "assoc");
  public static final AFn const__5 = (AFn)RT.map(new Object[]{RT.keyword((String)null, "status"), 200L, RT.keyword((String)null, "headers"), RT.map(new Object[]{"content-type", "application/json"})});
  public static final Keyword const__6 = (Keyword)RT.keyword((String)null, "body");
  public static final Var const__7 = (Var)RT.var("jsonista.core", "write-value-as-bytes");
  public static final AFn const__9 = (AFn)RT.map(new Object[]{RT.keyword((String)null, "message"), "Hello, World!"});

  public test$handler4() {
  }

  public static Object invokeStatic() {
    return ((IFn)const__0.getRawRoot()).invoke(const__5, const__6, ((IFn)const__7.getRawRoot()).invoke(const__9));
  }

  public Object invoke() {
    return invokeStatic();
  }
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment