Skip to content

Instantly share code, notes, and snippets.

@tgxworld
Last active August 29, 2015 14:20
Show Gist options
  • Save tgxworld/140bf583f1b6d8bda66d to your computer and use it in GitHub Desktop.
Save tgxworld/140bf583f1b6d8bda66d to your computer and use it in GitHub Desktop.
Benchmarks for indexing documents into ElasticSearch
# 10 indexes
guoxiang@guoxiang-GS60-2PC-Ghost moe $ ruby split_by_types.rb
Cleaning all documents on existing cluster
{"acknowledged":true}
Searching through all indexes and types took 37 ms
Searching through known index and type took 13 ms
Indexing a document into an existing index took 1.182491 ms
guoxiang@guoxiang-GS60-2PC-Ghost moe $ ruby split_by_indexes.rb
Cleaning all documents on existing cluster
{"acknowledged":true}
Searching through all indexes and types took 44 ms
Searching through known index and type took 11 ms
Indexing a document into an existing index took 1.513686 ms
# 20 indexes
guoxiang@guoxiang-GS60-2PC-Ghost moe $ ruby /home/guoxiang/moe/split_by_types.rb
Cleaning all documents on existing cluster
{"acknowledged":true}
Searching through all indexes and types took 46 ms
Searching through known index and type took 12 ms
Indexing a document into an existing index took 1.5609879999999998 ms
guoxiang@guoxiang-GS60-2PC-Ghost moe $ ruby split_by_indexes.rb
Cleaning all documents on existing cluster
{"acknowledged":true}
Searching through all indexes and types took 57 ms
Searching through known index and type took 9 ms
Indexing a document into an existing index took 1.163458 ms
require 'elasticsearch'
require 'ffaker'
class ForumPost
def content
FFaker::Lorem.paragraph
end
def nickname
FFaker::Name.name
end
def date
FFaker::Time.date
end
def tags
[FFaker::Color.name, FFaker::Color.name]
end
end
client = Elasticsearch::Client.new
puts "Cleaning all documents on existing cluster"
system("curl -XDELETE 'http://localhost:9200/_all'")
puts "\n"
# Seed data where we use the same index but differentiate documents by index.
10_000.times do
forum = ForumPost.new
client.index(
{
index: 'forum_feedback',
type: 'document',
body: {
content: forum.content,
nickname: forum.nickname,
date: forum.date,
tags: forum.tags
}
}
)
end
20.times do |time|
type = FFaker::Name.name
10_000.times do
forum = ForumPost.new
client.index(
{
index: "a" * (time + 1),
type: 'document',
body: {
content: forum.content,
nickname: forum.nickname,
date: forum.date,
tags: forum.tags
}
}
)
end
end
forum = ForumPost.new
client.index(
{
index: 'forum_feedback',
type: 'document',
body: {
content: "some lame content",
nickname: "Guo Xiang",
date: forum.date,
tags: ['purple unicorn']
}
}
)
response = client.search(
index: '_all',
body: {
query: {
match_phrase: {
_all: "some lame content"
}
}
}
)
puts "Searching through all indexes and types took #{response["took"]} ms"
response = client.search(
index: 'forum_feedback',
type: 'document',
body: {
query: {
match_phrase: {
_all: "some lame content"
}
}
}
)
puts "Searching through known index and type took #{response["took"]} ms"
before = Time.now
response = client.index(
{
index: 'forum_feedback',
type: 'document',
body: {
content: "lalal",
nickname: "donkey",
date: Date.today,
tags: ["tag"]
}
}
)
after = Time.now
puts "Indexing a document into an existing index took #{(after - before) * 1000.0} ms"
require 'elasticsearch'
require 'ffaker'
class ForumPost
def content
FFaker::Lorem.paragraph
end
def nickname
FFaker::Name.name
end
def date
FFaker::Time.date
end
def tags
[FFaker::Color.name, FFaker::Color.name]
end
end
client = Elasticsearch::Client.new
puts "Cleaning all documents on existing cluster"
system("curl -XDELETE 'http://localhost:9200/_all'")
puts "\n"
# Seed data where we use the same index but differentiate documents by type.
10_000.times do
forum = ForumPost.new
client.index(
{
index: 'documents',
type: 'forum_feedback',
body: {
content: forum.content,
nickname: forum.nickname,
date: forum.date,
tags: forum.tags
}
}
)
end
20.times do
type = FFaker::Name.name
10_000.times do
forum = ForumPost.new
client.index(
{
index: 'documents',
type: type,
body: {
content: forum.content,
nickname: forum.nickname,
date: forum.date,
tags: forum.tags
}
}
)
end
end
forum = ForumPost.new
client.index(
{
index: 'documents',
type: 'forum_feedback',
body: {
content: "some lame content",
nickname: "Guo Xiang",
date: forum.date,
tags: ['purple unicorn']
}
}
)
response = client.search(
index: '_all',
body: {
query: {
match_phrase: {
_all: "some lame content"
}
}
}
)
puts "Searching through all indexes and types took #{response["took"]} ms"
response = client.search(
index: 'documents',
type: 'forum_feedback',
body: {
query: {
match_phrase: {
_all: "some lame content"
}
}
}
)
puts "Searching through known index and type took #{response["took"]} ms"
before = Time.now
response = client.index(
{
index: 'documents',
type: 'forum_feedback',
body: {
content: "lalal",
nickname: "donkey",
date: Date.today,
tags: ["tag"]
}
}
)
after = Time.now
puts "Indexing a document into an existing index took #{(after - before) * 1000.0} ms"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment