Skip to content

Instantly share code, notes, and snippets.

@vhyza
Last active December 18, 2015 16:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vhyza/5813445 to your computer and use it in GitHub Desktop.
Save vhyza/5813445 to your computer and use it in GitHub Desktop.
terms vector test
echo "[!] delete twitter index"
curl -XDELETE 'http://localhost:9200/twitter/' ; echo
echo "[*] create twitter index"
curl -XPUT 'http://localhost:9200/twitter/' -d '{
"mappings": {
"tweet": {
"properties": {
"text": {
"type": "string",
"term_vector": "with_positions_offsets_payloads",
"store": "yes",
"index_analyzer": "fulltext_analyzer"
},
"fullname": {
"type": "string",
"term_vector": "with_positions_offsets_payloads",
"index_analyzer": "fulltext_analyzer"
}
}
}
},
"settings": {
"index": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"type_as_payload"
]
}
}
}
}
}' ; echo
echo "[*] create document 1"
curl -XPUT 'http://localhost:9200/twitter/tweet/1?pretty=true' -d '{
"fullname" : "John Doe",
"text" : "twitter test test test "
}' ; echo
echo "[*] create document 2"
curl -XPUT 'http://localhost:9200/twitter/tweet/2?pretty=true' -d '{
"fullname" : "Jane Doe",
"text" : "Another twitter test"
}' ; echo
echo "[*] refresh index"
curl -XPOST 'http://localhost:9200/twitter/_refresh' ; echo
echo "[*] get termvector for document 2 (with text: Another twitter test)"
curl -XGET 'http://localhost:9200/twitter/tweet/2/_termvector?pretty=true' -d '{
"fields" : ["text"],
"offsets" : true,
"payloads" : true,
"positions" : true,
"term_statistics" : true,
"field_statistics" : true
}' ; echo
echo "[!] update document 2"
curl -XPUT 'http://localhost:9200/twitter/tweet/2?pretty=true' -d '{
"fullname" : "Jane Doe",
"text" : "Completely different text"
}' ; echo
echo "[*] refresh index"
curl -XPOST 'http://localhost:9200/twitter/_refresh' ; echo
echo "[!] get termvector for document 2 (with text: Completely different text)"
curl -XGET 'http://localhost:9200/twitter/tweet/2/_termvector?pretty=true' -d '{
"fields" : ["text"],
"offsets" : true,
"payloads" : true,
"positions" : true,
"term_statistics" : true,
"field_statistics" : true
}' ; echo
[!] delete twitter index
{"ok":true,"acknowledged":true}
[*] create twitter index
{"ok":true,"acknowledged":true}
[*] create document 1
{
"ok" : true,
"_index" : "twitter",
"_type" : "tweet",
"_id" : "1",
"_version" : 1
}
[*] create document 2
{
"ok" : true,
"_index" : "twitter",
"_type" : "tweet",
"_id" : "2",
"_version" : 1
}
[*] refresh index
{"ok":true,"_shards":{"total":1,"successful":1,"failed":0}}
[*] get termvector for document 2 (with text: Another twitter test)
{
"_index" : "twitter",
"_type" : "tweet",
"_id" : "2",
"_version" : 1,
"exists" : true,
"term_vectors" : {
"text" : {
"field_statistics" : {
"sum_doc_freq" : 5,
"doc_count" : 2,
"sum_ttf" : 7
},
"terms" : {
"another" : {
"doc_freq" : 1,
"ttf" : 1,
"term_freq" : 1,
"pos" : [ 0 ],
"start" : [ 0 ],
"end" : [ 7 ],
"payload" : [ "d29yZA==" ]
},
"test" : {
"doc_freq" : 2,
"ttf" : 4,
"term_freq" : 1,
"pos" : [ 2 ],
"start" : [ 16 ],
"end" : [ 20 ],
"payload" : [ "d29yZA==" ]
},
"twitter" : {
"doc_freq" : 2,
"ttf" : 2,
"term_freq" : 1,
"pos" : [ 1 ],
"start" : [ 8 ],
"end" : [ 15 ],
"payload" : [ "d29yZA==" ]
}
}
}
}
}
[!] update document 2
{
"ok" : true,
"_index" : "twitter",
"_type" : "tweet",
"_id" : "2",
"_version" : 2
}
[*] refresh index
{"ok":true,"_shards":{"total":1,"successful":1,"failed":0}}
[!] get termvector for document 2 (with text: Completely different text)
{
"_index" : "twitter",
"_type" : "tweet",
"_id" : "2",
"_version" : 2,
"exists" : true,
"term_vectors" : {
"text" : {
"field_statistics" : {
"sum_doc_freq" : 8,
"doc_count" : 3,
"sum_ttf" : 10
},
"terms" : {
"test" : {
"doc_freq" : 2,
"ttf" : 4,
"term_freq" : 3,
"pos" : [ 1, 2, 3 ],
"start" : [ 8, 13, 18 ],
"end" : [ 12, 17, 22 ],
"payload" : [ "d29yZA==", "d29yZA==", "d29yZA==" ]
},
"twitter" : {
"doc_freq" : 2,
"ttf" : 2,
"term_freq" : 1,
"pos" : [ 0 ],
"start" : [ 0 ],
"end" : [ 7 ],
"payload" : [ "d29yZA==" ]
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment