Skip to content

Instantly share code, notes, and snippets.

@dubeyji10
Forked from Verina-Armanyous/ingest_data.ipynb
Created June 18, 2022 06:49
Show Gist options
  • Save dubeyji10/45469dbcd8f1eb1596a8401340b3699f to your computer and use it in GitHub Desktop.
Save dubeyji10/45469dbcd8f1eb1596a8401340b3699f to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Hands-on Example"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import json\n",
"from elasticsearch import Elasticsearch\n",
"# uncomment the next line to install elasticsearch\n",
"# pip install elasticsearch "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Connect to the cluster\n",
"es = Elasticsearch([{'host': 'localhost', 'port': 9200}])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# check the status of the cluster \n",
"# if you just started the cluster, wait a bit before running this command. \n",
"# Otherwise, you will get a \"no response\" error. If that happens, retry in a bit. \n",
"print(es.cluster.health())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"es.indices.create(index='pokemon_characters', body={\"number_of_replicas\": 2, \"number_of_shards\":3})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# fetch data from programmingquotesapi and ingest the data into Elasticsearch \n",
"r = requests.get('http://localhost:9200')\n",
"i = 1\n",
"while i <= 898:\n",
" r = requests.get('https://pokeapi.co/api/v2/pokemon/'+ str(i))\n",
" es.index(index='pokemon_characters', doc_type='pokemon', id=i, body=json.loads(r.content))\n",
" i=i+1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# return 3 documents \n",
"res = es.search(index=\"pokemon_characters\", query = {\n",
" 'match_all' : {}}, size = '3')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"documents = res['hits']['hits']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for num, doc in enumerate(documents):\n",
" for key, value in doc.items():\n",
" print (key, \":\", value)\n",
" \n",
" print (\"\\n\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# return the character with id = 5\n",
"es.get(index='pokemon_characters', doc_type='pokemon', id=5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"es.search(index=\"pokemon_characters\", body={\"query\": {\"match\" : { \"name\" : 'cat'}}})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment