Skip to content

Instantly share code, notes, and snippets.

@armonge
Last active December 19, 2018 00:10
Show Gist options
  • Save armonge/edf0182b8823a8dc56af03044d45f64f to your computer and use it in GitHub Desktop.
Save armonge/edf0182b8823a8dc56af03044d45f64f to your computer and use it in GitHub Desktop.
Count the words in all different phrases of a Dialogflow agent
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import json\n",
"import re\n",
"\n",
"def is_number(string):\n",
" try:\n",
" float(string)\n",
" return True\n",
" except Exception:\n",
" return False\n",
" \n",
" \n",
"def words(string):\n",
" return re.sub(\"[^\\w]\", \" \", string).split()\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"files = os.listdir('google-action/intents')\n",
"files = [filename for filename in files if 'usersays' in filename]\n",
"\n",
"usersays = set()\n",
"for filename in files:\n",
" with open('google-action/intents/' + filename) as file:\n",
" intent_data = json.load(file)\n",
" for phrase_data in intent_data:\n",
"\n",
" for phrase_variation in phrase_data['data']:\n",
" usersays.add(phrase_variation['text'])\n",
" \n",
" \n",
"usersays = set(string.strip() for string in usersays) \n",
"usersays = set(string for string in usersays if len(string) > 1) \n",
"usersays = set(string for string in usersays if not is_number(string)) "
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"files = os.listdir('google-action/entities')\n",
"\n",
"entities = set()\n",
"for filename in files:\n",
" with open('google-action/entities/' + filename) as file:\n",
" entity_data = json.load(file)\n",
" for entity_value in entity_data:\n",
" entities.add(entity_value['value'])\n",
" for entity_synonym in entity_value['synonyms']:\n",
" entities.add(entity_synonym)\n",
" \n",
" \n",
"entities = set(string.strip() for string in entities) \n",
"entities = set(string for string in entities if len(string) > 1) \n",
"entities = set(string for string in entities if not is_number(string)) "
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"count = 0\n",
"all_phrases = set(usersays).union(entities)\n",
"for phrase in all_phrases:\n",
" count += len(words(phrase))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"912"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"count"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"with open('phrases.txt', 'w') as out:\n",
" out.write('\\n'.join(all_phrases))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment