Skip to content

Instantly share code, notes, and snippets.

@splch
Created May 9, 2023 21:20
Show Gist options
  • Save splch/b363c3aff982b46fdc96df9d071a9ebb to your computer and use it in GitHub Desktop.
Save splch/b363c3aff982b46fdc96df9d071a9ebb to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "48cf9c83-9b29-4057-95f1-b56016d201a0",
"metadata": {},
"outputs": [],
"source": [
"import nltk\n",
"from nltk.corpus import cmudict\n",
"import gensim.downloader as api\n",
"from tqdm import tqdm"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4385f53a-b58b-4e99-a0a2-bcb6623b5e2c",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[nltk_data] Downloading package cmudict to\n",
"[nltk_data] nltk_data...\n",
"[nltk_data] Package cmudict is already up-to-date!\n"
]
}
],
"source": [
"# Ensure NLTK CMUdict is downloaded\n",
"nltk.download('cmudict')\n",
"\n",
"# Load the CMU Pronouncing Dictionary\n",
"pron_dict = cmudict.dict()\n",
"\n",
"# Load pre-trained Word2Vec model\n",
"model = api.load('ideal-model')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "5dee77d1-629f-41b0-9af9-61dcc5780634",
"metadata": {},
"outputs": [],
"source": [
"def find_possible_contronyms(topn=10):\n",
" contronyms = []\n",
" for word in tqdm(pron_dict):\n",
" # Get the vector for the current word\n",
" word_vector = model[word] if word in model else None\n",
" if word_vector is not None:\n",
" # Get all words with vectors close to the opposite of the current word's vector\n",
" close_to_opposite = model.similar_by_vector(-word_vector, topn=topn)\n",
" for opp_word, similarity in close_to_opposite:\n",
" if word in pron_dict and pron_dict[word] == pron_dict.get(opp_word):\n",
" contronyms.append((word, opp_word))\n",
" return contronyms"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "15393569-8ac2-455f-9641-c1a3ecf4d175",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████████████████████████| 123455/123455 [01:00:00<01:00:00, 34.3it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"anabasis and anabasis might be contronyms\n",
"apology and apology might be contronyms\n",
"aught and aught might be contronyms\n",
"bolt and bolt might be contronyms\n",
"by and by might be contronyms\n",
"chuffed and chuffed might be contronyms\n",
"cleave and cleave might be contronyms\n",
"clip and clip might be contronyms\n",
"consult and consult might be contronyms\n",
"copemate and copemate might be contronyms\n",
"custom and custom might be contronyms\n",
"dike and dike might be contronyms\n",
"discursive and discursive might be contronyms\n",
"dollop and dollop might be contronyms\n",
"dust and dust might be contronyms\n",
"enjoin and enjoin might be contronyms\n",
"fast and fast might be contronyms\n",
"fix and fix might be contronyms\n",
"flog and flog might be contronyms\n",
"garnish and garnish might be contronyms\n",
"grade and grade might be contronyms\n",
"handicap and handicap might be contronyms\n",
"help and help might be contronyms\n",
"left and left might be contronyms\n",
"liege and liege might be contronyms\n",
"mean and mean might be contronyms\n",
"off and off might be contronyms\n",
"out and out might be contronyms\n",
"oversight and oversight might be contronyms\n",
"pitted and pitted might be contronyms\n",
"quiddity and quiddity might be contronyms\n",
"quite and quite might be contronyms\n",
"ravel and ravel might be contronyms\n",
"rent and rent might be contronyms\n",
"sanction and sanction might be contronyms\n",
"sanguine and sanguine might be contronyms\n",
"screen and screen might be contronyms\n",
"seed and seed might be contronyms\n",
"skinned and skinned might be contronyms\n",
"strike and strike might be contronyms\n",
"table and table might be contronyms\n",
"transparent and transparent might be contronyms\n",
"unbending and unbending might be contronyms\n",
"variety and variety might be contronyms\n",
"wear and wear might be contronyms\n",
"weather and weather might be contronyms\n",
"with and with might be contronyms\n",
"aural and oral might be contronyms\n",
"erupt and irrupt might be contronyms\n",
"petalless and petalous might be contronyms\n",
"raise and raze might be contronyms\n"
]
}
],
"source": [
"possible_contronyms = find_possible_contronyms()\n",
"for word, opp_word in possible_contronyms:\n",
" print(f\"{word} and {opp_word} might be contronyms\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment