Skip to content

Instantly share code, notes, and snippets.

@kavinduchamiran
Created May 7, 2019 15:42
Show Gist options
  • Save kavinduchamiran/e6e4069c7b2dc50c3c7f857bbc0161d6 to your computer and use it in GitHub Desktop.
Save kavinduchamiran/e6e4069c7b2dc50c3c7f857bbc0161d6 to your computer and use it in GitHub Desktop.
/Users/kavinduchamiran/PycharmProjects/fyp/venv/bin/python "/Users/kavinduchamiran/Library/Application Support/JetBrains/Toolbox/apps/PyCharm-P/ch-0/183.5429.31/PyCharm.app/Contents/helpers/pydev/pydevconsole.py" --mode=client --port=50317
import sys; print('Python %s on %s' % (sys.version, sys.platform))
sys.path.extend(['/Users/kavinduchamiran/PycharmProjects/fyp'])
PyDev console: starting.
Python 2.7.10 (default, Feb 22 2019, 21:17:52)
[GCC 4.2.1 Compatible Apple LLVM 10.0.1 (clang-1001.0.37.14)] on darwin
>>> model
Traceback (most recent call last):
File "<input>", line 1, in <module>
NameError: name 'model' is not defined
>>> import gensim
>>> model = gensim.models.KeyedVectors.load_word2vec_format('models/wiki/WikipediaClean5Negative300Skip10.txt', binary=False)
>>> import csv
>>> csv2 = csv.reader(open('models/wiki/id_title_map.csv', 'r'))
>>> csv2
<_csv.reader object at 0x12b2de910>
>>> csv2 = csv.reader(open('models/wiki/id_title_map.csv', 'r'))
>>> ids = {line[0].split(':')[1]: line[1] for line in csv2}
>>> concepts = {v:k for k,v in ids.items()}
>>> '31695' in model.vocab
False
>>> '24110' in model.vocab
True
>>> model.most_similar(positive=['24110', 'president'], topn=1)
[(u'52113', 0.8172774910926819)]
>>> model.most_similar(positive=['24110', 'president'], topn=10)
[(u'52113', 0.8172774910926819), (u'vice', 0.7803899049758911), (u'presidents', 0.7550864815711975), (u'chairman', 0.7382392883300781), (u'3452243', 0.7200016975402832), (u'presidency', 0.7125098705291748), (u'52234', 0.6992216110229492), (u'4822', 0.6988502144813538), (u'34674550', 0.6745897531509399), (u'executive', 0.6706091165542603)]
>>> concepts['52113']
Traceback (most recent call last):
File "<input>", line 1, in <module>
KeyError: '52113'
>>> ids['52113']
'Vice president'
>>> ids['3452243']
'Chairman'
>>> concepts['Animal']
'48305670'
>>> concepts['Animal'] in model.vocab
False
>>> model.most_similar(positive=['animal'], topn=10)
[(u'animals', 0.865334153175354), (u'human', 0.7011067867279053), (u'dogs', 0.6440911889076233), (u'dog', 0.6118031740188599), (u'38084714', 0.5994277000427246), (u'domesticated', 0.5949822664260864), (u'174438', 0.5927456617355347), (u'175596', 0.5910624265670776), (u'pigs', 0.5904308557510376), (u'humans', 0.5867228507995605)]
>>> ids['175596']
'Animal testing'
>>> ids['174438']
'Animal welfare'
>>> ids['38084714']
'Phnom Tamao Wildlife Rescue Centre'
>>> animals = ['canidae', 'felidae', 'cat', 'cattle', 'dog', 'donkey', 'goat', 'guinea pig', 'horse', 'pig', 'rabbit', 'fancy rat varieties', 'laboratory rat strains', 'sheep breeds', 'water buffalo breeds', 'chicken breeds', 'duck breeds', 'goose breeds', 'pigeon breeds', 'turkey breeds', 'aardvark', 'aardwolf', 'african buffalo', 'african elephant', 'african leopard', 'albatross', 'alligator', 'alpaca', 'american buffalo (bison)', 'american robin', 'amphibian', 'list', 'anaconda', 'angelfish', 'anglerfish', 'ant', 'anteater', 'antelope', 'antlion', 'ape', 'aphid', 'arabian leopard', 'arctic fox', 'arctic wolf', 'armadillo', 'arrow crab', 'asp', 'ass (donkey)', 'baboon', 'badger', 'bald eagle', 'bandicoot', 'barnacle', 'barracuda', 'basilisk', 'bass', 'bat', 'beaked whale', 'bear', 'list', 'beaver', 'bedbug', 'bee', 'beetle', 'bird', 'list', 'bison', 'blackbird', 'black panther', 'black widow spider', 'blue bird', 'blue jay', 'blue whale', 'boa', 'boar', 'bobcat', 'bobolink', 'bonobo', 'booby', 'box jellyfish', 'bovid', 'buffalo, african', 'buffalo, american (bison)', 'bug', 'butterfly', 'buzzard', 'camel', 'canid', 'cape buffalo', 'capybara', 'cardinal', 'caribou', 'carp', 'cat', 'list', 'catshark', 'caterpillar', 'catfish', 'cattle', 'list', 'centipede', 'cephalopod', 'chameleon', 'cheetah', 'chickadee', 'chicken', 'list', 'chimpanzee', 'chinchilla', 'chipmunk', 'clam', 'clownfish', 'cobra', 'cockroach', 'cod', 'condor', 'constrictor', 'coral', 'cougar', 'cow', 'coyote', 'crab', 'crane', 'crane fly', 'crawdad', 'crayfish', 'cricket', 'crocodile', 'crow', 'cuckoo', 'cicada', 'damselfly', 'deer', 'dingo', 'dinosaur', 'list', 'dog', 'list', 'dolphin', 'donkey', 'list', 'dormouse', 'dove', 'dragonfly', 'dragon', 'duck', 'list', 'dung beetle', 'eagle', 'earthworm', 'earwig', 'echidna', 'eel', 'egret', 'elephant', 'elephant seal', 'elk', 'emu', 'english pointer', 'ermine', 'falcon', 'ferret', 'finch', 'firefly', 'fish', 'flamingo', 'flea', 'fly', 'flyingfish', 'fowl', 'fox', 'frog', 'fruit bat', 'gamefowl', 'list', 'galliform', 'list', 'gazelle', 'gecko', 'gerbil', 'giant panda', 'giant squid', 'gibbon', 'gila monster', 'giraffe', 'goat', 'list', 'goldfish', 'goose', 'list', 'gopher', 'gorilla', 'grasshopper', 'great blue heron', 'great white shark', 'grizzly bear', 'ground shark', 'ground sloth', 'grouse', 'guan', 'list', 'guanaco', 'guineafowl', 'list', 'guinea pig', 'list', 'gull', 'guppy', 'haddock', 'halibut', 'hammerhead shark', 'hamster', 'hare', 'harrier', 'hawk', 'hedgehog', 'hermit crab', 'heron', 'herring', 'hippopotamus', 'hookworm', 'hornet', 'horse', 'list', 'hoverfly', 'hummingbird', 'humpback whale', 'hyena', 'iguana', 'impala', 'irukandji jellyfish', 'jackal', 'jaguar', 'jay', 'jellyfish', 'junglefowl', 'kangaroo', 'kangaroo mouse', 'kangaroo rat', 'kingfisher', 'kite', 'kiwi', 'koala', 'koi', 'komodo dragon', 'krill', 'ladybug', 'lamprey', 'landfowl', 'land snail', 'lark', 'leech', 'lemming', 'lemur', 'leopard', 'leopon', 'limpet', 'lion', 'lizard', 'llama', 'lobster', 'locust', 'loon', 'louse', 'lungfish', 'lynx', 'macaw', 'mackerel', 'magpie', 'mammal', 'manatee', 'mandrill', 'manta ray', 'marlin', 'marmoset', 'marmot', 'marsupial', 'marten', 'mastodon', 'meadowlark', 'meerkat', 'mink', 'minnow', 'mite', 'mockingbird', 'mole', 'mollusk', 'mongoose', 'monitor lizard', 'monkey', 'moose', 'mosquito', 'moth', 'mountain goat', 'mouse', 'mule', 'muskox', 'narwhal', 'newt', 'new world quail', 'nightingale', 'ocelot', 'octopus', 'old world quail', 'opossum', 'orangutan', 'orca', 'ostrich', 'otter', 'owl', 'ox', 'panda', 'panther', 'panthera hybrid', 'parakeet', 'parrot', 'parrotfish', 'partridge', 'peacock', 'peafowl', 'pelican', 'penguin', 'perch', 'peregrine falcon', 'pheasant', 'pig', 'pigeon', 'list', 'pike', 'pilot whale', 'pinniped', 'piranha', 'planarian', 'platypus', 'polar bear', 'pony', 'porcupine', 'porpoise', "portuguese man o' war", 'possum', 'prairie dog', 'prawn', 'praying mantis', 'primate', 'ptarmigan', 'puffin', 'puma', 'python', 'quail', 'quelea', 'quokka', 'rabbit', 'list', 'raccoon', 'rainbow trout', 'rat', 'rattlesnake', 'raven', 'ray (batoidea)', 'ray (rajiformes)', 'red panda', 'reindeer', 'reptile', 'rhinoceros', 'right whale', 'roadrunner', 'rodent', 'rook', 'rooster', 'roundworm', 'saber-toothed cat', 'sailfish', 'salamander', 'salmon', 'sawfish', 'scale insect', 'scallop', 'scorpion', 'seahorse', 'sea lion', 'sea slug', 'sea snail', 'shark', 'list', 'sheep', 'list', 'shrew', 'shrimp', 'silkworm', 'silverfish', 'skink', 'skunk', 'sloth', 'slug', 'smelt', 'snail', 'snake', 'list', 'snipe', 'snow leopard', 'sockeye salmon', 'sole', 'sparrow', 'sperm whale', 'spider', 'spider monkey', 'spoonbill', 'squid', 'squirrel', 'starfish', 'star-nosed mole', 'steelhead trout', 'stingray', 'stoat', 'stork', 'sturgeon', 'sugar glider', 'swallow', 'swan', 'swift', 'swordfish', 'swordtail', 'tahr', 'takin', 'tapir', 'tarantula', 'tarsier', 'tasmanian devil', 'termite', 'tern', 'thrush', 'tick', 'tiger', 'tiger shark', 'tiglon', 'toad', 'tortoise', 'toucan', 'trapdoor spider', 'tree frog', 'trout', 'tuna', 'turkey', 'list', 'turtle', 'tyrannosaurus', 'urial', 'vampire bat', 'vampire squid', 'vicuna', 'viper', 'vole', 'vulture', 'wallaby', 'walrus', 'wasp', 'warbler', 'water boa', 'water buffalo', 'weasel', 'whale', 'whippet', 'whitefish', 'whooping crane', 'wildcat', 'wildebeest', 'wildfowl', 'wolf', 'wolverine', 'wombat', 'woodpecker', 'worm', 'wren', 'xerinae', 'x-ray fish', 'yak', 'yellow perch', 'zebra', 'zebra finch', 'animals by number of neurons', 'animals by size', 'common household pests', 'common names of poisonous animals', 'alpaca', 'bali cattle', 'cat', 'cattle', 'chicken', 'dog', 'domestic bactrian camel', 'domestic canary', 'domestic dromedary camel', 'domestic duck', 'domestic goat', 'domestic goose', 'domestic guineafowl', 'domestic hedgehog', 'domestic pig', 'domestic pigeon', 'domestic rabbit']
>>> animal_vectors = [model[a] for a in animals if a in model.vocab]
>>> import numpy as np
>>> animal_sum - reduce(np.add, animal_vectors)
Traceback (most recent call last):
File "<input>", line 1, in <module>
NameError: name 'animal_sum' is not defined
>>> animal_sum = reduce(np.add, animal_vectors)
>>> animal_mean = animal_sum / 500
>>> model.similar_by_vector(animal_sum, topn=5)
[(u'12521276', 0.8072807788848877), (u'blesbuck', 0.8051647543907166), (u'whitetailed', 0.8028668165206909), (u'4419203', 0.8010376691818237), (u'3074113', 0.7959646582603455)]
>>> ids['12521276']
'Least pygmy squirrel'
>>> ids['4419203']
'Tantilla'
>>> []
[]
>>> ids['3074113']
'Antelope jackrabbit'
>>> model.similar_by_vector(animal_mean, topn=5)
[(u'12521276', 0.8072807788848877), (u'blesbuck', 0.8051648139953613), (u'whitetailed', 0.8028668165206909), (u'4419203', 0.8010377287864685), (u'3074113', 0.7959646582603455)]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment