Skip to content

Instantly share code, notes, and snippets.

Helw150 /
Last active Apr 13, 2019
Saves a dictionary of vectors into the Gensim KeyedVectors format
from gensim import utils
def save2gensim(fname, word2vec_dict):
vectors = list(word2vec_dict.values())
vector_size = vectors[0].shape[0]
total_vec = len(vectors)
with utils.smart_open(fname, 'wb') as fout:
fout.write(utils.to_utf8("%s %s\n" % (total_vec, vector_size)))
# store in sorted order: most frequent words at the top
for word, vector in word2vec_dict.items():
Helw150 /
Last active Jul 28, 2018
A Python Script which multi-processes large files with a rough progress bar
#!/usr/bin/env python
"""Counts the number of times a word occurs in a very large text file"""
from __future__ import print_function
import os
import sys
import argparse
import textacy
import multiprocessing
from tqdm import tqdm
Helw150 / zenburn.js
Last active Nov 6, 2017
Changing Chrome OS Shell to utilize the Zenburn Color Theme
View zenburn.js
// Disable bold.
term_.prefs_.set('enable-bold', false)
// Use this for Zenburn
term_.prefs_.set('background-color', "#3F3F3F");
term_.prefs_.set('foreground-color', "#DCDCCC");
base03 = "#002b36";
base02 = "#073642";
base01 = "#586e75";
Helw150 /
Created Oct 24, 2017
Array to Min-Heap with In-Order Traversal the same as the Array
# i/p = array of numbers
# create a binary tree such that each subtree is a min-heap and the inorder traversal // of the binary tree is same as the array provided
# [5, 7, 10, 8, 1, 4]
# 1
# / \
# 5 4
# \
# 7
Helw150 /
Last active Sep 15, 2017
Functions to work with Open Graph efficiently
# This function takes Open Graph info and just returns the articles
def returnArticles(og_array):
article_urls = []
for og in og_array:
is_article = False
for prop in og:
if hasattr(prop, "property"):
if prop["property"] == "og:type":
if prop["content"] == "article":
is_article = True
Helw150 /
Created Jul 18, 2017
Some more human controls for boto3
import boto3
def get_id_from_name(name):
description = [instance for r in response['Reservations'] for instance in r['Instances'] for tag in instance['Tags'] if tag['Key'] == 'Name' if tag['Value'] == name]
return description['InstanceId']
def start_instance_by_name(name):
ec2 = boto3.client('ec2')
instance_id = get_id_from_name(name)
# Do a dryrun first to verify permissions
def brand_from_url(url):
domain_list = urlparse(url).netloc.split('.')
if(len(domain_list) < 3):
brand= domain_list[0]
brand = domain_list[1]
return brand
Helw150 /
Last active Apr 25, 2017
Scrapes all repos that have descriptions, aren't private and aren't forks. Places them in a JS list of Objects for import.
This file is a helper function to scrape Github for Repo's to add to my website.
It can be set to run whenever Gatsby is called in order to add new Repo's as they arrive.
Usage: python SampleOutput.js
from sys import argv
from github import Github
import re
Helw150 /
Created Apr 25, 2017
A simple article creator for gatsby
import os
import re
from datetime import datetime
import errno
def titleToUrl(title):
lowerTitle = title.lower()
URL = lowerTitle.replace(" ", "-")
return "/blog/" + URL + "/"