Skip to content

Instantly share code, notes, and snippets.

@Helw150
Helw150 / NewArticle.py
Created April 25, 2017 14:49
A simple article creator for gatsby
import os
import re
from datetime import datetime
import errno
def titleToUrl(title):
lowerTitle = title.lower()
URL = lowerTitle.replace(" ", "-")
return "/blog/" + URL + "/"
@Helw150
Helw150 / RepoScraper.py
Last active April 25, 2017 18:58
Scrapes all repos that have descriptions, aren't private and aren't forks. Places them in a JS list of Objects for import.
'''
This file is a helper function to scrape Github for Repo's to add to my website.
It can be set to run whenever Gatsby is called in order to add new Repo's as they arrive.
Usage: python RepoScraper.py SampleOutput.js
'''
from sys import argv
from github import Github
import re
def brand_from_url(url):
domain_list = urlparse(url).netloc.split('.')
if(len(domain_list) < 3):
brand= domain_list[0]
else:
brand = domain_list[1]
return brand
@Helw150
Helw150 / aws_control.py
Created July 18, 2017 18:41
Some more human controls for boto3
import boto3
def get_id_from_name(name):
description = [instance for r in response['Reservations'] for instance in r['Instances'] for tag in instance['Tags'] if tag['Key'] == 'Name' if tag['Value'] == name]
return description['InstanceId']
def start_instance_by_name(name):
ec2 = boto3.client('ec2')
instance_id = get_id_from_name(name)
# Do a dryrun first to verify permissions
@Helw150
Helw150 / OG-articles.py
Last active September 15, 2017 17:28
Functions to work with Open Graph efficiently
# This function takes Open Graph info and just returns the articles
def returnArticles(og_array):
article_urls = []
for og in og_array:
is_article = False
for prop in og:
if hasattr(prop, "property"):
if prop["property"] == "og:type":
if prop["content"] == "article":
is_article = True
@Helw150
Helw150 / createTree.py
Created October 24, 2017 22:37
Array to Min-Heap with In-Order Traversal the same as the Array
# i/p = array of numbers
# create a binary tree such that each subtree is a min-heap and the inorder traversal // of the binary tree is same as the array provided
# [5, 7, 10, 8, 1, 4]
# 1
# / \
# 5 4
# \
# 7
@Helw150
Helw150 / zenburn.js
Last active November 6, 2017 15:07
Changing Chrome OS Shell to utilize the Zenburn Color Theme
// Disable bold.
term_.prefs_.set('enable-bold', false)
// Use this for Zenburn
term_.prefs_.set('background-color', "#3F3F3F");
term_.prefs_.set('foreground-color', "#DCDCCC");
base03 = "#002b36";
base02 = "#073642";
base01 = "#586e75";
@Helw150
Helw150 / large-file-processing.py
Last active July 28, 2018 22:34
A Python Script which multi-processes large files with a rough progress bar
#!/usr/bin/env python
"""Counts the number of times a word occurs in a very large text file"""
from __future__ import print_function
import os
import sys
import argparse
import textacy
import multiprocessing
from tqdm import tqdm
@Helw150
Helw150 / save2gensim.py
Last active April 13, 2019 12:32
Saves a dictionary of vectors into the Gensim KeyedVectors format
from gensim import utils
def save2gensim(fname, word2vec_dict):
vectors = list(word2vec_dict.values())
vector_size = vectors[0].shape[0]
total_vec = len(vectors)
with utils.smart_open(fname, 'wb') as fout:
fout.write(utils.to_utf8("%s %s\n" % (total_vec, vector_size)))
# store in sorted order: most frequent words at the top
for word, vector in word2vec_dict.items():
@Helw150
Helw150 / upload_csv.py
Created September 16, 2022 15:50
Lab Meeting Dataset upload Code
# See https://huggingface.co/docs/datasets/upload_dataset for more details
from datasets import load_dataset
dataset_name = "PUT_YOUR_NAME_HERE"
data_files = {"train": "train.csv", "dev": "dev.csv", "test": "test.csv"}
dataset = load_dataset("namespace/your_dataset_name", data_files=data_files)
datasets.push_to_hub(f"SALT-NLP/{dataset_name}", private=True)