A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)
#coding:utf-8 | |
import psutil | |
from mpl_toolkits.mplot3d import Axes3D | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import matplotlib.animation as animation | |
TIME_NUM = 90 | |
FRAMES = 360 |
docker ps | awk {' print $1 '} | tail -n+2 > tmp.txt; for line in $(cat tmp.txt); do docker kill $line; done; rm tmp.txt |
import string | |
import nltk | |
from nltk.tokenize import RegexpTokenizer | |
from nltk.corpus import stopwords | |
import re | |
def preprocess(sentence): | |
sentence = sentence.lower() | |
tokenizer = RegexpTokenizer(r'\w+') | |
tokens = tokenizer.tokenize(sentence) |
A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)
# CoffeeScript version of Google Spreadsheet Driver for Tableau Data Web Connector | |
init = -> | |
if !tableau | |
alert 'init- tableau NOT defined!' | |
return | |
tableau.scriptVersion = '1.0' | |
tableau.log 'init' | |
tableau.initCallback() |
def multireplace(string, replacements, ignore_case=False): | |
""" | |
Given a string and a replacement map, it returns the replaced string. | |
:param str string: string to execute replacements on | |
:param dict replacements: replacement dictionary {value to find: value to replace} | |
:param bool ignore_case: whether the match should be case insensitive | |
:rtype: str | |
""" |
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer | |
from sklearn.decomposition import NMF, LatentDirichletAllocation | |
import numpy as np | |
def display_topics(H, W, feature_names, documents, no_top_words, no_top_documents): | |
for topic_idx, topic in enumerate(H): | |
print "Topic %d:" % (topic_idx) | |
print " ".join([feature_names[i] | |
for i in topic.argsort()[:-no_top_words - 1:-1]]) | |
top_doc_indices = np.argsort( W[:,topic_idx] )[::-1][0:no_top_documents] |
import random | |
import sys | |
def build_chain(text, chain = {}): | |
words = text.split(' ') | |
index = 1 | |
for word in words[index:]: | |
key = words[index - 1] | |
if key in chain: | |
chain[key].append(word) |
from gensim.models import KeyedVectors | |
# Load gensim word2vec | |
w2v_path = '<Gensim File Path>' | |
w2v = KeyedVectors.load_word2vec_format(w2v_path) | |
import io | |
# Vector file, `\t` seperated the vectors and `\n` seperate the words | |
""" |