A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)
#-*- coding: utf-8 -*- | |
import re | |
import nltk | |
from nltk.tokenize import RegexpTokenizer | |
from nltk import bigrams, trigrams | |
import math | |
stopwords = nltk.corpus.stopwords.words('portuguese') |
# $Id: Nielsen2012Python_case.py,v 1.2 2012/09/02 16:55:25 fn Exp $ | |
# Define a url as a Python string (note we are only getting 100 documents) | |
url = "http://wikilit.referata.com/" + \ | |
"wiki/Special:Ask/" + \ | |
"-5B-5BCategory:Publications-5D-5D/" + \ | |
"-3FHas-20author%3DAuthor(s)/-3FYear/" + \ | |
"-3FPublished-20in/-3FAbstract/-3FHas-20topic%3DTopic(s)/" + \ | |
"-3FHas-20domain%3DDomain(s)/" + \ | |
"format%3D-20csv/limit%3D-20100/offset%3D0" |
############################################################################# | |
# Full Imports | |
from __future__ import division | |
import math | |
import random | |
""" | |
This is a pure Python implementation of the K-means Clustering algorithmn. The | |
original can be found here: |
'''Implementation and of K Means Clustering | |
Requires : python 2.7.x, Numpy 1.7.1+''' | |
import numpy as np | |
def kMeans(X, K, maxIters = 10, plot_progress = None): | |
centroids = X[np.random.choice(np.arange(len(X)), K), :] | |
for i in range(maxIters): | |
# Cluster Assignment step | |
C = np.array([np.argmin([np.dot(x_i-y_k, x_i-y_k) for y_k in centroids]) for x_i in X]) |
package org.logician.sorta | |
import scala.util.Random | |
import scala.math | |
import scala.collection.mutable | |
import scala.collection.mutable.ArrayBuffer | |
/** | |
* Created with IntelliJ IDEA. | |
* User: Austin |
Codes for Machine Learning Foundations(NTU) | |
台湾国立大学《机器学习基石》(Coursera版)相关的代码、编程作业等。 | |
课程地址:https://class.coursera.org/ntumlone-001/ |
git fetch --all | |
git reset --hard origin/master | |
git pull origin master |
A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)
#!/bin/bash | |
##################################################### | |
# Name: Bash CheatSheet for Mac OSX | |
# | |
# A little overlook of the Bash basics | |
# | |
# Usage: | |
# | |
# Author: J. Le Coupanec | |
# Date: 2014/11/04 |
#!/bin/bash | |
sudo apt-get -y install cups | |
sudo apt-get -y install cups-pdf | |
# add pdf printer to cups | |
# - named files end up in ~/PDF/ | |
# - unnamed files are stored in /var/spool/cups-pdf/ANONYMOUS/, such as PDF:s created by streaming bytes over an API | |
sudo lpadmin -p cups-pdf -v cups-pdf:/ -E -P /usr/share/ppd/cups-pdf/CUPS-PDF.ppd |