Instantly share code, notes, and snippets.

View remove_numbers.py
# Function designed to strip out all numbers (alphabetic - English only - and numeric) from a string as part of a
# text normalisation process.
# Based on the text2num package (https://github.com/ghewgill/text2num) and using code from
# here (http://stackoverflow.com/questions/25346058/removing-list-of-words-from-a-string)
from string import digits
# List of number terms
nums = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'eleven',
View contraction cleaner.py
# This code is not mine! Copied from https://gist.github.com/nealrs/96342d8231b75cf4bb82, but with suggested alteration to include text.lower() in the function.
import re
cList = {
"ain't": "am not",
"aren't": "are not",
"can't": "cannot",
"can't've": "cannot have",
"'cause": "because",
"could've": "could have",
View Analyses of resolutions.R
library(ggplot2)
positions = c("Travel and Holidays", "Finances", "Learning and Career", "Mental Wellbeing",
"Relationships", "Physical Health")
# What are the most popular resolutions?
p1 <- ggplot(twitter_df[twitter_df$Resolution.type != "",], aes(x = Resolution.type, fill = Resolution.type)) +
geom_bar() +
coord_flip() +
ggtitle("Number of tweets by resolution type") +
View Resolutions matching.py
# Import the relevant packages
import numpy as np
import re
# Create 6 new dummy variables which indicate whether one of the words associated with a resolution is present in the tweet.
twitter_df['Physical Health'] = np.where(twitter_df['Tweet'].str.contains('(?:^|\W)(weight|fit|exercise|gym|muscle|health|water|smoking|alcohol|drinking|walk|run|swim)(?:$|\W)',
flags = re.IGNORECASE), 1, 0)
twitter_df['Learning and Career'] = np.where(twitter_df['Tweet'].str.contains('(?:^|\W)(business|job|career|professional|study|learn|develop|advance|grades|school|university| read|study|skill|education)(?:$|\W)',
flags = re.IGNORECASE), 1, 0)
View gohan.go
package main
import (
"fmt"
)
type Saiyan struct {
Name string
Power int
Father *Saiyan
View christmas_tree.R
# Load in the packages
library(ggplot2)
library(extrafont)
font_import()
loadfonts()
# Read in the base Christmas tree data
ChristmasTree <- read.csv("https://raw.githubusercontent.com/t-redactyl/Blog-posts/master/Christmas%20tree%20base%20data.csv")
# Generate the "lights"
View data_extraction.py
import urllib2
import json
import math
import numpy as np
from pandas import Series, DataFrame
import pandas as pd
import matplotlib.pyplot as plt
def expired_listings(site, searchterm):
View mysql_setup.sql
-- Create tables
DROP TABLE IF EXISTS ratingsdata;
CREATE TABLE ratingsdata (
userid INT,
itemid INT,
rating INT,
timestamp INT,
PRIMARY KEY (userid, itemid));
DROP TABLE IF EXISTS movies;
View web-scraping.py
import lxml.html
from lxml.cssselect import CSSSelector
import requests
def get_title(node):
'''
Extracts the movie title from the URL http://www.timeout.com/london/film/the-50-best-christmas-movies
taking into account that some titles are tagged as h3, and some as h3 a.
'''
h3_elem = node.cssselect('div.feature-item__text h3')[0]
View cat_class_1_method.py
def name_print(cat):
'''Print the name of the cat.'''
print "The cat is called %s." % cat.name
name_print(felix)