Skip to content

Instantly share code, notes, and snippets.

View t-redactyl's full-sized avatar

Jodie Burchell t-redactyl

  • JetBrains
  • Berlin, Germany
View GitHub Profile
require(ggplot2); require(gridExtra)
# Set the colours for the graphs
barfill <- "#4271AE"
barlines <- "#1F3552"
line1 <- "black"
line2 <- "#FF3721"
# Plotting histogram of sample of daily page views
g1 <- ggplot(data=as.data.frame(sample), aes(sample)) +
# Defining lambda and n
lambda <- 220
n <- 30
# Calculating SEM
sem <- sqrt(lambda / n)
set.seed(567)
# Sample of 30 (29 from the Poisson distribution and an outlier of 260)
sample1 <- c(rpois(29, lambda = 220), 260)
# Sample of 10 (9 from the Poisson distribution and an outlier of 260)
sample2 <- c(rpois(9, lambda = 220), 260)
@t-redactyl
t-redactyl / remove_numbers.py
Last active June 22, 2017 08:04
Function designed to strip out all numbers (alphabetic - English only - and numeric) from a string as part of a text normalisation process.
# Function designed to strip out all numbers (alphabetic - English only - and numeric) from a string as part of a
# text normalisation process.
# Based on the text2num package (https://github.com/ghewgill/text2num) and using code from
# here (http://stackoverflow.com/questions/25346058/removing-list-of-words-from-a-string)
from string import digits
# List of number terms
nums = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'eleven',
library(ggplot2)
positions = c("Travel and Holidays", "Finances", "Learning and Career", "Mental Wellbeing",
"Relationships", "Physical Health")
# What are the most popular resolutions?
p1 <- ggplot(twitter_df[twitter_df$Resolution.type != "",], aes(x = Resolution.type, fill = Resolution.type)) +
geom_bar() +
coord_flip() +
ggtitle("Number of tweets by resolution type") +
package main
import (
"fmt"
)
type Saiyan struct {
Name string
Power int
Father *Saiyan
@t-redactyl
t-redactyl / data_extraction.py
Created April 12, 2016 09:50
Companion code for blog post
import urllib2
import json
import math
import numpy as np
from pandas import Series, DataFrame
import pandas as pd
import matplotlib.pyplot as plt
def expired_listings(site, searchterm):
@t-redactyl
t-redactyl / mysql_setup.sql
Last active December 18, 2015 02:07
MySQL code for the blog post: Finding the highest rated Christmas movies in MovieLens 10M (23/12/2015)
-- Create tables
DROP TABLE IF EXISTS ratingsdata;
CREATE TABLE ratingsdata (
userid INT,
itemid INT,
rating INT,
timestamp INT,
PRIMARY KEY (userid, itemid));
DROP TABLE IF EXISTS movies;
@t-redactyl
t-redactyl / web-scraping.py
Created December 18, 2015 01:02
Web scaping code for the blog post: Finding the highest rated Christmas movies in MovieLens 10M (23/12/2015)
import lxml.html
from lxml.cssselect import CSSSelector
import requests
def get_title(node):
'''
Extracts the movie title from the URL http://www.timeout.com/london/film/the-50-best-christmas-movies
taking into account that some titles are tagged as h3, and some as h3 a.
'''
h3_elem = node.cssselect('div.feature-item__text h3')[0]
@t-redactyl
t-redactyl / cat_class_1_method.py
Created November 12, 2015 03:02
Code associated with blog post:
def name_print(cat):
'''Print the name of the cat.'''
print "The cat is called %s." % cat.name
name_print(felix)