Skip to content

Instantly share code, notes, and snippets.

View t-redactyl's full-sized avatar

Jodie Burchell t-redactyl

  • JetBrains
  • Berlin, Germany
View GitHub Profile
set.seed(567)
# Sample of 30 (29 from the Poisson distribution and an outlier of 260)
sample1 <- c(rpois(29, lambda = 220), 260)
# Sample of 10 (9 from the Poisson distribution and an outlier of 260)
sample2 <- c(rpois(9, lambda = 220), 260)
# Defining lambda and n
lambda <- 220
n <- 30
# Calculating SEM
sem <- sqrt(lambda / n)
require(ggplot2); require(gridExtra)
# Set the colours for the graphs
barfill <- "#4271AE"
barlines <- "#1F3552"
line1 <- "black"
line2 <- "#FF3721"
# Plotting histogram of sample of daily page views
g1 <- ggplot(data=as.data.frame(sample), aes(sample)) +
av_peds_2 <- ddply(p.subset, c("date", "collapsed_sensors_2"), summarise,
n_peds = sum(Hourly_Counts))
# Extract weekday versus weekend
av_peds_2$day <- weekdays(av_peds_2$date, abbreviate = FALSE)
av_peds_2$weekend <- ifelse((av_peds_2$day == "Saturday" | av_peds_2$day == "Sunday"),
"Weekend", "Weekday")
av_peds_2$weekend <- as.factor(av_peds_2$weekend)
# Extract time of day
@t-redactyl
t-redactyl / centred_chart.R
Created October 29, 2015 00:23
Code associated with blog post
library(ggplot2); library(gridExtra)
g1 <- ggplot(data=mtcars, aes(x=wt, y=mpg)) +
geom_point(alpha = 0.7, colour = "#0971B2") +
ylab("Miles per gallon") +
ylim(10, 35) +
xlab("Weight (`000 lbs)") +
ggtitle("Untransformed Weight") +
geom_vline(xintercept = 0) +
theme_bw()
@t-redactyl
t-redactyl / cleaning_data.R
Created November 4, 2015 05:52
Code associate with blog post
mtcars$am.f <- as.factor(mtcars$am); levels(mtcars$am.f) <- c("Automatic", "Manual")
mtcars$cyl.f <- as.factor(mtcars$cyl); levels(mtcars$cyl.f) <- c("4 cyl", "6 cyl", "8 cyl")
mtcars$vs.f <- as.factor(mtcars$vs); levels(mtcars$vs.f) <- c("V engine", "Straight engine")
mtcars$gear.f <- as.factor(mtcars$gear); levels(mtcars$gear.f) <- c("3 gears", "4 gears", "5 gears")
mtcars$carb.f <- as.factor(mtcars$carb)
# Load in the packages
library(ggplot2)
library(extrafont)
font_import()
loadfonts()
# Read in the base Christmas tree data
ChristmasTree <- read.csv("https://raw.githubusercontent.com/t-redactyl/Blog-posts/master/Christmas%20tree%20base%20data.csv")
# Generate the "lights"
# Import the relevant packages
import numpy as np
import re
# Create 6 new dummy variables which indicate whether one of the words associated with a resolution is present in the tweet.
twitter_df['Physical Health'] = np.where(twitter_df['Tweet'].str.contains('(?:^|\W)(weight|fit|exercise|gym|muscle|health|water|smoking|alcohol|drinking|walk|run|swim)(?:$|\W)',
flags = re.IGNORECASE), 1, 0)
twitter_df['Learning and Career'] = np.where(twitter_df['Tweet'].str.contains('(?:^|\W)(business|job|career|professional|study|learn|develop|advance|grades|school|university| read|study|skill|education)(?:$|\W)',
flags = re.IGNORECASE), 1, 0)
# This code is not mine! Copied from https://gist.github.com/nealrs/96342d8231b75cf4bb82, but with suggested alteration to include text.lower() in the function.
import re
cList = {
"ain't": "am not",
"aren't": "are not",
"can't": "cannot",
"can't've": "cannot have",
"'cause": "because",
"could've": "could have",
carat cut color clarity depth table price x y z
0.32 Ideal G VVS1 61.2 55.0 814 4.41 4.44 2.71
0.7 Fair I SI1 62.0 67.0 1848 5.54 5.5 3.42
1.46 Premium J SI2 60.1 58.0 6387 7.43 7.34 4.44
0.38 Premium G VS2 60.4 57.0 983 4.7 4.67 2.83
0.7 Very Good F VS2 62.9 56.0 2400 5.66 5.73 3.58
0.32 Ideal E SI2 62.7 55.0 576 4.42 4.39 2.76
0.71 Ideal F VS1 62.1 57.0 3066 5.73 5.76 3.57
0.3 Ideal E VS2 61.5 55.0 844 4.31 4.28 2.64
0.36 Ideal E VVS2 61.8 54.0 928 4.6 4.62 2.85