Jodie Burchell t-redactyl

## comparing_small_and_larger_samples.R
set.seed(567)

# Sample of 30 (29 from the Poisson distribution and an outlier of 260)
sample1 <- c(rpois(29, lambda = 220), 260)

# Sample of 10 (9 from the Poisson distribution and an outlier of 260)
sample2 <- c(rpois(9, lambda = 220), 260)

## calculating_sem_formula.R
# Defining lambda and n
lambda <- 220
n <- 30

# Calculating SEM
sem <- sqrt(lambda / n)

## bootstrap_plot_1.R
require(ggplot2); require(gridExtra)

# Set the colours for the graphs
barfill <- "#4271AE"
barlines <- "#1F3552"
line1 <- "black"
line2 <- "#FF3721"

# Plotting histogram of sample of daily page views
g1 <- ggplot(data=as.data.frame(sample), aes(sample)) +

## av_peds_2.R
av_peds_2 <- ddply(p.subset, c("date", "collapsed_sensors_2"), summarise,
                 n_peds = sum(Hourly_Counts))

# Extract weekday versus weekend
av_peds_2$day <- weekdays(av_peds_2$date, abbreviate = FALSE)
av_peds_2$weekend <- ifelse((av_peds_2$day == "Saturday" | av_peds_2$day == "Sunday"),
                           "Weekend", "Weekday")
av_peds_2$weekend <- as.factor(av_peds_2$weekend)

# Extract time of day

## centred_chart.R
library(ggplot2); library(gridExtra)
g1 <- ggplot(data=mtcars, aes(x=wt, y=mpg)) +
        geom_point(alpha = 0.7, colour = "#0971B2") +
        ylab("Miles per gallon") +
        ylim(10, 35) +
        xlab("Weight (`000 lbs)") +
        ggtitle("Untransformed Weight") +
        geom_vline(xintercept = 0) +
        theme_bw()

## cleaning_data.R
mtcars$am.f <- as.factor(mtcars$am); levels(mtcars$am.f) <- c("Automatic", "Manual")
mtcars$cyl.f <- as.factor(mtcars$cyl); levels(mtcars$cyl.f) <- c("4 cyl", "6 cyl", "8 cyl")
mtcars$vs.f <- as.factor(mtcars$vs); levels(mtcars$vs.f) <- c("V engine", "Straight engine")
mtcars$gear.f <- as.factor(mtcars$gear); levels(mtcars$gear.f) <- c("3 gears", "4 gears", "5 gears")
mtcars$carb.f <- as.factor(mtcars$carb)

## christmas_tree.R
# Load in the packages
library(ggplot2)
library(extrafont)
font_import()
loadfonts()

# Read in the base Christmas tree data
ChristmasTree <- read.csv("https://raw.githubusercontent.com/t-redactyl/Blog-posts/master/Christmas%20tree%20base%20data.csv")

# Generate the "lights"

## Resolutions matching.py
# Import the relevant packages
import numpy as np
import re

# Create 6 new dummy variables which indicate whether one of the words associated with a resolution is present in the tweet.
twitter_df['Physical Health'] = np.where(twitter_df['Tweet'].str.contains('(?:^|\W)(weight|fit|exercise|gym|muscle|health|water|smoking|alcohol|drinking|walk|run|swim)(?:$|\W)',
    flags = re.IGNORECASE), 1, 0)

twitter_df['Learning and Career'] = np.where(twitter_df['Tweet'].str.contains('(?:^|\W)(business|job|career|professional|study|learn|develop|advance|grades|school|university| read|study|skill|education)(?:$|\W)',
    flags = re.IGNORECASE), 1, 0)

## contraction cleaner.py
# This code is not mine! Copied from https://gist.github.com/nealrs/96342d8231b75cf4bb82, but with suggested alteration to include text.lower() in the function.

import re
cList = {
  "ain't": "am not",
  "aren't": "are not",
  "can't": "cannot",
  "can't've": "cannot have",
  "'cause": "because",
  "could've": "could have",

## diamonds_sample.csv

          
            carat
            cut
            color
            clarity
            depth
            table
            price
            x
            y
            z

            
              0.32
              Ideal
              G
              VVS1
              61.2
              55.0
              814
              4.41
              4.44
              2.71

            
              0.7
              Fair
              I
              SI1
              62.0
              67.0
              1848
              5.54
              5.5
              3.42

            
              1.46
              Premium
              J
              SI2
              60.1
              58.0
              6387
              7.43
              7.34
              4.44

            
              0.38
              Premium
              G
              VS2
              60.4
              57.0
              983
              4.7
              4.67
              2.83

            
              0.7
              Very Good
              F
              VS2
              62.9
              56.0
              2400
              5.66
              5.73
              3.58

            
              0.32
              Ideal
              E
              SI2
              62.7
              55.0
              576
              4.42
              4.39
              2.76

            
              0.71
              Ideal
              F
              VS1
              62.1
              57.0
              3066
              5.73
              5.76
              3.57

            
              0.3
              Ideal
              E
              VS2
              61.5
              55.0
              844
              4.31
              4.28
              2.64

            
              0.36
              Ideal
              E
              VVS2
              61.8
              54.0
              928
              4.6
              4.62
              2.85
	set.seed(567)

	# Sample of 30 (29 from the Poisson distribution and an outlier of 260)
	sample1 <- c(rpois(29, lambda = 220), 260)

	# Sample of 10 (9 from the Poisson distribution and an outlier of 260)
	sample2 <- c(rpois(9, lambda = 220), 260)
	# Defining lambda and n
	lambda <- 220
	n <- 30

	# Calculating SEM
	sem <- sqrt(lambda / n)
	require(ggplot2); require(gridExtra)

	# Set the colours for the graphs
	barfill <- "#4271AE"
	barlines <- "#1F3552"
	line1 <- "black"
	line2 <- "#FF3721"

	# Plotting histogram of sample of daily page views
	g1 <- ggplot(data=as.data.frame(sample), aes(sample)) +
	av_peds_2 <- ddply(p.subset, c("date", "collapsed_sensors_2"), summarise,
	n_peds = sum(Hourly_Counts))

	# Extract weekday versus weekend
	av_peds_2$day <- weekdays(av_peds_2$date, abbreviate = FALSE)
	av_peds_2$weekend <- ifelse((av_peds_2$day == "Saturday" \| av_peds_2$day == "Sunday"),
	"Weekend", "Weekday")
	av_peds_2$weekend <- as.factor(av_peds_2$weekend)

	# Extract time of day
	library(ggplot2); library(gridExtra)
	g1 <- ggplot(data=mtcars, aes(x=wt, y=mpg)) +
	geom_point(alpha = 0.7, colour = "#0971B2") +
	ylab("Miles per gallon") +
	ylim(10, 35) +
	xlab("Weight (`000 lbs)") +
	ggtitle("Untransformed Weight") +
	geom_vline(xintercept = 0) +
	theme_bw()
	mtcars$am.f <- as.factor(mtcars$am); levels(mtcars$am.f) <- c("Automatic", "Manual")
	mtcars$cyl.f <- as.factor(mtcars$cyl); levels(mtcars$cyl.f) <- c("4 cyl", "6 cyl", "8 cyl")
	mtcars$vs.f <- as.factor(mtcars$vs); levels(mtcars$vs.f) <- c("V engine", "Straight engine")
	mtcars$gear.f <- as.factor(mtcars$gear); levels(mtcars$gear.f) <- c("3 gears", "4 gears", "5 gears")
	mtcars$carb.f <- as.factor(mtcars$carb)
	# Load in the packages
	library(ggplot2)
	library(extrafont)
	font_import()
	loadfonts()

	# Read in the base Christmas tree data
	ChristmasTree <- read.csv("https://raw.githubusercontent.com/t-redactyl/Blog-posts/master/Christmas%20tree%20base%20data.csv")

	# Generate the "lights"
	# Import the relevant packages
	import numpy as np
	import re

	# Create 6 new dummy variables which indicate whether one of the words associated with a resolution is present in the tweet.
	twitter_df['Physical Health'] = np.where(twitter_df['Tweet'].str.contains('(?:^\|\W)(weight\|fit\|exercise\|gym\|muscle\|health\|water\|smoking\|alcohol\|drinking\|walk\|run\|swim)(?:$\|\W)',
	flags = re.IGNORECASE), 1, 0)

	twitter_df['Learning and Career'] = np.where(twitter_df['Tweet'].str.contains('(?:^\|\W)(business\|job\|career\|professional\|study\|learn\|develop\|advance\|grades\|school\|university\| read\|study\|skill\|education)(?:$\|\W)',
	flags = re.IGNORECASE), 1, 0)
	# This code is not mine! Copied from https://gist.github.com/nealrs/96342d8231b75cf4bb82, but with suggested alteration to include text.lower() in the function.

	import re
	cList = {
	"ain't": "am not",
	"aren't": "are not",
	"can't": "cannot",
	"can't've": "cannot have",
	"'cause": "because",
	"could've": "could have",
carat	cut	color	clarity	depth	table	price	x	y	z
0.32	Ideal	G	VVS1	61.2	55.0	814	4.41	4.44	2.71
0.7	Fair	I	SI1	62.0	67.0	1848	5.54	5.5	3.42
1.46	Premium	J	SI2	60.1	58.0	6387	7.43	7.34	4.44
0.38	Premium	G	VS2	60.4	57.0	983	4.7	4.67	2.83
0.7	Very Good	F	VS2	62.9	56.0	2400	5.66	5.73	3.58
0.32	Ideal	E	SI2	62.7	55.0	576	4.42	4.39	2.76
0.71	Ideal	F	VS1	62.1	57.0	3066	5.73	5.76	3.57
0.3	Ideal	E	VS2	61.5	55.0	844	4.31	4.28	2.64
0.36	Ideal	E	VVS2	61.8	54.0	928	4.6	4.62	2.85