Jodie Burchell t-redactyl

## diamonds_sample_weighted.csv

          
            carat
            cut
            color
            clarity
            depth
            table
            price
            x
            y
            z
            nclarity

            
              0.42
              Ideal
              G
              VS1
              61.4
              56.0
              921
              4.82
              4.85
              2.97
              5

            
              0.3
              Ideal
              D
              VS1
              62.2
              56.0
              835
              4.31
              4.27
              2.67
              5

            
              0.31
              Ideal
              G
              IF
              61.5
              54.0
              871
              4.4
              4.41
              2.71
              1

            
              1.79
              Premium
              H
              VS1
              62.6
              56.0
              14904
              7.81
              7.76
              4.87
              5

            
              0.28
              Very Good
              F
              VS1
              62.1
              59.0
              487
              4.16
              4.21
              2.6
              5

            
              1.2
              Premium
              E
              VS1
              60.7
              57.0
              10053
              6.89
              6.81
              4.16
              5

            
              1.52
              Fair
              J
              VS1
              65.4
              58.0
              7186
              7.22
              7.17
              4.7
              5

            
              0.31
              Ideal
              E
              VS1
              61.8
              55.0
              692
              4.36
              4.38
              2.7
              5

            
              0.52
              Ideal
              G
              VS1
              60.9
              55.0
              1815
              5.22
              5.16
              3.16
              5

## copper_data.csv

          
            product
            year
            export
            percentage
            sum

            
              copper
              2006
              4176
              79
              5255

            
              copper
              2007
              8560
              81
              10505

            
              copper
              2008
              6473
              76
              8519

            
              copper
              2009
              10465
              80
              13027

            
              copper
              2010
              14977
              86
              17325

            
              copper
              2011
              15421
              83
              18629

            
              copper
              2012
              14805
              82
              18079

            
              copper
              2013
              15183
              80
              19088

            
              copper
              2014
              14012
              76
              18437

## diamonds_sample_weighted.csv

          
            0.42
            Ideal
            G
            VS1
            61.4
            56.0
            921
            4.82
            4.85
            2.97
            5

            
              0.3
              Ideal
              D
              VS1
              62.2
              56.0
              835
              4.31
              4.27
              2.67
              5

            
              0.31
              Ideal
              G
              IF
              61.5
              54.0
              871
              4.4
              4.41
              2.71
              1

            
              1.79
              Premium
              H
              VS1
              62.6
              56.0
              14904
              7.81
              7.76
              4.87
              5

            
              0.28
              Very Good
              F
              VS1
              62.1
              59.0
              487
              4.16
              4.21
              2.6
              5

            
              1.2
              Premium
              E
              VS1
              60.7
              57.0
              10053
              6.89
              6.81
              4.16
              5

            
              1.52
              Fair
              J
              VS1
              65.4
              58.0
              7186
              7.22
              7.17
              4.7
              5

            
              0.31
              Ideal
              E
              VS1
              61.8
              55.0
              692
              4.36
              4.38
              2.7
              5

            
              0.52
              Ideal
              G
              VS1
              60.9
              55.0
              1815
              5.22
              5.16
              3.16
              5

## diamonds_sample.csv

          
            carat
            cut
            color
            clarity
            depth
            table
            price
            x
            y
            z

            
              0.32
              Ideal
              G
              VVS1
              61.2
              55.0
              814
              4.41
              4.44
              2.71

            
              0.7
              Fair
              I
              SI1
              62.0
              67.0
              1848
              5.54
              5.5
              3.42

            
              1.46
              Premium
              J
              SI2
              60.1
              58.0
              6387
              7.43
              7.34
              4.44

            
              0.38
              Premium
              G
              VS2
              60.4
              57.0
              983
              4.7
              4.67
              2.83

            
              0.7
              Very Good
              F
              VS2
              62.9
              56.0
              2400
              5.66
              5.73
              3.58

            
              0.32
              Ideal
              E
              SI2
              62.7
              55.0
              576
              4.42
              4.39
              2.76

            
              0.71
              Ideal
              F
              VS1
              62.1
              57.0
              3066
              5.73
              5.76
              3.57

            
              0.3
              Ideal
              E
              VS2
              61.5
              55.0
              844
              4.31
              4.28
              2.64

            
              0.36
              Ideal
              E
              VVS2
              61.8
              54.0
              928
              4.6
              4.62
              2.85

## remove_numbers.py
# Function designed to strip out all numbers (alphabetic - English only - and numeric) from a string as part of a
# text normalisation process.

# Based on the text2num package (https://github.com/ghewgill/text2num) and using code from
# here (http://stackoverflow.com/questions/25346058/removing-list-of-words-from-a-string)

from string import digits

# List of number terms
nums = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'eleven',

## contraction cleaner.py
# This code is not mine! Copied from https://gist.github.com/nealrs/96342d8231b75cf4bb82, but with suggested alteration to include text.lower() in the function.

import re
cList = {
  "ain't": "am not",
  "aren't": "are not",
  "can't": "cannot",
  "can't've": "cannot have",
  "'cause": "because",
  "could've": "could have",

## Analyses of resolutions.R
library(ggplot2)

positions = c("Travel and Holidays", "Finances", "Learning and Career", "Mental Wellbeing",
              "Relationships", "Physical Health")

# What are the most popular resolutions?
p1 <- ggplot(twitter_df[twitter_df$Resolution.type != "",], aes(x = Resolution.type, fill = Resolution.type)) +
        geom_bar() +
        coord_flip() +
        ggtitle("Number of tweets by resolution type") +

## Resolutions matching.py
# Import the relevant packages
import numpy as np
import re

# Create 6 new dummy variables which indicate whether one of the words associated with a resolution is present in the tweet.
twitter_df['Physical Health'] = np.where(twitter_df['Tweet'].str.contains('(?:^|\W)(weight|fit|exercise|gym|muscle|health|water|smoking|alcohol|drinking|walk|run|swim)(?:$|\W)',
    flags = re.IGNORECASE), 1, 0)

twitter_df['Learning and Career'] = np.where(twitter_df['Tweet'].str.contains('(?:^|\W)(business|job|career|professional|study|learn|develop|advance|grades|school|university| read|study|skill|education)(?:$|\W)',
    flags = re.IGNORECASE), 1, 0)

## gohan.go
package main

import (
	"fmt"
)

type Saiyan struct {
	Name   string
	Power  int
	Father *Saiyan

## christmas_tree.R
# Load in the packages
library(ggplot2)
library(extrafont)
font_import()
loadfonts()

# Read in the base Christmas tree data
ChristmasTree <- read.csv("https://raw.githubusercontent.com/t-redactyl/Blog-posts/master/Christmas%20tree%20base%20data.csv")

# Generate the "lights"
carat	cut	color	clarity	depth	table	price	x	y	z	nclarity
0.42	Ideal	G	VS1	61.4	56.0	921	4.82	4.85	2.97	5
0.3	Ideal	D	VS1	62.2	56.0	835	4.31	4.27	2.67	5
0.31	Ideal	G	IF	61.5	54.0	871	4.4	4.41	2.71	1
1.79	Premium	H	VS1	62.6	56.0	14904	7.81	7.76	4.87	5
0.28	Very Good	F	VS1	62.1	59.0	487	4.16	4.21	2.6	5
1.2	Premium	E	VS1	60.7	57.0	10053	6.89	6.81	4.16	5
1.52	Fair	J	VS1	65.4	58.0	7186	7.22	7.17	4.7	5
0.31	Ideal	E	VS1	61.8	55.0	692	4.36	4.38	2.7	5
0.52	Ideal	G	VS1	60.9	55.0	1815	5.22	5.16	3.16	5
product	year	export	percentage	sum
copper	2006	4176	79	5255
copper	2007	8560	81	10505
copper	2008	6473	76	8519
copper	2009	10465	80	13027
copper	2010	14977	86	17325
copper	2011	15421	83	18629
copper	2012	14805	82	18079
copper	2013	15183	80	19088
copper	2014	14012	76	18437
	# Function designed to strip out all numbers (alphabetic - English only - and numeric) from a string as part of a
	# text normalisation process.

	# Based on the text2num package (https://github.com/ghewgill/text2num) and using code from
	# here (http://stackoverflow.com/questions/25346058/removing-list-of-words-from-a-string)

	from string import digits

	# List of number terms
	nums = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'eleven',
	# This code is not mine! Copied from https://gist.github.com/nealrs/96342d8231b75cf4bb82, but with suggested alteration to include text.lower() in the function.

	import re
	cList = {
	"ain't": "am not",
	"aren't": "are not",
	"can't": "cannot",
	"can't've": "cannot have",
	"'cause": "because",
	"could've": "could have",
	library(ggplot2)

	positions = c("Travel and Holidays", "Finances", "Learning and Career", "Mental Wellbeing",
	"Relationships", "Physical Health")

	# What are the most popular resolutions?
	p1 <- ggplot(twitter_df[twitter_df$Resolution.type != "",], aes(x = Resolution.type, fill = Resolution.type)) +
	geom_bar() +
	coord_flip() +
	ggtitle("Number of tweets by resolution type") +
	# Import the relevant packages
	import numpy as np
	import re

	# Create 6 new dummy variables which indicate whether one of the words associated with a resolution is present in the tweet.
	twitter_df['Physical Health'] = np.where(twitter_df['Tweet'].str.contains('(?:^\|\W)(weight\|fit\|exercise\|gym\|muscle\|health\|water\|smoking\|alcohol\|drinking\|walk\|run\|swim)(?:$\|\W)',
	flags = re.IGNORECASE), 1, 0)

	twitter_df['Learning and Career'] = np.where(twitter_df['Tweet'].str.contains('(?:^\|\W)(business\|job\|career\|professional\|study\|learn\|develop\|advance\|grades\|school\|university\| read\|study\|skill\|education)(?:$\|\W)',
	flags = re.IGNORECASE), 1, 0)
	package main

	import (
	"fmt"
	)

	type Saiyan struct {
	Name string
	Power int
	Father *Saiyan
	# Load in the packages
	library(ggplot2)
	library(extrafont)
	font_import()
	loadfonts()

	# Read in the base Christmas tree data
	ChristmasTree <- read.csv("https://raw.githubusercontent.com/t-redactyl/Blog-posts/master/Christmas%20tree%20base%20data.csv")

	# Generate the "lights"