Skip to content

Instantly share code, notes, and snippets.

View kendravant's full-sized avatar

Kendra Vant kendravant

View GitHub Profile
# FILE: Classifying Breast Cancer as Benign or Malignant
# AUTHOR: Timothy P. Jurka
library(RTextTools);
# GET THE BREAST CANCER DATA FROM http://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.names
data <- read.csv("http://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data",header=FALSE)
data <- data[-1]
# ADD TEXTUAL DESCRIPTORS FOR EACH MASS CHARACTERISTIC FOR THE DOCUMENT-TERM MATRIX
rm(list = ls())
doInstall <- TRUE # Change to FALSE if you don't want packages installed.
toInstall <- c("zoo", "tm", "ggplot2", "Snowball")
if(doInstall){install.packages(toInstall, repos = "http://cran.r-project.org")}
lapply(toInstall, library, character.only = TRUE)
# From: http://www.cnn.com/2012/10/03/politics/debate-transcript/index.html
Transcript <- readLines("https://raw.github.com/dsparks/Test_image/master/Denver_Debate_Transcript.txt")
head(Transcript, 20)
doInstall <- TRUE
toInstall <- c("twitteR", "dismo", "maps", "ggplot2")
if(doInstall){install.packages(toInstall, repos = "http://cran.us.r-project.org")}
lapply(toInstall, library, character.only = TRUE)
searchTerm <- "#rstats"
searchResults <- searchTwitter(searchTerm, n = 1000) # Gather Tweets
tweetFrame <- twListToDF(searchResults) # Convert to a nice dF
userInfo <- lookupUsers(tweetFrame$screenName) # Batch lookup of user info