mcorey/Corey Diss Paper 3

## Corey Diss Paper 3
#Build 03-11 Respondent Data [12-Feb-2013 Cut to 2000 RND cases]

rm(list=ls()) #Clear Workspace

#Load Libraries
library(utils)
library(foreign)
library(car)
library(TraMineR)
library(plyr)
library(TraMineRextras)
library(parallel)
library(cluster)
library(stats)
library(descr)
library(gmodels)
library(texreg)

#Set WD and load data
#MAC
setwd("/Users/michaelcorey/Dropbox/School Work")
# *nix
# setwd("/home/michael/Dropbox/School Work/")

  atus.resp.raw<-read.dta("ATUS data/atusresp_0311/atus_0311_resp.dta",
                      convert.factors=TRUE, warn.missing.labels = FALSE)
  atus.summ<-read.dta("ATUS data/atusresp_0311/atus_0311_summ.dta",
                      convert.factors=TRUE, warn.missing.labels = FALSE)
  ls()  #check what is loaded.

  atus.summ.m <- atus.summ[, c("tucaseid", "teage", "ptdtrace", "peeduca",
                               "pehspnon", "tesex", "t050101", "t050102",
                               "t050103")]

  atus.resp <- merge(atus.resp.raw, atus.summ.m, by="tucaseid", all.x=F, all.y=T)

#Get codebook for new file
  colnames(atus.resp) #show column names

  var.labels   <- attr(atus.resp.raw, "var.labels")  #Make variable label object
  var.lbl.summ <- attr(atus.summ.m, "var.labels")
  var.labels <- c(var.labels, 1:8)  #need to add dummy labels until I can pass
                                    #the partial list of var.names from summ.m
   var.labels
    codebook.resp <- data.frame(var.name=names(atus.resp), var.labels)  #Make codebook
    codebook.resp  #Print codebook

#Look around the data
  #Look at line numbers
    table(atus.resp$tulineno) #Looks good, all line 1 (ATUS respondents)


#Build smaller dataset
    resp.0311     <- atus.resp[, c("tucaseid")] #Make dataframe
    resp.0311     <- as.data.frame(resp.0311)

  #HH Characteristics
    resp.0311$id              <- atus.resp$tucaseid       #Assign ID
    resp.0311$sex             <- atus.resp$tesex          #Resp Sex
    resp.0311$kid.lt18.own    <- atus.resp$trohhchild     #Own HH kid <18
    resp.0311$nonhh.kid.lt18. <- atus.resp$trnhhchild     #Own Non-HH kid <18
    resp.0311$kid.lt18.hh     <- atus.resp$trhhchild      #Any HH child under 18
    resp.0311$kid.yngst       <- atus.resp$tryhhchild     #Youngest Child in HH
    resp.0311$hh.size         <- atus.resp$trnumhou       #HH Size
    resp.0311$kid.lt13.wake.ts  <- atus.resp$tucc2    #1st kid <13 awake [ts]
    resp.0311$kid.lt13.sleep.ts <- atus.resp$tucc4    #last kid <13 asleep [ts]

  #Respondent Characteristics
    resp.0311$hourly          <- atus.resp$teernhry       #Hourly Flag
    resp.0311$year            <- atus.resp$tuyear         #Year of Intv
    resp.0311$day             <- atus.resp$tudiaryday     #Week of Intv
    resp.0311$holiday         <- atus.resp$trholiday      #Is it a holiday?
    resp.0311$earn.weekly     <- atus.resp$trernwa        #Weekly Earnings
    resp.0311$industry        <- atus.resp$trdtind1       #Indstry Recode (main)
    resp.0311$occup           <- atus.resp$trdtocc1       #Occup Recode (main)
    resp.0311$work.7d         <- atus.resp$tufwk          #Work last 7 days
    resp.0311$month           <- atus.resp$tumonth        #Month of Intv
    resp.0311$job.7d          <- atus.resp$tuabsot        #Job last 7 days
    resp.0311$h35.usual       <- atus.resp$tehrftpt       #35 hrs usually
    resp.0311$whrs.usual      <- atus.resp$tehruslt       #Usual work hours (tot)
    resp.0311$age             <- atus.resp$teage          #Age of respondent
    resp.0311$race            <- atus.resp$ptdtrace       #Race of respondent
    resp.0311$hisp            <- atus.resp$pehspnon       #Hispanic
    resp.0311$lf.edit         <- atus.resp$telfs          #Edited labor force
    resp.0311$workj1          <- atus.summ$t050101        #Time in 1st Job on Day
    resp.0311$workj2          <- atus.summ$t050102        #Time in 2nd Job on Day
    resp.0311$workOth         <- atus.summ$t050103        #Time in 3rd Job on Day


  #Fix Respondent Characteristics
    #Recode day to weekday
      resp.0311$weekday <- recode(resp.0311$day,
              ' c("Sunday", "Saturday") = "Weekend"; else = "Weekday" ')
      table(atus.resp$tudiaryday, resp.0311$weekday)

  #Partner Information
    resp.0311$ptner.pres      <- atus.resp$trsppres       #Is spouse/ptnr present
    resp.0311$ptner.empl      <- atus.resp$tespempnot     #Sp/Ptnr Employment Status
    resp.0311$ptner.job7d     <- atus.resp$tuspabs        #Sp/Ptnr Job Last 7d
    resp.0311$ptner.35hwk     <- atus.resp$tuspusft       #Sp/Ptnr Usually 35h+
    resp.0311$ptner.whrs      <- atus.resp$tespuhrs       #Sp/Ptnr Usual Work Hours
    resp.0311$ptner.ftpt      <- atus.resp$trspftpt       #Sp/Ptnr Full/Part Time


#Save and pass to build activity data
  save(resp.0311, file="Diss Paper 1 -- weekend work over time/Diss Paper 1 Data/resp0311.Rdata" )

  rm(list=ls())
    load(file="Diss Paper 1 -- weekend work over time/Diss Paper 1 Data/resp0311.Rdata")
#----------------------------------------------------------------------------

  #Setup Activity Data
    act.0311<-read.dta("ATUS data/atusact_0311/atus_0311_act.dta",
                       convert.factors=TRUE, warn.missing.labels = FALSE)
      save(act.0311, file="Diss Paper 1 -- weekend work over time/Diss Paper 1 Data/act0311-raw.Rdata" )

  #OR
    #Load Pre-Set Activity Data
#    load(file = "/Users/michaelcorey/Dropbox/School Work/Diss Paper 1 -- weekend work over time/Diss Paper 1 Data/act0311-raw.Rdata")

  #Convert Start Time Factor into minutes (1-1440)
    #Use a function from StackOverflow to split times to minutes (from 4am as zero)
      act.0311$startmin <- sapply(strsplit(as.character(act.0311$tustarttim),":"),
                            function(x) {
                              x <- as.numeric(x)
                              ((x[1]-4)*60)+x[2]+(x[3]/60)+1
                            } )

      act.0311$stopmin <- sapply(strsplit(as.character(act.0311$tustoptime),":"),
                           function(x) {
                             x <- as.numeric(x)
                             ((x[1]-4)*60)+x[2]+(x[3]/60)+1
                           } )

  #Then adjust after midnight times to be greater than 1200 (e.g.20h*60m) minutes
    act.0311$startmin[113:120] #look at a negative case

  #Add 1440 to the cases where the start minute is less than zero
    act.0311$startmin[which(act.0311$startmin<=0)] <-
      act.0311$startmin[which(act.0311$startmin<=0)] + 1440
      act.0311$startmin[113:120] #check correct conversion
      act.0311$stopmin[113:120] #look at a negative case

  #Add 1440 to the cases where the stop minute is less than zero
    act.0311$stopmin[which(act.0311$stopmin<=0)] <-
      act.0311$stopmin[which(act.0311$stopmin<=0)] + 1440
        act.0311$stopmin[113:120] #check correct conversion
  #Move Anytime the last activity moves to the next day into being 1440
    act.0311$stopmin[which(act.0311$stopmin < act.0311$startmin)] <- 1440

    act.0311[1:10,] #Show first ten rows, conversions look OK
      summary(act.0311$stopmin) #And the summary makes sense

    #make short id name1
      act.0311$id <- act.0311$tucaseid
      act.0311[1:10, c("id", "tucaseid")]

  #Make a simple 4-category activity ledger (1=sleep, 2=care, 3=work, 4=other)
    act.0311$simpleacts <- recode(act.0311$trcodep, "50101:59999='work';
                                  else='other' " )
      act.0311$simpleacts <- factor(act.0311$simpleacts, c( "other", "work"))
  #Care
    act.0311$careacts <- recode(act.0311$trcodep, "030101:030399='care';
                                  else='other' " )
      act.0311$careacts <- factor(act.0311$careacts, c( "other", "care"))
  #Leisure
    act.0311$leisureacts <- recode(act.0311$trcodep, "120101:139999='leisure';
                                  else='other' " )
      act.0311$leisureacts <- factor(act.0311$leisureacts, c( "other", "leisure"))
  #Sleep
    act.0311$sleepacts <- recode(act.0311$trcodep, "010101:101099='sleep';
                                  else='other' " )
      act.0311$sleepacts <- factor(act.0311$sleepacts, c( "other", "sleep"))

  #Check activity codings for overlaps and accuracy
      act.0311[25:50, c("id", "simpleacts", "careacts", "leisureacts",
                        "sleepacts", "trcodep")]


  #make backup of full file
    act.0311.full <- act.0311
      save(act.0311, file="act0311.Rdata")
    act.0311 <- act.0311[,c("id", "startmin", "stopmin", "simpleacts",
                            "careacts", "leisureacts", "sleepacts")]

  #Reducing the activity file to weekdays and individuals with (own) children
    #Subset the respondents to only thos with kids and a weekday diary
    resp.0311.wkdy.kids <- subset(resp.0311, kid.lt18.own=="Yes" &
                                    weekday=="Weekday" &
                                    workj1>=30 & lf.edit=="Employed - at work")
## ADD Working to ^^ & below
      resp.0311.wkdy.kids[1:10, c("id", "kid.lt18.own", "kid.lt18.hh",
                                  "day", "weekday", "workj1") ]

  #Select only 4000 of cases
  ###Why not do 1000 at a time and then rbind???!?!?!, could even foreach it
  ####And get some multi-core going on???
      set.seed(85)
      resp.0311.wkdy.kids <- resp.0311.wkdy.kids[sample(nrow(resp.0311.wkdy.kids),
                                                4000,),]

  #Subset the activity file to only those ids matching the wkdy.kids resp file
    act.0311.wkdy.kids <- subset(act.0311, act.0311$id %in% resp.0311.wkdy.kids$id)
  #Check my work (act.0311.wkdy.kids$id should be identical to that of resp...kids)
    check <- act.0311.wkdy.kids$id %in% resp.0311.wkdy.kids$id
      summary(check)  #Yep, all match.

  #Format the activity file as a sequence
    act.seqf.simple.wkdy.kids <- seqformat(act.0311.wkdy.kids, id="id",
                                from = "SPELL", to="STS",
                                begin = "startmin", end="stopmin",
                                status ="simpleacts", process="FALSE")

  #Sequence
    act.seq.simp.wkdy.kids <- seqdef(act.seqf.simple.wkdy.kids, xtstep=120)

  #Granulate to 15 minute intervals
    act.seq.swk.gran <- seqgranularity(act.seq.simp.wkdy.kids,
                                       tspan = 15, method = "first")

  #Implement a label for granular y axis
    time.lab.gran <- c(1:97)
      time.lab.gran[1]="4am"; time.lab.gran[9]="6am"; time.lab.gran[17]="8am";
      time.lab.gran[25]="10am"; time.lab.gran[33]="12pm"; time.lab.gran[41]="2pm";
      time.lab.gran[49]="4pm"; time.lab.gran[57]="6pm"; time.lab.gran[65]="8pm";
      time.lab.gran[73]="10pm"; time.lab.gran[81]="12am"; time.lab.gran[89]="2am";
      time.lab.gran[97]="4am";

  #Make sequence distance matrix
    seqdist.mtx <- seqdist(act.seq.swk.gran, method="DHD", sm="CONSTANT")

#Look at sequence data & etc

  #Hierarchical Aglomative (agnes) cluster ala Laurent and Kan
    act.seq.simp.clstr <- agnes(seqdist.mtx, diss=T, method="ward",
                                keep.diss = F, keep.data=F)
  #Why did Lesnard use a -.3 linkage?!?!?!?

  #Plot the agnes in PDF
  pdf("Diss Paper 1 -- weekend work over time/agnes3.pdf", width=40, height=15)
    plot(act.seq.simp.clstr, ask=F )
  dev.off()

  #Breakup clusters and plot:
    act.seq.swk.gran$clust12 <- cutree(act.seq.simp.clstr, k=7)
    act.seq.swk.gran$clust12 <- factor(act.seq.swk.gran$clust12,
                                      labels=paste("Cluster", 1:7))
    resp.0311.wkdy.kids$work.clust <- act.seq.swk.gran$clust12


  seqdplot(act.seq.swk.gran, group=resp.0311.wkdy.kids$work.clust,
           xtlab=time.lab.gran, border=NA)
  #Try it with 4 clusters...
    act.seq.swk.gran$clust7 <- cutree(act.seq.simp.clstr, k=9)
    act.seq.swk.gran$clust7 <- factor(act.seq.swk.gran$clust7,
                                      labels=paste("Work Clust", 1:9))
  #Plot State Distribution Plots
      seqdplot(act.seq.swk.gran, group=act.seq.swk.gran$clust7,
           xtlab=time.lab.gran, border=NA)

  #Plot Modal States Plots
      seqplot(act.seq.swk.gran, type="i", group=act.seq.swk.gran$clust7,
           xtlab=time.lab.gran)

  #Table to look at cuml and total percentage in each cluster
    freq(ordered(act.seq.swk.gran$clust7), plot=F)

  #Table to look at clusters against certain factors
    CrossTable(act.seq.swk.gran$clust7, resp.0311.wkdy.kids$hh.size,
               prop.c=F, prop.t=F, prop.chi=F)

 #Plot representative sequences (why 35 seqs?)
    seqrplot(act.seq.swk.gran, criterion = "prob",
             xtlab=time.lab.gran, dist.matrix=seqdist.mtx,
             withlegend = "right")


  #Format the care activity file as a sequence
    act.seqf.care <- seqformat(act.0311.wkdy.kids, id="id",
                                from = "SPELL", to="STS",
                                begin = "startmin", end="stopmin",
                                status ="careacts", process="FALSE")

  #Sequence
    act.seq.care <- seqdef(act.seqf.care, xtstep=120)

  #Granulate to 15 minute intervals
    act.seq.care.gran <- seqgranularity(act.seq.care,
                                       tspan = 15, method = "first")

  #Make sequence distance matrix
    seqdist.mtx.care <- seqdist(act.seq.care.gran, method="DHD", sm="CONSTANT")

  #Look at sequence data for care, etc

  #Hierarchical Aglomative (agnes) cluster ala Laurent and Kan
    act.seq.care.clstr <- agnes(seqdist.mtx.care, diss=T, method="ward",
                                keep.diss = F, keep.data=F)
  #Why did Lesnard use the beta, and/or a -.3 linkage?!?!?!?

  #Plot the Care agnes in PDF
  pdf("Diss Paper 1 -- weekend work over time/CareAgnes.pdf", width=40, height=15)
    plot(act.seq.care.clstr, ask=F )
  dev.off()

  #Make useable clusters
    act.seq.care.clstr$care.clust <- cutree(act.seq.care.clstr, k=7)
    resp.0311.wkdy.kids$care.clust <- factor(act.seq.care.clstr$care.clust,
                                      labels=paste("Care Clust", 1:7))

  #Plot State Distribution Plots for Care
      seqdplot(act.seq.care.gran, group=resp.0311.wkdy.kids$care.clust,
           xtlab=time.lab.gran, border=NA)  #NEAT!

  CrossTable(act.seq.care.gran$care.clust, act.seq.swk.gran$clust12,
             prop.r=F, prop.t=F, prop.chisq=T)

    short.clst <- act.seq.swk.gran[c("clust7", "care.clust")]

  analyze.this <- merge(resp.0311.wkdy.kids, short.clst)

#Trying out texreg
  testreg <- lm(whrs.usual ~ sex, data=resp.0311.wkdy.kids)
  screenreg(testreg)

#collapse work clusters
  resp.0311.wkdy.kids$work.5c <- recode(resp.0311.wkdy.kids$work.clust,
                                        "c('Cluster 1','Cluster 4','Cluster 5')='Day';
                                         c('Cluster 3')= 'Night Shift';
                                        'Cluster 2'='AM part-time';
                                        c('Cluster 6')='Eve Shift';
                                        c('Cluster 7')='Long Day'")

  resp.0311.wkdy.kids$work.5c <-relevel(resp.0311.wkdy.kids$work.5c, ref="Day")

  #Plot Collapsed Work Clusters
  seqplot(act.seq.swk.gran, type="d", group=resp.0311.wkdy.kids$work.5c,
         xtlab=time.lab.gran)

#multilevel reg for work clusters?
  work.mlgt <- multinom(work.5c ~ age + sex + hh.size + kid.yngst, data=resp.0311.wkdy.kids)
    summary(work.mlgt)
    exp(coef(work.mlgt))

write.dta(dataframe=resp.0311.wkdy.kids, file="clustForStata.dta")

#Try to find a simple kmeans elbow graph
#    kMax<-20; varexpl<-NULL
#      for(i in 1:kMax) {
#      kMeansResults<-kmeans(seqdist.mtx, i)
#      varexpl <- c(varexpl, kMeansResults$betweenss/kMeansResults$totss)
#        }

#    plot(1:kMax, varexpl*100, xlab = "Number of clusters",
#      ylab = "Percent of variance explained",type="l")
  #hmm, 3?

#  act.kmeans <- kmeans(seqdist.mtx, 7)
#    plot(seqdist.mtx, act.kmeans$cluster)
	#Build 03-11 Respondent Data [12-Feb-2013 Cut to 2000 RND cases]

	rm(list=ls()) #Clear Workspace

	#Load Libraries
	library(utils)
	library(foreign)
	library(car)
	library(TraMineR)
	library(plyr)
	library(TraMineRextras)
	library(parallel)
	library(cluster)
	library(stats)
	library(descr)
	library(gmodels)
	library(texreg)

	#Set WD and load data
	#MAC
	setwd("/Users/michaelcorey/Dropbox/School Work")
	# *nix
	# setwd("/home/michael/Dropbox/School Work/")

	atus.resp.raw<-read.dta("ATUS data/atusresp_0311/atus_0311_resp.dta",
	convert.factors=TRUE, warn.missing.labels = FALSE)
	atus.summ<-read.dta("ATUS data/atusresp_0311/atus_0311_summ.dta",
	convert.factors=TRUE, warn.missing.labels = FALSE)
	ls() #check what is loaded.

	atus.summ.m <- atus.summ[, c("tucaseid", "teage", "ptdtrace", "peeduca",
	"pehspnon", "tesex", "t050101", "t050102",
	"t050103")]

	atus.resp <- merge(atus.resp.raw, atus.summ.m, by="tucaseid", all.x=F, all.y=T)

	#Get codebook for new file
	colnames(atus.resp) #show column names

	var.labels <- attr(atus.resp.raw, "var.labels") #Make variable label object
	var.lbl.summ <- attr(atus.summ.m, "var.labels")
	var.labels <- c(var.labels, 1:8) #need to add dummy labels until I can pass
	#the partial list of var.names from summ.m
	var.labels
	codebook.resp <- data.frame(var.name=names(atus.resp), var.labels) #Make codebook
	codebook.resp #Print codebook

	#Look around the data
	#Look at line numbers
	table(atus.resp$tulineno) #Looks good, all line 1 (ATUS respondents)


	#Build smaller dataset
	resp.0311 <- atus.resp[, c("tucaseid")] #Make dataframe
	resp.0311 <- as.data.frame(resp.0311)

	#HH Characteristics
	resp.0311$id <- atus.resp$tucaseid #Assign ID
	resp.0311$sex <- atus.resp$tesex #Resp Sex
	resp.0311$kid.lt18.own <- atus.resp$trohhchild #Own HH kid <18
	resp.0311$nonhh.kid.lt18. <- atus.resp$trnhhchild #Own Non-HH kid <18
	resp.0311$kid.lt18.hh <- atus.resp$trhhchild #Any HH child under 18
	resp.0311$kid.yngst <- atus.resp$tryhhchild #Youngest Child in HH
	resp.0311$hh.size <- atus.resp$trnumhou #HH Size
	resp.0311$kid.lt13.wake.ts <- atus.resp$tucc2 #1st kid <13 awake [ts]
	resp.0311$kid.lt13.sleep.ts <- atus.resp$tucc4 #last kid <13 asleep [ts]

	#Respondent Characteristics
	resp.0311$hourly <- atus.resp$teernhry #Hourly Flag
	resp.0311$year <- atus.resp$tuyear #Year of Intv
	resp.0311$day <- atus.resp$tudiaryday #Week of Intv
	resp.0311$holiday <- atus.resp$trholiday #Is it a holiday?
	resp.0311$earn.weekly <- atus.resp$trernwa #Weekly Earnings
	resp.0311$industry <- atus.resp$trdtind1 #Indstry Recode (main)
	resp.0311$occup <- atus.resp$trdtocc1 #Occup Recode (main)
	resp.0311$work.7d <- atus.resp$tufwk #Work last 7 days
	resp.0311$month <- atus.resp$tumonth #Month of Intv
	resp.0311$job.7d <- atus.resp$tuabsot #Job last 7 days
	resp.0311$h35.usual <- atus.resp$tehrftpt #35 hrs usually
	resp.0311$whrs.usual <- atus.resp$tehruslt #Usual work hours (tot)
	resp.0311$age <- atus.resp$teage #Age of respondent
	resp.0311$race <- atus.resp$ptdtrace #Race of respondent
	resp.0311$hisp <- atus.resp$pehspnon #Hispanic
	resp.0311$lf.edit <- atus.resp$telfs #Edited labor force
	resp.0311$workj1 <- atus.summ$t050101 #Time in 1st Job on Day
	resp.0311$workj2 <- atus.summ$t050102 #Time in 2nd Job on Day
	resp.0311$workOth <- atus.summ$t050103 #Time in 3rd Job on Day


	#Fix Respondent Characteristics
	#Recode day to weekday
	resp.0311$weekday <- recode(resp.0311$day,
	' c("Sunday", "Saturday") = "Weekend"; else = "Weekday" ')
	table(atus.resp$tudiaryday, resp.0311$weekday)

	#Partner Information
	resp.0311$ptner.pres <- atus.resp$trsppres #Is spouse/ptnr present
	resp.0311$ptner.empl <- atus.resp$tespempnot #Sp/Ptnr Employment Status
	resp.0311$ptner.job7d <- atus.resp$tuspabs #Sp/Ptnr Job Last 7d
	resp.0311$ptner.35hwk <- atus.resp$tuspusft #Sp/Ptnr Usually 35h+
	resp.0311$ptner.whrs <- atus.resp$tespuhrs #Sp/Ptnr Usual Work Hours
	resp.0311$ptner.ftpt <- atus.resp$trspftpt #Sp/Ptnr Full/Part Time


	#Save and pass to build activity data
	save(resp.0311, file="Diss Paper 1 -- weekend work over time/Diss Paper 1 Data/resp0311.Rdata" )

	rm(list=ls())
	load(file="Diss Paper 1 -- weekend work over time/Diss Paper 1 Data/resp0311.Rdata")
	#----------------------------------------------------------------------------

	#Setup Activity Data
	act.0311<-read.dta("ATUS data/atusact_0311/atus_0311_act.dta",
	convert.factors=TRUE, warn.missing.labels = FALSE)
	save(act.0311, file="Diss Paper 1 -- weekend work over time/Diss Paper 1 Data/act0311-raw.Rdata" )

	#OR
	#Load Pre-Set Activity Data
	# load(file = "/Users/michaelcorey/Dropbox/School Work/Diss Paper 1 -- weekend work over time/Diss Paper 1 Data/act0311-raw.Rdata")

	#Convert Start Time Factor into minutes (1-1440)
	#Use a function from StackOverflow to split times to minutes (from 4am as zero)
	act.0311$startmin <- sapply(strsplit(as.character(act.0311$tustarttim),":"),
	function(x) {
	x <- as.numeric(x)
	((x[1]-4)*60)+x[2]+(x[3]/60)+1
	} )

	act.0311$stopmin <- sapply(strsplit(as.character(act.0311$tustoptime),":"),
	function(x) {
	x <- as.numeric(x)
	((x[1]-4)*60)+x[2]+(x[3]/60)+1
	} )

	#Then adjust after midnight times to be greater than 1200 (e.g.20h*60m) minutes
	act.0311$startmin[113:120] #look at a negative case

	#Add 1440 to the cases where the start minute is less than zero
	act.0311$startmin[which(act.0311$startmin<=0)] <-
	act.0311$startmin[which(act.0311$startmin<=0)] + 1440
	act.0311$startmin[113:120] #check correct conversion
	act.0311$stopmin[113:120] #look at a negative case

	#Add 1440 to the cases where the stop minute is less than zero
	act.0311$stopmin[which(act.0311$stopmin<=0)] <-
	act.0311$stopmin[which(act.0311$stopmin<=0)] + 1440
	act.0311$stopmin[113:120] #check correct conversion
	#Move Anytime the last activity moves to the next day into being 1440
	act.0311$stopmin[which(act.0311$stopmin < act.0311$startmin)] <- 1440

	act.0311[1:10,] #Show first ten rows, conversions look OK
	summary(act.0311$stopmin) #And the summary makes sense

	#make short id name1
	act.0311$id <- act.0311$tucaseid
	act.0311[1:10, c("id", "tucaseid")]

	#Make a simple 4-category activity ledger (1=sleep, 2=care, 3=work, 4=other)
	act.0311$simpleacts <- recode(act.0311$trcodep, "50101:59999='work';
	else='other' " )
	act.0311$simpleacts <- factor(act.0311$simpleacts, c( "other", "work"))
	#Care
	act.0311$careacts <- recode(act.0311$trcodep, "030101:030399='care';
	else='other' " )
	act.0311$careacts <- factor(act.0311$careacts, c( "other", "care"))
	#Leisure
	act.0311$leisureacts <- recode(act.0311$trcodep, "120101:139999='leisure';
	else='other' " )
	act.0311$leisureacts <- factor(act.0311$leisureacts, c( "other", "leisure"))
	#Sleep
	act.0311$sleepacts <- recode(act.0311$trcodep, "010101:101099='sleep';
	else='other' " )
	act.0311$sleepacts <- factor(act.0311$sleepacts, c( "other", "sleep"))

	#Check activity codings for overlaps and accuracy
	act.0311[25:50, c("id", "simpleacts", "careacts", "leisureacts",
	"sleepacts", "trcodep")]


	#make backup of full file
	act.0311.full <- act.0311
	save(act.0311, file="act0311.Rdata")
	act.0311 <- act.0311[,c("id", "startmin", "stopmin", "simpleacts",
	"careacts", "leisureacts", "sleepacts")]

	#Reducing the activity file to weekdays and individuals with (own) children
	#Subset the respondents to only thos with kids and a weekday diary
	resp.0311.wkdy.kids <- subset(resp.0311, kid.lt18.own=="Yes" &
	weekday=="Weekday" &
	workj1>=30 & lf.edit=="Employed - at work")
	## ADD Working to ^^ & below
	resp.0311.wkdy.kids[1:10, c("id", "kid.lt18.own", "kid.lt18.hh",
	"day", "weekday", "workj1") ]

	#Select only 4000 of cases
	###Why not do 1000 at a time and then rbind???!?!?!, could even foreach it
	####And get some multi-core going on???
	set.seed(85)
	resp.0311.wkdy.kids <- resp.0311.wkdy.kids[sample(nrow(resp.0311.wkdy.kids),
	4000,),]

	#Subset the activity file to only those ids matching the wkdy.kids resp file
	act.0311.wkdy.kids <- subset(act.0311, act.0311$id %in% resp.0311.wkdy.kids$id)
	#Check my work (act.0311.wkdy.kids$id should be identical to that of resp...kids)
	check <- act.0311.wkdy.kids$id %in% resp.0311.wkdy.kids$id
	summary(check) #Yep, all match.

	#Format the activity file as a sequence
	act.seqf.simple.wkdy.kids <- seqformat(act.0311.wkdy.kids, id="id",
	from = "SPELL", to="STS",
	begin = "startmin", end="stopmin",
	status ="simpleacts", process="FALSE")

	#Sequence
	act.seq.simp.wkdy.kids <- seqdef(act.seqf.simple.wkdy.kids, xtstep=120)

	#Granulate to 15 minute intervals
	act.seq.swk.gran <- seqgranularity(act.seq.simp.wkdy.kids,
	tspan = 15, method = "first")

	#Implement a label for granular y axis
	time.lab.gran <- c(1:97)
	time.lab.gran[1]="4am"; time.lab.gran[9]="6am"; time.lab.gran[17]="8am";
	time.lab.gran[25]="10am"; time.lab.gran[33]="12pm"; time.lab.gran[41]="2pm";
	time.lab.gran[49]="4pm"; time.lab.gran[57]="6pm"; time.lab.gran[65]="8pm";
	time.lab.gran[73]="10pm"; time.lab.gran[81]="12am"; time.lab.gran[89]="2am";
	time.lab.gran[97]="4am";

	#Make sequence distance matrix
	seqdist.mtx <- seqdist(act.seq.swk.gran, method="DHD", sm="CONSTANT")

	#Look at sequence data & etc

	#Hierarchical Aglomative (agnes) cluster ala Laurent and Kan
	act.seq.simp.clstr <- agnes(seqdist.mtx, diss=T, method="ward",
	keep.diss = F, keep.data=F)
	#Why did Lesnard use a -.3 linkage?!?!?!?

	#Plot the agnes in PDF
	pdf("Diss Paper 1 -- weekend work over time/agnes3.pdf", width=40, height=15)
	plot(act.seq.simp.clstr, ask=F )
	dev.off()

	#Breakup clusters and plot:
	act.seq.swk.gran$clust12 <- cutree(act.seq.simp.clstr, k=7)
	act.seq.swk.gran$clust12 <- factor(act.seq.swk.gran$clust12,
	labels=paste("Cluster", 1:7))
	resp.0311.wkdy.kids$work.clust <- act.seq.swk.gran$clust12


	seqdplot(act.seq.swk.gran, group=resp.0311.wkdy.kids$work.clust,
	xtlab=time.lab.gran, border=NA)
	#Try it with 4 clusters...
	act.seq.swk.gran$clust7 <- cutree(act.seq.simp.clstr, k=9)
	act.seq.swk.gran$clust7 <- factor(act.seq.swk.gran$clust7,
	labels=paste("Work Clust", 1:9))
	#Plot State Distribution Plots
	seqdplot(act.seq.swk.gran, group=act.seq.swk.gran$clust7,
	xtlab=time.lab.gran, border=NA)

	#Plot Modal States Plots
	seqplot(act.seq.swk.gran, type="i", group=act.seq.swk.gran$clust7,
	xtlab=time.lab.gran)

	#Table to look at cuml and total percentage in each cluster
	freq(ordered(act.seq.swk.gran$clust7), plot=F)

	#Table to look at clusters against certain factors
	CrossTable(act.seq.swk.gran$clust7, resp.0311.wkdy.kids$hh.size,
	prop.c=F, prop.t=F, prop.chi=F)

	#Plot representative sequences (why 35 seqs?)
	seqrplot(act.seq.swk.gran, criterion = "prob",
	xtlab=time.lab.gran, dist.matrix=seqdist.mtx,
	withlegend = "right")


	#Format the care activity file as a sequence
	act.seqf.care <- seqformat(act.0311.wkdy.kids, id="id",
	from = "SPELL", to="STS",
	begin = "startmin", end="stopmin",
	status ="careacts", process="FALSE")

	#Sequence
	act.seq.care <- seqdef(act.seqf.care, xtstep=120)

	#Granulate to 15 minute intervals
	act.seq.care.gran <- seqgranularity(act.seq.care,
	tspan = 15, method = "first")

	#Make sequence distance matrix
	seqdist.mtx.care <- seqdist(act.seq.care.gran, method="DHD", sm="CONSTANT")

	#Look at sequence data for care, etc

	#Hierarchical Aglomative (agnes) cluster ala Laurent and Kan
	act.seq.care.clstr <- agnes(seqdist.mtx.care, diss=T, method="ward",
	keep.diss = F, keep.data=F)
	#Why did Lesnard use the beta, and/or a -.3 linkage?!?!?!?

	#Plot the Care agnes in PDF
	pdf("Diss Paper 1 -- weekend work over time/CareAgnes.pdf", width=40, height=15)
	plot(act.seq.care.clstr, ask=F )
	dev.off()

	#Make useable clusters
	act.seq.care.clstr$care.clust <- cutree(act.seq.care.clstr, k=7)
	resp.0311.wkdy.kids$care.clust <- factor(act.seq.care.clstr$care.clust,
	labels=paste("Care Clust", 1:7))

	#Plot State Distribution Plots for Care
	seqdplot(act.seq.care.gran, group=resp.0311.wkdy.kids$care.clust,
	xtlab=time.lab.gran, border=NA) #NEAT!

	CrossTable(act.seq.care.gran$care.clust, act.seq.swk.gran$clust12,
	prop.r=F, prop.t=F, prop.chisq=T)

	short.clst <- act.seq.swk.gran[c("clust7", "care.clust")]

	analyze.this <- merge(resp.0311.wkdy.kids, short.clst)

	#Trying out texreg
	testreg <- lm(whrs.usual ~ sex, data=resp.0311.wkdy.kids)
	screenreg(testreg)

	#collapse work clusters
	resp.0311.wkdy.kids$work.5c <- recode(resp.0311.wkdy.kids$work.clust,
	"c('Cluster 1','Cluster 4','Cluster 5')='Day';
	c('Cluster 3')= 'Night Shift';
	'Cluster 2'='AM part-time';
	c('Cluster 6')='Eve Shift';
	c('Cluster 7')='Long Day'")

	resp.0311.wkdy.kids$work.5c <-relevel(resp.0311.wkdy.kids$work.5c, ref="Day")

	#Plot Collapsed Work Clusters
	seqplot(act.seq.swk.gran, type="d", group=resp.0311.wkdy.kids$work.5c,
	xtlab=time.lab.gran)

	#multilevel reg for work clusters?
	work.mlgt <- multinom(work.5c ~ age + sex + hh.size + kid.yngst, data=resp.0311.wkdy.kids)
	summary(work.mlgt)
	exp(coef(work.mlgt))

	write.dta(dataframe=resp.0311.wkdy.kids, file="clustForStata.dta")

	#Try to find a simple kmeans elbow graph
	# kMax<-20; varexpl<-NULL
	# for(i in 1:kMax) {
	# kMeansResults<-kmeans(seqdist.mtx, i)
	# varexpl <- c(varexpl, kMeansResults$betweenss/kMeansResults$totss)
	# }

	# plot(1:kMax, varexpl*100, xlab = "Number of clusters",
	# ylab = "Percent of variance explained",type="l")
	#hmm, 3?

	# act.kmeans <- kmeans(seqdist.mtx, 7)
	# plot(seqdist.mtx, act.kmeans$cluster)