The data consists of remission survival times on 42 leukemia patients, half of whom get a certain new treatment therapy and the other half of whom get a standard treatment therapy.
Variable Name | Description |
---|---|
rx |
0 = New Treatment |
1 = Standard Treatment | |
logwbc |
Log of White Blood Cell Counts |
sex |
0 = Female |
1 = Male | |
survt |
Survival Time |
status |
0 = Censored |
1 = Relapsed | |
lwbc3 |
1 = Low White Blood Cell Count (<10) |
2 = Medium White Blood Cell Count [10 - 20) | |
3 = High White Blood Cell Count [20+ ) |
library(foreign)
leuk <- read.dta("http://web1.sph.emory.edu/dkleinb/allDatasets/surv2datasets/anderson.dta")
leuk$lwbc3 <- factor(leuk$lwbc3)
library(survival)
leuk$SurvObj <- with(leuk, Surv(survt, status))
model <- survfit(SurvObj~ sex, data = leuk)
library(survminer)
ggsurvplot(model, conf.int=TRUE, risk.table=TRUE,
risk.table.col="strata", break.time.by=5)
mod1 <- coxph(SurvObj~sex, data=leuk)
mod2 <- coxph(SurvObj~logwbc, data=leuk)
mod3 <- coxph(SurvObj~rx, data=leuk)
mod4 <- coxph(SurvObj~lwbc3, data=leuk)
tidy1 <- tidy(mod1, exponentiate=T)[,-c(3,4)]
tidy2 <- tidy(mod2, exponentiate=T)[,-c(3,4)]
tidy3 <- tidy(mod3, exponentiate=T)[,-c(3,4)]
tidy4 <- tidy(mod4, exponentiate=T)[,-c(3,4)]
knitr::kable(bind_rows(tidy1, tidy2, tidy3, tidy4))
cox.zph(mod1)
cox.zph(mod2)
cox.zph(mod3)
cox.zph(mod4)
anova(mod2, mod4)
mod5 <- coxph(SurvObj~sex + lwbc3 + rx, data=leuk)
tidy5 <- tidy(mod5, exponentiate=T)[,-c(3,4)]
knitr::kable(tidy5)
cox.zph(mod5)
mod6 <- coxph(formula = SurvObj ~ lwbc3 + rx + strata(sex), data = leuk)
mod7 <- coxph(formula = SurvObj ~ logwbc + rx + strata(sex), data = leuk)
anova(mod6, mod7)
cox.zph(mod6)
mod6.sep <- lapply(split(leuk, leuk$sex),
FUN = function(DF) {
coxph(SurvObj ~ lwbc3 + rx, DF)
})
mod6.sep
An interaction model allows for different coefficients for different strata.
mod6.int <- coxph(formula = SurvObj ~ (lwbc3 + rx)*sex - sex + strata(sex),
data = leuk)
mod6.int
anova(mod6.int, mod6)
The dataset “vets.dat” considers survival times in days for 137 patients from the Veteran’s Administration Lung Cancer Trial cited by Kalbfleisch and Prentice in their text (The Statistical Analysis of Survival Time Data, Wiley, 2002). A complete list of the variables is given below.
Variable Name | Description |
---|---|
tx |
Treatment |
1 = Standard | |
2= Test | |
ct1 |
Indicator of Large Cell |
ct2 |
Indicator of Adeno Cell |
ct3 |
Indicator of Small Cell |
ct4 |
Indicator of Squamos Cell |
survt |
Survival Time (days) |
perf |
Performance (0 - 100) |
dd |
Disease Duration (Months) |
age |
Age in Years |
status |
0 = Censored |
1 = Died |
vets <- read.dta("http://web1.sph.emory.edu/dkleinb/allDatasets/surv2datasets/vets.dta")
head(vets)
vets <- within(vets, {
tx <- factor(tx, levels = 1:2, labels = c("standard","test"))
priortx <- factor(priortx, levels = c(0,10), labels = c("none","some"))
require(survival)
SurvObj <- Surv(survt, status)
})
## Multiple dummy variables to one variable
## http://stackoverflow.com/questions/5450538/going-from-multiple-dummy-variables-to-a-single-variable
dummies <- vets[, paste0("ct", 1:4)]
vets$CellType <- names(dummies)[apply(dummies == 1, MARGIN = 1, FUN = which)]
vets$CellType <- factor(vets$CellType)
all.additive.model <- coxph(SurvObj ~ tx + ct1 + ct2 + ct3 + perf + dd + age + priortx, data = vets)
res.zph <- cox.zph(all.additive.model)
res.zph
plot(res.zph)
vets$PSbin <- as.numeric(with(vets, perf >= 60))
vets$Z <- with(vets, interaction(CellType, PSbin))
vets$Z <- relevel(vets$Z, ref = "ct4.0")
stratified1 <- coxph(SurvObj ~ tx + age + strata(Z), data = vets)
stratified1
vets$tx.num <- as.numeric(vets$tx == "test")
strat.int1 <- coxph(SurvObj ~ (tx.num + age)*Z - Z + strata(Z), data = vets)
strat.int1
anova(stratified1, strat.int1)
strat.int2 <- coxph(SurvObj ~ tx.num + age*Z - Z + strata(Z), data = vets)
strat.int2
anova(strat.int1, strat.int2)
strat.int3 <- coxph(SurvObj ~ age + tx.num*Z - Z + strata(Z), data = vets)
strat.int3
anova(strat.int1, strat.int3)
A graphical view of the stratified Cox approach
## Create dataset to predict survival for
newdat <- expand.grid(rx = 0:1, sex = 0:1)
## Define log(-log(S)) vs log(time) plot function
plot.fun <- function(MODEL, newdata = newdat) {
## Plot
plot(survfit(MODEL, newdata = newdata),
log = "x", fun = function(S) log(-log(S)),
lty = rep(1:2, 2), col = rep(c("red","blue"), each = 2)
)
## Add legend
legend("topleft", legend = strata(newdat, sep = ":"),
lty = rep(1:2, 2), col = rep(c("red","blue"), each = 2))
}
model1 <- coxph(formula = SurvObj ~ rx + sex,
data = leuk)
model1
plot.fun(model1)
title("Same baselines, same effects
all parallel with same distance")
model2 <- coxph(formula = SurvObj ~ rx * sex,
data = leuk)
model2
plot.fun(model2)
title("Same baselines, different effects
all parallel with different distances")
model3 <- coxph(formula = SurvObj ~ rx + strata(sex),
data = leuk)
model3
plot.fun(model3, newdata = data.frame(rx = 0:1))
title("Different baselines, same effects
parallel within each stratum with equal distance")
model4 <- coxph(formula = SurvObj ~ rx + rx:sex + strata(sex),
data = leuk)
model4
model4.0 <- coxph(formula = SurvObj ~ rx,
data = leuk, subset = (sex == 0))
model4.1 <- coxph(formula = SurvObj ~ rx,
data = leuk, subset = (sex == 1))
plot.fun2 <- function(MODEL, ...){
plot(survfit(MODEL, newdata = data.frame(rx = 0:1)),
log = "x", fun = function(S) log(-log(S)),
lty = 1:2, xlim = c(1,35), ylim = c(-4,1.5), ...
)
}
plot.fun2(model4.0, col = "red")
## Add male curves
par(new = T)
plot.fun2(model4.1, col = "blue", bty = "n", xaxt = "n", yaxt = "n")
## Add legend
legend("topleft", legend = strata(newdat, sep = ":"),
lty = rep(1:2, 2), col = rep(c("blue","red"), each = 2))
title("Different baselines, Different effects
parallel within each stratum with different distances")