rm
(
list
=
ls
(
)
)
## 50 patients and 200 genes
dat
=
matrix
(
rnorm
(
10000
,
mean
=
8
,
sd
=
4
)
,
nrow
=
200
)
rownames
(
dat
)
=
paste
0
(
'gene_'
,
1
:
nrow
(
dat
)
)
colnames
(
dat
)
=
paste
0
(
'sample_'
,
1
:
ncol
(
dat
)
)
os_years
=
abs
(
floor
(
rnorm
(
ncol
(
dat
)
,
mean
=
50
,
sd
=
20
)
)
)
os_status
=
sample
(
rep
(
c
(
'LIVING'
,
'DECEASED'
)
,
25
)
)
library
(
survival
)
my
.surv
<
-
Surv
(
os_years
,
os_status
=
=
'DECEASED'
)
## The status indicator, normally 0=alive, 1=dead. Other choices are TRUE/FALSE (TRUE = death) or 1/2 (2=death).
## And most of the time we just care about the time od DECEASED .
fit.KM
=
survfit
(
my
.surv~
1
)
fit.KM
plot
(
fit.KM
)
log_rank_p
<
-
apply
(
dat
,
1
,
function
(
values
1
)
{
group
=
ifelse
(
values
1
>
median
(
values
1
)
,
'high'
,
'low'
)
kmfit
2
<
-
survfit
(
my
.surv~
group
)
#plot(kmfit2)
data
.survdiff
=
survdiff
(
my
.surv~
group
)
p.val
=
1
-
pchisq
(
data
.survdiff$chisq
,
length
(
data
.survdiff$n
)
-
1
)
}
)
names
(
log_rank_p[log_rank_p
<
0.0
5
]
)
gender
=
ifelse
(
rnorm
(
ncol
(
dat
)
)
>
1
,
'male'
,
'female'
)
age
=
abs
(
floor
(
rnorm
(
ncol
(
dat
)
,
mean
=
50
,
sd
=
20
)
)
)
## gender and age are similar with group(by gene expression)
cox_results
<
-
apply
(
dat
,
1
,
function
(
values
1
)
{
group
=
ifelse
(
values
1
>
median
(
values
1
)
,
'high'
,
'low'
)
survival_dat
<
-
data
.frame
(
group
=
group
,
gender
=
gender
,
age
=
age
,
stringsAsFactors
=
F
)
m
=
coxph
(
my
.surv ~ age
+
gender
+
group
,
data
=
survival_dat
)
beta
<
-
coef
(
m
)
se
<
-
sqrt
(
diag
(
vcov
(
m
)
)
)
HR
<
-
exp
(
beta
)
HRse
<
-
HR
*
se
#summary(m)
tmp
<
-
round
(
cbind
(
coef
=
beta
,
se
=
se
,
z
=
beta
/
se
,
p
=
1
-
pchisq
(
(
beta
/
se
)
^
2
,
1
)
,
HR
=
HR
,
HRse
=
HRse
,
HRz
=
(
HR
-
1
)
/
HRse
,
HRp
=
1
-
pchisq
(
(
(
HR
-
1
)
/
HRse
)
^
2
,
1
)
,
HRCILL
=
exp
(
beta
-
qnorm
(
.
975
,
0
,
1
)
*
se
)
,
HRCIUL
=
exp
(
beta
+
qnorm
(
.
975
,
0
,
1
)
*
se
)
)
,
3
)
return
(
tmp['grouplow'
,
]
)
}
)
cox_results[
,
cox_results[
4
,
]
<
0.0
5
]