R Markdown
#For our analysis, select: 1. european; 2. age by category; 3. see the complete dataset (no NA) number; 4. incident cancer cases
dat0=pheno.clean.final[which(pheno.clean.final$race=="White"),]
table(dat0$colorectal_cancer)
##
## 0 1
## 377373 4033
dat0$agegroups<-cut(dat0$age, breaks=c( 40, 45,50,55,60,65,70,75), right = FALSE)
dat0=dat0[-which(dat0$colorectal_cancer==1 & dat0$colorectal_cancer_incident!=1),] #exclude non-incident cc
dat0$prs_scaled=scale(dat0$PRS_colorectal,center = T,scale = T)
fit <- glm(colorectal_cancer~prs_scaled+PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10, data=dat0,family = binomial(), model = FALSE, x = FALSE, y = FALSE)
summary(fit)
##
## Call:
## glm(formula = colorectal_cancer ~ prs_scaled + PC1 + PC2 + PC3 +
## PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10, family = binomial(),
## data = dat0, model = FALSE, x = FALSE, y = FALSE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.3164 -0.1242 -0.1061 -0.0922 3.5321
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.2860486 0.0819907 -64.471 < 2e-16 ***
## prs_scaled 0.4101889 0.0204574 20.051 < 2e-16 ***
## PC1 -0.0134804 0.0079532 -1.695 0.090082 .
## PC2 -0.0063994 0.0080499 -0.795 0.426629
## PC3 0.0118942 0.0099428 1.196 0.231593
## PC4 0.0030475 0.0045931 0.663 0.507011
## PC5 0.0116118 0.0027331 4.249 2.15e-05 ***
## PC6 0.0006845 0.0073662 0.093 0.925959
## PC7 0.0047842 0.0054692 0.875 0.381704
## PC8 -0.0228026 0.0063326 -3.601 0.000317 ***
## PC9 -0.0039339 0.0043521 -0.904 0.366045
## PC10 -0.0078056 0.0087750 -0.890 0.373718
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 28788 on 379742 degrees of freedom
## Residual deviance: 28339 on 379731 degrees of freedom
## AIC: 28363
##
## Number of Fisher Scoring iterations: 8
exp( 0.4101952)
## [1] 1.507112
dat0$alcohol=NA
dat0$alcohol[which(dat0$AlcoholFreq=="Never" | dat0$AlcoholFreq== "Special occasions only")]=0
dat0$alcohol[which(dat0$AlcoholFreq=="Daily or almost daily" | dat0$AlcoholFreq== "Three or four times a week" |
dat0$AlcoholFreq== "Once or twice a week" | dat0$AlcoholFreq=="One to three times a month" )]=1
#let's make NA to be 0 for the sum up
dat0$beef[which(dat0$beef<0)]=0
dat0$pork[which(dat0$pork<0)]=0
dat0$lamb[which(dat0$lamb<0)]=0
dat0$redmeat.numeric=dat0$beef+dat0$pork+dat0$lamb
dat0$cooked_veg[which(dat0$cooked_veg=="-10")]=0
dat0$raw_veg[which(dat0$raw_veg=="-10")]=0
dat0$raw_veg[which(dat0$raw_veg<0)]=NA
dat0$cooked_veg[which(dat0$cooked_veg<0)]=NA
dat0$veg.numeric=dat0$cooked_veg+dat0$raw_veg
dat0$process_meat[which(dat0$process_meat<0)]=NA
dat0$birth.x=as.numeric(as.character(dat0$birth.x))
dat0$birth.y=as.numeric(as.character(dat0$birth.y))
dat0$smoke.status[which(dat0$smoke.status<0)]=NA
#Add social economic and birth location/assessment center location
dat_test=dat0[,c("IID","PRS_colorectal","sex","age","height","waist","smoke.status","activity",
"colorectal_cancer","colorectal_cancer_incident","redmeat.numeric","veg.numeric","process_meat",
"assessment_center","agegroups", "birth.y","birth.x",
"alcohol","PC1","PC2","PC3","PC4","PC5","PC6","PC7","PC8","PC9","PC10")]
dat_test=dat_test[complete.cases(dat_test),]
table(dat_test$colorectal_cancer)
##
## 0 1
## 282429 1743
table(dat_test$colorectal_cancer_incident)
##
## 0 1
## 282429 1743
dim(dat_test) #[1] 284169 28
## [1] 284172 28
dat_test$prs_scaled=as.vector(scale(dat_test$PRS_colorectal,center = T,scale = T))
dat_test$age.scale=as.vector(scale(dat_test$age,center = T,scale=T))
dat_test$height.scale=as.vector(scale(dat_test$height,center=T,scale = T))
dat_test$waist.scale=as.vector(scale(dat_test$waist,center=T,scale = T))
dat_test$birth.y.scale=as.vector(scale(dat_test$birth.y,center=T,scale = T))
dat_test$birth.x.scale=as.vector(scale(dat_test$birth.x,center=T,scale = T))
dat_test$veg.numeric.scale=as.vector(scale(dat_test$veg.numeric,center=T,scale = T))
dat_test$process_meat.scale=as.vector(scale(dat_test$process_meat,center=T,scale = T))
dat_test$redmeat.numeric.scale=as.vector(scale(dat_test$redmeat.numeric,center=T,scale = T))
table(dat_test$assessment_center) #note that 10003 is too small, we set 11001 as the reference group
##
## 10003 11001 11002 11003 11004 11005 11006 11007 11008 11009 11010 11011 11012
## 4 7886 8470 10620 11303 10351 10971 18282 16615 21052 25872 25990 5802
## 11013 11014 11016 11017 11018 11020 11021 11022 11023
## 20047 17843 18834 12272 13376 13338 13567 1288 389
#dat_test$assessment_center=factor(dat_test$assessment_center)
#dat_test$assessment_center=relevel(dat_test$assessment_center,ref = "11001")
fit <- glm(colorectal_cancer~prs_scaled+PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+factor(activity)+agegroups+sex+alcohol+height.scale+relevel(factor(dat_test$assessment_center),ref = "11001")+waist.scale+factor(smoke.status)+redmeat.numeric.scale+process_meat.scale+veg.numeric.scale+birth.x.scale+birth.y.scale+prs_scaled:birth.x.scale+prs_scaled:birth.y.scale+prs_scaled:factor(activity)+prs_scaled:agegroups+prs_scaled:sex+prs_scaled:alcohol+prs_scaled:height.scale+prs_scaled:waist.scale+prs_scaled:veg.numeric.scale+prs_scaled:redmeat.numeric.scale+prs_scaled:factor(smoke.status)+prs_scaled:process_meat.scale, data=dat_test,family = binomial(), model = FALSE, x = FALSE, y = FALSE)
summary(fit)
##
## Call:
## glm(formula = colorectal_cancer ~ prs_scaled + PC1 + PC2 + PC3 +
## PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10 + factor(activity) +
## agegroups + sex + alcohol + height.scale + relevel(factor(dat_test$assessment_center),
## ref = "11001") + waist.scale + factor(smoke.status) + redmeat.numeric.scale +
## process_meat.scale + veg.numeric.scale + birth.x.scale +
## birth.y.scale + prs_scaled:birth.x.scale + prs_scaled:birth.y.scale +
## prs_scaled:factor(activity) + prs_scaled:agegroups + prs_scaled:sex +
## prs_scaled:alcohol + prs_scaled:height.scale + prs_scaled:waist.scale +
## prs_scaled:veg.numeric.scale + prs_scaled:redmeat.numeric.scale +
## prs_scaled:factor(smoke.status) + prs_scaled:process_meat.scale,
## family = binomial(), data = dat_test, model = FALSE, x = FALSE,
## y = FALSE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.4718 -0.1291 -0.0979 -0.0696 3.9891
##
## Coefficients:
## Estimate
## (Intercept) -7.101399
## prs_scaled 0.655239
## PC1 -0.024151
## PC2 0.013532
## PC3 0.007292
## PC4 -0.003510
## PC5 0.009853
## PC6 0.002921
## PC7 0.003657
## PC8 -0.026568
## PC9 -0.006859
## PC10 -0.013153
## factor(activity)1 0.023742
## factor(activity)2 0.017799
## agegroups[45,50) 0.709890
## agegroups[50,55) 1.383273
## agegroups[55,60) 1.650811
## agegroups[60,65) 1.895994
## agegroups[65,70) 2.148956
## agegroups[70,75) 2.398049
## sexMale 0.227228
## alcohol -0.015469
## height.scale 0.056854
## relevel(factor(dat_test$assessment_center), ref = "11001")10003 -6.431823
## relevel(factor(dat_test$assessment_center), ref = "11001")11002 -0.038508
## relevel(factor(dat_test$assessment_center), ref = "11001")11003 -0.260607
## relevel(factor(dat_test$assessment_center), ref = "11001")11004 -0.295590
## relevel(factor(dat_test$assessment_center), ref = "11001")11005 -0.171025
## relevel(factor(dat_test$assessment_center), ref = "11001")11006 -0.259218
## relevel(factor(dat_test$assessment_center), ref = "11001")11007 -0.168623
## relevel(factor(dat_test$assessment_center), ref = "11001")11008 -0.233069
## relevel(factor(dat_test$assessment_center), ref = "11001")11009 -0.273151
## relevel(factor(dat_test$assessment_center), ref = "11001")11010 -0.292404
## relevel(factor(dat_test$assessment_center), ref = "11001")11011 -0.240929
## relevel(factor(dat_test$assessment_center), ref = "11001")11012 -0.208486
## relevel(factor(dat_test$assessment_center), ref = "11001")11013 -0.347291
## relevel(factor(dat_test$assessment_center), ref = "11001")11014 -0.349974
## relevel(factor(dat_test$assessment_center), ref = "11001")11016 -0.318156
## relevel(factor(dat_test$assessment_center), ref = "11001")11017 -0.443371
## relevel(factor(dat_test$assessment_center), ref = "11001")11018 -0.359613
## relevel(factor(dat_test$assessment_center), ref = "11001")11020 -0.449314
## relevel(factor(dat_test$assessment_center), ref = "11001")11021 -0.687207
## relevel(factor(dat_test$assessment_center), ref = "11001")11022 -0.953463
## relevel(factor(dat_test$assessment_center), ref = "11001")11023 -1.338564
## waist.scale 0.129664
## factor(smoke.status)1 0.173482
## factor(smoke.status)2 -0.084341
## redmeat.numeric.scale 0.073132
## process_meat.scale 0.066610
## veg.numeric.scale 0.005826
## birth.x.scale -0.056858
## birth.y.scale 0.004340
## prs_scaled:birth.x.scale 0.005865
## prs_scaled:birth.y.scale 0.024590
## prs_scaled:factor(activity)1 -0.117096
## prs_scaled:factor(activity)2 -0.073661
## prs_scaled:agegroups[45,50) -0.095915
## prs_scaled:agegroups[50,55) -0.108503
## prs_scaled:agegroups[55,60) -0.035762
## prs_scaled:agegroups[60,65) -0.133928
## prs_scaled:agegroups[65,70) -0.159267
## prs_scaled:agegroups[70,75) -0.800574
## prs_scaled:sexMale 0.018454
## prs_scaled:alcohol -0.056909
## prs_scaled:height.scale 0.017142
## prs_scaled:waist.scale -0.019931
## prs_scaled:veg.numeric.scale -0.011235
## prs_scaled:redmeat.numeric.scale -0.008710
## prs_scaled:factor(smoke.status)1 0.021224
## prs_scaled:factor(smoke.status)2 0.063039
## prs_scaled:process_meat.scale -0.030052
## Std. Error
## (Intercept) 0.282510
## prs_scaled 0.185517
## PC1 0.011810
## PC2 0.013633
## PC3 0.014603
## PC4 0.007020
## PC5 0.004138
## PC6 0.013928
## PC7 0.009036
## PC8 0.010570
## PC9 0.005874
## PC10 0.011748
## factor(activity)1 0.073977
## factor(activity)2 0.075635
## agegroups[45,50) 0.227717
## agegroups[50,55) 0.209969
## agegroups[55,60) 0.205157
## agegroups[60,65) 0.201298
## agegroups[65,70) 0.202005
## agegroups[70,75) 0.330289
## sexMale 0.082133
## alcohol 0.074986
## height.scale 0.038566
## relevel(factor(dat_test$assessment_center), ref = "11001")10003 98.388597
## relevel(factor(dat_test$assessment_center), ref = "11001")11002 0.184021
## relevel(factor(dat_test$assessment_center), ref = "11001")11003 0.184971
## relevel(factor(dat_test$assessment_center), ref = "11001")11004 0.185246
## relevel(factor(dat_test$assessment_center), ref = "11001")11005 0.182502
## relevel(factor(dat_test$assessment_center), ref = "11001")11006 0.177249
## relevel(factor(dat_test$assessment_center), ref = "11001")11007 0.162411
## relevel(factor(dat_test$assessment_center), ref = "11001")11008 0.161748
## relevel(factor(dat_test$assessment_center), ref = "11001")11009 0.160575
## relevel(factor(dat_test$assessment_center), ref = "11001")11010 0.154377
## relevel(factor(dat_test$assessment_center), ref = "11001")11011 0.155995
## relevel(factor(dat_test$assessment_center), ref = "11001")11012 0.221011
## relevel(factor(dat_test$assessment_center), ref = "11001")11013 0.162058
## relevel(factor(dat_test$assessment_center), ref = "11001")11014 0.164782
## relevel(factor(dat_test$assessment_center), ref = "11001")11016 0.158828
## relevel(factor(dat_test$assessment_center), ref = "11001")11017 0.183390
## relevel(factor(dat_test$assessment_center), ref = "11001")11018 0.178403
## relevel(factor(dat_test$assessment_center), ref = "11001")11020 0.182685
## relevel(factor(dat_test$assessment_center), ref = "11001")11021 0.187201
## relevel(factor(dat_test$assessment_center), ref = "11001")11022 0.472268
## relevel(factor(dat_test$assessment_center), ref = "11001")11023 1.011719
## waist.scale 0.030524
## factor(smoke.status)1 0.055744
## factor(smoke.status)2 0.100561
## redmeat.numeric.scale 0.028190
## process_meat.scale 0.029215
## veg.numeric.scale 0.026081
## birth.x.scale 0.028572
## birth.y.scale 0.034810
## prs_scaled:birth.x.scale 0.022996
## prs_scaled:birth.y.scale 0.023222
## prs_scaled:factor(activity)1 0.065899
## prs_scaled:factor(activity)2 0.066910
## prs_scaled:agegroups[45,50) 0.198006
## prs_scaled:agegroups[50,55) 0.181515
## prs_scaled:agegroups[55,60) 0.176573
## prs_scaled:agegroups[60,65) 0.173597
## prs_scaled:agegroups[65,70) 0.174258
## prs_scaled:agegroups[70,75) 0.294442
## prs_scaled:sexMale 0.074247
## prs_scaled:alcohol 0.066819
## prs_scaled:height.scale 0.034799
## prs_scaled:waist.scale 0.027616
## prs_scaled:veg.numeric.scale 0.024105
## prs_scaled:redmeat.numeric.scale 0.025640
## prs_scaled:factor(smoke.status)1 0.050630
## prs_scaled:factor(smoke.status)2 0.089542
## prs_scaled:process_meat.scale 0.026427
## z value
## (Intercept) -25.137
## prs_scaled 3.532
## PC1 -2.045
## PC2 0.993
## PC3 0.499
## PC4 -0.500
## PC5 2.381
## PC6 0.210
## PC7 0.405
## PC8 -2.514
## PC9 -1.168
## PC10 -1.120
## factor(activity)1 0.321
## factor(activity)2 0.235
## agegroups[45,50) 3.117
## agegroups[50,55) 6.588
## agegroups[55,60) 8.047
## agegroups[60,65) 9.419
## agegroups[65,70) 10.638
## agegroups[70,75) 7.260
## sexMale 2.767
## alcohol -0.206
## height.scale 1.474
## relevel(factor(dat_test$assessment_center), ref = "11001")10003 -0.065
## relevel(factor(dat_test$assessment_center), ref = "11001")11002 -0.209
## relevel(factor(dat_test$assessment_center), ref = "11001")11003 -1.409
## relevel(factor(dat_test$assessment_center), ref = "11001")11004 -1.596
## relevel(factor(dat_test$assessment_center), ref = "11001")11005 -0.937
## relevel(factor(dat_test$assessment_center), ref = "11001")11006 -1.462
## relevel(factor(dat_test$assessment_center), ref = "11001")11007 -1.038
## relevel(factor(dat_test$assessment_center), ref = "11001")11008 -1.441
## relevel(factor(dat_test$assessment_center), ref = "11001")11009 -1.701
## relevel(factor(dat_test$assessment_center), ref = "11001")11010 -1.894
## relevel(factor(dat_test$assessment_center), ref = "11001")11011 -1.544
## relevel(factor(dat_test$assessment_center), ref = "11001")11012 -0.943
## relevel(factor(dat_test$assessment_center), ref = "11001")11013 -2.143
## relevel(factor(dat_test$assessment_center), ref = "11001")11014 -2.124
## relevel(factor(dat_test$assessment_center), ref = "11001")11016 -2.003
## relevel(factor(dat_test$assessment_center), ref = "11001")11017 -2.418
## relevel(factor(dat_test$assessment_center), ref = "11001")11018 -2.016
## relevel(factor(dat_test$assessment_center), ref = "11001")11020 -2.459
## relevel(factor(dat_test$assessment_center), ref = "11001")11021 -3.671
## relevel(factor(dat_test$assessment_center), ref = "11001")11022 -2.019
## relevel(factor(dat_test$assessment_center), ref = "11001")11023 -1.323
## waist.scale 4.248
## factor(smoke.status)1 3.112
## factor(smoke.status)2 -0.839
## redmeat.numeric.scale 2.594
## process_meat.scale 2.280
## veg.numeric.scale 0.223
## birth.x.scale -1.990
## birth.y.scale 0.125
## prs_scaled:birth.x.scale 0.255
## prs_scaled:birth.y.scale 1.059
## prs_scaled:factor(activity)1 -1.777
## prs_scaled:factor(activity)2 -1.101
## prs_scaled:agegroups[45,50) -0.484
## prs_scaled:agegroups[50,55) -0.598
## prs_scaled:agegroups[55,60) -0.203
## prs_scaled:agegroups[60,65) -0.771
## prs_scaled:agegroups[65,70) -0.914
## prs_scaled:agegroups[70,75) -2.719
## prs_scaled:sexMale 0.249
## prs_scaled:alcohol -0.852
## prs_scaled:height.scale 0.493
## prs_scaled:waist.scale -0.722
## prs_scaled:veg.numeric.scale -0.466
## prs_scaled:redmeat.numeric.scale -0.340
## prs_scaled:factor(smoke.status)1 0.419
## prs_scaled:factor(smoke.status)2 0.704
## prs_scaled:process_meat.scale -1.137
## Pr(>|z|)
## (Intercept) < 2e-16 ***
## prs_scaled 0.000412 ***
## PC1 0.040847 *
## PC2 0.320899
## PC3 0.617517
## PC4 0.617127
## PC5 0.017269 *
## PC6 0.833909
## PC7 0.685715
## PC8 0.011953 *
## PC9 0.242883
## PC10 0.262860
## factor(activity)1 0.748254
## factor(activity)2 0.813953
## agegroups[45,50) 0.001824 **
## agegroups[50,55) 4.46e-11 ***
## agegroups[55,60) 8.51e-16 ***
## agegroups[60,65) < 2e-16 ***
## agegroups[65,70) < 2e-16 ***
## agegroups[70,75) 3.86e-13 ***
## sexMale 0.005664 **
## alcohol 0.836560
## height.scale 0.140431
## relevel(factor(dat_test$assessment_center), ref = "11001")10003 0.947878
## relevel(factor(dat_test$assessment_center), ref = "11001")11002 0.834248
## relevel(factor(dat_test$assessment_center), ref = "11001")11003 0.158863
## relevel(factor(dat_test$assessment_center), ref = "11001")11004 0.110563
## relevel(factor(dat_test$assessment_center), ref = "11001")11005 0.348700
## relevel(factor(dat_test$assessment_center), ref = "11001")11006 0.143617
## relevel(factor(dat_test$assessment_center), ref = "11001")11007 0.299155
## relevel(factor(dat_test$assessment_center), ref = "11001")11008 0.149603
## relevel(factor(dat_test$assessment_center), ref = "11001")11009 0.088929 .
## relevel(factor(dat_test$assessment_center), ref = "11001")11010 0.058213 .
## relevel(factor(dat_test$assessment_center), ref = "11001")11011 0.122474
## relevel(factor(dat_test$assessment_center), ref = "11001")11012 0.345514
## relevel(factor(dat_test$assessment_center), ref = "11001")11013 0.032112 *
## relevel(factor(dat_test$assessment_center), ref = "11001")11014 0.033682 *
## relevel(factor(dat_test$assessment_center), ref = "11001")11016 0.045162 *
## relevel(factor(dat_test$assessment_center), ref = "11001")11017 0.015622 *
## relevel(factor(dat_test$assessment_center), ref = "11001")11018 0.043828 *
## relevel(factor(dat_test$assessment_center), ref = "11001")11020 0.013913 *
## relevel(factor(dat_test$assessment_center), ref = "11001")11021 0.000242 ***
## relevel(factor(dat_test$assessment_center), ref = "11001")11022 0.043497 *
## relevel(factor(dat_test$assessment_center), ref = "11001")11023 0.185815
## waist.scale 2.16e-05 ***
## factor(smoke.status)1 0.001858 **
## factor(smoke.status)2 0.401633
## redmeat.numeric.scale 0.009480 **
## process_meat.scale 0.022606 *
## veg.numeric.scale 0.823244
## birth.x.scale 0.046597 *
## birth.y.scale 0.900771
## prs_scaled:birth.x.scale 0.798686
## prs_scaled:birth.y.scale 0.289651
## prs_scaled:factor(activity)1 0.075587 .
## prs_scaled:factor(activity)2 0.270941
## prs_scaled:agegroups[45,50) 0.628097
## prs_scaled:agegroups[50,55) 0.549995
## prs_scaled:agegroups[55,60) 0.839499
## prs_scaled:agegroups[60,65) 0.440416
## prs_scaled:agegroups[65,70) 0.360731
## prs_scaled:agegroups[70,75) 0.006549 **
## prs_scaled:sexMale 0.803711
## prs_scaled:alcohol 0.394380
## prs_scaled:height.scale 0.622294
## prs_scaled:waist.scale 0.470468
## prs_scaled:veg.numeric.scale 0.641150
## prs_scaled:redmeat.numeric.scale 0.734070
## prs_scaled:factor(smoke.status)1 0.675066
## prs_scaled:factor(smoke.status)2 0.481425
## prs_scaled:process_meat.scale 0.255469
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 21233 on 284171 degrees of freedom
## Residual deviance: 20188 on 284101 degrees of freedom
## AIC: 20330
##
## Number of Fisher Scoring iterations: 10
#Now let's select the control samples for the case/control study
table(dat_test$colorectal_cancer)
##
## 0 1
## 282429 1743
control=dat_test[which(dat_test$colorectal_cancer==0),]
set.seed(10252022)
#set.seed(11022022)
control=control[sample(1:dim(control)[1],size=1743,replace = F),]
dat_test_casecontrol=rbind(control,dat_test[which(dat_test$colorectal_cancer==1),])
table(dat_test_casecontrol$colorectal_cancer)
##
## 0 1
## 1743 1743
dat_test_casecontrol$prs_scaled=as.vector(scale(dat_test_casecontrol$PRS_colorectal,center = T,scale = T))
dat_test_casecontrol$age.scale=as.vector(scale(dat_test_casecontrol$age,center = T,scale=T))
dat_test_casecontrol$height.scale=as.vector(scale(dat_test_casecontrol$height,center=T,scale = T))
dat_test_casecontrol$waist.scale=as.vector(scale(dat_test_casecontrol$waist,center=T,scale = T))
dat_test_casecontrol$birth.y.scale=as.vector(scale(dat_test_casecontrol$birth.y,center=T,scale = T))
dat_test_casecontrol$birth.x.scale=as.vector(scale(dat_test_casecontrol$birth.x,center=T,scale = T))
dat_test_casecontrol$veg.numeric.scale=as.vector(scale(dat_test_casecontrol$veg.numeric,center=T,scale = T))
dat_test_casecontrol$process_meat.scale=as.vector(scale(dat_test_casecontrol$process_meat,center=T,scale = T))
dat_test_casecontrol$redmeat.numeric.scale=as.vector(scale(dat_test_casecontrol$redmeat.numeric,center=T,scale = T))
fit_normal<-prs_e_function_gr(data=dat_test_casecontrol,
formula = colorectal_cancer~prs_scaled+PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+factor(activity)+agegroups+sex+alcohol+height.scale+assessment_center+waist.scale+factor(smoke.status)+redmeat.numeric.scale+process_meat.scale+veg.numeric.scale+birth.x.scale+birth.y.scale+prs_scaled:birth.x.scale+prs_scaled:birth.y.scale+prs_scaled:factor(activity)+prs_scaled:agegroups+prs_scaled:sex+prs_scaled:alcohol+prs_scaled:height.scale+prs_scaled:waist.scale+prs_scaled:veg.numeric.scale+prs_scaled:redmeat.numeric.scale+prs_scaled:factor(smoke.status)+prs_scaled:process_meat.scale,
formula_prs = prs_scaled ~ PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+birth.x.scale+birth.y.scale,
numDeriv = F,
facVar="assessment_center")
## After removing missing values, the number of observations is 3486
## initial value 7079.971407
## iter 50 value 7058.783575
## final value 7057.612449
## converged
fit_cc_glm <- glm(colorectal_cancer~prs_scaled+PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9+PC10+factor(activity)+agegroups+sex+alcohol+height.scale+assessment_center+waist.scale+factor(smoke.status)+redmeat.numeric.scale+process_meat.scale+veg.numeric.scale+birth.x.scale+birth.y.scale+prs_scaled:birth.x.scale+prs_scaled:birth.y.scale+prs_scaled:factor(activity)+prs_scaled:agegroups+prs_scaled:sex+prs_scaled:alcohol+prs_scaled:height.scale+prs_scaled:waist.scale+prs_scaled:veg.numeric.scale+prs_scaled:redmeat.numeric.scale+prs_scaled:factor(smoke.status)+prs_scaled:process_meat.scale, data=dat_test_casecontrol,family = binomial(), model = FALSE, x = FALSE, y = FALSE)
#print results
fit_normal$res_normal
## Estimate Std.Error Z.value
## (Intercept) -1.8624592303 0.372146401 -5.004641256
## prs_scaled 0.7494519119 0.196056025 3.822641575
## PC1 -0.0328349339 0.017714973 -1.853513054
## PC2 -0.0025914285 0.020716835 -0.125088053
## PC3 0.0023456304 0.022389950 0.104762642
## PC4 -0.0081625534 0.010991280 -0.742639031
## PC5 0.0121852278 0.006360451 1.915780523
## PC6 0.0044850555 0.021470378 0.208895042
## PC7 -0.0054891427 0.012909426 -0.425204230
## PC8 -0.0358245037 0.015510612 -2.309677020
## PC9 -0.0180150297 0.009044649 -1.991788670
## PC10 -0.0129227671 0.017843378 -0.724233213
## factor(activity)1 -0.0764839442 0.103639558 -0.737980225
## factor(activity)2 -0.0667222143 0.105673711 -0.631398422
## agegroups[45,50) 0.6465500883 0.237398203 2.723483500
## agegroups[50,55) 1.2740540143 0.220569487 5.776202467
## agegroups[55,60) 1.7144391654 0.216458437 7.920408127
## agegroups[60,65) 1.9273764498 0.211578467 9.109511372
## agegroups[65,70) 2.2121974290 0.214557067 10.310531662
## agegroups[70,75) 2.2321323853 0.508886132 4.386310113
## sexMale 0.2004358175 0.115802577 1.730840730
## alcohol 0.0167083502 0.102961531 0.162277600
## height.scale 0.0384969521 0.054420669 0.707395789
## assessment_center11002 -0.1686919280 0.291866288 -0.577976747
## assessment_center11003 -0.3582593073 0.287849082 -1.244608128
## assessment_center11004 -0.1219663054 0.292953815 -0.416332880
## assessment_center11005 -0.0004021919 0.290539906 -0.001384291
## assessment_center11006 -0.2419307211 0.280712875 -0.861844049
## assessment_center11007 -0.1746538535 0.260269362 -0.671050377
## assessment_center11008 -0.3179623068 0.256398561 -1.240109559
## assessment_center11009 -0.3171081063 0.252479815 -1.255974092
## assessment_center11010 -0.3835740938 0.244467200 -1.569020687
## assessment_center11011 -0.2148826657 0.250534289 -0.857697630
## assessment_center11012 -0.3182985756 0.333113613 -0.955525573
## assessment_center11013 -0.3609548944 0.256604149 -1.406660396
## assessment_center11014 -0.2974060282 0.260774925 -1.140470191
## assessment_center11016 -0.5095579379 0.250514758 -2.034043587
## assessment_center11017 -0.2572761827 0.287347733 -0.895347878
## assessment_center11018 -0.3339763493 0.278990198 -1.197089904
## assessment_center11020 -0.5180131591 0.281790322 -1.838292941
## assessment_center11021 -0.7375506705 0.280730200 -2.627258020
## assessment_center11022 -1.0696518474 0.659346845 -1.622290083
## assessment_center11023 -1.4376261721 1.463113595 -0.982580011
## waist.scale 0.1170583807 0.044041991 2.657881231
## factor(smoke.status)1 0.1680032918 0.078872353 2.130065678
## factor(smoke.status)2 -0.0870663714 0.133616490 -0.651613969
## redmeat.numeric.scale 0.0572790745 0.039495405 1.450271870
## process_meat.scale 0.1019869442 0.040878919 2.494854221
## veg.numeric.scale -0.0115146116 0.037069724 -0.310620375
## birth.x.scale -0.0385289512 0.042340161 -0.909985939
## birth.y.scale -0.0047980723 0.052045005 -0.092190832
## prs_scaled:birth.x.scale -0.0039143295 0.035829409 -0.109249068
## prs_scaled:birth.y.scale 0.0305684431 0.036986733 0.826470482
## prs_scaled:factor(activity)1 -0.1321675541 0.068734664 -1.922866073
## prs_scaled:factor(activity)2 -0.0840250729 0.070035955 -1.199741952
## prs_scaled:agegroups[45,50) -0.1268838784 0.205333087 -0.617941708
## prs_scaled:agegroups[50,55) -0.1234983412 0.188409370 -0.655478766
## prs_scaled:agegroups[55,60) -0.0519407917 0.183019229 -0.283799642
## prs_scaled:agegroups[60,65) -0.1724790531 0.179985960 -0.958291711
## prs_scaled:agegroups[65,70) -0.1918974806 0.180517864 -1.063038728
## prs_scaled:agegroups[70,75) -0.9689031226 0.314308519 -3.082649891
## prs_scaled:sexMale 0.0086383467 0.078271013 0.110364571
## prs_scaled:alcohol -0.0710648216 0.070469673 -1.008445463
## prs_scaled:height.scale 0.0176433534 0.036039787 0.489552104
## prs_scaled:waist.scale -0.0119408331 0.029409240 -0.406023180
## prs_scaled:veg.numeric.scale -0.0143603947 0.025599873 -0.560955696
## prs_scaled:redmeat.numeric.scale -0.0140805779 0.026564866 -0.530045122
## prs_scaled:factor(smoke.status)1 0.0189597650 0.052979183 0.357871981
## prs_scaled:factor(smoke.status)2 0.0651317037 0.093276271 0.698266592
## prs_scaled:process_meat.scale -0.0328308507 0.027466171 -1.195319525
## eta_X.Intercept. -0.1201589913 0.085503044 -1.405318290
## eta_PC1 0.0065416887 0.007198291 0.908783590
## eta_PC2 -0.0060058771 0.008740841 -0.687105217
## eta_PC3 0.0016542777 0.009759041 0.169512324
## eta_PC4 0.0001886295 0.004844563 0.038936323
## eta_PC5 -0.0019827462 0.002727663 -0.726902998
## eta_PC6 -0.0002292867 0.009622912 -0.023827161
## eta_PC7 0.0095141354 0.005493888 1.731767346
## eta_PC8 -0.0004760320 0.006774566 -0.070267518
## eta_PC9 -0.0011852478 0.003846085 -0.308169956
## eta_PC10 0.0049047785 0.007766306 0.631545848
## eta_birth.x.scale -0.0056059954 0.024847847 -0.225612922
## eta_birth.y.scale 0.0195512376 0.026080744 0.749642626
## sigma_stratadata[, facVar]11001 0.9824778839 0.060636419 16.202768771
## sigma_stratadata[, facVar]11002 0.9443578708 0.055867199 16.903619506
## sigma_stratadata[, facVar]11003 1.0057258040 0.054926018 18.310553746
## sigma_stratadata[, facVar]11004 0.9794200032 0.052675825 18.593349247
## sigma_stratadata[, facVar]11005 0.9537129785 0.052615789 18.125984408
## sigma_stratadata[, facVar]11006 0.9070368536 0.051031199 17.774163062
## sigma_stratadata[, facVar]11007 0.9843861926 0.042022081 23.425451091
## sigma_stratadata[, facVar]11008 0.9580022583 0.041689405 22.979513757
## sigma_stratadata[, facVar]11009 0.9637053159 0.038063748 25.318192648
## sigma_stratadata[, facVar]11010 0.9873322180 0.035731109 27.632285649
## sigma_stratadata[, facVar]11011 0.9643073100 0.035793949 26.940511671
## sigma_stratadata[, facVar]11012 0.9965794024 0.075173284 13.257095432
## sigma_stratadata[, facVar]11013 1.0300137708 0.043467484 23.696190016
## sigma_stratadata[, facVar]11014 0.8927168012 0.040410188 22.091379415
## sigma_stratadata[, facVar]11016 0.9789042156 0.039860426 24.558297956
## sigma_stratadata[, facVar]11017 0.8727957140 0.050802546 17.180156804
## sigma_stratadata[, facVar]11018 0.9639861527 0.051032064 18.889813277
## sigma_stratadata[, facVar]11020 1.0242921321 0.055896763 18.324712725
## sigma_stratadata[, facVar]11021 1.0251153058 0.056234973 18.229142010
## sigma_stratadata[, facVar]11022 0.8472679240 0.170236628 4.977001334
## sigma_stratadata[, facVar]11023 1.3340849771 0.565234336 2.360233434
## Pvalue
## (Intercept) 5.596616e-07
## prs_scaled 1.320296e-04
## PC1 6.380886e-02
## PC2 9.004538e-01
## PC3 9.165642e-01
## PC4 4.577003e-01
## PC5 5.539304e-02
## PC6 8.345302e-01
## PC7 6.706878e-01
## PC8 2.090604e-02
## PC9 4.639425e-02
## PC10 4.689226e-01
## factor(activity)1 4.605265e-01
## factor(activity)2 5.277800e-01
## agegroups[45,50) 6.459744e-03
## agegroups[50,55) 7.640543e-09
## agegroups[55,60) 2.367322e-15
## agegroups[60,65) 8.275383e-20
## agegroups[65,70) 6.315099e-25
## agegroups[70,75) 1.152897e-05
## sexMale 8.348018e-02
## alcohol 8.710873e-01
## height.scale 4.793206e-01
## assessment_center11002 5.632798e-01
## assessment_center11003 2.132758e-01
## assessment_center11004 6.771664e-01
## assessment_center11005 9.988955e-01
## assessment_center11006 3.887733e-01
## assessment_center11007 5.021884e-01
## assessment_center11008 2.149349e-01
## assessment_center11009 2.091254e-01
## assessment_center11010 1.166431e-01
## assessment_center11011 3.910594e-01
## assessment_center11012 3.393120e-01
## assessment_center11013 1.595281e-01
## assessment_center11014 2.540905e-01
## assessment_center11016 4.194720e-02
## assessment_center11017 3.706012e-01
## assessment_center11018 2.312715e-01
## assessment_center11020 6.601925e-02
## assessment_center11021 8.607604e-03
## assessment_center11022 1.047413e-01
## assessment_center11023 3.258142e-01
## waist.scale 7.863360e-03
## factor(smoke.status)1 3.316619e-02
## factor(smoke.status)2 5.146502e-01
## redmeat.numeric.scale 1.469827e-01
## process_meat.scale 1.260089e-02
## veg.numeric.scale 7.560892e-01
## birth.x.scale 3.628299e-01
## birth.y.scale 9.265464e-01
## prs_scaled:birth.x.scale 9.130049e-01
## prs_scaled:birth.y.scale 4.085373e-01
## prs_scaled:factor(activity)1 5.449687e-02
## prs_scaled:factor(activity)2 2.302396e-01
## prs_scaled:agegroups[45,50) 5.366138e-01
## prs_scaled:agegroups[50,55) 5.121596e-01
## prs_scaled:agegroups[55,60) 7.765639e-01
## prs_scaled:agegroups[60,65) 3.379157e-01
## prs_scaled:agegroups[65,70) 2.877644e-01
## prs_scaled:agegroups[70,75) 2.051664e-03
## prs_scaled:sexMale 9.121202e-01
## prs_scaled:alcohol 3.132407e-01
## prs_scaled:height.scale 6.244509e-01
## prs_scaled:waist.scale 6.847256e-01
## prs_scaled:veg.numeric.scale 5.748277e-01
## prs_scaled:redmeat.numeric.scale 5.960806e-01
## prs_scaled:factor(smoke.status)1 7.204391e-01
## prs_scaled:factor(smoke.status)2 4.850105e-01
## prs_scaled:process_meat.scale 2.319622e-01
## eta_X.Intercept. 1.599267e-01
## eta_PC1 3.634644e-01
## eta_PC2 4.920164e-01
## eta_PC3 8.653937e-01
## eta_PC4 9.689412e-01
## eta_PC5 4.672854e-01
## eta_PC6 9.809905e-01
## eta_PC7 8.331499e-02
## eta_PC8 9.439807e-01
## eta_PC9 7.579530e-01
## eta_PC10 5.276837e-01
## eta_birth.x.scale 8.215025e-01
## eta_birth.y.scale 4.534700e-01
## sigma_stratadata[, facVar]11001 4.820683e-59
## sigma_stratadata[, facVar]11002 4.231180e-64
## sigma_stratadata[, facVar]11003 6.817297e-75
## sigma_stratadata[, facVar]11004 3.637392e-77
## sigma_stratadata[, facVar]11005 1.987540e-73
## sigma_stratadata[, facVar]11006 1.120575e-70
## sigma_stratadata[, facVar]11007 2.352523e-121
## sigma_stratadata[, facVar]11008 7.471907e-117
## sigma_stratadata[, facVar]11009 2.014135e-141
## sigma_stratadata[, facVar]11010 4.556747e-168
## sigma_stratadata[, facVar]11011 7.368652e-160
## sigma_stratadata[, facVar]11012 4.104889e-40
## sigma_stratadata[, facVar]11013 3.947025e-124
## sigma_stratadata[, facVar]11014 3.825351e-108
## sigma_stratadata[, facVar]11016 3.526570e-133
## sigma_stratadata[, facVar]11017 3.738992e-66
## sigma_stratadata[, facVar]11018 1.383342e-79
## sigma_stratadata[, facVar]11020 5.255818e-75
## sigma_stratadata[, facVar]11021 3.030379e-74
## sigma_stratadata[, facVar]11022 6.457688e-07
## sigma_stratadata[, facVar]11023 1.826344e-02
summary(fit_cc_glm)
##
## Call:
## glm(formula = colorectal_cancer ~ prs_scaled + PC1 + PC2 + PC3 +
## PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10 + factor(activity) +
## agegroups + sex + alcohol + height.scale + assessment_center +
## waist.scale + factor(smoke.status) + redmeat.numeric.scale +
## process_meat.scale + veg.numeric.scale + birth.x.scale +
## birth.y.scale + prs_scaled:birth.x.scale + prs_scaled:birth.y.scale +
## prs_scaled:factor(activity) + prs_scaled:agegroups + prs_scaled:sex +
## prs_scaled:alcohol + prs_scaled:height.scale + prs_scaled:waist.scale +
## prs_scaled:veg.numeric.scale + prs_scaled:redmeat.numeric.scale +
## prs_scaled:factor(smoke.status) + prs_scaled:process_meat.scale,
## family = binomial(), data = dat_test_casecontrol, model = FALSE,
## x = FALSE, y = FALSE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.318 -1.052 0.511 1.014 2.429
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.863871 0.373154 -4.995 5.89e-07 ***
## prs_scaled 0.499497 0.238532 2.094 0.03626 *
## PC1 -0.031221 0.017375 -1.797 0.07235 .
## PC2 -0.001645 0.020718 -0.079 0.93672
## PC3 0.001455 0.022458 0.065 0.94835
## PC4 -0.007664 0.011006 -0.696 0.48620
## PC5 0.012662 0.006421 1.972 0.04861 *
## PC6 0.003961 0.021718 0.182 0.85527
## PC7 -0.007049 0.013208 -0.534 0.59355
## PC8 -0.036849 0.015732 -2.342 0.01917 *
## PC9 -0.017327 0.009147 -1.894 0.05821 .
## PC10 -0.010266 0.018004 -0.570 0.56853
## factor(activity)1 -0.069999 0.105282 -0.665 0.50613
## factor(activity)2 -0.044384 0.107349 -0.413 0.67927
## agegroups[45,50) 0.677336 0.238974 2.834 0.00459 **
## agegroups[50,55) 1.262668 0.221802 5.693 1.25e-08 ***
## agegroups[55,60) 1.721197 0.217963 7.897 2.86e-15 ***
## agegroups[60,65) 1.946960 0.213114 9.136 < 2e-16 ***
## agegroups[65,70) 2.228565 0.216040 10.315 < 2e-16 ***
## agegroups[70,75) 2.227709 0.526871 4.228 2.36e-05 ***
## sexMale 0.218233 0.118283 1.845 0.06504 .
## alcohol 0.022692 0.104569 0.217 0.82820
## height.scale 0.034745 0.055813 0.623 0.53360
## assessment_center11002 -0.189062 0.298327 -0.634 0.52625
## assessment_center11003 -0.329364 0.293504 -1.122 0.26179
## assessment_center11004 -0.155371 0.301725 -0.515 0.60659
## assessment_center11005 0.004084 0.298811 0.014 0.98910
## assessment_center11006 -0.234589 0.287361 -0.816 0.41430
## assessment_center11007 -0.162683 0.265678 -0.612 0.54032
## assessment_center11008 -0.360973 0.261192 -1.382 0.16697
## assessment_center11009 -0.327795 0.258923 -1.266 0.20552
## assessment_center11010 -0.411078 0.249548 -1.647 0.09950 .
## assessment_center11011 -0.227754 0.255604 -0.891 0.37291
## assessment_center11012 -0.312083 0.340233 -0.917 0.35901
## assessment_center11013 -0.411595 0.262348 -1.569 0.11667
## assessment_center11014 -0.335384 0.266277 -1.260 0.20784
## assessment_center11016 -0.583197 0.255896 -2.279 0.02266 *
## assessment_center11017 -0.257912 0.293121 -0.880 0.37892
## assessment_center11018 -0.376571 0.285428 -1.319 0.18706
## assessment_center11020 -0.600235 0.288220 -2.083 0.03729 *
## assessment_center11021 -0.713118 0.287651 -2.479 0.01317 *
## assessment_center11022 -0.932136 0.674587 -1.382 0.16704
## assessment_center11023 -1.889872 1.476241 -1.280 0.20048
## waist.scale 0.122917 0.044748 2.747 0.00602 **
## factor(smoke.status)1 0.179899 0.080391 2.238 0.02523 *
## factor(smoke.status)2 -0.098684 0.135728 -0.727 0.46718
## redmeat.numeric.scale 0.065471 0.040199 1.629 0.10338
## process_meat.scale 0.099055 0.041569 2.383 0.01718 *
## veg.numeric.scale -0.014606 0.037740 -0.387 0.69875
## birth.x.scale -0.036320 0.042846 -0.848 0.39662
## birth.y.scale -0.005420 0.052999 -0.102 0.91855
## prs_scaled:birth.x.scale 0.019313 0.038553 0.501 0.61641
## prs_scaled:birth.y.scale 0.036272 0.039235 0.924 0.35524
## prs_scaled:factor(activity)1 -0.091153 0.108538 -0.840 0.40101
## prs_scaled:factor(activity)2 -0.011432 0.111961 -0.102 0.91867
## prs_scaled:agegroups[45,50) -0.016787 0.241636 -0.069 0.94461
## prs_scaled:agegroups[50,55) -0.175490 0.223036 -0.787 0.43139
## prs_scaled:agegroups[55,60) 0.062828 0.220620 0.285 0.77581
## prs_scaled:agegroups[60,65) -0.129553 0.212461 -0.610 0.54201
## prs_scaled:agegroups[65,70) -0.113264 0.216266 -0.524 0.60047
## prs_scaled:agegroups[70,75) -0.874803 0.506897 -1.726 0.08438 .
## prs_scaled:sexMale 0.128997 0.125349 1.029 0.30343
## prs_scaled:alcohol -0.001814 0.106803 -0.017 0.98645
## prs_scaled:height.scale -0.040993 0.059002 -0.695 0.48720
## prs_scaled:waist.scale -0.015674 0.046448 -0.337 0.73578
## prs_scaled:veg.numeric.scale -0.019426 0.037445 -0.519 0.60390
## prs_scaled:redmeat.numeric.scale 0.024491 0.042442 0.577 0.56391
## prs_scaled:factor(smoke.status)1 0.124121 0.085627 1.450 0.14718
## prs_scaled:factor(smoke.status)2 0.121209 0.137469 0.882 0.37793
## prs_scaled:process_meat.scale -0.044091 0.043437 -1.015 0.31008
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4832.6 on 3485 degrees of freedom
## Residual deviance: 4253.3 on 3416 degrees of freedom
## AIC: 4393.3
##
## Number of Fisher Scoring iterations: 4