
百度网盘:链接: https://pan.baidu.com/s/19U1j_fNIV0ILj4MNIkp-bg 密码: 9vbl 百度网盘:链接: https://pan.baidu.com/s/1FhiwB1b5TNaQb9WoOlqQLw 密码: hnag


rm(list = ls())
## 1.安装依赖包
install.packages('glmnet')
install.packages('MASS')
install.packages('survival')
install.packages('rms')
library(glmnet)
library(MASS)
library(survival)
library(rms)
# 2.读取数据
data_exercise <- read.csv('data_exercise.csv')
data <- data_exercise
# 3.空缺值处理
data <- na.omit(data)
# 4.变量转换
data$C6_log <- log(data$C6)
data$C5_log_minus <- log(max(data$C5)+1-(data$C5))
## 5. logistic 回归
### 1.0 full model
### 5.1定义输出和预测变量
Outcome <- "Status_death"
CandidateVariables <- c("B1", "B2", "B3", "B4", "C1", "C2", "C3", "C4", "C5_log_minus" ,"C6_log")
### 5.2建立规则
Formula <- formula(paste(paste(Outcome,"~", collapse=" "),
paste(CandidateVariables, collapse=" + ")))
### 5.3给数据味值
model.full <- glm(Formula, data=data,family=binomial)
summary(model.full)
### 5.4逐步选择变量
model.step <- stepAIC(model.full, direction="both")
summary(model.step)
# 6 Lasso
## 6.1 将数据转化为矩阵
tmp.y <- data$Status_death
tmp.x <- model.matrix(~.,data[CandidateVariables])
### 6.2 模型味数据
model.lasso <- glmnet(tmp.x, tmp.y, family="binomial", nlambda=50, alpha=1, standardize=TRUE)
plot(model.lasso,xvar="lambda",label=TRUE)
### 6.3通过交叉验证找到最佳模型
cv.model <- cv.glmnet(tmp.x, tmp.y, family="binomial", nlambda=50, alpha=1, standardize=TRUE)
plot(cv.model)
cv.model$lambda.min
coef(cv.model, s=cv.model$lambda.min)
### 增加lambda以进一步收缩
cv.model$lambda.1se
coef(cv.model, s=cv.model$lambda.1se)
### 如果最终的变量数为5个
coef(model.lasso , s=model.lasso$lambda[4])
coef(model.lasso , s=0.149000)
# 7. Cox regression
# 2.0 full model
### 7.1定义输出和预测变量值
Outcome <- "Surv(Time_death,Status_death)"
CandidateVariables <- c("B1", "B2", "B3", "B4", "C1", "C2", "C3", "C4", "C5_log_minus" ,"C6_log")
### 7.2 建立规则
Formula <- formula(paste(paste(Outcome,"~", collapse=" "),
paste(CandidateVariables, collapse=" + ")))
### 7.3用所有候选变量拟合模型
model.full <- coxph(Formula, data=data)
summary(model.full)
### 7.4 逐步选择变量
model.step <- stepAIC(model.full, direction="both")
summary(model.step)
# 8 Lasso
### 8.1 将数据转化为矩阵
tmp.y <- Surv(data$Time_death,data$Status_death)
tmp.x <- model.matrix(~.,data[CandidateVariables])
### 模型味数据
model.lasso <- glmnet(tmp.x, tmp.y, family="cox", nlambda=50, alpha=1, standardize=TRUE)
plot(model.lasso,xvar="lambda",label=TRUE)
# 通过正交实验找最优的模型
cv.model <- cv.glmnet(tmp.x, tmp.y, family="cox", nlambda=50, alpha=1, standardize=TRUE)
plot(cv.model)
cv.model$lambda.min
coef(cv.model, s=cv.model$lambda.min)
### 增加lambda以进一步收缩
cv.model$lambda.1se
coef(cv.model, s=cv.model$lambda.1se)
### 加入最终的模型由7个变量
coef(model.lasso , s=model.lasso$lambda[6])
coef(model.lasso , s=0.223)
## 9. 线性回归
# 3.0 full model
### 定义输出和预测值
Outcome <- "X"
CandidateVariables <- c("B1", "B2", "B3", "B4", "C1", "C2", "C3", "C4", "C5_log_minus" ,"C6_log")
### 建立规则
Formula <- formula(paste(paste(Outcome,"~", collapse=" "),
paste(CandidateVariables, collapse=" + ")))
### 所有变量输入到模型
model.full <- lm(Formula, data=data)
summary(model.full)
# 逐步选择
model.step <- stepAIC(model.full, direction="both")
summary(model.step)
# 3.2 Lasso
### 数据转化
tmp.y <- data$X
tmp.x <- model.matrix(~.,data[CandidateVariables])
### 味模型
model.lasso <- glmnet(tmp.x, tmp.y, family="gaussian", nlambda=50, alpha=1, standardize=TRUE)
### 通过交叉验证找到最佳模型
cv.model <- cv.glmnet(tmp.x, tmp.y, family="gaussian", nlambda=50, alpha=1, standardize=TRUE)
plot(cv.model)
cv.model$lambda.min
coef(cv.model, s=cv.model$lambda.min)
### 增加lambda以进一步收缩
cv.model$lambda.1se
coef(cv.model, s=cv.model$lambda.1se)
### 假如最终的模型由8个变量
coef(model.lasso , s=model.lasso$lambda[14])
coef(model.lasso , s=0.06725)