rstanarmを使ってロジスティクス回帰

参考

コード

library(tidyverse)
library(caret)
library(GGally)
library(ggplot2)
library(corrplot)
library(bayesplot)
theme_set(bayesplot::theme_default(base_family = "sans"))
library(rstanarm)
options(mc.cores = 1)
library(loo)
library(projpred)
SEED=14124869

nnn<-40
v1_vec<-round(rnorm(nnn,60,10))
v2_vec<-rbinom(nnn,1,5/10)
v3_vec<-rbinom(nnn,1,5/10)
v4_vec<-rbinom(nnn,1,5/10)
pp<-0.1+v2_vec*0.15
outcome_vec<-apply(matrix(pp),1,function(x) rbinom(1,1,x))
df<-data.frame(v1_vec,v2_vec,v3_vec,v4_vec,outcome_vec)
names(df)<-c("v1","v2","v3","v4","outcome")
df[1]<-scale(df[1])
n=dim(df)[1]
p=dim(df)[2]

corrplot(cor(df[,c(1:4)]))
df$outcome<-factor(df$outcome)

x<-model.matrix(outcome ~. -1, data = df)
y<-df$outcome
(reg_formula <- formula(paste("outcome ~", paste(names(df)[1:(p-1)], collapse = " + "))))

t_prior <- student_t(df = 7, location = 0, scale = 2.5)
post1 <- stan_glm(reg_formula, data = df,
			family = binomial(link = "logit"),
			prior = t_prior, prior_intercept = t_prior, QR=TRUE,
			seed = SEED, refresh = 0)

pplot <- plot(post1, "areas", prob = 0.95, prob_outer = 1)
pplot + geom_vline(xintercept = 0)

round(coef(post1), 2)
round(posterior_interval(post1, prob = 0.9), 2)

logistic_model <- glm(outcome ~v1 + v2 +v3 +v4, data = df, family = "binomial")
summary(logistic_model)

何回かまわした結果

1回目



2回目







3回目







変数をふやしてみる


nnn<-40
v1_vec<-round(rnorm(nnn,60,10))
v2_vec<-rbinom(nnn,1,5/10)
v3_vec<-rbinom(nnn,1,5/10)
v4_vec<-rbinom(nnn,1,5/10)
v5_vec<-rbinom(nnn,1,5/10)
v6_vec<-rbinom(nnn,1,5/10)
v7_vec<-rbinom(nnn,1,5/10)
v8_vec<-rbinom(nnn,1,5/10)
v9_vec<-rbinom(nnn,1,5/10)
v10_vec<-rbinom(nnn,1,5/10)
v11_vec<-rbinom(nnn,1,5/10)
v12_vec<-rbinom(nnn,1,5/10)
v13_vec<-rbinom(nnn,1,5/10)
v14_vec<-rbinom(nnn,1,5/10)
v15_vec<-rbinom(nnn,1,5/10)
v16_vec<-rbinom(nnn,1,5/10)
v17_vec<-rbinom(nnn,1,5/10)
v18_vec<-rbinom(nnn,1,5/10)
v19_vec<-rbinom(nnn,1,5/10)
v20_vec<-rbinom(nnn,1,5/10)
v21_vec<-rbinom(nnn,1,5/10)

pp<-0.1+v2_vec*0.15
outcome_vec<-apply(matrix(pp),1,function(x) rbinom(1,1,x))
df<-data.frame(
	v1_vec,v2_vec,v3_vec,v4_vec,v5_vec,
	v6_vec,v7_vec,v8_vec,v9_vec,v10_vec,
	v11_vec,v12_vec,v13_vec,v14_vec,v15_vec,
	v16_vec,v17_vec,v18_vec,v19_vec,v20_vec,
	v21_vec,
	outcome_vec)
names(df)<-c("v1","v2","v3","v4","v5",
		"v6","v7","v8","v9","v10",
		"v11","v12","v13","v14","v15",
		"v16","v17","v18","v19","v20","v21",
		"outcome")
df[1]<-scale(df[1])
n=dim(df)[1]
p=dim(df)[2]

corrplot(cor(df[,c(1:21)]))
df$outcome<-factor(df$outcome)

x<-model.matrix(outcome ~. -1, data = df)
y<-df$outcome
(reg_formula <- formula(paste("outcome ~", paste(names(df)[1:(p-1)], collapse = " + "))))

t_prior <- student_t(df = 7, location = 0, scale = 2.5)
post1 <- stan_glm(reg_formula, data = df,
			family = binomial(link = "logit"),
			prior = t_prior, prior_intercept = t_prior, QR=TRUE,
			seed = SEED, refresh = 0)

pplot <- plot(post1, "areas", prob = 0.95, prob_outer = 1)
pplot + geom_vline(xintercept = 0)

round(coef(post1), 2)
round(posterior_interval(post1, prob = 0.9), 2)

logistic_model <- glm(outcome ~v1 + v2 +v3 +v4+v5+v6+v7+v8+v9+v10+v11+v12+v13+v14+v15+v16+v17+v18+v19+v20+v21, data = df, family = "binomial")
summary(logistic_model)





いいなと思ったら応援しよう!