이재용






(4)


Initialization

N=1000  # Total number of iterations(time)
burn=N/2  # Burn-in period is half of N

# data y is given and it has 52 values therefore n = 52
n = 52  # i = 1,...,n

# hyperparameters
mu = 0
tau_squared = 1
alpha = 1
beta = 1


# Initial values for parameters of interest: 
# all 50 thetas start at 0 and sigma_squared starts at 1

theta_matrix =NULL  # Initializing

for(i in 1:n){  # Initializing each theta with 0 of length N(the number of iterations)
  theta = rep(0, N)
  theta_matrix = cbind(theta_matrix, theta)  # You can access ith theta by theta_matrix[,i]
}

sigma_squared = rep(1,N)  # Initializing each sigma_squared with 1 of length N


Running Gibbs Sampler algorithm

library(MCMCpack)  # for invgamma

for (i in 1:(N-1)){
  # Taking samples from the conditional dstns of each theta and sigma_squared
  
  for (j in 1:n){  # Accessing ith theta by theta_matrix[,j] and ith y value by y_values[j]
    theta_matrix[,j][i+1] = rnorm(1, ((y[j]/sigma_squared[i]) + (mu/tau_squared))/((1/sigma_squared[i])+(1/tau_squared)), sqrt(1/((1/sigma_squared[i])+(1/tau_squared))))
  }
  
  sigma_squared[i+1] = rinvgamma(1, shape=((n/2)+alpha), scale=((1/2)*sum((y-theta_matrix[i+1,])^2)+beta))
}



Convergence Diagnostics


Traceplots of theta1, theta10, theta21, theta30, theta42, theta50 and sigma_squared

dev.new()
par(mar=c(1,1,1,1))  # To avoid "Error in plot.new() : figure margins too large"
par(mfrow=c(7,1))
plot(1:N,theta_matrix[,1],type="l", main="theta1")
plot(1:N,theta_matrix[,10],type="l", main="theta10")
plot(1:N,theta_matrix[,21],type="l", main="theta21")
plot(1:N,theta_matrix[,30],type="l", main="theta30")
plot(1:N,theta_matrix[,42],type="l", main="theta42")
plot(1:N,theta_matrix[,50],type="l", main="theta50")
plot(1:N,sigma_squared,type="l", main="sigma_squared")

It looks like all of these plots show no chains getting stuck in certain areas which is a sign of good mixing, and therefore the chains have converged well.



Running mean plots

mean_theta1=rep(0,N)  # To store the mean values
mean_theta10=rep(0,N)  # To store the mean values
mean_theta21=rep(0,N)  # To store the mean values
mean_theta30=rep(0,N)  # To store the mean values
mean_theta42=rep(0,N)  # To store the mean values
mean_theta50=rep(0,N)  # To store the mean values
mean_sigma_squared=rep(0,N)  # To store the mean values

# Get the mean of the draws UP TO EACH ITERATION
for (i in 1:N){
  mean_theta1[i]=mean(theta_matrix[1:i],1)
  mean_theta10[i]=mean(theta_matrix[1:i],10)
  mean_theta21[i]=mean(theta_matrix[1:i],21)
  mean_theta30[i]=mean(theta_matrix[1:i],30)
  mean_theta42[i]=mean(theta_matrix[1:i],42) 
  mean_theta50[i]=mean(theta_matrix[1:i],50) 
  mean_sigma_squared[i]=mean(sigma_squared[1:i])
}

dev.new()
par(mar=c(1,1,1,1))  # To avoid "Error in plot.new() : figure margins too large"
par(mfrow=c(7,1))
plot(1:N,mean_theta1,type="l", main="theta1")
plot(1:N,mean_theta10,type="l", main="theta10")
plot(1:N,mean_theta21,type="l", main="theta21")
plot(1:N,mean_theta30,type="l", main="theta30")
plot(1:N,mean_theta42,type="l", main="theta42")
plot(1:N,mean_theta50,type="l", main="theta50")
plot(1:N,mean_sigma_squared,type="l", main="sigma_squared")

These are the running mean plots. A good running mean plot has the mean converging without getting stuck in some area for a long period. It looks like all the means are showing a good sign of mixing which means that they have converged well.



Posterior mean and 95% Credible Intervals


Prep work

library(coda)  # for mcmc()

theta_mcmc=mcmc(theta_matrix)  # We must turn our chains into MCMC objects
                               # If you give a matrix, it will automatically turn every
                               # column into a mcmc I guess
sigma_squared_mcmc=mcmc(sigma_squared)


Posterior mean and SD of each theta (it’s in order, i.e., theta1-theta50, although they are not labeled)

summary(theta_mcmc)$statistics[,1:2]
            Mean        SD
theta 0.16866215 1.0010291
theta 0.06662427 1.0098908
theta 0.10609239 1.0288480
theta 0.11098265 1.0028352
theta 0.11207815 1.0124509
theta 0.08977234 0.9898377
theta 0.11382065 1.0078150
theta 0.17277891 0.9815877
theta 0.15602034 1.0248586
theta 0.09052712 1.0179474
theta 0.08302068 0.9506850
theta 0.10470803 0.9921187
theta 0.07295253 0.9774395
theta 0.18075740 0.9948902
theta 0.05717448 0.9750635
theta 0.08011310 0.9948006
theta 0.10412684 0.9967510
theta 0.09659759 0.9881780
theta 0.07896604 0.9763653
theta 0.09113349 0.9920818
theta 0.12497041 1.0533581
theta 0.11094601 1.0133544
theta 0.07204923 0.9848513
theta 0.06572016 0.9598377
theta 0.09943317 1.0103351
theta 0.04215786 0.9823270
theta 0.16662591 1.0018879
theta 0.11480913 1.0104026
theta 0.17375106 1.0022958
theta 0.12599156 1.0490034
theta 0.13751327 1.0509961
theta 0.04333467 0.9954232
theta 0.16321795 1.0454928
theta 0.07538786 0.9757666
theta 0.10684127 0.9898285
theta 0.12613391 1.0075094
theta 0.15837362 1.0273960
theta 0.12077104 1.0319959
theta 0.08503282 1.0000584
theta 0.06327342 1.0169710
theta 0.09610743 1.0334569
theta 0.08536585 0.9964378
theta 0.05337960 0.9760025
theta 0.08387485 1.0131051
theta 0.09877131 0.9801875
theta 0.12325634 1.0434862
theta 0.15491046 0.9890931
theta 0.02374481 0.9759991
theta 0.10373518 1.0087258
theta 0.17113957 0.9780961
theta 0.19489864 0.9924494
theta 0.16287883 1.0381615


95% Credible Intervals of each theta

summary(theta_mcmc)$quantiles[,c(1,5)]
           2.5%    97.5%
theta -1.810895 2.077111
theta -1.849406 1.984451
theta -1.862935 2.150443
theta -1.889497 2.006077
theta -1.760124 2.030506
theta -1.764752 2.052513
theta -1.828619 1.987939
theta -1.759649 2.021391
theta -1.940154 2.098491
theta -1.884837 2.041454
theta -1.834327 1.864707
theta -1.882342 2.030670
theta -1.805197 1.907382
theta -1.786278 2.152024
theta -1.813243 1.996194
theta -1.776229 2.051804
theta -1.850189 2.157586
theta -1.845776 1.954609
theta -1.796501 1.942548
theta -1.776173 1.931577
theta -1.944257 2.107698
theta -1.837710 2.127958
theta -1.898102 2.024992
theta -1.751929 1.857339
theta -1.799762 1.979638
theta -1.809408 2.006924
theta -1.715998 2.115109
theta -1.742719 2.151597
theta -1.832360 2.096757
theta -1.938203 2.153232
theta -1.874204 2.134105
theta -1.888116 2.159077
theta -1.805354 2.110769
theta -1.813591 1.994042
theta -1.669803 2.146920
theta -1.859552 2.095682
theta -1.844567 2.125155
theta -2.022027 2.179667
theta -1.890258 1.941496
theta -1.916019 2.126840
theta -1.763327 2.163823
theta -1.951211 2.000401
theta -1.893532 1.927870
theta -1.762094 2.108573
theta -1.795246 1.968075
theta -1.872191 2.134931
theta -1.725252 2.119553
theta -1.890692 1.953790
theta -1.744632 2.066899
theta -1.848796 1.966606
theta -1.766885 2.227527
theta -1.844086 2.185829


Posterior mean and SD of sigma_squared

summary(sigma_squared_mcmc)$statistics[1:2]
     Mean        SD 
101.51997  21.08786 


95% Credible Intervals of sigma_squared

summary(sigma_squared_mcmc)$quantiles[c(1,5)]
     2.5%     97.5% 
 67.58507 150.68130 









(2)


Initialization

N=1000  # Total number of iterations(time)
burn=N/2  # Burn-in period is half of N I guess

# observations
n = 16

# hyperparameters
alpha = 2
beta = 4

# initial values: starting point is 0 for both
x_with_y=rep(0,N)
y_with_x=rep(0,N)

x_and_y = cbind(x_with_y, y_with_x)


Gibbs Sampler algorithm

for (i in 1:(N-1)){
  # Taking samples from the conditional dstns of x|y and y|x
  
  y_with_x[i+1] = rbeta(1, x_with_y[i]+alpha, n-x_with_y[i]+beta)
  x_with_y[i+1] = rbinom(1,n,y_with_x[i+1])

  x_and_y[i+1,]=c(x_with_y[i+1],y_with_x[i+1])
}


The generated chain for the target joint density

tail(x_and_y)
        x_with_y   y_with_x
 [995,]        6 0.39895807
 [996,]        6 0.38380821
 [997,]        3 0.42123959
 [998,]        2 0.09455467
 [999,]        1 0.11796922
[1000,]        2 0.18101756


Traceplots for X and Y

par(mfrow=c(2,1))
plot(1:N,x_and_y[,1],type="l", main="X")
plot(1:N,x_and_y[,2],type="l", main="Y")

The traceplots show no sign of the chains getting stuck in certain areas. We can say that the chains have mixed well and therefore converged well.




(3)


Comparison of histograms

library(VGAM)  # for betabinom.ab

samples = rbetabinom.ab(500, n, alpha, beta)  #  ~betabinom(n, alpha, beta)

par(mfrow=c(2,1))
hist(x_and_y[500:N,1], main="Samples obtained from the Gibbs Sampler")  
# Since we are only using 500 iterations, I can use the after burn-in period instead (N/2 = 500)
hist(samples, main="Samples directly from f(x)")

We can clearly see that these samples are very much alike.



---
title: "Bayesian Statistics HW"
output: html_notebook
---

\

### 이재용 

\
\

![](/Users/jaeyonglee/Documents/College/수업/4학년 2학기/베이지안/과제/images/스크린샷 2021-12-06 오전 9.48.36.png)

\

![](/Users/jaeyonglee/Documents/College/수업/4학년 2학기/베이지안/과제/images/IMG_0302.jpeg)

![](/Users/jaeyonglee/Documents/College/수업/4학년 2학기/베이지안/과제/images/IMG_0303.jpeg)

\
\

## (4)

\

### Initialization

```{r}
N=1000  # Total number of iterations(time)
burn=N/2  # Burn-in period is half of N

# data y is given and it has 52 values therefore n = 52
n = 52  # i = 1,...,n

# hyperparameters
mu = 0
tau_squared = 1
alpha = 1
beta = 1


# Initial values for parameters of interest: 
# all 50 thetas start at 0 and sigma_squared starts at 1

theta_matrix =NULL  # Initializing

for(i in 1:n){  # Initializing each theta with 0 of length N(the number of iterations)
  theta = rep(0, N)
  theta_matrix = cbind(theta_matrix, theta)  # You can access ith theta by theta_matrix[,i]
}

sigma_squared = rep(1,N)  # Initializing each sigma_squared with 1 of length N
```

\

### Running Gibbs Sampler algorithm

```{r}
library(MCMCpack)  # for invgamma

for (i in 1:(N-1)){
  # Taking samples from the conditional dstns of each theta and sigma_squared
  
  for (j in 1:n){  # Accessing ith theta by theta_matrix[,j] and ith y value by y_values[j]
    theta_matrix[,j][i+1] = rnorm(1, ((y[j]/sigma_squared[i]) + (mu/tau_squared))/((1/sigma_squared[i])+(1/tau_squared)), sqrt(1/((1/sigma_squared[i])+(1/tau_squared))))
  }
  
  sigma_squared[i+1] = rinvgamma(1, shape=((n/2)+alpha), scale=((1/2)*sum((y-theta_matrix[i+1,])^2)+beta))
}
```

\
\

## Convergence Diagnostics

\

### Traceplots of theta1, theta10, theta21, theta30, theta42, theta50 and sigma_squared

```{r}
dev.new()
par(mar=c(1,1,1,1))  # To avoid "Error in plot.new() : figure margins too large"
par(mfrow=c(7,1))
plot(1:N,theta_matrix[,1],type="l", main="theta1")
plot(1:N,theta_matrix[,10],type="l", main="theta10")
plot(1:N,theta_matrix[,21],type="l", main="theta21")
plot(1:N,theta_matrix[,30],type="l", main="theta30")
plot(1:N,theta_matrix[,42],type="l", main="theta42")
plot(1:N,theta_matrix[,50],type="l", main="theta50")
plot(1:N,sigma_squared,type="l", main="sigma_squared")
```

#### It looks like all of these plots show no chains getting stuck in certain areas which is a sign of good mixing, and therefore the chains have converged well.

\
\

### Running mean plots

```{r}
mean_theta1=rep(0,N)  # To store the mean values
mean_theta10=rep(0,N)  # To store the mean values
mean_theta21=rep(0,N)  # To store the mean values
mean_theta30=rep(0,N)  # To store the mean values
mean_theta42=rep(0,N)  # To store the mean values
mean_theta50=rep(0,N)  # To store the mean values
mean_sigma_squared=rep(0,N)  # To store the mean values

# Get the mean of the draws UP TO EACH ITERATION
for (i in 1:N){
  mean_theta1[i]=mean(theta_matrix[1:i],1)
  mean_theta10[i]=mean(theta_matrix[1:i],10)
  mean_theta21[i]=mean(theta_matrix[1:i],21)
  mean_theta30[i]=mean(theta_matrix[1:i],30)
  mean_theta42[i]=mean(theta_matrix[1:i],42) 
  mean_theta50[i]=mean(theta_matrix[1:i],50) 
  mean_sigma_squared[i]=mean(sigma_squared[1:i])
}

dev.new()
par(mar=c(1,1,1,1))  # To avoid "Error in plot.new() : figure margins too large"
par(mfrow=c(7,1))
plot(1:N,mean_theta1,type="l", main="theta1")
plot(1:N,mean_theta10,type="l", main="theta10")
plot(1:N,mean_theta21,type="l", main="theta21")
plot(1:N,mean_theta30,type="l", main="theta30")
plot(1:N,mean_theta42,type="l", main="theta42")
plot(1:N,mean_theta50,type="l", main="theta50")
plot(1:N,mean_sigma_squared,type="l", main="sigma_squared")

```

#### These are the running mean plots. A good running mean plot has the mean converging without getting stuck in some area for a long period. It looks like all the means are showing a good sign of mixing which means that they have converged well.

\
\

### Posterior mean and 95% Credible Intervals

\

#### Prep work

```{r}
library(coda)  # for mcmc()

theta_mcmc=mcmc(theta_matrix)  # We must turn our chains into MCMC objects
                               # If you give a matrix, it will automatically turn every
                               # column into a mcmc I guess
sigma_squared_mcmc=mcmc(sigma_squared)
```

\

#### Posterior mean and SD of each theta (it's in order, i.e., theta1-theta50, although they are not labeled)

```{r}
summary(theta_mcmc)$statistics[,1:2]
```

\

#### 95% Credible Intervals of each theta

```{r}
summary(theta_mcmc)$quantiles[,c(1,5)]
```

\

#### Posterior mean and SD of sigma_squared

```{r}
summary(sigma_squared_mcmc)$statistics[1:2]
```

\

#### 95% Credible Intervals of sigma_squared

```{r}
summary(sigma_squared_mcmc)$quantiles[c(1,5)]
```

\
\
\
\
\
\
\

![](/Users/jaeyonglee/Documents/College/수업/4학년 2학기/베이지안/과제/images/스크린샷 2021-12-06 오전 9.48.48.png)

![](/Users/jaeyonglee/Documents/College/수업/4학년 2학기/베이지안/과제/images/IMG_0304.jpeg)

![](/Users/jaeyonglee/Documents/College/수업/4학년 2학기/베이지안/과제/images/IMG_0305.jpeg)

\

## (2)

\

### Initialization

```{r}
N=1000  # Total number of iterations(time)
burn=N/2  # Burn-in period is half of N I guess

# observations
n = 16

# hyperparameters
alpha = 2
beta = 4

# initial values: starting point is 0 for both
x_with_y=rep(0,N)
y_with_x=rep(0,N)

x_and_y = cbind(x_with_y, y_with_x)
```

\

### Gibbs Sampler algorithm

```{r}
for (i in 1:(N-1)){
  # Taking samples from the conditional dstns of x|y and y|x
  
  y_with_x[i+1] = rbeta(1, x_with_y[i]+alpha, n-x_with_y[i]+beta)
  x_with_y[i+1] = rbinom(1,n,y_with_x[i+1])

  x_and_y[i+1,]=c(x_with_y[i+1],y_with_x[i+1])
}
```

\

### The generated chain for the target joint density

```{r}
tail(x_and_y)
```

\

### Traceplots for X and Y

```{r}
par(mfrow=c(2,1))
plot(1:N,x_and_y[,1],type="l", main="X")
plot(1:N,x_and_y[,2],type="l", main="Y")
```

#### The traceplots show no sign of the chains getting stuck in certain areas. We can say that the chains have mixed well and therefore converged well.

\
\
\

## (3)

\

### Comparison of histograms

```{r}
library(VGAM)  # for betabinom.ab

samples = rbetabinom.ab(500, n, alpha, beta)  #  ~betabinom(n, alpha, beta)

par(mfrow=c(2,1))
hist(x_and_y[500:N,1], main="Samples obtained from the Gibbs Sampler")  
# Since we are only using 500 iterations, I can use the after burn-in period instead (N/2 = 500)
hist(samples, main="Samples directly from f(x)")
```

#### We can clearly see that these samples are very much alike.

\
\






