Lecture 07 - Applications 01: Simulation
None as a placeholder or missing value in listslist[0] = valuelist.append() and list.extend() to add elements to a list* operator: list = [1, 2, 3] * 2+ operator: list1 + list2for loop to iterate over a listfor loops with if statementsrandom module to generate random numbersnumpy function to simulate different distributions (as we did with the normal distribution)np.random.distribution_name(parameters): normal, uniform, binomial, chi-square, etc.\(X = \begin{pmatrix} X_1 \\ X_2 \\ \vdots \\ X_n \end{pmatrix}\)
# Import necessary packages
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Set seed for reproducibility
np.random.seed(151)
# Sample size
n = 10000
# Simulate random variables
vec_normal = np.random.normal(loc = 7, scale = 5, size = n)
vec_chisqr = np.random.chisquare(df = 1, size = n)
vec_unif = np.random.uniform(low = -3,high = 5, size = n)
# Check their means
print("Normal mean: " + str(np.mean(vec_normal)))
print("Chi-square mean: " + str(np.mean(vec_chisqr)))
print("Uniform mean: " + str(np.mean(vec_unif)))Normal mean: 7.0273290325979465
Chi-square mean: 1.0051975023328465
Uniform mean: 1.0556049464433184
plt.subplots() functionfig, ax = plt.subplots(nrows, ncols)fig or ax (e.g., my_figures, axs (preferred over axes to avoid confusion), list_subfig, etc)nrows is the number of rows of subplotsncols is the number of columns of subplotsplot() function, as you would with a single plot# Create a plot with 1 row, 2 columns
# You will create a list of subfigures "list_subfig"
# You can choose whichever name you like
# The option "figsize" indicates the (width,height) of the graph in inches
fig, list_subfig = plt.subplots(1, 2, figsize = (10, 3))
# The tight layout option ensures that the axes are not overlapping
plt.tight_layout()
# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.3) # Adjust the value as needed
# First Figure
list_subfig[0].hist(x = vec_normal)
list_subfig[0].set_title("Normal Distribution")
list_subfig[0].set_xlabel("Value")
list_subfig[0].set_ylabel("Frequency")
# Second Figure
list_subfig[1].hist(x = vec_unif)
list_subfig[1].set_title("Uniform Distribution")
list_subfig[1].set_xlabel("Value")
list_subfig[1].set_ylabel("Frequency")
plt.show()list(range(start, stop, step)) to create a sequence of numbersstart is the first number in the sequencestop is the last number in the sequence (not included)step is the difference between each number in the sequencestart is not provided, it defaults to 0list(range(1, 10, 2)) will create the list [1, 3, 5, 7, 9]list[0:3] will return the first three elements of the listlist(range()) to create the following lists:
range() function in other ways sooni = 0, j = 0
i = 0, j = 1
i = 1, j = 0
i = 1, j = 1
i = 2, j = 0
i = 2, j = 1
# One way is to write this with repeated code chunks
# Each time will start the process of generating new data from scratch.
num_simulations = 2000
# Simulate with sample size one
sample_size = 1
vec_xbar = [None] * num_simulations
for iteration in range(num_simulations):
vec_unif = np.random.uniform(low = -2, high=2, size = sample_size)
vec_xbar[iteration] = vec_unif.mean()
plt.hist(vec_xbar)
plt.title("Distribution of Xbar with size 1")
plt.ylabel("Frequency")
plt.xlabel("Values of Xbar")
plt.show()# Simulate with sample size 10
sample_size = 10
vec_xbar = [None] * num_simulations
for iteration in range(num_simulations):
vec_unif = np.random.uniform(low = -2, high=2, size = sample_size)
vec_xbar[iteration] = vec_unif.mean()
plt.hist(vec_xbar)
plt.title("Distribution of Xbar with size 10")
plt.ylabel("Frequency")
plt.xlabel("Values of Xbar")
plt.show()# Simulate with sample size 50
sample_size = 50
vec_xbar = [None] * num_simulations
for iteration in range(num_simulations):
vec_unif = np.random.uniform(low = -2, high=2, size = sample_size)
vec_xbar[iteration] = vec_unif.mean()
plt.hist(vec_xbar)
plt.title("Distribution of Xbar with size 50")
plt.ylabel("Frequency")
plt.xlabel("Values of Xbar")
plt.show()range(num_simulations) here and not just num_simulations? 🤔num_simulations instead (error!)# Simulate with sample size 100
sample_size = 100
vec_xbar = [None] * num_simulations
for iteration in num_simulations:
vec_unif = np.random.uniform(low = -2, high=2, size = sample_size)
vec_xbar[iteration] = vec_unif.mean()
plt.hist(vec_xbar)
plt.title("Distribution of Xbar with size 100")
plt.ylabel("Frequency")
plt.xlabel("Values of Xbar")
plt.show()An error occurred while executing the following cell:
------------------
# Simulate with sample size 100
sample_size = 100
vec_xbar = [None] * num_simulations
for iteration in num_simulations:
vec_unif = np.random.uniform(low = -2, high=2, size = sample_size)
vec_xbar[iteration] = vec_unif.mean()
plt.hist(vec_xbar)
plt.title("Distribution of Xbar with size 100")
plt.ylabel("Frequency")
plt.xlabel("Values of Xbar")
plt.show()
------------------
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[10], line 4
2 sample_size = 100
3 vec_xbar = [None] * num_simulations
----> 4 for iteration in num_simulations:
5 vec_unif = np.random.uniform(low = -2, high=2, size = sample_size)
6 vec_xbar[iteration] = vec_unif.mean()
TypeError: 'int' object is not iterablenum_simulations = 2000
sample_size_list = [1,10,50,100,200]
for sample_size in sample_size_list:
# The following command a vector null values, of length "num_simulations"
vec_xbar = [None] * num_simulations
for iteration in range(num_simulations):
vec_unif = np.random.uniform(low = -2, high=2, size = sample_size)
vec_xbar[iteration] = vec_unif.mean()
plt.hist(vec_xbar)
plt.title("Distribution of Xbar when n is " + str(sample_size))
plt.ylabel("Frequency")
plt.xlabel("Values of Xbar")
plt.show()fig, list_subfig = plt.subplots(1, 3, figsize = (15, 3))
# The tight layout option ensures that the axes are not overlapping
plt.tight_layout()
# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.3) # Adjust the value as needed
# First Figure
list_subfig[0].hist(x = vec_normal)
list_subfig[0].set_title("Normal Distribution")
list_subfig[0].set_xlabel("Value")
list_subfig[0].set_ylabel("Frequency")
# Second Figure
list_subfig[1].hist(x = vec_unif)
list_subfig[1].set_title("Uniform Distribution")
list_subfig[1].set_xlabel("Value")
list_subfig[1].set_ylabel("Frequency")
# Third Figure
list_subfig[2].hist(x = vec_chisqr)
list_subfig[2].set_title("Chi-square Distribution")
list_subfig[2].set_xlabel("Value")
list_subfig[2].set_ylabel("Frequency")
plt.show()fig, list_subfig = plt.subplots(1, 3, figsize = (15, 3))
# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.3) # Adjust the value as needed
# First Figure
list_subfig[0].hist(x = vec_normal)
list_subfig[0].set_title("Normal Distribution")
list_subfig[0].set_xlabel("Value")
list_subfig[0].set_ylabel("Frequency")
# Second Figure
list_subfig[1].hist(x = vec_unif)
list_subfig[1].set_title("Uniform Distribution")
list_subfig[1].set_xlabel("Value")
list_subfig[1].set_ylabel("Frequency")
# Third Figure
list_subfig[2].hist(x = vec_chisqr)
list_subfig[2].set_title("Chi-square Distribution")
list_subfig[2].set_xlabel("Value")
list_subfig[2].set_ylabel("Frequency")
plt.show()fig, list_subfig = plt.subplots(1, 3, figsize = (10, 3))
# The tight layout option ensures that the axes are not overlapping
plt.tight_layout()
# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.3) # Adjust the value as needed
# First Figure
list_subfig[0].hist(x = vec_normal)
list_subfig[0].set_title("Normal Distribution")
list_subfig[0].set_xlabel("Value")
list_subfig[0].set_ylabel("Frequency")
# Second Figure
list_subfig[1].hist(x = vec_unif)
list_subfig[1].set_title("Uniform Distribution")
list_subfig[1].set_xlabel("Value")
list_subfig[1].set_ylabel("Frequency")
# Third Figure
list_subfig[2].hist(x = vec_chisqr)
list_subfig[2].set_title("Chi-square Distribution")
list_subfig[2].set_xlabel("Value")
list_subfig[2].set_ylabel("Frequency")
plt.show()range(num_simulations) here?range(num_simulations) is that the for loop expects an iterable objectnum_simulations, which is an integer (2000), we will get a TypeError because an integer is not iterablefor loop will not work as expected and we need to use range(num_simulations) to iterate over the numbers from 0 to 1999num_simulations = 2000
sample_size_list = [1,10,50,100,200]
for sample_size in sample_size_list:
# The following command a vector null values, of length "num_simulations"
vec_xbar = [None] * num_simulations
for iteration in range(num_simulations):
vec_chisqr = np.random.chisquare(df = 1, size = sample_size)
vec_xbar[iteration] = vec_chisqr.mean()
plt.hist(vec_xbar)
plt.title("Distribution of Xbar when n is " + str(sample_size))
plt.ylabel("Frequency")
plt.xlabel("Values of Xbar")
plt.show()fig, list_subfig = plt.subplots(1, 5, figsize = (20, 3))
# The tight layout option ensures that the axes are not overlapping
plt.tight_layout()
# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.3) # Adjust the value as needed
# Start the loop
num_simulations = 2000
sample_size_list = [1,10,50,100,200]
index = 0
for sample_size in sample_size_list:
# The following command a vector null values, of length "num_simulations"
vec_xbar = [None] * num_simulations
for iteration in range(num_simulations):
vec_chisqr = np.random.chisquare(df = 1, size = sample_size)
vec_xbar[iteration] = vec_chisqr.mean()
list_subfig[index].hist(vec_xbar)
list_subfig[index].set_title("Distribution of Xbar when n is " + str(sample_size))
list_subfig[index].set_ylabel("Frequency")
list_subfig[index].set_xlabel("Values of Xbar")
index = index + 1
plt.show()