Lecture 07 - Applications 01: Simulation
23 September, 2024
None
as a placeholder or missing value in listslist[0] = value
list.append()
and list.extend()
to add elements to a list*
operator: list = [1, 2, 3] * 2
+
operator: list1 + list2
for
loop to iterate over a listfor
loops with if
statementsrandom
module to generate random numbersnumpy
function to simulate different distributions (as we did with the normal distribution)np.random.distribution_name(parameters)
: normal, uniform, binomial, chi-square, etc.\(X = \begin{pmatrix} X_1 \\ X_2 \\ \vdots \\ X_n \end{pmatrix}\)
# Import necessary packages
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Set seed for reproducibility
np.random.seed(151)
# Sample size
n = 10000
# Simulate random variables
vec_normal = np.random.normal(loc = 7, scale = 5, size = n)
vec_chisqr = np.random.chisquare(df = 1, size = n)
vec_unif = np.random.uniform(low = -3,high = 5, size = n)
# Check their means
print("Normal mean: " + str(np.mean(vec_normal)))
print("Chi-square mean: " + str(np.mean(vec_chisqr)))
print("Uniform mean: " + str(np.mean(vec_unif)))
Normal mean: 7.0273290325979465
Chi-square mean: 1.0051975023328465
Uniform mean: 1.0556049464433184
plt.subplots()
functionfig, ax = plt.subplots(nrows, ncols)
fig
or ax
(e.g., my_figures
, axs
(preferred over axes
to avoid confusion), list_subfig
, etc)nrows
is the number of rows of subplotsncols
is the number of columns of subplotsplot()
function, as you would with a single plot# Create a plot with 1 row, 2 columns
# You will create a list of subfigures "list_subfig"
# You can choose whichever name you like
# The option "figsize" indicates the (width,height) of the graph in inches
fig, list_subfig = plt.subplots(1, 2, figsize = (10, 3))
# The tight layout option ensures that the axes are not overlapping
plt.tight_layout()
# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.3) # Adjust the value as needed
# First Figure
list_subfig[0].hist(x = vec_normal)
list_subfig[0].set_title("Normal Distribution")
list_subfig[0].set_xlabel("Value")
list_subfig[0].set_ylabel("Frequency")
# Second Figure
list_subfig[1].hist(x = vec_unif)
list_subfig[1].set_title("Uniform Distribution")
list_subfig[1].set_xlabel("Value")
list_subfig[1].set_ylabel("Frequency")
plt.show()
list(range(start, stop, step))
to create a sequence of numbersstart
is the first number in the sequencestop
is the last number in the sequence (not included)step
is the difference between each number in the sequencestart
is not provided, it defaults to 0list(range(1, 10, 2))
will create the list [1, 3, 5, 7, 9]
list[0:3]
will return the first three elements of the listlist(range())
to create the following lists:
range()
function in other ways sooni = 0, j = 0
i = 0, j = 1
i = 1, j = 0
i = 1, j = 1
i = 2, j = 0
i = 2, j = 1
# One way is to write this with repeated code chunks
# Each time will start the process of generating new data from scratch.
num_simulations = 2000
# Simulate with sample size one
sample_size = 1
vec_xbar = [None] * num_simulations
for iteration in range(num_simulations):
vec_unif = np.random.uniform(low = -2, high=2, size = sample_size)
vec_xbar[iteration] = vec_unif.mean()
plt.hist(vec_xbar)
plt.title("Distribution of Xbar with size 1")
plt.ylabel("Frequency")
plt.xlabel("Values of Xbar")
plt.show()
# Simulate with sample size 10
sample_size = 10
vec_xbar = [None] * num_simulations
for iteration in range(num_simulations):
vec_unif = np.random.uniform(low = -2, high=2, size = sample_size)
vec_xbar[iteration] = vec_unif.mean()
plt.hist(vec_xbar)
plt.title("Distribution of Xbar with size 10")
plt.ylabel("Frequency")
plt.xlabel("Values of Xbar")
plt.show()
# Simulate with sample size 50
sample_size = 50
vec_xbar = [None] * num_simulations
for iteration in range(num_simulations):
vec_unif = np.random.uniform(low = -2, high=2, size = sample_size)
vec_xbar[iteration] = vec_unif.mean()
plt.hist(vec_xbar)
plt.title("Distribution of Xbar with size 50")
plt.ylabel("Frequency")
plt.xlabel("Values of Xbar")
plt.show()
range(num_simulations)
here and not just num_simulations
? 🤔num_simulations
instead (error!)# Simulate with sample size 100
sample_size = 100
vec_xbar = [None] * num_simulations
for iteration in num_simulations:
vec_unif = np.random.uniform(low = -2, high=2, size = sample_size)
vec_xbar[iteration] = vec_unif.mean()
plt.hist(vec_xbar)
plt.title("Distribution of Xbar with size 100")
plt.ylabel("Frequency")
plt.xlabel("Values of Xbar")
plt.show()
An error occurred while executing the following cell:
------------------
# Simulate with sample size 100
sample_size = 100
vec_xbar = [None] * num_simulations
for iteration in num_simulations:
vec_unif = np.random.uniform(low = -2, high=2, size = sample_size)
vec_xbar[iteration] = vec_unif.mean()
plt.hist(vec_xbar)
plt.title("Distribution of Xbar with size 100")
plt.ylabel("Frequency")
plt.xlabel("Values of Xbar")
plt.show()
------------------
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[10], line 4
2 sample_size = 100
3 vec_xbar = [None] * num_simulations
----> 4 for iteration in num_simulations:
5 vec_unif = np.random.uniform(low = -2, high=2, size = sample_size)
6 vec_xbar[iteration] = vec_unif.mean()
TypeError: 'int' object is not iterable
num_simulations = 2000
sample_size_list = [1,10,50,100,200]
for sample_size in sample_size_list:
# The following command a vector null values, of length "num_simulations"
vec_xbar = [None] * num_simulations
for iteration in range(num_simulations):
vec_unif = np.random.uniform(low = -2, high=2, size = sample_size)
vec_xbar[iteration] = vec_unif.mean()
plt.hist(vec_xbar)
plt.title("Distribution of Xbar when n is " + str(sample_size))
plt.ylabel("Frequency")
plt.xlabel("Values of Xbar")
plt.show()
fig, list_subfig = plt.subplots(1, 3, figsize = (15, 3))
# The tight layout option ensures that the axes are not overlapping
plt.tight_layout()
# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.3) # Adjust the value as needed
# First Figure
list_subfig[0].hist(x = vec_normal)
list_subfig[0].set_title("Normal Distribution")
list_subfig[0].set_xlabel("Value")
list_subfig[0].set_ylabel("Frequency")
# Second Figure
list_subfig[1].hist(x = vec_unif)
list_subfig[1].set_title("Uniform Distribution")
list_subfig[1].set_xlabel("Value")
list_subfig[1].set_ylabel("Frequency")
# Third Figure
list_subfig[2].hist(x = vec_chisqr)
list_subfig[2].set_title("Chi-square Distribution")
list_subfig[2].set_xlabel("Value")
list_subfig[2].set_ylabel("Frequency")
plt.show()
fig, list_subfig = plt.subplots(1, 3, figsize = (15, 3))
# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.3) # Adjust the value as needed
# First Figure
list_subfig[0].hist(x = vec_normal)
list_subfig[0].set_title("Normal Distribution")
list_subfig[0].set_xlabel("Value")
list_subfig[0].set_ylabel("Frequency")
# Second Figure
list_subfig[1].hist(x = vec_unif)
list_subfig[1].set_title("Uniform Distribution")
list_subfig[1].set_xlabel("Value")
list_subfig[1].set_ylabel("Frequency")
# Third Figure
list_subfig[2].hist(x = vec_chisqr)
list_subfig[2].set_title("Chi-square Distribution")
list_subfig[2].set_xlabel("Value")
list_subfig[2].set_ylabel("Frequency")
plt.show()
fig, list_subfig = plt.subplots(1, 3, figsize = (10, 3))
# The tight layout option ensures that the axes are not overlapping
plt.tight_layout()
# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.3) # Adjust the value as needed
# First Figure
list_subfig[0].hist(x = vec_normal)
list_subfig[0].set_title("Normal Distribution")
list_subfig[0].set_xlabel("Value")
list_subfig[0].set_ylabel("Frequency")
# Second Figure
list_subfig[1].hist(x = vec_unif)
list_subfig[1].set_title("Uniform Distribution")
list_subfig[1].set_xlabel("Value")
list_subfig[1].set_ylabel("Frequency")
# Third Figure
list_subfig[2].hist(x = vec_chisqr)
list_subfig[2].set_title("Chi-square Distribution")
list_subfig[2].set_xlabel("Value")
list_subfig[2].set_ylabel("Frequency")
plt.show()
range(num_simulations)
here?range(num_simulations)
is that the for
loop expects an iterable objectnum_simulations
, which is an integer (2000), we will get a TypeError
because an integer is not iterablefor
loop will not work as expected and we need to use range(num_simulations)
to iterate over the numbers from 0 to 1999num_simulations = 2000
sample_size_list = [1,10,50,100,200]
for sample_size in sample_size_list:
# The following command a vector null values, of length "num_simulations"
vec_xbar = [None] * num_simulations
for iteration in range(num_simulations):
vec_chisqr = np.random.chisquare(df = 1, size = sample_size)
vec_xbar[iteration] = vec_chisqr.mean()
plt.hist(vec_xbar)
plt.title("Distribution of Xbar when n is " + str(sample_size))
plt.ylabel("Frequency")
plt.xlabel("Values of Xbar")
plt.show()
fig, list_subfig = plt.subplots(1, 5, figsize = (20, 3))
# The tight layout option ensures that the axes are not overlapping
plt.tight_layout()
# Adjust the spacing between subplots
plt.subplots_adjust(wspace=0.3) # Adjust the value as needed
# Start the loop
num_simulations = 2000
sample_size_list = [1,10,50,100,200]
index = 0
for sample_size in sample_size_list:
# The following command a vector null values, of length "num_simulations"
vec_xbar = [None] * num_simulations
for iteration in range(num_simulations):
vec_chisqr = np.random.chisquare(df = 1, size = sample_size)
vec_xbar[iteration] = vec_chisqr.mean()
list_subfig[index].hist(vec_xbar)
list_subfig[index].set_title("Distribution of Xbar when n is " + str(sample_size))
list_subfig[index].set_ylabel("Frequency")
list_subfig[index].set_xlabel("Values of Xbar")
index = index + 1
plt.show()