Plots


Ex. 5.1

Load the iris dataset (also used for day-4 exercises):

data("iris")
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

Extract a portion of the dataset corresponding to the Iris virginica species and save it into a new data.frame called iris.virginica.

iris.virginica <- iris[iris[,"Species"]=="virginica",]

Using the iris.virginica data.frame, make a scatterplot of sepal length (y-axis) versus petal length (x-axis).

plot(iris.virginica[,"Petal.Length"], 
     iris.virginica[,"Sepal.Length"])

Add title, x- and y-axis labels.

plot(iris.virginica[,"Petal.Length"], 
     iris.virginica[,"Sepal.Length"],
     main = "Iris virginica",
     xlab = "Petal length [cm]",
     ylab = "Sepal length [cm]")

Fix their magnification with cex.main, cex.axis, and cex.lab.

plot(iris.virginica[,"Petal.Length"], 
     iris.virginica[,"Sepal.Length"],
     main = "Iris virginica",
     xlab ="Petal length [cm]",
     ylab = "Sepal length [cm]",
     cex.main = 1.3,
     cex.lab = 1.3,
     cex.axis = 1.3)

Change the orientation of all axis labels to horizontal using the las function (use the help function to understand its usage).

plot(iris.virginica[,"Petal.Length"], 
     iris.virginica[,"Sepal.Length"],
     main = "Iris virginica",
     xlab ="Petal length [cm]",
     ylab = "Sepal length [cm]",
     cex.main = 1.3,
     cex.lab = 1.3,
     cex.axis = 1.3,
     las = 1)

Change the character used to show the data point to have a filled circle.

plot(iris.virginica[,"Petal.Length"], 
     iris.virginica[,"Sepal.Length"],
     main = "Iris virginica",
     xlab ="Petal length [cm]",
     ylab = "Sepal length [cm]",
     cex.main = 1.3,
     cex.lab = 1.3,
     cex.axis = 1.3,
     las = 1,
     pch = 19)

Use a “color-picker” or “color-palette” website (e.g., https://colorbrewer2.org) to select a color you like and obtain its HEX code (e.g., “#004643”), and use it to specify the color of the points.

plot(iris.virginica[,"Petal.Length"], 
     iris.virginica[,"Sepal.Length"],
     main = "Iris virginica",
     xlab ="Petal length [cm]",
     ylab = "Sepal length [cm]",
     cex.main = 1.3,
     cex.lab = 1.3,
     cex.axis = 1.3,
     las = 1,
     pch = 19,
     col = "#e16162")

Compute the linear fit of sepal length vs. petal length and save it into a variable called vFit.

vFit <- lm(Sepal.Length~Petal.Length, 
           data=iris.virginica)

Plot the linear fit using the abline function, using a thin (lwd = 0.8), dashed, dark-gray line.

plot(iris.virginica[,"Petal.Length"], 
     iris.virginica[,"Sepal.Length"],
     main = "Iris virginica",
     xlab ="Petal length [cm]",
     ylab = "Sepal length [cm]",
     cex.main = 1.3,
     cex.lab = 1.3,
     cex.axis = 1.3,
     las = 1,
     pch = 19,
     col = "#e16162")

abline(vFit, 
       col = "gray4",
       lty = 2,
       lwd = 0.8)

Compute the Spearman correlation of the sepal length vs. petal length (rounded to two digits) and plot the text label “R = ” in correspondence of position [6.5, 5.5] in the plot, with a reasonable magnification (cex).

plot(iris.virginica[,"Petal.Length"], 
     iris.virginica[,"Sepal.Length"],
     main = "Iris virginica",
     xlab ="Petal length [cm]",
     ylab = "Sepal length [cm]",
     cex.main = 1.3,
     cex.lab = 1.3,
     cex.axis = 1.3,
     las = 1,
     pch = 19,
     col = "#e16162")

abline(vFit, 
       col = "gray4",
       lty = 2,
       lwd = 0.8)

R <- round(cor(iris.virginica[,"Petal.Length"],
                iris.virginica[,"Sepal.Length"]), 2)
text(x = 6.5, y = 5.5, 
     labels = paste0("R = ", R),
     cex = 1.3)


Ex. 5.2

From the iris dataset, extract the data corresponding to the Iris versicolor species and save it into a new data.frame called iris.versicolor.

iris.versicolor <- iris[iris[,"Species"]=="versicolor",]

Using the iris.versicolor data.frame you have just created, make an density histogram of petal length.

hist(iris.versicolor[,"Petal.Length"], 
     freq = FALSE)

Add title and x-axis names with magnification 1.2.

hist(iris.versicolor[,"Petal.Length"], 
     freq = FALSE,
     main = "Iris versicolor",
     xlab = "Petal length",
     cex.lab = 1.2,
     cex.axis = 1.2,
     cex.main = 1.2)

Change the color of the bars to lightblue.

hist(iris.versicolor[,"Petal.Length"], 
     freq = FALSE,
     main = "Iris versicolor",
     xlab = "Petal length",
     cex.lab = 1.2,
     cex.axis = 1.2,
     cex.main = 1.2,
     col = "lightblue")

Add a black line with with of 2 representing the density (estimated with the density function).

hist(iris.versicolor[,"Petal.Length"], 
     freq = FALSE,
     main = "Iris versicolor",
     xlab = "Petal length",
     cex.lab = 1.2,
     cex.axis = 1.2,
     cex.main = 1.2,
     col = "lightblue")

lines(density(iris.versicolor[,"Petal.Length"]),
      col = "black",
      lwd = 2)


Ex. 5.3

From the iris dataset, extract the data corresponding to the Iris setosa species and save it into a new data.frame called iris.setosa.

iris.setosa <- iris[iris[,"Species"]=="setosa",]

Using the three sub-datasets you have created, make a boxplot showing the distribution of petal length for the three species. To do so, put them in a named list to be passed to the boxplot function.

iris.list <- list(setosa = iris.setosa[,"Petal.Length"],
                  versicolor = iris.versicolor[,"Petal.Length"],
                  virginica = iris.virginica[,"Petal.Length"])
boxplot(iris.list) 

Add title and axis names.

boxplot(iris.list,
        main = "Petal length",
        xlab = "Iris species",
        ylab = "Length in cm") 

Color the three boxes using a different color for each species.

boxplot(iris.list,
        main = "Petal length",
        xlab = "Iris species",
        ylab = "Length in cm",
        col = c("#e16162", "#f9bc60", "#abd1c6")) 


Ex. 5.4

The boxplot function can be also applied to a data.frame by specifying a formula (e.g., Petal.Width~Species to consider petal width depending on the species) and data (i.e., the iris data.frame, in our case). Seek for more information using the help function.

Starting from the original iris data.frame, compute a boxplot of petal width using a formula and color it by using different colors for different species.

boxplot(Petal.Width~Species,
        data = iris,
        main = "Petal width",
        xlab = "Iris species",
        ylab = "Length in cm",
        col = c("#e16162", "#f9bc60", "#abd1c6")) 


Ex. 5.5

Starting from the original iris data.frame, add a column called color reporting the color assigned to each flower (i.e., row) depending on the species it belongs to.

palette <- c("#e16162", "#f9bc60", "#abd1c6")
iris[,"color"] <- NA
iris[iris[,"Species"]=="setosa","color"] <- palette[1]
iris[iris[,"Species"]=="versicolor","color"] <- palette[2]
iris[iris[,"Species"]=="virginica","color"] <- palette[3]

Make a scatterplot of sepal length (y-axis) versus petal length (x-axis) of all flowers, colored according to the information saved in the color column.

plot(iris[,"Petal.Length"], 
     iris[,"Sepal.Length"],
     main = "Iris database",
     xlab ="Petal length [cm]",
     ylab = "Sepal length [cm]",
     cex.main = 1.3,
     cex.lab = 1.3,
     cex.axis = 1.3,
     las = 1,
     pch = 19,
     col = iris[,"color"])

Add a line depicting the linear fit and some textual information on the correlation between x and y variables.

plot(iris[,"Petal.Length"], 
     iris[,"Sepal.Length"],
     main = "Iris dataset",
     xlab ="Petal length [cm]",
     ylab = "Sepal length [cm]",
     cex.main = 1.3,
     cex.lab = 1.3,
     cex.axis = 1.3,
     las = 1,
     pch = 19,
     col = iris[,"color"])

vFit2 <- lm(Sepal.Length~Petal.Length, 
           data=iris)
abline(vFit2, 
       col = "gray4",
       lty = 2,
       lwd = 0.8)

R <- round(cor(iris[,"Petal.Length"],
               iris[,"Sepal.Length"]), 2)
text(x = 2.2, y = 7.2, 
     labels = paste0("R = ", R),
     cex = 1.3)

You can add a legend to the right of the figure using the legend function as in the example below:

legend("bottomright",
     pch = 1,
     bty = "o",
     col = c("red", "green", "blue"),
     legend = c("classA", "classB", "classC"))

Modify the arguments of the example above to add an explanatory legend to your figure.

plot(iris[,"Petal.Length"], 
     iris[,"Sepal.Length"],
     main = "Iris dataset",
     xlab ="Petal length [cm]",
     ylab = "Sepal length [cm]",
     cex.main = 1.3,
     cex.lab = 1.3,
     cex.axis = 1.3,
     las = 1,
     pch = 19,
     col = iris[,"color"])

vFit2 <- lm(Sepal.Length~Petal.Length, 
           data=iris)
abline(vFit2, 
       col = "gray4",
       lty = 2,
       lwd = 0.8)

R <- round(cor(iris[,"Petal.Length"],
               iris[,"Sepal.Length"]), 2)
text(x = 2.2, y = 7.2, 
     labels = paste0("R = ", R),
     cex = 1.3)

legend("bottomright",
     pch = 19,
     bty = "n",
     col = palette,
     legend = c("setosa", "versicolor", "virginica"))


Ex. 5.6

Adapt the code of Ex. 5.4. to compare petal width (x-axis) with sepal width (y-axis) and save the resulting plot into a png file named Iris_width_plots.png.

png("Iris_width_plots.png")

plot(iris[,"Petal.Width"], 
     iris[,"Sepal.Width"],
     main = "Iris dataset",
     xlab ="Petal width [cm]",
     ylab = "Sepal width [cm]",
     cex.main = 1.3,
     cex.lab = 1.3,
     cex.axis = 1.3,
     las = 1,
     pch = 19,
     col = iris[,"color"])

vFit2 <- lm(Sepal.Width~Petal.Width, 
           data=iris)
abline(vFit2, 
       col = "gray4",
       lty = 2,
       lwd = 0.8)

R <- round(cor(iris[,"Petal.Width"],
               iris[,"Sepal.Width"]), 2)
text(x = 2, y = 4.2, 
     labels = paste0("R = ", R),
     cex = 1.3)

legend("bottomright",
     pch = 19,
     bty = "n",
     col = palette,
     legend = c("setosa", "versicolor", "virginica"))
 
dev.off()