For many more graphs using R, see the R graph gallery.
Cereals <- read.csv("~/Ch28_Cereals.csv")
Here's an idea of what the data looks like:
head(Cereals)
## name brand IsKellogs calories sugars carbo protein
## 1 100%_Bran Nabisco no 70 6 5.0 4
## 2 100%_Natural_Bran Quaker no 120 8 8.0 3
## 3 All-Bran Kellogs yes 70 5 7.0 4
## 4 All-Bran_with_Extra_Fiber Kellogs yes 50 0 8.0 4
## 5 Almond_Delight Ralston no 110 8 14.0 2
## 6 Apple_Cinnamon_Cheerios Gen-Mills no 110 10 10.5 2
## fat sodium fiber potass shelf
## 1 1 130 10.0 280 Lower
## 2 5 15 2.0 135 Lower
## 3 1 260 9.0 320 Lower
## 4 0 140 14.0 330 Lower
## 5 2 200 1.0 -1 Lower
## 6 2 180 1.5 70 Upper
install.packages("ggplot2")
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 3.6.3
hist(Cereals$sugars)
A fancier histogram still without using ggplot:
hist(Cereals$sugars, breaks=5, freq=FALSE, density=50, col="red", , main="Sugar content in cereals", xlab="sugar content (g)")
Specifying break points every 10 units, from a minimum of 0 to a maximum of 120:
hist(dataset$variable, breaks=seq(0,120,10))
Adding a normal curve:
hist(dataset$variable, freq=FALSE)
curve(dnorm(x, mean=number, sd=number), add=TRUE, col="red")
Using ggplot:
ggplot(Cereals, aes(x=sugars))+
geom_histogram(bins=5)+
theme_bw()+
ggtitle("Histogram of sugar content in cereals")
boxplot(Cereals$sugars, ylab="sugar content (g)")
Using ggplot:
ggplot(Cereals, aes(y=sugars))+
geom_boxplot()+
theme_bw()+
ggtitle("Boxplot of sugar content in cereals")
Each box is a category of some categorical variable:
boxplot(Cereals$sugars ~ Cereals$brand)
Each box is a different quantitative variable:
boxplot(Cereals$calories, Cereals$carbo, Cereals$sodium)
Using ggplot:
ggplot(Cereals, aes(x=factor(brand), y=sugars, fill=factor(brand)))+
geom_boxplot()+
theme_bw()+
ggtitle("Side-by-Side Boxplot of Sugar Content by Brand")
Using ggplot:
ggplot(Cereals, aes(x=sugars))+
geom_dotplot()+
theme_bw()+
ggtitle("Dot Plot of sugars in cereal brands")
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
Using ggplot:
ggplot(Cereals, aes(x=carbo))+
geom_density()+
theme_bw()+
ggtitle("Density Plot of carbohydrates")
stem(Cereals$potass)
##
## The decimal point is 2 digit(s) to the right of the |
##
## -0 | 00
## 0 | 2233333333444444444
## 0 | 55555666666778999999
## 1 | 0000001111111222233444
## 1 | 667799
## 2 | 034
## 2 | 68
## 3 | 23
plot(Cereals$calories, Cereals$sugars)
plot(jitter(Cereals$calories), jitter(Cereals$sugars))
Comparing multiple variables in a matrix
pairs(dataset\(variable1 ~ dataset\)variable2+ dataset$variable3)
pie(table(Cereals$brand))
pie(table(subset(Cereals$brand, Cereals$shelf=="Upper")))
pie(table(subset(Cereals$brand, Cereals$shelf=="Lower")))
. Bar chart
barplot(table(Cereals$brand))
With already summarized data:
barplot(c(5,4,3,5), names.arg = c("category1", "category2", "category3", "category4"))
Using ggplot:
ggplot(Cereals, aes(brand, fill=brand))+
geom_bar()+
ggtitle("Bar chart of cereal brand")
Find color choices here
barplot(table(Cereals$shelf,Cereals$brand), col=c("darkslategray3","sandybrown"))
Using ggplot:
ggplot(Cereals, aes(brand, fill=shelf))+
geom_bar()+
ggtitle("Bar Chart of Cereal brand by shelf location")
barplot(table(Cereals$brand,Cereals$shelf), col=c("paleturquoise1","darkslategray3","paleturquoise4","darkseagreen3","sandybrown","chocolate"), beside=TRUE)
Using ggplot:
ggplot(Cereals, aes(shelf, fill=brand))+
geom_bar(position="dodge")+
ggtitle("Bar Chart of Shelf Distribution by Brand")
With summarized data in a table:
titanic <- read.table(header=TRUE, text='Survival Class People
1 Alive First 202
2 Dead First 123
3 Alive Second 118
4 Dead Second 167
5 Alive Third 178
6 Dead Third 528
7 Alive Crew 212
8 Dead Crew 673')
ggplot(titanic, aes(Survival, People, fill = Class)) + geom_bar(stat="identity", position = "dodge")
table(Cereals$brand)
##
## Gen-Mills Kellogs Nabisco Post Quaker Ralston
## 22 23 6 9 8 8
As percentages or proportions instead of counts:
prop.table(table(Cereals$brand))
##
## Gen-Mills Kellogs Nabisco Post Quaker Ralston
## 0.28947368 0.30263158 0.07894737 0.11842105 0.10526316 0.10526316
table(Cereals$brand, Cereals$shelf)
##
## Lower Upper
## Gen-Mills 16 6
## Kellogs 19 4
## Nabisco 3 3
## Post 7 2
## Quaker 7 1
## Ralston 4 4