Cereals <- read.csv("~/Ch28_Cereals.csv")
Here's an idea of what the data looks like:
head(Cereals)
## name brand IsKellogs calories sugars carbo protein
## 1 100%_Bran Nabisco no 70 6 5.0 4
## 2 100%_Natural_Bran Quaker no 120 8 8.0 3
## 3 All-Bran Kellogs yes 70 5 7.0 4
## 4 All-Bran_with_Extra_Fiber Kellogs yes 50 0 8.0 4
## 5 Almond_Delight Ralston no 110 8 14.0 2
## 6 Apple_Cinnamon_Cheerios Gen-Mills no 110 10 10.5 2
## fat sodium fiber potass shelf
## 1 1 130 10.0 280 Lower
## 2 5 15 2.0 135 Lower
## 3 1 260 9.0 320 Lower
## 4 0 140 14.0 330 Lower
## 5 2 200 1.0 -1 Lower
## 6 2 180 1.5 70 Upper
Input the number of successes followed by the sample size.
WARNING: Results for all one-proportion and two-proportion inference are computed using Wilson's score interval, and won't match results by hand.
prop.test(20,100, conf.level = 0.95, correct=FALSE)
##
## 1-sample proportions test without continuity correction
##
## data: 20 out of 100, null probability 0.5
## X-squared = 36, df = 1, p-value = 1.973e-09
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
## 0.1333669 0.2888292
## sample estimates:
## p
## 0.2
prop.test(table(Cereals$shelf), p=.2, correct=FALSE)
##
## 1-sample proportions test without continuity correction
##
## data: table(Cereals$shelf), null probability 0.2
## X-squared = 136.89, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.2
## 95 percent confidence interval:
## 0.6281876 0.8227060
## sample estimates:
## p
## 0.7368421
Singling out one of the categories as "success":
prop.test(sum(Cereals$brand=="Kellogs", na.rm=TRUE), sum(complete.cases(Cereals$brand)), p=.2, correct=FALSE)
##
## 1-sample proportions test without continuity correction
##
## data: sum(Cereals$brand == "Kellogs", na.rm = TRUE) out of sum(complete.cases(Cereals$brand)), null probability 0.2
## X-squared = 5.0033, df = 1, p-value = 0.0253
## alternative hypothesis: true p is not equal to 0.2
## 95 percent confidence interval:
## 0.2109133 0.4133420
## sample estimates:
## p
## 0.3026316
prop.test(c(20,30), c(120,130), conf.level = 0.95, correct=FALSE)
##
## 2-sample test for equality of proportions without continuity
## correction
##
## data: c(20, 30) out of c(120, 130)
## X-squared = 1.6026, df = 1, p-value = 0.2055
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.16254869 0.03434356
## sample estimates:
## prop 1 prop 2
## 0.1666667 0.2307692
prop.test(table(Cereals$IsKellogs, Cereals$shelf), correct=FALSE)
##
## 2-sample test for equality of proportions without continuity
## correction
##
## data: table(Cereals$IsKellogs, Cereals$shelf)
## X-squared = 1.3547, df = 1, p-value = 0.2445
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.32614192 0.07019442
## sample estimates:
## prop 1 prop 2
## 0.6981132 0.8260870
Input the mean of the sample, then the standard deviation, then the sample size.
You first need to load the BSDA package.
install.packages("BSDA")
library("BSDA")
## Loading required package: lattice
##
## Attaching package: 'BSDA'
## The following object is masked from 'package:datasets':
##
## Orange
tsum.test(10.5, 3.5, 30, alternative = "greater", mu = 8)
## Warning in tsum.test(10.5, 3.5, 30, alternative = "greater", mu = 8): argument
## 'var.equal' ignored for one-sample test.
##
## One-sample t-Test
##
## data: Summarized x
## t = 3.9123, df = 29, p-value = 0.0002537
## alternative hypothesis: true mean is greater than 8
## 95 percent confidence interval:
## 9.414241 NA
## sample estimates:
## mean of x
## 10.5
t.test(Cereals$calories, mu=100)
##
## One Sample t-test
##
## data: Cereals$calories
## t = 3.1022, df = 75, p-value = 0.002707
## alternative hypothesis: true mean is not equal to 100
## 95 percent confidence interval:
## 102.4955 111.4519
## sample estimates:
## mean of x
## 106.9737
For one-sided alternatives, use the options alternative="less" or alternative="greater".
Samples in two columns:
t.test(Cereals$sugars, Cereals$calories)
##
## Welch Two Sample t-test
##
## data: Cereals$sugars and Cereals$calories
## t = -43.385, df = 82.64, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -104.5710 -95.4027
## sample estimates:
## mean of x mean of y
## 6.986842 106.973684
Samples in 1 column divided by values of a categorical variable in a different column:
t.test(subset(Cereals$calories, Cereals$shelf=="Upper"), subset(Cereals$calories, Cereals$shelf=="Lower"))
##
## Welch Two Sample t-test
##
## data: subset(Cereals$calories, Cereals$shelf == "Upper") and subset(Cereals$calories, Cereals$shelf == "Lower")
## t = -1.6964, df = 72.308, p-value = 0.09411
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -13.205429 1.062572
## sample estimates:
## mean of x mean of y
## 102.5000 108.5714
t.test(Cereals$calories, Cereals$sugars, paired=TRUE)
##
## Paired t-test
##
## data: Cereals$calories and Cereals$sugars
## t = 49.84, df = 75, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 95.99034 103.98335
## sample estimates:
## mean of the differences
## 99.98684
chisq.test(table(Cereals$brand, Cereals$shelf))
## Warning in chisq.test(table(Cereals$brand, Cereals$shelf)): Chi-squared
## approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: table(Cereals$brand, Cereals$shelf)
## X-squared = 5.8704, df = 5, p-value = 0.319
chisq.test(matrix(c(30,40,50,100),ncol=2,byrow=TRUE))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: matrix(c(30, 40, 50, 100), ncol = 2, byrow = TRUE)
## X-squared = 1.4818, df = 1, p-value = 0.2235
chisq.test(table(Cereals$brand), p= c(.25, .25, .2, .1, .1, .1))
##
## Chi-squared test for given probabilities
##
## data: table(Cereals$brand)
## X-squared = 7.1842, df = 5, p-value = 0.2073
chisq.test(matrix(c(30,40,50,100,200)), p= c(.25, .25, .2, .2, .1))
##
## Chi-squared test for given probabilities
##
## data: matrix(c(30, 40, 50, 100, 200))
## X-squared = 705, df = 4, p-value < 2.2e-16
summary(lm(Cereals$carbo ~ Cereals$sugars))
##
## Call:
## lm(formula = Cereals$carbo ~ Cereals$sugars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.2463 -1.2562 -0.2463 1.8692 6.1766
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.78411 0.74684 23.813 < 2e-16 ***
## Cereals$sugars -0.42296 0.09045 -4.676 1.29e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.469 on 74 degrees of freedom
## Multiple R-squared: 0.2281, Adjusted R-squared: 0.2176
## F-statistic: 21.86 on 1 and 74 DF, p-value: 1.286e-05
Confidence interval for predicted slope
confint(lm(Cereals$carbo ~ Cereals$sugars), level=.95)
## 2.5 % 97.5 %
## (Intercept) 16.2960066 19.2722210
## Cereals$sugars -0.6031946 -0.2427287
Confidence intervals for predicted values
tempvariable <- Cereals$carbo
predict(lm(Cereals$sugars~tempvariable), newdata= data.frame(tempvariable = 19), interval="confidence")
## fit lwr upr
## 1 4.737612 3.426047 6.049177
predict(lm(Cereals$sugars~tempvariable), newdata= data.frame(tempvariable = 19), interval="predict")
## fit lwr upr
## 1 4.737612 -3.176984 12.65221
Comparing a quantitative and categorical variable (stacked data):
anova(lm(Cereals$sugars ~ Cereals$brand))
## Analysis of Variance Table
##
## Response: Cereals$sugars
## Df Sum Sq Mean Sq F value Pr(>F)
## Cereals$brand 5 243.24 48.648 2.7737 0.02416 *
## Residuals 70 1227.75 17.539
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(Cereals$sugars ~ Cereals$brand))
## Df Sum Sq Mean Sq F value Pr(>F)
## Cereals$brand 5 243.2 48.65 2.774 0.0242 *
## Residuals 70 1227.7 17.54
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Comparing several quantitative variables (unstacked data):
artsurvey <- read.csv("~/artsurvey.csv")
tempvariable<-stack(artsurvey, select=c("artist.1", "artist.2", "artist.3", "artist.4", "artist.5"))
anova(lm(tempvariable))
## Analysis of Variance Table
##
## Response: values
## Df Sum Sq Mean Sq F value Pr(>F)
## ind 4 72.04 18.0100 4.3558 0.002809 **
## Residuals 95 392.80 4.1347
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Bonferroni pairwise comparisons:
pairwise.t.test(tempvariable$values,tempvariable$ind, p.adjust = "bonferroni")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: tempvariable$values and tempvariable$ind
##
## artist.1 artist.2 artist.3 artist.4
## artist.2 1.0000 - - -
## artist.3 0.0152 0.3194 - -
## artist.4 0.0071 0.1786 1.0000 -
## artist.5 0.3194 1.0000 1.0000 1.0000
##
## P value adjustment method: bonferroni