Cereals <- read.csv("~/Ch28_Cereals.csv")
Here's an idea of what the data looks like:
head(Cereals)
##                        name     brand IsKellogs calories sugars carbo protein
## 1                 100%_Bran   Nabisco        no       70      6   5.0       4
## 2         100%_Natural_Bran    Quaker        no      120      8   8.0       3
## 3                  All-Bran   Kellogs       yes       70      5   7.0       4
## 4 All-Bran_with_Extra_Fiber   Kellogs       yes       50      0   8.0       4
## 5            Almond_Delight   Ralston        no      110      8  14.0       2
## 6   Apple_Cinnamon_Cheerios Gen-Mills        no      110     10  10.5       2
##   fat sodium fiber potass shelf
## 1   1    130  10.0    280 Lower
## 2   5     15   2.0    135 Lower
## 3   1    260   9.0    320 Lower
## 4   0    140  14.0    330 Lower
## 5   2    200   1.0     -1 Lower
## 6   2    180   1.5     70 Upper
Input the number of successes followed by the sample size. 
 WARNING: Results for all one-proportion and two-proportion inference are computed using Wilson's score interval, and won't match results by hand.
prop.test(20,100, conf.level = 0.95, correct=FALSE)
## 
##  1-sample proportions test without continuity correction
## 
## data:  20 out of 100, null probability 0.5
## X-squared = 36, df = 1, p-value = 1.973e-09
## alternative hypothesis: true p is not equal to 0.5
## 95 percent confidence interval:
##  0.1333669 0.2888292
## sample estimates:
##   p 
## 0.2
prop.test(table(Cereals$shelf), p=.2, correct=FALSE)
## 
##  1-sample proportions test without continuity correction
## 
## data:  table(Cereals$shelf), null probability 0.2
## X-squared = 136.89, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.2
## 95 percent confidence interval:
##  0.6281876 0.8227060
## sample estimates:
##         p 
## 0.7368421
Singling out one of the categories as "success":
prop.test(sum(Cereals$brand=="Kellogs", na.rm=TRUE), sum(complete.cases(Cereals$brand)), p=.2, correct=FALSE)
## 
##  1-sample proportions test without continuity correction
## 
## data:  sum(Cereals$brand == "Kellogs", na.rm = TRUE) out of sum(complete.cases(Cereals$brand)), null probability 0.2
## X-squared = 5.0033, df = 1, p-value = 0.0253
## alternative hypothesis: true p is not equal to 0.2
## 95 percent confidence interval:
##  0.2109133 0.4133420
## sample estimates:
##         p 
## 0.3026316
prop.test(c(20,30), c(120,130), conf.level = 0.95, correct=FALSE)
## 
##  2-sample test for equality of proportions without continuity
##  correction
## 
## data:  c(20, 30) out of c(120, 130)
## X-squared = 1.6026, df = 1, p-value = 0.2055
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.16254869  0.03434356
## sample estimates:
##    prop 1    prop 2 
## 0.1666667 0.2307692
prop.test(table(Cereals$IsKellogs, Cereals$shelf), correct=FALSE)
## 
##  2-sample test for equality of proportions without continuity
##  correction
## 
## data:  table(Cereals$IsKellogs, Cereals$shelf)
## X-squared = 1.3547, df = 1, p-value = 0.2445
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.32614192  0.07019442
## sample estimates:
##    prop 1    prop 2 
## 0.6981132 0.8260870
Input the mean of the sample, then the standard deviation, then the sample size. 
 You first need to load the BSDA package. 
 install.packages("BSDA") 
library("BSDA")
## Loading required package: lattice
## 
## Attaching package: 'BSDA'
## The following object is masked from 'package:datasets':
## 
##     Orange
tsum.test(10.5, 3.5, 30, alternative = "greater", mu = 8)
## Warning in tsum.test(10.5, 3.5, 30, alternative = "greater", mu = 8): argument
## 'var.equal' ignored for one-sample test.
## 
##  One-sample t-Test
## 
## data:  Summarized x
## t = 3.9123, df = 29, p-value = 0.0002537
## alternative hypothesis: true mean is greater than 8
## 95 percent confidence interval:
##  9.414241       NA
## sample estimates:
## mean of x 
##      10.5
t.test(Cereals$calories, mu=100)
## 
##  One Sample t-test
## 
## data:  Cereals$calories
## t = 3.1022, df = 75, p-value = 0.002707
## alternative hypothesis: true mean is not equal to 100
## 95 percent confidence interval:
##  102.4955 111.4519
## sample estimates:
## mean of x 
##  106.9737
For one-sided alternatives, use the options alternative="less" or alternative="greater".
Samples in two columns:
t.test(Cereals$sugars, Cereals$calories)
## 
##  Welch Two Sample t-test
## 
## data:  Cereals$sugars and Cereals$calories
## t = -43.385, df = 82.64, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -104.5710  -95.4027
## sample estimates:
##  mean of x  mean of y 
##   6.986842 106.973684
Samples in 1 column divided by values of a categorical variable in a different column:
t.test(subset(Cereals$calories, Cereals$shelf=="Upper"), subset(Cereals$calories, Cereals$shelf=="Lower"))
## 
##  Welch Two Sample t-test
## 
## data:  subset(Cereals$calories, Cereals$shelf == "Upper") and subset(Cereals$calories, Cereals$shelf == "Lower")
## t = -1.6964, df = 72.308, p-value = 0.09411
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -13.205429   1.062572
## sample estimates:
## mean of x mean of y 
##  102.5000  108.5714
t.test(Cereals$calories, Cereals$sugars, paired=TRUE)
## 
##  Paired t-test
## 
## data:  Cereals$calories and Cereals$sugars
## t = 49.84, df = 75, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   95.99034 103.98335
## sample estimates:
## mean of the differences 
##                99.98684
chisq.test(table(Cereals$brand, Cereals$shelf))
## Warning in chisq.test(table(Cereals$brand, Cereals$shelf)): Chi-squared
## approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  table(Cereals$brand, Cereals$shelf)
## X-squared = 5.8704, df = 5, p-value = 0.319
chisq.test(matrix(c(30,40,50,100),ncol=2,byrow=TRUE))
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  matrix(c(30, 40, 50, 100), ncol = 2, byrow = TRUE)
## X-squared = 1.4818, df = 1, p-value = 0.2235
chisq.test(table(Cereals$brand), p= c(.25, .25, .2, .1, .1, .1))
## 
##  Chi-squared test for given probabilities
## 
## data:  table(Cereals$brand)
## X-squared = 7.1842, df = 5, p-value = 0.2073
chisq.test(matrix(c(30,40,50,100,200)), p= c(.25, .25, .2, .2, .1))
## 
##  Chi-squared test for given probabilities
## 
## data:  matrix(c(30, 40, 50, 100, 200))
## X-squared = 705, df = 4, p-value < 2.2e-16
summary(lm(Cereals$carbo ~ Cereals$sugars))
## 
## Call:
## lm(formula = Cereals$carbo ~ Cereals$sugars)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2463  -1.2562  -0.2463   1.8692   6.1766 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    17.78411    0.74684  23.813  < 2e-16 ***
## Cereals$sugars -0.42296    0.09045  -4.676 1.29e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.469 on 74 degrees of freedom
## Multiple R-squared:  0.2281, Adjusted R-squared:  0.2176 
## F-statistic: 21.86 on 1 and 74 DF,  p-value: 1.286e-05
Confidence interval for predicted slope
confint(lm(Cereals$carbo ~ Cereals$sugars), level=.95)
##                     2.5 %     97.5 %
## (Intercept)    16.2960066 19.2722210
## Cereals$sugars -0.6031946 -0.2427287
Confidence intervals for predicted values
tempvariable <- Cereals$carbo
predict(lm(Cereals$sugars~tempvariable), newdata= data.frame(tempvariable = 19), interval="confidence")
##        fit      lwr      upr
## 1 4.737612 3.426047 6.049177
predict(lm(Cereals$sugars~tempvariable), newdata= data.frame(tempvariable = 19), interval="predict")
##        fit       lwr      upr
## 1 4.737612 -3.176984 12.65221
Comparing a quantitative and categorical variable (stacked data):
anova(lm(Cereals$sugars ~ Cereals$brand))
## Analysis of Variance Table
## 
## Response: Cereals$sugars
##               Df  Sum Sq Mean Sq F value  Pr(>F)  
## Cereals$brand  5  243.24  48.648  2.7737 0.02416 *
## Residuals     70 1227.75  17.539                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(Cereals$sugars ~ Cereals$brand))
##               Df Sum Sq Mean Sq F value Pr(>F)  
## Cereals$brand  5  243.2   48.65   2.774 0.0242 *
## Residuals     70 1227.7   17.54                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Comparing several quantitative variables (unstacked data):
artsurvey <- read.csv("~/artsurvey.csv")
tempvariable<-stack(artsurvey, select=c("artist.1", "artist.2", "artist.3", "artist.4", "artist.5"))
anova(lm(tempvariable))
## Analysis of Variance Table
## 
## Response: values
##           Df Sum Sq Mean Sq F value   Pr(>F)   
## ind        4  72.04 18.0100  4.3558 0.002809 **
## Residuals 95 392.80  4.1347                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Bonferroni pairwise comparisons:
pairwise.t.test(tempvariable$values,tempvariable$ind, p.adjust = "bonferroni")
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  tempvariable$values and tempvariable$ind 
## 
##          artist.1 artist.2 artist.3 artist.4
## artist.2 1.0000   -        -        -       
## artist.3 0.0152   0.3194   -        -       
## artist.4 0.0071   0.1786   1.0000   -       
## artist.5 0.3194   1.0000   1.0000   1.0000  
## 
## P value adjustment method: bonferroni