There are a few simple things we need to set up before we 
use R. They are for data input and for better organization 
of computation. 

1. Create a folder on your disk, call it, say,  22S101.
2. Go to the site:

         www.stat.uiowa.edu/~jian
   then go to 22S:101 Biostatisitcs, and then click on Data Sets

3. Download the following data sets:

   Percent low birth weight data (unicef)     Ch.2  Ex. 17
   Nursing home data (nurshome)               Ch.2  Ex. 18
   Canadian cigarettes data (cigarett)        Ch.2  Ex. 19

   Save the above data sets as .txt files in the folder 22S101 
   (on A drive). Use the names given in the parentheses above. 
   After saving the data, you will have the following files in the folder:

   unicef.txt
   nurshome.txt
   cigarett.txt

4. Open R (Click the Start button, click All programs, and click R)

5. Change the working directory of R to your desired directory.
   For our class, we use the directory 22S101 in  
   A (disk) drive as our working directory.
   This is done as follows:

   Click File (on the left-upper coner of the R window)
   Click Change directory (a small window will pop up)
   Click Browse
   Find A drive, and click on it
   Click 22S101 
   Click save
   Click OK
Now we are ready to work in R.                 
#######################################################################
# Note: in R, if there is an # in front of a line, then 
# this line is regarded as a comment by R
######################################################################
# Some basic operations
# assign values to x and y
x <- 10  
y <- 5     
# take a look at x and y
x
y
# now do 
x+y
x-y
x*y
x/y
###############################
# vectors
x <- c(4,2,6)
y <- c(2,1,3)
# take a look at x and y
x
y
# now do
x+y
x-y
x*y
x/y
#################################
# create new vectors
z1 <- x+y
z2 <- 2*x+3*y+10
z3 <- x*y  
z4 <- x^2
# take a look at z1, z2,  z3 and z4
z1
z2
z3
z4
###################################
# summary statistics
# mean and median
mean(z1)
median(z1)
# variance and standard deviation
z1 <- c(1,1,1)
var(z1)
sd(z1)
z2 <- c(0.4, 2, 0.6)
var(z2)
sd(z2)
########################
# Forced expiratory volumes in 1 second for 13 adolescents
# suffering from asthma.
fev <- c(2.30, 2.15, 3.50, 2.60, 2.75, 2.82, 4.05, 2.25, 2.68,3.00,
         4.02, 2.85, 3.38)
mean(fev)
median(fev)
sd(fev)
min(fev)
max(fev)
summary(fev)
quantile(fev)

##################################################
# basic graphs
##################################################
# Chapter 2, Excercise 17
# Input data
unicef <- read.table("unicef.txt",header=T, sep="\t",quote="",row.names=NULL)
lowbwt <- unicef[[2]]   # The second `column' is lowbwt            
lowbwt                  # take a look at lowbwt

#(a) Boxplot
boxplot(lowbwt, ylab="Percentage of low birth weight infants")
# Histogram
# Absolute frequency histogram
hist(lowbwt)

# Probability histogram
hist(lowbwt, probability=T)

# Put two histograms on a single page

par(mfrow=c(2,1), mar=c(4,8,4,8))
hist(lowbwt, main="Absolute frequency histogram of lowbwt")
hist(lowbwt, probability=T, main="Probability histogram of lowbwt")

# Put three plots on a single page
par(mfrow=c(2,2), mar=c(4,4,4,3))
boxplot(lowbwt, ylab="Percentage of low birth weight infants", main="Boxplot of lowbwt")
hist(lowbwt, main="Absolute frequency histogram of lowbwt")
hist(lowbwt, probability=T, main="Probability histogram of lowbwt")

############################################
# Chapter 2, Exercise 18
# Input data
nurshome<- read.table("nurshome.txt",header=T,quote="", sep="\t",row.names=NULL)

# Name the variables
state <- as.character(nurshome[[1]])
resident <- nurshome[[2]]
state          # take a look at state
resident       # take a look at resident

#(a) which state has the smallest number of nursing home residents, and
#    which state has the largest number of nursing home residents? 

state[resident==min(resident)]
state[resident==max(resident)]

# (b) Boxplot    
boxplot(resident, ylab="# of home residents per 1000 population")

#  Histogram
# Absolute frequency histograp
hist(resident, main="Absolute frequency histogram")

#  Probability histogram

hist(resident, probability=T, main="Probability histogram")

par(mfrow=c(2,1), mar=c(4,8,4,8))
boxplot(resident, ylab="# of home residents per 1000 population", main="Boxplot")
hist(resident, probability=T, main="Histogram: # of home residents per 1000 population ")

################################################
# Chapter 2, Exercise 19
# Input data
cigarett <- read.table("cigarett.txt",header=T,quote="", sep="\t",row.names=NULL)
tar <- cigarett[[1]]
nic <- cigarett[[2]]

#(a) One-way scatter plot
#par(mfrow=c(1,1), mar=c(8,8,8,6))
stripchart(tar, pch=1)
title(main="One-way scatter plot of tar", xlab="tar")

#(c) Two-way scatter plot of tar versus nicotine
#par(mfrow=c(1,1), mar=c(6,8,8,4))
plot(nic, tar, xlab="nicotine",ylab="tar", col="blue",
main="Canadian cigarette data: tar vs. nicotine")

# Put the plots on one page
par(mfrow=c(3,2), mar=c(4,6,5,4))
stripchart(tar, pch=1, col="red")
title(main="One-way scatter plot of tar", xlab="tar")

stripchart(nic, pch=1, col="blue")
title(main="One-way scatter plot of nicotine", xlab="nicotine")

stripchart(tar,vertical=T, pch=2, col="red")
title(main="Vertical one-way scatter plot of tar", ylab="tar")

stripchart(nic,vertical=T, pch=2, col="blue")
title(main="Vertical one-way scatter plot of nicotine", ylab="nicotine")

boxplot(tar, main="Boxplot of tar", col="red")
boxplot(nic, main="Boxplot of nicotine", col="blue")

mean(tar)
sd(tar)
summary(tar)