#INTRO TO R
#My favourite resources:
#https://www.zoology.ubc.ca/~schluter/R/ -> link to a graduate level course on using R for statistic at UBC (that I drew heavily from in putting this together)
#stackoverflow
#google
#R is a big calculator:
5+4
log(5)
(log(5)^2*10)/4.567
#make assignments
a <- 5
b <- 4
x <- a+b
#To view contents of any object, including a vector, type its name and enter, or use the “print” command,
x # print "x" to the screen
print(x) # do the same
#STORING ITEMS IN VECTORS
#Use the left arrow “<-” (“less than” sign followed by a dash) and the “c” function (for concatenate) to create a vector containing a set of measurements.
x <- c(11,42,-3,14,5) # store these 5 numbers in vector x
x <- c(1:10) # store integers 1 to 10
x <- c("Watson","Crick","Wilkins") # use quotes for character data
x <- c("Watson",1) # can not have different data types, defaults to character
X <- c("Crick")
x <- c(11,42,-3,14,5)
y <- c(x, X)
#Use the “seq” function to generate a sequence of numbers and store in a vector,
x <- seq(0,10,by=0.1) # 0, 0.1, 0.2, ... 9.9, 10
#Use “rep” to repeat values a specified number of times and store to a vector,
x <- rep(c(1,2,3),2) # 1 2 3 1 2 3
x <- rep(c(1,2,3), each=2) # 1 1 2 2 3 3
x <- rep(c(1,2,3),c(2,1,4)) # 1 1 2 3 3 3 3
#ACCESS ELEMENTS OF A VECTOR
#Use integers in square brackets to indicate specific elements of a vector. For example,
x[5] # 5th value of the vector x
x[2:6] # 2nd through 6th elements of x
x[2:length(x)] # everything but the first element
x[-1] # everything but the first element
x[5] <- 4.2 # change the value of the 5th element to 4.2
#MATH WITH VECTORS
#These operations are carried out on every element of the vector
x + 1 # add 1 to each element of x
x^2 # square each element of x
x/2 # divide each element of x by 2
10 * x # multiply each element of x by 10
x %% 2 # divide each element by 2 and determine the remainder
#if you want to store what you have done to the vector, you either need to save over the current vector or store it in a new vector
y <- 10*x
x <- 10*x
#USEFUL VECTOR FUNCTIONS
#Here is a selection of useful functions for data vectors. Many of the functions will also work on other data objects such as data frames, possibly with different effects.
#Transform numerical data
#The most common data transformations, illustrated using the single variable “x”.
sqrt(x) # square root
sqrt(x+0.5) # modified square root transformation
log(x) # the natural log of x
log10(x) # log base 10 of x
exp(x) # exponential ("antilog") of x
abs(x) # absolute value of x
x<- 2.45667
round(x,2) # round x to two decimal places
#Summary Statistics
#Here are a few basic statistical functions on a numeric vector named x. Most of them will require the “na.rm=TRUE” option if the vector includes one or more missing values.
x <- rep(c(1,2,3),c(2,1,4)) # 1 1 2 3 3 3 3
sum(x) # the sum of values in x
length(x) # number of elements (including missing)
mean(x) # sample mean
sd(x) # sample standard deviation
min(x) # smallest element in x
max(x) # largest element in x
range(x) # smallest and largest elements in x
range(x)[1] # just the smallest element in x
median(x) # median of elements in x
unique(x) # extracts only the unique values of x
sort(x) # sort, smallest to largest
#What am I?
#These functions return TRUE or FALSE depending on the structure of x and its data type.
is.vector(x)
is.character(x)
is.numeric(x)
is.integer(x)
is.factor(x)
#Functions for character data
x <- "I am a sentence."
casefold(x) # convert to lower case
casefold(x,upper=TRUE) # convert to upper case
substr(x,2,4) # extract 2nd to 4th characters of each element of x
paste(x," a fragment",sep="") # paste "ly" to the end of each element in x
paste(x,"a fragment",sep=" -") # paste "ly" to the end of each element in x
nchar(x) # no. of characters in each element of x
x<- c("I", "am", "a", "sentence")
grep("a",x) # which elements of x contain letter "a" ?
strsplit(x,"a") # split x into pieces wherever the letter "a" occurs
x <-c("TLO2", "TLO3", "TLO4")
gsub("TLO", "", x) # replace "TLO" with "" wherever "TLO" occurs
#TRUE and FALSE (logical) data
#Vectors can be assigned logical measurements, directly or as the result of a logical operation. Here’s an example of direct assignment.
z <- c(TRUE, TRUE, FALSE) # put 3 logical values to a vector z
#Logical operations can identify and select those vector elements for which a condition is TRUE. The logical operations are symbolized
# == (equal to)
# != (not equal to)
# < (less than)
# <= (less than or equal to)
# and so on.
z <- c(2, -1, 3, 99, 8 )
z <= 3 # TRUE TRUE TRUE FALSE FALSE (for each element of z)
!(z < 3) # FALSE FALSE TRUE TRUE TRUE
z[z != 3] # 2 -1 99 8, the elements of z for which the condition is TRUE
which(z >= 4) # 4 5, the indices for elements of z satisfying the condition
is.vector(z) # TRUE
is.character(z) # FALSE
is.numeric(z) # TRUE
is.na(z) # FALSE FALSE FALSE FALSE FALSE
any(z < 0) # TRUE
all(z > 0) # FALSE
#The logical operators “&” and “|” refer to AND and OR. For example, put the following numbers into a vector “z”,
z <- c(-10, -5, -1, 0, 3, 92)
z < 0 & abs(z) > 5 # TRUE FALSE FALSE FALSE FALSE FALSE
z[z < 0 | abs(z) > 5] # -10 -5 -1 92
#HOW TO DEAL WITH MISSING VALUES
#Missing values in R are indicated with NA.
x[5] <- NA # assign "missing" to the 5th element of x
x[x == -99] <- NA # change all instances of -99 in x to missing
which(is.na(x)) # identify which element(s) is missing
#Some functions will treat NA as valid entries. For example, the length of a vector (number of elements) includes missing values in the count.
length(x)
#If you only want to include non-missing values, use this:
x1 <- na.omit(x) # put the non-missing values of x into new vector x1, also used for dataframe (later)
x1 <- x[!is.na(x)]) # put the non-missing values of x into new vector x1
length(x1) # count the number of non-missing values
#Some functions won’t work on variables with missing values unless default options are modified. For example, if you try to calculate the mean of a vector that contains missing values you will get NA as your result. Most functions have an option “na.rm” that allows you to drop the missing values before you calculate.
x <- c(1,2,3,4,5,NA) # a vector with one missing value
mean(x) # result is NA
mean(x, na.rm = TRUE) # result is the mean of non-missing values of x