#1- Getting Started: download and run R in RStudio #Go to the home website of R: http://cran.stat.sfu.ca/ an install R for your OS. #Download and install RStudio based on your OS from this link: http://www.rstudio.com/products/rstudio/download/ #2- Install and use R packages from http://www.r-project.org #install.packages(“”) #Example: install.packages("tm") #(text mining package in R) library(tm) # load the package to use the functions #3- Use functions and Help in R #Predefined functions: sqrt(9) sum(1:5) #User defined functions: #f <- function() { ## Do something interesting } f <- function(a, b) { a^2+b } f(2,3) #4- The objects, operations and classes in R #Everything in R is an object #Every object has a class #Classes define what objects contain #Object are named by users 1+1 result <- 1+1 1:5 power.of.3 <- 3^(1:5) power.of.3 class("power.of.3") class(power.of.3) ls() #5- Setting and changing the workspace in R #R stores your objects in workspace #Workspace is located in your computer’s memory #Current workspace: getwd() #to set directory: setwd(anydirectoy) setwd("*set wd folder here*") #change it based on your computer #6- Loading datasets in R #R datasets: data(iris) iris head(iris) #7- Load 20newsgroup/twitter dataset in R #8- View the loaded dataset library(tm) corpus <- VCorpus(DirSource("LabSep15newsgroup/comp.hardware/", encoding = "UTF-8")) corpus corpus[1:10] inspect(corpus[1]) inspect(corpus[66:68]) #9- Basic data structures in R #Vectors #A collection of values that all have the same data type. The elements of a vector are all numbers, giving a numeric vector, or all character values, giving a character vector. vect.numerical <- c(1, 2, 99, 6, 8, 9) is(vect.numerical) vect.character <- c("austria", "spain","france", "uk", "belgium","poland") is(vect.character) vect.logical <- c(TRUE, TRUE, FALSE, TRUE) is(vect.logical) #Factors #A collection of values that all come from a fixed set of possible values. A factor is similar to a vector, except that the values within a factor are limited to a fixed set of possible values. citizen <- factor(c("uk", "us","no", "au", "uk", "us", "us")) citizen unclass(citizen) #Matrices #A two-dimensional collection of values that all have the same type. The values are arranged in rows and columns. mat <- matrix(1:10, 2, 5) class(mat) dim(mat) #Data frames #A collection of vectors that all have the same length. This is like a matrix, except that each column can contain a different data type. data(iris) iris[1:3,] names(iris) #Lists #A collection of data structures. The components of a list can be simply vectors--similar to a data frame, but with each column allowed to have a different length. However, a list can also be a much more complicated structure. empl.list <- list(employee = "Anna", spouse = "Fred", children = 3, child.ages = c(4, 7, 9)) class(empl.list) empl.list$child.ages[2] #10- Missing and special values in R #Missing value: NA a <- c(3, 5, 6) a[2] <- NA a is.na(a) #Special values: # -Inf NaN Inf d <- c(-1, 0, 1) d/0 #Read data: #read.table can be used to read data from text file like csv #read.table creates a data frame from the values and tries to guess the type of each variable mydata <- read.table("file.csv", + sep = ",") #no example but will see in future excercises