#By Stefan Zeugner, 2007-02-12 #R commands used in TA sesssion 1 on R # QUICK INTRO INTO R FOR PROGRAMMING NEWBIES #____ Contents: ______________________ # o BASIC INTRODUCTION # o GENERATE VECTORS , MATRICES # o WORKING WITH LISTS # o CLEANING UP OBJECTS # o RANDOM NUMBERS AND MATRIX ALGEBRA # o CLASSES (variable types) # o READING FILES AND DATA.FRAME # o ESTIMATING # o PREPARE DATAFRAME FOR SAVING # o SAVE TO FILE #______________________________________ #------- BASIC INTROCDUTION ----------- # define variables (here, money equation example): M<-50000 # money supply 100->P # price level Y=1000 # real GDP v=P*Y/M # calculate velocity # (scalar operators: * / - + %% %/% ^ ) print(v) # display the value of v foo="Milton" # define string / character variable ls() #list the workspace: all the variables (and other objects) defined so far ls # ls is a function; executing it requires brackets (); without the brackets, the object / function code is displayed, but not executed rm(M) #removing variables ls() # now M is not in the workspace anymore help(ls) #show help on ls function ?ls # equivalent command help.search("ls") #create basic vector test=1 test[3]=2 # add element test #display #MODE TYPES: numeric(double and integer), character, logical, (raw, complex), list, function mode(test) mode(foo) #logical operators == < >= !=, & | ! 3==3 #returns TRUE (3 equals 3) 3!=3 #returns FALSE (it is false that "3 is not equal to 3") 3=3 #returns error; because single "=" is used for assignment (equivalent to "<-") # playing with modes bool = 4>3 bool2 = FALSE mode(bool) bool[2]="dsf" mode(bool) ##------- GENERATE VECTORS , MATRICES ----------- #____operators c(), rbind, cbind ______ # c() combines stuff in one dimension (vector) c(4,3) c(1:3) c(10:-1) cumsum(c(1:10)) #cumsum does the cumulative sum of a vector # cbind/rbind combine stuff along the column/row dimension in 2 dimensions cbind(1:2,3:4) cbind(c(1,2),c(3,4)) mymatrix=rbind(c(1,2),c(3,4)) print(mymatrix) #___ other stuff _____ # construct vectors: seq(from=10,to=2,by=-2) #equivalent to: seq(10,2,2) rep(1,10) rep("test",10,sep="_") #constuct matrix matrix(1,3,3) matrix(1:9,3) testmat=matrix(1:12,3,4) #accessing elements mymatrix[2,1] mymatrix[,1] #reshape matrix(testmat,2,6) #some operations on matrices mymat=matrix(1:9,3,3) length(mymat) # no. of elements dim(mymat) max(mymat) sum(mymat) det(mymat) eigen(mymat) #array(): alternative to matrix() matrix(1:4,3,3) array(1:4,dim=c(3,3)) mymat[,2]<-c(1,2,3) pmax(10:1,1:10) #comparing vectors a=1:10 b=rep(5,10) h<-a==b print(h) #------- WORKING WITH LISTS ----------- #create a list (object) mylist=list(bool,bool2) # a list is an object that can hold all kinds of other objects: vectors, other lists, functions, etc. mode(mylist) #assess modes mode(mylist[[1]]) mode(mylist[[2]]) ls(pat="bool") 3 search for objects with name 'bool' #manipulating lists newlist=c(mylist,P) newlist newlist=list(mylist,P) newlist c(newlist,recursive=TRUE) mode(unlist(newlist)) #accessing elements otherlist=list(hihi=2,huhu=2:3,haha=matrix(1,3,3)) names(otherlist) otherlist$haha # the same as: otherlist[[3]] # list and names! names(newlist)=c("a","b") newlist$a newlist$b #------- CLEANING UP ----------- ls(pat="mat") ls(pat="list\|mat") #list everything that is called with 'list' or 'mat' rm(list=ls(pat="mat\|list")) #delete everything that is called with 'list' or 'mat' rm(list=ls()) #remove everything #------- RANDOM NUMBERS AND MATRIX ALGEBRA ----------- #simulate data and do OLS on it: #matrix operators: t:transpose, solve:inverse, %*%: matrix mult N=100 #no.obs. #draw X matrix (normally distributed, 3 columns): matrix(rnorm(N*3),N,3) # draw error terms: e=rnorm(N,0,2) #make dep. variable: y=10*X[,1]-3*X[,2]+5*X[,3]-10+e # add a constant to the design matrix: design=cbind(rep(1,N),X) # calc coefs. b=(X'X)^(-1) X'y b=solve(t(design) %*% design) %*% t(design) %*% y # do ehat=[I - X(X'X)^(-1)X'] y ehat=(diag(N)-design %*% solve(t(design) %*% design) %*% t(design)) %*% y print(b) #------- CLASSES (variable types) ----------- #class "numeric", "logical", "character" or "list", but also "matrix", "array", "factor" and "data.frame" a # "mode" are general types, "class" is a finer specifiation mymat=matrix(1:9,3,3) class(mymat) mode(mymat) #------- READING FILES AND DATA.FRAME ----------- #set working directory to the directory where you have your files stored setwd('C:/Dokumente und Einstellungen/zeugner/Eigene Dateien/IHS') #IMPORTANT: in your path, use forward slashes "/" instead of backward slashes "\" - also under Windows! #read data from file usdata=read.csv("u_cpi_usa.csv") # consider also read.table("u_cpi_usa.csv",sep=",") class(usdata) #data.frame: a restricted list #accessing elements of data.frame names(usdata) usdata$usa_cpi #use 'attach' for easy handling attach(usdata) #tell the workspace to directly access usdata list elements usa_cpi #if this does not work use the following detach(usdata) #delink usdata list elements from workspace attach(usdata) #link usdata list elements from workspace #summarize info summary(usa_infl) summary(usdata) #calculate inflation infl=c(NA,diff(log(usdata$usa_cpi))) #add inflation to data frame usdata=cbind(usdata,infl) names(usdata)[4]="usa_infl" # assign the rownames dimnames(usdata)[[1]]<-date # drop, 1st and 3rd column of usdata usdata<-usdata[,-c(1,3)] #you have to do the attaching again to update information detach(usdata) attach(usdata) usa_cpi usa_infl #------- ESTIMATING ----------- # lm() does OLS: lm(usa_unemp ~ usa_infl) #glm(usa_unemp ~ usa_infl,family="quasipoisson") #assign results to object "model" model=lm(usa_unemp ~ usa_infl) class(model) names(model) summary(model) # access attributes of lm object model$residuals #------- PREPARE DATAFRAME FOR SAVING ----------- hats=data.frame(ehat=model$residuals,yhat=model$fitted.values,row.names=row.names(usdata)[2:length(row.names(usdata))]) #we have to use row.names(usdata)[2:length(row.names(usdata))] as there is in NA in the first entry of usa_infl #------- SAVE TO FILE ----------- write.table(hats,"hatsout.txt") #write to file with comma separator: write.table(hats,"hatsout.csv", sep=",") #exit from R: q() # or q(save="no") #by Stefan Zeugner, 2007