## This is a comment ## R will ignore these ## Note the autocomplete ability (use tab) ## Load dataset, change the path for your case ## Examples of possible paths ## C:/Users/survey-cleaned-2022-09-23.csv ## /Users/survey-cleaned-2022-09-23.csv classSurv <- read.csv("/home/apua/Desktop/survey-cleaned-2022-09-23.csv") ## Column header variable names names(classSurv) ## Rename header variable names names(classSurv) <- c("math", "math.log", "metrics", "metrics.tf", "stats", "stats.tf", "height", "height.mom", "height.pop", "OS", "junk", "stylus", "stylus.use") ## Gives me the variable name of the 5th column ## Maybe useful when trying to change one of the variable names only names(classSurv)[5] ## seeing the values of the math column classSurv$math ## distribution of values of math column table(classSurv$math) ## shows you which of the observations correspond to the correct answer ## but does not "save" the results classSurv$math == 2 ## these two commands work, but the second one might be better given the situation about the math variable sum(classSurv$math == 2) sum(classSurv$math == "2") ## show you how many students are Windows users sum(classSurv$OS == "Windows") ## old faithful dataset, available in R ## default settings hist(faithful$eruptions) ## change the breaks hist(faithful$eruptions, breaks = 2) hist(faithful$eruptions, breaks = 200)