In this lab you can use the interactive console to explore or Knit the document. Remember anything you type here can be “sent” to the console with Cmd-Enter (OS-X) or Ctrl-Enter (Windows/Linux) in an R code chunk.
youth
.library(readr)
youth <- read_csv("http://jhudatascience.org/intro_to_R_class/data/Youth_Tobacco_Survey_YTS_Data.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_character(),
## YEAR = col_double(),
## Data_Value = col_double(),
## Data_Value_Std_Err = col_double(),
## Low_Confidence_Limit = col_double(),
## High_Confidence_Limit = col_double(),
## Sample_Size = col_double(),
## DisplayOrder = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
head()
to display the first few rows of the data frame.head(youth)
## # A tibble: 6 x 31
## YEAR LocationAbbr LocationDesc TopicType TopicDesc MeasureDesc DataSource
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 2015 AZ Arizona Tobacco U… Cessation… Percent of C… YTS
## 2 2015 AZ Arizona Tobacco U… Cessation… Percent of C… YTS
## 3 2015 AZ Arizona Tobacco U… Cessation… Percent of C… YTS
## 4 2015 AZ Arizona Tobacco U… Cessation… Quit Attempt… YTS
## 5 2015 AZ Arizona Tobacco U… Cessation… Quit Attempt… YTS
## 6 2015 AZ Arizona Tobacco U… Cessation… Quit Attempt… YTS
## # … with 24 more variables: Response <chr>, Data_Value_Unit <chr>,
## # Data_Value_Type <chr>, Data_Value <dbl>, Data_Value_Footnote_Symbol <chr>,
## # Data_Value_Footnote <chr>, Data_Value_Std_Err <dbl>,
## # Low_Confidence_Limit <dbl>, High_Confidence_Limit <dbl>, Sample_Size <dbl>,
## # Gender <chr>, Race <chr>, Age <chr>, Education <chr>, GeoLocation <chr>,
## # TopicTypeId <chr>, TopicId <chr>, MeasureId <chr>, StratificationID1 <chr>,
## # StratificationID2 <chr>, StratificationID3 <chr>, StratificationID4 <chr>,
## # SubMeasureID <chr>, DisplayOrder <dbl>
spec()
to display the list of column names and their type specification.spec(youth)
## cols(
## YEAR = col_double(),
## LocationAbbr = col_character(),
## LocationDesc = col_character(),
## TopicType = col_character(),
## TopicDesc = col_character(),
## MeasureDesc = col_character(),
## DataSource = col_character(),
## Response = col_character(),
## Data_Value_Unit = col_character(),
## Data_Value_Type = col_character(),
## Data_Value = col_double(),
## Data_Value_Footnote_Symbol = col_character(),
## Data_Value_Footnote = col_character(),
## Data_Value_Std_Err = col_double(),
## Low_Confidence_Limit = col_double(),
## High_Confidence_Limit = col_double(),
## Sample_Size = col_double(),
## Gender = col_character(),
## Race = col_character(),
## Age = col_character(),
## Education = col_character(),
## GeoLocation = col_character(),
## TopicTypeId = col_character(),
## TopicId = col_character(),
## MeasureId = col_character(),
## StratificationID1 = col_character(),
## StratificationID2 = col_character(),
## StratificationID3 = col_character(),
## StratificationID4 = col_character(),
## SubMeasureID = col_character(),
## DisplayOrder = col_double()
## )
readxl
package with the library()
command.If it is not installed, install it via: RStudio --> Tools --> Install Packages
. You can also try install.packages("readxl")
.
library(readxl)
Monuments.xlsx
.download.file("http://jhudatascience.org/intro_to_R_class/data/Monuments.xlsx",
destfile = "Monuments.xlsx",
overwrite = TRUE,
mode = "wb")
read_excel()
function in the readxl
package to read the Monuments.xlsx
file and call the output mon
.mon <- read_excel("Monuments.xlsx")
getwd()
getwd()
## [1] "/Users/avahoffman/Dropbox/JHSPH/intro_to_r/Data_IO/lab"
mon
object as a CSV file calling it “monuments.csv”, using write_csv()
. Where is the file now?write_csv(mon, "monuments.csv")
rds
format. Call the file my_variable.rds
.y <- c(10, 20, 30, 40, 50, 60)
write_rds(y, "my_variable.rds")