Skip to contents

The rSDP package provides a simple interface for discovering, querying, and subsetting data products that are incorporated into the RMBL Spatial Data Platform. The RMBL SDP provides a set of curated, high-resolution, and high-fidelity geospatial datasets for a set of domains in Western Colorado (USA) in the vicinity of Rocky Mountain Biological Laboratory. For more information about the RMBL SDP see here.

SDP data products are provided as geospatial raster datasets in cloud-optimized Geotiff (COG) format. The rSDP package provides functions to access these datasets in cloud storage (Amazon S3) without downloading.

Installation

You can install the latest version of rSDP from GitHub with:

# install.packages("remotes")
remotes::install_github("rmbl-sdp/rSDP")

Installing the development version of the leaflet package is currently required for web maps.

remotes::install_github("rstudio/leaflet")

Discovering SDP Data and Metadata

The package provides functions sdp_get_catalog(), and sdp_get_metadata() that download information about what datasets are currently available and what their spatial attributes are.

library(rSDP)

## Gets entries for vegetation data products in the Upper Gunnison (UG) domain.
sdp_cat <- sdp_get_catalog(domains="UG", 
                           types="Vegetation",
                           deprecated=FALSE,
                           return_stac=FALSE)
sdp_cat[,1:5]
#>    CatalogID  Release       Type                       Product Domain
#> 54    R3D013 Release3 Vegetation              Understory Cover     UG
#> 55    R3D014 Release3 Vegetation       Vegetation Canopy Cover     UG
#> 56    R3D015 Release3 Vegetation      Vegetation Canopy Height     UG
#> 57    R3D016 Release3 Vegetation 20th Percentile Canopy Height     UG
#> 58    R3D017 Release3 Vegetation 80th Percentile Canopy Height     UG
#> 59    R3D018 Release3 Vegetation               Basic Landcover     UG
#> 60    R3D019 Release3 Vegetation        October 2017 NAIP NDVI     UG
#> 61    R3D020 Release3 Vegetation       Septober 2019 NAIP NDVI     UG
#> 73     BM012 Basemaps Vegetation      Canopy Structure Basemap     UG
#> 74     BM013 Basemaps Vegetation             Landcover Basemap     UG
## Grabs detailed metadata for a specific dataset.
item_meta <- sdp_get_metadata(catalog_id="R1D001",return_list=TRUE)

## Prints the detailed description.
item_description <- item_meta$qgis$abstract[[1]]
print(item_description)
#> [1] "This map represents estimated stream flowlines from a hydrologically corrected digital elevation model. The lines were derived in GRASS GIS using a multi-direction algorithm that allows channel braiding. Each stream segment is identified by a unique integer. Stream lines were delineated for drainage areas greater than 512000 square meters.\n"

Accessing SDP data in the cloud.

The function sdp_get_raster(), creates R representations of cloud-based datasets that can be used for further processing, returning a SpatRaster which can be further manipulated using functions in the terra package.

## Creates a `SpatRaster` object for a dataset.
dem <- sdp_get_raster(catalog_id="R3D009")
terra::plot(dem)

Alternatively, you can plot these data on a web map:

terra::plet(dem,tiles="Esri.WorldImagery")

Downloading SDP data locally.

By default, sdp_get_raster() connects to cloud-based datasets without downloading them locally, but specifying download_files=TRUE and providing a local file path will download the raster data to disk. This can sometimes speed up operations that would be prohibitively slow with cloud-based data sources:

## Creates a local `SpatRaster`
dem_local <- sdp_get_raster(catalog_id="R3D009",
                            download_files=TRUE,
                            download_path="~/Downloads", 
                            overwrite=FALSE)

Extracting samples of SDP data.

The function sdp_extract_data() extracts samples from datasets at locations represented by points, lines, or polygons.

## Extracts values of an SDP dataset.
elev <- sdp_get_raster(catalog_id="R3D009")
slope <- sdp_get_raster(catalog_id="R3D012")

location_df <- data.frame(SiteName=c("Roaring Judy","Gothic","Galena Lake"),
                          Lat=c(38.716995,38.958446,39.021644),
                          Lon=c(-106.853186,-106.988934,-107.072569))
location_sv <- terra::vect(location_df,geom=c("Lon","Lat"),crs="EPSG:4327")

dem_sample <- sdp_extract_data(raster=elev,locations=location_sv)
#> [1] "Re-projecting locations to coordinate system of the raster."
#> [1] "Extracting data at 3 locations for 1 raster layers."
#> [1] "Extraction complete."
slope_sample <- sdp_extract_data(raster=slope,locations=dem_sample)
#> [1] "Extracting data at 3 locations for 1 raster layers."
#> [1] "Extraction complete."
plot(slope_sample$UG_dem_3m_v1,slope_sample$UG_dem_slope_1m_v1,xlab="Elevation (m).",
     ylab="Slope (degrees)")

With line or polygon locations sdp_extract_data() summarizes raster values by line or polygon. The default method computes the mean value for each polygon, but you can also specify other summary functions using the sum_fun argument.

slope <- sdp_get_raster(catalog_id="R3D012")

location_poly <- data.frame(SiteName=c("Wet","Conifer","Rocky"),
                            WKT=c("POLYGON ((327651 4313638,327620 4313727,327693 4313759, 327651 4313638))",
                                   "POLYGON ((327340 4314059,327450 4314026,327418 4313970,327340 4314059))",
                                   "POLYGON ((328193 4314314,328285 4314274,328244 4314223, 328193 4314314))"))
location_poly_sv <- terra::vect(location_poly,geom="WKT",crs="EPSG:32613")

slope_site_mean <- sdp_extract_data(raster=slope,locations=location_poly_sv)
#> [1] "Extracting data at 3 locations for 1 raster layers."
#> [1] "Extraction complete."
slope_site_sd <- sdp_extract_data(raster=slope,locations=slope_site_mean,
                                  sum_fun=sd,bind=TRUE)
#> [1] "Extracting data at 3 locations for 1 raster layers."
#> [1] "Extraction complete."
names(slope_site_sd) <- c("SiteName","ID","Slope_mean","ID2","Slope_sd")

plot(slope_site_sd$Slope_mean,slope_site_sd$Slope_sd,xlab="Slope Mean (deg.)",
     ylab="Slope Standard Deviation",pch="")
text(slope_site_sd$Slope_mean,slope_site_sd$Slope_sd,labels=slope_site_sd$SiteName)

You can also return all the cell values intersecting each line or polygon by specifying sum_fun=NULL. Passing the argument exact=TRUE with polygon features returns the proportion of each raster cell included in the polygon (useful for computing area-weighted means.)

slope_allcells <- sdp_extract_data(raster=slope,locations=slope_site_mean,
                                  sum_fun=NULL,exact=TRUE,bind=FALSE)
#> [1] "Extracting data at 3 locations for 1 raster layers."
#> [1] "Extraction complete."
head(slope_allcells)
#>   ID UG_dem_slope_1m_v1   fraction
#> 1  1           4.609455 0.01733949
#> 2  1           6.018766 0.34246161
#> 3  1           9.753592 0.60725906
#> 4  1           3.104769 0.06934668
#> 5  1           6.362072 0.46575703
#> 6  1           6.617933 0.88677505

Working with raster time-series

The sdp_get_raster() and sdp_extract_data() functions also provide some convenience features for subsetting time-series datasets by day or year.

## Connects to rasters from a temporal subset of daily data.
tmax <- sdp_get_raster("R4D004",date_start=as.Date("2011-12-01"),date_end=as.Date("2011-12-30"))
#> [1] "Returning daily dataset with 30 layers..."

## Further subsets when extracting data
tmax_sample <- sdp_extract_data(tmax,location_sv,date_start=as.Date("2011-12-01"),date_end=as.Date("2011-12-20"))
#> [1] "Re-projecting locations to coordinate system of the raster."
#> [1] "Extracting data at 3 locations for 20 raster layers."
#> [1] "Extraction complete."
tmax_df <- as.data.frame(tmax_sample)
dates <- as.Date(names(tmax_df)[3:ncol(tmax_sample)])
sites <- tmax_df$SiteName

##Plots the result
plot(dates,tmax_df[1,3:ncol(tmax_sample)],type="l",ylab="Tmax (C)",ylim=c(-15,7))
points(dates,tmax_df[2,3:ncol(tmax_sample)],type="l",col=3)
points(dates,tmax_df[3,3:ncol(tmax_sample)],type="l",col=4)
legend("bottomright", legend=sites,col=c(1,3,4),bty="n",lty=1)


##Retrieving rasters from a subset of years.
snow_yearly <- sdp_get_raster("R4D001",years=c(2012,2019))
#> [1] "Returning yearly dataset with 2 layers..."
terra::plot(snow_yearly,range=c(60,230))

Extracting data from large time-series datasets.

For extracting subsets of large datasets, it’s sometimes a good idea to loop over small subsets rather than extracting from a single large raster object with many (sometimes hundreds) of layers.

## Extracts with a single call.
start1 <- Sys.time()
tmax1 <- sdp_get_raster("R4D004",date_start=as.Date("2004-10-01"),date_end=as.Date("2004-10-31"))
#> [1] "Returning daily dataset with 31 layers..."

tmax_extr1 <- sdp_extract_data(tmax1,location_sv,verbose=FALSE)
elapsed1 <- Sys.time() - start1

## Loops over layers (different subset to avoid cacheing).
start2 <- Sys.time()
tmax2 <- sdp_get_raster("R4D004",date_start=as.Date("2005-10-01"),date_end=as.Date("2005-10-31"),
                        verbose=FALSE)
locations_proj <- terra:::project(location_sv,"EPSG:32613")

extr_list <- list()
for(i in 1:terra::nlyr(tmax2)){
  extr_dat <- sdp_extract_data(tmax2[[i]],locations_proj,verbose=FALSE)[,3]
  extr_list[[i]] <- extr_dat
}
tmax_extr2 <- do.call(cbind,extr_list)
elapsed2 <- Sys.time() - start2

## Loops over creating the raster object itself. 
## This is slower single threaded, but can be more easily made parallel.
start3 <- Sys.time()
days <- seq(as.Date("2006-10-01"),as.Date("2006-10-31"),by="day")
extr_list3 <- list()
for(i in 1:length(days)){
  tmax3 <- sdp_get_raster("R4D004",date_start=days[i],date_end=days[i],verbose=FALSE)
  extr_dat <- sdp_extract_data(tmax3,locations_proj,verbose=FALSE)[,3]
  extr_list3[[i]] <- extr_dat
}
tmax_extr3 <- do.call(cbind,extr_list3)
elapsed3 <- Sys.time() - start3

## Parallel extraction via foreach.
library(foreach)
library(doParallel)
#> Loading required package: iterators
#> Loading required package: parallel
library(sf)
#> Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE

## Can't pass SpatVector or SpatRaster objects via Foreach, so convert to sf.
locations_sf <- st_as_sf(location_sv)

start4 <- Sys.time()
cl <- parallel::makeCluster(4)
doParallel::registerDoParallel(cl)
days <- seq(as.Date("2007-10-01"),as.Date("2007-10-31"),by="day")

extr_list4 <- foreach::foreach(i=1:length(days),.packages=c("sf","terra","rSDP")) %dopar% {
  tmax4 <- sdp_get_raster("R4D007",date_start=days[i],
                         date_end=days[i],verbose=FALSE)
   locations_sv <- vect(locations_sf)
   extr_dat <- sdp_extract_data(tmax4,locations=locations_sv,
                                verbose=FALSE,return_type="sf")[,4]
   (st_drop_geometry(extr_dat))
}
parallel::stopCluster(cl)
tmax_extr4 <- do.call(cbind,extr_list4)
elapsed4 <- Sys.time() - start4

##Collects timings.
timings <- data.frame(approach=c("Single Call","Looping sdp_extract_data()","Looping over sdp_get_raster()","Foreach"),
                      timing=c(elapsed1,elapsed2,elapsed3,elapsed4))
timings
#>                        approach        timing
#> 1                   Single Call 19.74518 secs
#> 2    Looping sdp_extract_data() 27.44984 secs
#> 3 Looping over sdp_get_raster() 29.54908 secs
#> 4                       Foreach 35.03244 secs