In lesson 07 we will make the jump to mult-dimensional data and begin working with BlueSky netcdf output. To work with netcdf files we will have to use a netCDF package. R has several including ‘ncdf’, ‘RNetCDF’ and ‘ncdf4’. We recommend ‘ncdf4’. This package has a very straightforward interface for anyone familiar with netcdf and uses sensible defaults for _FillValue, etc.

Before we begin, however, we need to download the desired file as the ncdf4 package only works on local files, not URLs.

downloadBSOutput()

The following function will download a BlueSky netcdf file to your local directory for further analysis:

downloadBSOutput <- function(model="PNW-1.33km", date="2014090100") {
  
  # Build the URL
  URLBase <- "http://smoke.airfire.org/bluesky-daily/output/standard"
  fileURL <- paste0(URLBase, "/", model, "/", date, "/data/smoke_dispersion.nc")
  
  # Create a descriptive file name
  fileName <- paste0(model,"_",date,".nc")
  
  # Download the data to your working directory if it's not already there
  if (!file.exists(fileName)) download.file(url=fileURL, destfile=fileName)
  
  # return file name
  return(fileName)
  
}

# Download a netcdf file to work with
fileName <- downloadBSOutput("PNW-1.33km", "2014092500")

Now we’re ready to get started. Let’s begin by loading the ncdf4 package and then do some initial exploration of the BlueSky output file. Have a look at the package documentation help(package="ncdf4") to see what commands are available to you and to learn more about the arguments accepted by each command. We will start by:

library(ncdf4)   # for netcdf capabilities

nc <- nc_open(fileName)
class(nc)
## [1] "ncdf4"
names(nc)
##  [1] "filename"    "writable"    "id"          "safemode"    "format"     
##  [6] "is_GMT"      "groups"      "fqgn2Rindex" "ndims"       "natts"      
## [11] "dim"         "unlimdimid"  "nvars"       "var"
for (name in names(nc)) print(paste0(name,': ',class(nc[[name]])))
## [1] "filename: character"
## [1] "writable: logical"
## [1] "id: integer"
## [1] "safemode: logical"
## [1] "format: character"
## [1] "is_GMT: logical"
## [1] "groups: list"
## [1] "fqgn2Rindex: list"
## [1] "ndims: numeric"
## [1] "natts: numeric"
## [1] "dim: list"
## [1] "unlimdimid: numeric"
## [1] "nvars: numeric"
## [1] "var: list"
names(nc$dim)
## [1] "TSTEP"     "DATE-TIME" "LAY"       "VAR"       "ROW"       "COL"
class(nc$dim$COL)
## [1] "ncdim4"
names(nc$var)
## [1] "TFLAG" "PM25"
class(nc$var$PM25)
## [1] "ncvar4"
names(nc$var$PM25)
##  [1] "id"                 "name"               "ndims"             
##  [4] "natts"              "size"               "dimids"            
##  [7] "prec"               "units"              "longname"          
## [10] "group_index"        "chunksizes"         "storage"           
## [13] "shuffle"            "compression"        "dims"              
## [16] "dim"                "varsize"            "unlim"             
## [19] "make_missing_value" "missval"            "hasAddOffset"      
## [22] "hasScaleFact"
print(nc)
## File PNW-1.33km_2014092500.nc (NC_FORMAT_CLASSIC):
## 
##      2 variables (excluding dimension variables):
##         int TFLAG[DATE-TIME,VAR,TSTEP]   
##             units: <YYYYDDD,HHMMSS>
##             long_name: TFLAG           
##             var_desc: Timestep-valid flags:  (1) YYYYDDD or (2) HHMMSS                                
##         float PM25[COL,ROW,LAY,TSTEP]   
##             long_name: PM25            
##             units: ug/m^3          
##             var_desc: PM25                                                                            
## 
##      6 dimensions:
##         TSTEP  Size:47   *** is unlimited ***
##         DATE-TIME  Size:2
##         LAY  Size:1
##         VAR  Size:1
##         ROW  Size:421
##         COL  Size:1201
## 
##     33 global attributes:
##         IOAPI_VERSION: $Id: @(#) ioapi library version 3.0 $                                           
##         EXEC_ID: ????????????????                                                                
##         FTYPE: 1
##         CDATE: 2014268
##         CTIME: 170416
##         WDATE: 2014268
##         WTIME: 170416
##         SDATE: 2014268
##         STIME: 10000
##         TSTEP: 10000
##         NTHIK: 1
##         NCOLS: 1201
##         NROWS: 421
##         NLAYS: 1
##         NVARS: 1
##         GDTYP: 1
##         P_ALP: 0
##         P_BET: 0
##         P_GAM: 0
##         XCENT: -120.080001831055
##         YCENT: 46.8699989318848
##         XORIG: -126.080001831055
##         YORIG: 44.7700004577637
##         XCELL: 0.00999999977648258
##         YCELL: 0.00999999977648258
##         VGTYP: 5
##         VGTOP: -9999
##         VGLVLS: 100
##          VGLVLS: 0
##         GDNAM: HYSPLIT CONC    
##         UPNAM: hysplit2netCDF  
##         VAR-LIST: PM25            
##         FILEDESC: Hysplit Concentration Model Output                                              lat-lon coordinate system                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       
##         HISTORY:

From this output we see that this file contains a ‘TFLAG’ variable as well as a ‘PM2.5’ variable that is defined on a grid consisting of the following dimension: ‘[COL,ROW,LAY,TSTEP]’. (The LAY grid variable has only a single element and is degenerate.)

Unfortunately, this netcdf file is incomplete because it does not define variables for the grid dimensions. If we print out the values of TSTEP, ROW and COL we see only indices and not the floating point values of time, latitude and longitude as we might expect:

nc$dim$TSTEP$vals[1:20]
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20
nc$dim$COL$vals[1:20]
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20
nc$dim$ROW$vals[1:20]
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20

As it turns out, values for the time are stored in the 2-D ‘TFLAG’ variable with ASCII representations of Year-Julian day and Hour-Minute-Second stored in separate rows. We can see this by using ncvar_get() to extract this variable from the file. (See ?ncvar_get to read about specifying vectors of starting indices and counts to specify what block of a multi-dimensional array to access.)

Note: The TFLAG is stored as a 3-D variable on a [DATE-TIME,VAR,TSTEP] grid but VAR is degenerate.

ncvar_get(nc, 'TFLAG', start=c(1,1,1), count=c(-1,-1,-1))
##         [,1]    [,2]    [,3]    [,4]    [,5]    [,6]    [,7]    [,8]
## [1,] 2014268 2014268 2014268 2014268 2014268 2014268 2014268 2014268
## [2,]   10000   20000   30000   40000   50000   60000   70000   80000
##         [,9]   [,10]   [,11]   [,12]   [,13]   [,14]   [,15]   [,16]
## [1,] 2014268 2014268 2014268 2014268 2014268 2014268 2014268 2014268
## [2,]   90000  100000  110000  120000  130000  140000  150000  160000
##        [,17]   [,18]   [,19]   [,20]   [,21]   [,22]   [,23]   [,24]
## [1,] 2014268 2014268 2014268 2014268 2014268 2014268 2014268 2014269
## [2,]  170000  180000  190000  200000  210000  220000  230000       0
##        [,25]   [,26]   [,27]   [,28]   [,29]   [,30]   [,31]   [,32]
## [1,] 2014269 2014269 2014269 2014269 2014269 2014269 2014269 2014269
## [2,]   10000   20000   30000   40000   50000   60000   70000   80000
##        [,33]   [,34]   [,35]   [,36]   [,37]   [,38]   [,39]   [,40]
## [1,] 2014269 2014269 2014269 2014269 2014269 2014269 2014269 2014269
## [2,]   90000  100000  110000  120000  130000  140000  150000  160000
##        [,41]   [,42]   [,43]   [,44]   [,45]   [,46]   [,47]
## [1,] 2014269 2014269 2014269 2014269 2014269 2014269 2014269
## [2,]  170000  180000  190000  200000  210000  220000  230000

There is a lot that can be improved in this file and the following function demonstrates the actions required

bs2v2()

It opens an old-style BlueSky NetCDF file and creates a new, more sensible version.

library(stringr) # for str_replace()
library(ncdf4)   # for netcdf capabilities

bs2v2 <- function(fileName) {

  # open nc file
  old_nc <- nc_open(fileName)
  
  # ----- Create latitude and longitude axes -----------------------------------

  # Current (index) values
  row <- old_nc$dim$ROW$vals
  col <- old_nc$dim$COL$vals
  
  # Useful information is found in the global attributes
  globalAttributes <- ncatt_get(old_nc, varid=0) # varid=0 means 'global'
  
  # Use names(globalAttributes) to see the names of the elements contained in this list
  
  # NOTE:  globalAttributes is of class 'list'
  # NOTE:  Access list elements with either 'listName[[objectName]]' or 'listName$objectName' notation
  
  XORIG <- globalAttributes[["XORIG"]] # x origin
  YORIG <- globalAttributes[["YORIG"]] # y origin
  XCENT <- globalAttributes[["XCENT"]] # x center
  YCENT <- globalAttributes[["YCENT"]] # y center
  
  # Now we have enough information about the domain to figure out the n, e, s, w corners
  w <- XORIG
  e <- XORIG + 2 * abs(XCENT - XORIG)
  s <- YORIG
  n <- YORIG + 2 * (YCENT - YORIG)  
  
  # Knowing the grid dimensions and the true corners we can define legitimate lat/lon dimensions
  lat <- seq(s, n, length.out=length(row))
  lon <- seq(w, e, length.out=length(col))
  
  # ----- Create time axis -----------------------------------------------------
  
  # Temporal information is stored in the 'TFLAG' variable
  tflag <- ncvar_get(old_nc, "TFLAG")
  
  # NOTE:  'TFLAG' is a matrix object with two rows, one containing the year and Julian day, 
  # NOTE:  the other containing time in HHMMSS format. We will paste matrix elements together
  # NOTE:  with 'paste()'.  The 'sprintf()' function is useful for C-style string formatting.
  # NOTE:  Here we use it to add leading 0s to create a string that is six characters long.
  time_str <- paste0(tflag[1,], sprintf(fmt="%06d", tflag[2,]))
  
  # We use 'strptime()' to convert our character index to a "POSIXct" value.
  time <- strptime(x=time_str, format="%Y%j%H%M%S", tz="GMT")
  
  # ----- Create new ncdf4 object ----------------------------------------------
  
  # Get PM25 values
  # NOTE:  The degenerate 'LAY' dimension disppears so that 'pm25' is now 3D, not 4D. 
  pm25 <- ncvar_get(old_nc, "PM25")
  
  # Convert time to numeric value for storing purposes
  numericTime <- as.numeric(time)
  
  # Define dimensions
  latDim <- ncdim_def("lat", "Degrees North", lat) 
  lonDim <- ncdim_def("lon", "Degrees East", lon)  
  timeDim <- ncdim_def("time", "seconds from 1970-1-1", numericTime)  
  
  # Define variables
  pm25Var <- ncvar_def(name="PM25", units="ug/m^3", dim=list(lonDim, latDim, timeDim), missval=-1e30)
  
  # Create a new netcdf file 
  fileName_v2 <- str_replace(fileName, ".nc", "_v2.nc")
  new_nc <- nc_create(fileName_v2, pm25Var)
  
  # Put data into the newly defined variable 
  ncvar_put(new_nc, pm25Var, pm25)
  
  # Close the file
  nc_close(new_nc)
  
}

# Now run this function on the file we just downloaded
bs2v2(fileName)
list.files(pattern='*.nc')
## [1] "PNW-1.33km_2014092500_v2.nc" "PNW-1.33km_2014092500.nc"

At this point, our environment is pretty cluttered up with things – we have a lot of in-memory objects as you can see with ls(). Sometimes it is a good idea to close up open files and clear out the environment before reading in new data.

# Close original nc file
nc_close(nc)

# Remove everything except the object named 'fileName'
fileNameMask <- ls() %in% 'fileName'
rm( list=ls()[!fileNameMask] )

# Reload libraries
library(stringr) # for str_replace()
library(ncdf4)   # for netcdf capabilities

# Load newly created nc file and take a look
fileName_v2 <- str_replace(fileName, ".nc", "_v2.nc")
nc <- nc_open(fileName_v2)
nc
## File PNW-1.33km_2014092500_v2.nc (NC_FORMAT_CLASSIC):
## 
##      1 variables (excluding dimension variables):
##         float PM25[lon,lat,time]   
##             units: ug/m^3
##             _FillValue: -1.00000001504747e+30
## 
##      3 dimensions:
##         lon  Size:1201
##             units: Degrees East
##             long_name: lon
##         lat  Size:421
##             units: Degrees North
##             long_name: lat
##         time  Size:47
##             units: seconds from 1970-1-1
##             long_name: time
# What about our dimensions?
nc$dim$time$vals[1:20]
##  [1] 1411606800 1411610400 1411614000 1411617600 1411621200 1411624800
##  [7] 1411628400 1411632000 1411635600 1411639200 1411642800 1411646400
## [13] 1411650000 1411653600 1411657200 1411660800 1411664400 1411668000
## [19] 1411671600 1411675200
nc$dim$lon$vals[1:20]
##  [1] -126.08 -126.07 -126.06 -126.05 -126.04 -126.03 -126.02 -126.01
##  [9] -126.00 -125.99 -125.98 -125.97 -125.96 -125.95 -125.94 -125.93
## [17] -125.92 -125.91 -125.90 -125.89
nc$dim$lat$vals[1:20]
##  [1] 44.77 44.78 44.79 44.80 44.81 44.82 44.83 44.84 44.85 44.86 44.87
## [12] 44.88 44.89 44.90 44.91 44.92 44.93 44.94 44.95 44.96
# With this improved datafile we can use ncvar_get() to read in all of the data
pm25 <- ncvar_get(nc, "PM25") 
time <- ncvar_get(nc, "time")
time <- as.POSIXct(time, origin="1970-1-1", tz="GMT")
lat <- ncvar_get(nc, "lat")
lon <- ncvar_get(nc, "lon")

# Close the file now that everything is in memory
nc_close(nc)

Plotting BlueSky data

Let’s finish up with a couple of plots to show how easy it is to work with this data

# Example map
library(maps)
image(lon,lat,log(pm25[,,10]))
map('state',add=TRUE)
map('county',col='gray80',add=TRUE)

titleText <- paste0("BlueSky output for ",time[10])
title(titleText)

# Example timeseries
plot(pm25[910,199,] ~ time, ylim=c(0,500), col='white')
for (i in 908:912) {
  for (j in 197:201) {
    points(pm25[i,j,] ~ time, pch=16, col=adjustcolor('black',0.25))
  }
}
tickmarkHours <- (time[1] - 3600) + c(6,12,18,30,36,42)*3600
axis.POSIXct(1,at=tickmarkHours, format="%H")

titleText <- paste0('BlueSky timeseries centered on ',round(lon[910],2),'° E, ',round(lat[199],2),'° N')
title(titleText)

Several new techniques were presented in this lesson without explanation, including:

Lesson 08 along with will cover multi-dimensional arrays and the use of the apply() function. Lesson 09 will focus on working with dates and times.