This vignette shows how to filter WATLAS data based on spatial boundaries, temporal specifications, error estimates and speed.
Spatial filtering
Depending on the analysis, can make sense to use a spatial filter to exclude large outliers or subset only data from an area of interest. In this example we subset all data around Griend, Richel and Ballastplaat. Note that in this example all data are within the specified bounding box, so no data will be filtered out.
# location east of Griend
griend_east <- st_sfc(st_point(c(5.275, 53.2523)), crs = st_crs(4326)) |>
st_transform(crs = st_crs(32631))
# rectangle including Griend, Richel and Ballastplaat
bbox <- atl_bbox(griend_east, asp = "16:9", buffer = 8000)
bbox_sf <- st_as_sfc(bbox)
# create a base map
bm <- atl_create_bm(buffer = 10000)
# Round data to 200 m grid cells
data_heatmap <- copy(data)
data_heatmap <- data_heatmap[, c("x_round", "y_round") := list(
plyr::round_any(x, 200),
plyr::round_any(y, 200)
)]
data_heatmap <- data_heatmap[, .N, by = c("x_round", "y_round")]
# plot data with bounding box
bm +
geom_tile(
data = data_heatmap, aes(x_round, y_round, fill = N),
linewidth = 0.1, show.legend = FALSE
) +
geom_sf(data = bbox_sf, color = "firebrick", fill = NA) +
scale_fill_viridis(
option = "A", discrete = FALSE, trans = "log10", name = "N positions",
breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(10^.x)),
direction = -1
) +
coord_sf(expand = FALSE)
# filter data with bounding box
# note: for large tables use range and not sf_polygon
data <- atl_filter_bounds(
data = data,
x = "x",
y = "y",
x_range = c(bbox["xmin"], bbox["xmax"]),
y_range = c(bbox["ymin"], bbox["ymax"]),
remove_inside = FALSE
)
Temporal filtering
We now want to filter all positions of the tag before the bird was released. Since it usually takes a bit of time until birds are adjusted from the catching and getting used to the new tag, we also exclude 24 hours after release. Sometimes the tag still gives data after it fell off or the bird died, in this case we can exclude these positions too. To identify such circumstances, it can help to plot for example the last 1000 positions of each tag (see vignette: plotting data).
Some birds might have also directly left the area after release, so depending on the project it might be useful to already exclude birds with few data at this point (e.g. here with less than 100 positions - none of the birds is filtered out in this case using the example data).
# Load meta data
all_tags_path <- system.file(
"extdata", "tags_watlas_subset.xlsx", package = "tools4watlas"
)
all_tags <- readxl::read_excel(all_tags_path, sheet = "tags_watlas_all") |>
data.table()
# correct time zone to CET and change to UTC
all_tags[, release_ts := force_tz(as_datetime(release_ts), tzone = "CET")]
all_tags[, release_ts := with_tz(release_ts, tzone = "UTC")]
# join release_ts with data
all_tags[, tag := as.character(tag)]
data[all_tags, on = "tag", `:=`(release_ts = i.release_ts)]
# exclude positions before the release and 24h after
data <- data[datetime > release_ts + 24 * 3600]
# exclude positions after this date (bird died):
data <- data[!(tag == "3103" &
datetime > as.POSIXct("2023-09-25 15:00:00", tz = "UTC"))]
# exclude tags with less than 100 positions
data[, N := .N, tag]
data[N < 100] |> unique(by = "tag")
data <- data[N > 100]
# remove unneeded columns
data[, release_ts := NULL]
data[, N := NULL]
Filtering location errors
Based on WATLAS error estimate
5000 is typically used by Allert.
# filter based on variance in the Easting and Northing
var_max <- 5000 # in meters squared
data <- atl_filter_covariates(
data = data,
filters = c(
sprintf("varx < %s", var_max),
sprintf("vary < %s", var_max)
)
)
## Note: 0.54% of the dataset was filtered out, corresponding to 470 positions.
Based on speed
This will filter more unrealistic positions.
# calculate speed
data <- atl_get_speed(data,
type = c("in", "out")
)
# plot speed (subset relevant range)
ggplot(data = data[!is.na(speed_in) & speed_in > 5 & speed_in < 100]) +
geom_histogram(aes(x = speed_in), bins = 50) +
labs(x = "Speed in (m/s)") +
theme_bw()
# filter by speed
speed_max <- 40 # m/s (144 km/h)
data <- atl_filter_covariates(
data = data,
filters = c(
sprintf("speed_in < %s | is.na(speed_in)", speed_max),
sprintf("speed_out < %s | is.na(speed_out)", speed_max)
)
)
## Note: 0.23% of the dataset was filtered out, corresponding to 200 positions.
# recalculate speed
data <- atl_get_speed(data,
type = c("in", "out")
)
Save data
# Save data
fwrite(data, file = "../inst/extdata/watlas_data_filtered.csv", yaml = TRUE)