This function retrieves the time-lagged values of a variable, using the time variable defined in .t
in the function or by as_pibble()
. tlag()
is highly unusual among time-lag functions in that it is usable even if observations are not uniquely identified by .t
(and .i
, if defined).
tlag( .var, .df = get(".", envir = parent.frame()), .n = 1, .default = NA, .quick = FALSE, .resolve = "error", .group_i = TRUE, .i = NULL, .t = NULL, .d = NA, .uniqcheck = FALSE )
.var | Unquoted variable from |
---|---|
.df | Data frame, pibble, or tibble (usually the object that contains |
.n | Number of periods to lag by. 1 by default. Note that this is automatically scaled by |
.default | Fill-in value used when lagged observation is not present. Defaults to NA. |
.quick | If |
.resolve | If there is more than one observation per individal/period, and the value of |
.group_i | By default, if |
.i | Quoted or unquotes variable(s) that identify the individual cases. Note that setting any one of |
.t | Quoted or unquoted variable indicating the time. |
.d | Number indicating the gap in |
.uniqcheck | Logical parameter. Set to TRUE to always check whether |
data(Scorecard) # The Scorecard data is uniquely identified by unitid and year. # However, there are sometimes gaps between years. # In cases like this, using dplyr::lag() will still use the row before, # whereas tlag() will respect the gap and give a NA, much like plm::lag() # (although tlag is slower than either, sorry) Scorecard <- Scorecard %>% dplyr::mutate(pmdplyr_tlag = tlag(earnings_med, .i = unitid, .t = year )) Scorecard <- Scorecard %>% dplyr::arrange(year) %>% dplyr::group_by(unitid) %>% dplyr::mutate(dplyr_lag = dplyr::lag(earnings_med)) %>% dplyr::ungroup() # more NAs in the pmdplyr version - observations with a gap and thus no real lag present in data sum(is.na(Scorecard$pmdplyr_tlag))#> [1] 26987#> [1] 16950# If we want to ignore gaps, or have .d = 0, and .i and .t uniquely identify observations, # we can use the .quick option to match dplyr::lag() Scorecard <- Scorecard %>% dplyr::mutate(pmdplyr_quick_tlag = tlag(earnings_med, .i = unitid, .t = year, .d = 0, .quick = TRUE )) sum(Scorecard$dplyr_lag != Scorecard$pmdplyr_quick_tlag, na.rm = TRUE)#> [1] 0# Where tlag shines is when you have multiple observations per .i/.t # If the value of .var is constant within .i/.t, it will work just as you expect. # If it's not, it will throw an error, or you can set # .resolve to tell tlag how to select a single value from the many # Maybe we want to get the lagged average earnings within degree award type Scorecard <- Scorecard %>% dplyr::mutate( last_year_earnings_by_category = tlag(earnings_med, .i = pred_degree_awarded_ipeds, .t = year, .resolve = function(x) mean(x, na.rm = TRUE) ) ) # Or maybe I want the lagged earnings across all types - .i isn't necessary! Scorecard <- Scorecard %>% dplyr::mutate(last_year_earnings_all = tlag(earnings_med, .t = "year", .resolve = function(x) mean(x, na.rm = TRUE) )) # Curious why the first nonmissing obs show up in 2012? # It's because there's no 2008 or 2010 in the data, so when 2009 or 2011 look back # a year, they find nothing! # We could get around this by setting .d = 0 to ignore gap length # Note this can be a little slow. Scorecard <- Scorecard %>% dplyr::mutate(last_year_earnings_all = tlag(earnings_med, .t = year, .d = 0, .resolve = function(x) mean(x, na.rm = TRUE) ))