#==============================================================================================================
Data and code from “Mutual mate guarding and limited sexual conflict
in a sex-role reversed shorebird” Contributor: Johannes Krietsch
📍 This script runs relative to the project’s root directory and
describes how I subset the breeding pairs and bind them with them
metadata.
#==============================================================================================================
# Packages
sapply( c('data.table', 'magrittr', 'sdb', 'ggplot2', 'viridis', 'auksRuak', 'sf', 'foreach', 'knitr',
'stringr', 'ggnewscale'),
require, character.only = TRUE)
## data.table magrittr sdb ggplot2 viridis auksRuak sf foreach knitr stringr ggnewscale
## TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
# Functions
source('./R/0_functions.R')
# Lines to run to create html output
opts_knit$set(root.dir = rprojroot::find_rstudio_root_file())
# rmarkdown::render('./R/6_subset_breeders.R', output_dir = './OUTPUTS/R_COMPILED')
# Projection
PROJ = '+proj=laea +lat_0=90 +lon_0=-156.653428 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0 '
# Data
d = fread('./DATA/NANO_TAGS.txt', sep = '\t', header = TRUE, nThread = 20) %>% data.table
d = d[filtered == TRUE]
st_transform_DT(d)
dp = fread('./DATA/PRODUCTS/PAIR_WISE_INTERACTIONS.txt', sep = '\t', header = TRUE, nThread = 20) %>% data.table
dp[, year_ := year(datetime_1)]
dn = fread('./DATA/NESTS.txt', sep = '\t', header = TRUE, nThread = 20) %>% data.table
# change projection
st_transform_DT(dn)
# start and end of the data
d[, start := min(datetime_), by = ID]
d[, end := max(datetime_), by = ID]
dID = unique(d, by = c('ID', 'year_'))
# check if data overlap
dn = merge(dn, dID[, .(year_, male_id = ID, start_m = start, end_m = end)], by = c('male_id', 'year_'), all.x = TRUE)
dn = merge(dn, dID[, .(year_, female_id = ID, start_f = start, end_f = end)], by = c('female_id', 'year_'), all.x = TRUE)
# subset nests with both IDs tagged
dn[, both_tagged := !is.na(start_m) & !is.na(start_f), by = nestID]
# subset nests with both IDs tagged and overlapping time intervals
dn[, overlap := DescTools::Overlap(c(start_m, end_m), c(start_f, end_f)), by = nestID]
dn[, both_tagged_overlapping := overlap > 0, by = nestID]
dn[is.na(both_tagged_overlapping), both_tagged_overlapping := FALSE]
# check overlap with initiation date
dn[, overlap_initiation_m := DescTools::Overlap(c(start_m, end_m), c(initiation - 86400, initiation + 86400)),
by = nestID]
dn[, overlap_initiation_f := DescTools::Overlap(c(start_f, end_f), c(initiation - 86400, initiation + 86400)),
by = nestID]
dn[, both_tagged_at_initiation := overlap_initiation_m > 0 & overlap_initiation_f > 0, by = nestID]
dn[is.na(both_tagged_at_initiation), both_tagged_at_initiation := FALSE]
# any EPY as logic
dn[, anyEPY := as.logical(anyEPY)]
# nest data
dnID = dn[, .(year_, nestID, male_id, female_id, initiation, nest_state_date, anyEPY, m_tagged, f_tagged,
lat_n = lat, lon_n = lon, female_clutch, male_clutch, clutch_together, overlap, both_tagged_overlapping)]
dnID = unique(dnID, by = 'nestID')
# as integer
dnID[, male_id := as.integer(male_id)]
dnID[, female_id := as.integer(female_id)]
# data available relative to clutch initiation for each ID
dnIDu = rbind(dnID[!is.na(male_id), .(year_, ID = male_id, nestID, initiation, both_tagged_overlapping)],
dnID[!is.na(female_id), .(year_, ID = female_id, nestID, initiation, both_tagged_overlapping)])
# nests of tagged birds
ds = unique(dnID[m_tagged == TRUE | f_tagged == TRUE], by = c('male_id', 'female_id', 'nestID'))
ds[, .N, by = year_]
## year_ N
## 1: 2018 15
## 2: 2019 86
## year_ N
## 1: 2018 13
## 2: 2019 76
## year_ N
## 1: 2018 13
## 2: 2019 66
## year_ N
## 1: 2018 11
## 2: 2019 56
## year_ N
## 1: 2018 10
## 2: 2019 58
# by ID
ds = unique(dnID[m_tagged == TRUE | f_tagged == TRUE], by = c('male_id', 'female_id'))
ds[both_tagged_overlapping == TRUE, .N, by = year_] # four replacement clutches of same pair
## year_ N
## 1: 2018 10
## 2: 2019 54
# merge all with nest data
dID = merge(d, dnIDu, by = c('year_', 'ID'), all.x = TRUE, allow.cartesian = TRUE)
dID = dID[!is.na(nestID)]
# relative timing
dID[, initiation_day := as.Date(initiation)]
dID[, date_ := as.Date(datetime_)]
dID[, date_rel_pair := difftime(date_, initiation_day, units = 'days') %>% as.numeric()]
# unique by date_rel_pair
dID = dID[, .(N = .N), by = c('year_', 'nestID', 'ID', 'sex', 'date_rel_pair', 'both_tagged_overlapping')]
# check max
dID[, N] |> max()
## [1] 142
## [1] 142
ds = dnID[!is.na(male_id) & !is.na(female_id) & year_ > 2017 & !is.na(overlap) & overlap > 0] %>%
unique(., by = c('male_id', 'female_id', 'nestID'))
ds[, N := .N, by = .(male_id, female_id)]
ds[N > 1]
## year_ nestID male_id female_id initiation nest_state_date anyEPY m_tagged f_tagged lat_n lon_n female_clutch male_clutch
## 1: 2019 R406_19 270170938 270170935 2019-06-18 11:11:51 2019-06-21 17:55:00 FALSE TRUE TRUE -2076229 -652.0853 2 1
## 2: 2019 R320_19 270170938 270170935 2019-06-24 14:48:42 2019-07-07 12:06:00 FALSE TRUE TRUE -2076798 1260.7423 3 2
## 3: 2019 R502_19 273145126 270170942 2019-06-11 12:00:00 2019-06-14 00:02:36 FALSE TRUE TRUE -2076098 -329.0839 1 1
## 4: 2019 R218_19 273145126 270170942 2019-06-17 12:18:36 2019-06-21 15:52:00 FALSE TRUE TRUE -2076359 -270.4901 2 2
## 5: 2019 R905_19 273145139 270170970 2019-06-13 11:34:00 2019-06-16 15:33:59 FALSE TRUE TRUE -2077227 -94.4791 1 1
## 6: 2019 R913_19 273145139 270170970 2019-06-21 14:19:56 2019-07-08 04:14:00 FALSE TRUE TRUE -2076978 -404.3699 2 2
## 7: 2019 R201_19 270170620 273145050 2019-06-08 12:12:00 2019-06-18 10:37:00 FALSE TRUE TRUE -2076047 737.3589 1 1
## 8: 2019 R232_19 270170620 273145050 2019-06-22 14:01:25 2019-07-19 17:27:00 FALSE TRUE TRUE -2075940 1801.6723 2 2
## clutch_together overlap both_tagged_overlapping N
## 1: 1 975057 TRUE 2
## 2: 2 975057 TRUE 2
## 3: 1 950209 TRUE 2
## 4: 2 950209 TRUE 2
## 5: 1 783930 TRUE 2
## 6: 2 783930 TRUE 2
## 7: 1 1395702 TRUE 2
## 8: 2 1395702 TRUE 2
# number of pairs with data for both at the same time
ds %>% unique(., by = c('male_id', 'female_id')) %>% nrow
## [1] 64
## [1] 10
## [1] 54
## [1] 68
# look at pairs with two clutches
ggplot(data = dp[ID1 == 270170620 & ID2 == 273145050]) +
geom_tile(aes(datetime_1, pairID, fill = interaction), width = 900, show.legend = FALSE) +
scale_fill_manual(values = c('TRUE' = 'green4', 'FALSE' = 'firebrick3', 'NA' = 'grey50')) +
geom_vline(aes(xintercept = as.POSIXct('2019-06-18 10:37:00')), color = 'black', size = 3, alpha = 0.5) +
xlab('Date') + ylab('Nest') +
theme_classic()
ggplot(data = dp[ID1 == 270170938 & ID2 == 270170935]) +
geom_tile(aes(datetime_1, pairID, fill = interaction), width = 900, show.legend = FALSE) +
scale_fill_manual(values = c('TRUE' = 'green4', 'FALSE' = 'firebrick3', 'NA' = 'grey50')) +
geom_vline(aes(xintercept = as.POSIXct('2019-06-21 17:55:00')), color = 'black', size = 3, alpha = 0.5) +
xlab('Date') + ylab('Nest') +
theme_classic()
ggplot(data = dp[ID1 == 273145126 & ID2 == 270170942]) +
geom_tile(aes(datetime_1, pairID, fill = interaction), width = 900, show.legend = FALSE) +
scale_fill_manual(values = c('TRUE' = 'green4', 'FALSE' = 'firebrick3', 'NA' = 'grey50')) +
geom_vline(aes(xintercept = as.POSIXct('2019-06-14 00:02:36')), color = 'black', size = 3, alpha = 0.5) +
xlab('Date') + ylab('Nest') +
theme_classic()
ggplot(data = dp[ID1 == 273145139 & ID2 == 270170970]) +
geom_tile(aes(datetime_1, pairID, fill = interaction), width = 900, show.legend = FALSE) +
scale_fill_manual(values = c('TRUE' = 'green4', 'FALSE' = 'firebrick3', 'NA' = 'grey50')) +
geom_vline(aes(xintercept = as.POSIXct('2019-06-16 15:33:59')), color = 'black', size = 3, alpha = 0.5) +
xlab('Date') + ylab('Nest') +
theme_classic()
# merge with first nest number
dp = merge(dp, dnID[clutch_together == 1, .(male_id, female_id, year_, clutch_together)],
by.x = c('ID1', 'ID2', 'year_'), by.y = c('male_id', 'female_id', 'year_'), all.x = TRUE)
# assign second clutches
dp[ID1 == 270170620 & ID2 == 273145050 & datetime_1 > as.POSIXct('2019-06-18 10:37:00'), clutch_together := 2]
dp[ID1 == 270170938 & ID2 == 270170935 & datetime_1 > as.POSIXct('2019-06-21 17:55:00'), clutch_together := 2]
dp[ID1 == 273145126 & ID2 == 270170942 & datetime_1 > as.POSIXct('2019-06-14 00:02:36'), clutch_together := 2]
dp[ID1 == 273145139 & ID2 == 270170970 & datetime_1 > as.POSIXct('2019-06-16 15:33:59'), clutch_together := 2]
# merge with nests
dp = merge(dp, dnID, by.x = c('ID1', 'ID2', 'year_', 'clutch_together'),
by.y = c('male_id', 'female_id', 'year_', 'clutch_together'), all.x = TRUE)
unique(dp[!is.na(nestID)], by = 'nestID') |> nrow()
## [1] 68
# datetime relative to nest initiation date
dp[, datetime_rel_pair := difftime(datetime_1, initiation, units = 'days') %>% as.numeric()]
# datetime relative to nest initiation date
dp[, date_ := as.Date(datetime_1)]
dp[, initiation_day := as.Date(initiation)]
dp[, date_rel_pair := difftime(date_, initiation_day, units = 'days') %>% as.numeric()]
# same sex interaction?
dp[, same_sex := sex1 == sex2]
# breeding pair
dp[, breeding_pair := !is.na(nestID)]
# Assign polyandry
# polyandrous females
xf = dn[polyandrous == TRUE]$female_id |> unique()
dp[ID2 %in% xf, f_polyandrous := TRUE]
dp[is.na(f_polyandrous), f_polyandrous := FALSE]
# first nests
x1 = dn[polyandrous == TRUE & female_clutch == 1]$nestID |> unique()
dp[nestID %in% x1, f_polyandrous_first := TRUE]
dp[is.na(f_polyandrous_first), f_polyandrous_first := FALSE]
# second nest
x2 = dn[polyandrous == TRUE & female_clutch == 2]$nestID |> unique()
dp[nestID %in% x2, f_polyandrous_second := TRUE]
dp[is.na(f_polyandrous_second), f_polyandrous_second := FALSE]
# renesting females
# first nests of renesting females
x1 = dn[polyandrous == FALSE & female_clutch == 1 & N_female_clutch > 1 |
polyandrous == TRUE & female_clutch == 2 & N_female_clutch > 2]$nestID |> unique()
dp[nestID %in% x1, f_renesting_first := TRUE]
# second nests of renesting females
x2 = dn[polyandrous == FALSE & female_clutch == 2 & N_female_clutch > 1 |
polyandrous == TRUE & female_clutch == 3 & N_female_clutch > 2]$nestID |> unique()
dp[nestID %in% x2, f_renesting_second := TRUE]
# relative timing
dn[, initiation_day := as.Date(initiation)]
di = dn[!is.na(year_) & data_type == 'study_site', .(initiation_mean = mean(initiation_day, na.rm = TRUE)),
by = year_]
di[, initiation_mean := as.character(initiation_mean) |> as.Date()]
dp = merge(dp, di, by = 'year_', all.x = TRUE)
# relative initiation date
dp[, initiation_rel := difftime(initiation_day, initiation_mean, units = 'days') %>% as.numeric]
# distance to nest
dps = dp[!is.na(nestID)]
dps[, distance_nest_1 := sqrt(sum((c(lon1, lat1) - c(lon_n, lat_n))^2)), by = 1:nrow(dps)]
dps[, at_nest1 := distance_nest_1 < 15]
dps[, distance_nest_2 := sqrt(sum((c(lon2, lat2) - c(lon_n, lat_n))^2)), by = 1:nrow(dps)]
dps[, at_nest2 := distance_nest_2 < 15]
# merge back with data
dp = merge(dp, dps[, .(ID1, ID2, datetime_1, nestID, at_nest1, at_nest2)],
by = c('ID1', 'ID2', 'datetime_1', 'nestID'), all.x = TRUE)
# N pairwise data per day
dpn = dp[!is.na(date_rel_pair), .N, by = .(pairID, nestID, date_rel_pair)]
# check max
dpn[, N] |> max()
## [1] 142
# proportion of locations send
dpn[, Np := N / 142]
# merge with dp
dp = merge(dp, dpn, by = c('pairID', 'nestID', 'date_rel_pair'), all.x = TRUE)
# subset
dps =
dp[breeding_pair == TRUE & sex1 == 'M',
.(pairID, year_, ID1, ID2, sex1, sex2, datetime_1, datetime_2, N, Np, date_, date_rel_pair,
datetime_rel_pair, distance_pair, interaction, split, merge, IDsplitting, IDmerging, distance1_before,
distance2_before, nestID, initiation, initiation_day, initiation_rel, at_nest1,
at_nest2, female_clutch, anyEPY, breeding_pair, f_polyandrous, f_polyandrous_first,
f_polyandrous_second, f_renesting_first, f_renesting_second, type = 'breeding pair')]
# save data
# fwrite(dps, './DATA/PAIR_WISE_INTERACTIONS_BREEDING_PAIRS.txt', quote = TRUE, sep = '\t', row.names = FALSE)
# unique data
du = unique(dp, by = c('pairID'))
ds = unique(dp, by = c('nestID', 'date_rel_pair'))
ds = ds[!is.na(date_rel_pair)]
# nests to exclude (second clutches)
n2 = c('R201_19', 'R231_19', 'R905_19', 'R502_19')
ds = ds[!(nestID %in% n2)]
# data needed for null model
d0 = ds[, .(date_rel_pair, date_)] %>% unique
# subset non-breeders pairs within breeders
breeding_males = du[breeding_pair == TRUE]$ID1
breeding_females = du[breeding_pair == TRUE]$ID2
dps = dp[breeding_pair == FALSE & ID1 %in% breeding_males & ID2 %in% breeding_females]
# subset all interactions on days with relative initiation date for breeders
x = d0$date_rel_pair %>% unique
d0a = foreach(i = x, .combine = 'rbind') %do% {
# subset relative day
dss = d0[date_rel_pair == i]
# subset all pairwise interactions from this date
dsms = dps[date_ %in% dss$date_]
# assign type
dsms[, date_rel_pair := i]
dsms
}
d0a[, date_rel_pair_type := 'null_model']
# subset pairwise data with at least one interaction per day
# pairwise positions non-breeder pairs
dpn = d0a[, .N, by = .(pairID, date_, date_rel_pair)]
setnames(dpn, 'N', 'Nnb')
# proportion of locations send
dpn[, Np_nb := Nnb / 142]
d0a = merge(d0a, dpn, by = c('pairID', 'date_', 'date_rel_pair'), all.x = TRUE)
# Proportion of time together
dps = d0a[interaction == TRUE, .(N_int_nb = .N), by = .(pairID, date_, date_rel_pair)]
du = unique(d0a, by = c('pairID', 'date_', 'date_rel_pair'))
du = merge(du, dps, by = c('pairID', 'date_', 'date_rel_pair'), all.x = TRUE)
du[is.na(N_int_nb), N_int_nb := 0]
du[, int_prop_nb := N_int_nb / Nnb]
# merge back with full table
d0a = merge(d0a, du[, .(pairID, date_, date_rel_pair, N_int_nb, int_prop_nb)],
by = c('pairID', 'date_', 'date_rel_pair'), all.x = TRUE)
# subset only pairs with at least one interaction
d0a = d0a[int_prop_nb > 0]
# subset pairs with at least 50% data
d0a = d0a[Np_nb >= 0.5]
# select random pairs for each day
ds = unique(d0a, by = c('pairID', 'date_', 'date_rel_pair'))
# shuffle data
set.seed(210191)
ds = ds[sample(dim(ds)[1])]
# subset pairs
ds[, date_rel_pair_id := seq_len(.N), by = date_rel_pair]
ds = ds[date_rel_pair_id <= 50]
ds[, sub := paste0(pairID, '_', date_, '_', date_rel_pair)]
d0a[, sub := paste0(pairID, '_', date_, '_', date_rel_pair)]
d0as = d0a[sub %in% ds$sub]
ds[date_rel_pair >= -10 & date_rel_pair <= 10, .N, date_rel_pair]
## date_rel_pair N
## 1: 2 50
## 2: 6 50
## 3: -2 50
## 4: -4 50
## 5: 9 50
## 6: 8 50
## 7: 1 50
## 8: 4 50
## 9: 3 50
## 10: -3 50
## 11: -5 50
## 12: 0 50
## 13: -8 50
## 14: -7 50
## 15: -1 50
## 16: 10 50
## 17: -6 50
## 18: 7 50
## 19: 5 50
## 20: -9 50
## 21: -10 50
## date_rel_pair N
# merge with nest data from female
duf = unique(dp[!is.na(nestID)], by = c('ID2', 'year_'))
# delete columns in d0as
d0as[, nestID := NULL]
d0as[, initiation := NULL]
d0as[, initiation_rel := NULL]
d0as = merge(d0as, duf[, .(year_, ID2, nestID, initiation, initiation_rel)],
by = c('year_', 'ID2'), all.x = TRUE)
d0as[is.na(nestID)]
## Empty data.table (0 rows and 78 cols): year_,ID2,pairID,date_,date_rel_pair,ID1...
# rename order and save
d0as = d0as[,
.(pairID, year_, ID1, ID2, sex1, sex2, datetime_1, datetime_2, N = Nnb, Np = Np_nb, date_,
date_rel_pair, datetime_rel_pair, distance_pair, interaction, split, merge, nestID, initiation,
initiation_day, initiation_rel, type = 'randomization')]
# save data
# fwrite(d0as, './DATA/PAIR_WISE_INTERACTIONS_BREEDING_PAIRS_RANDOM.txt', quote = TRUE, sep = '\t', row.names = FALSE)
# version information
sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=German_Germany.utf8 LC_CTYPE=German_Germany.utf8 LC_MONETARY=German_Germany.utf8 LC_NUMERIC=C
## [5] LC_TIME=German_Germany.utf8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggnewscale_0.4.8 stringr_1.5.0 knitr_1.42 foreach_1.5.2 sf_1.0-13 auksRuak_0.1.0 viridis_0.6.2 viridisLite_0.4.1
## [9] ggplot2_3.4.0 sdb_2019.12 magrittr_2.0.3 data.table_1.14.8
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.4 sass_0.4.5 RMySQL_0.10.25 jsonlite_1.8.4 bit64_4.0.5 bslib_0.4.2 expm_0.999-7
## [8] askpass_1.1 highr_0.10 gld_2.6.6 lmom_2.9 cellranger_1.1.0 yaml_2.3.7 pillar_1.8.1
## [15] lattice_0.20-45 glue_1.6.2 filenamr_0.1.0 digest_0.6.31 cliExtras_0.1.0 sfext_0.1.0 snakecase_0.11.0
## [22] colorspace_2.1-0 htmltools_0.5.4 Matrix_1.5-1 pkgconfig_2.0.3 ggspatial_1.1.7 purrr_1.0.1 mvtnorm_1.1-3
## [29] scales_1.2.1 rootSolve_1.8.2.3 timechange_0.2.0 tibble_3.1.8 proxy_0.4-27 farver_2.1.1 generics_0.1.3
## [36] cachem_1.0.6 withr_2.5.0 janitor_2.1.0 cli_3.6.0 readxl_1.4.1 evaluate_0.20 fansi_1.0.4
## [43] MASS_7.3-58.1 anytime_0.3.9 class_7.3-20 tools_4.2.2 RMariaDB_1.2.2 hms_1.1.3 lifecycle_1.0.3
## [50] Exact_3.2 munsell_0.5.0 jquerylib_0.1.4 compiler_4.2.2 e1071_1.7-13 rlang_1.0.6 classInt_0.4-9
## [57] units_0.8-2 grid_4.2.2 iterators_1.0.14 rstudioapi_0.14 rmarkdown_2.20 boot_1.3-28 DescTools_0.99.47
## [64] gtable_0.3.1 codetools_0.2-18 DBI_1.1.3 R6_2.5.1 gridExtra_2.3 lubridate_1.9.2 dplyr_1.1.0
## [71] fastmap_1.1.0 bit_4.0.5 utf8_1.2.3 rprojroot_2.0.3 KernSmooth_2.23-20 stringi_1.7.12 Rcpp_1.0.10
## [78] vctrs_0.5.2 tidyselect_1.2.0 xfun_0.40