Define breeding pairs
How many breeders with overlap? Divide first and second clutch nest data
Merge dp with nest data
Nest attendance
Subset relevant data
Randomization for interaction base line

#==============================================================================================================

Data and code from “Mutual mate guarding and limited sexual conflict in a sex-role reversed shorebird” Contributor: Johannes Krietsch
📍 This script runs relative to the project’s root directory and describes how I subset the breeding pairs and bind them with them metadata.

#==============================================================================================================

# Packages
sapply( c('data.table', 'magrittr', 'sdb', 'ggplot2', 'viridis', 'auksRuak', 'sf', 'foreach', 'knitr', 
          'stringr', 'ggnewscale'), 
        require, character.only = TRUE)

## data.table   magrittr        sdb    ggplot2    viridis   auksRuak         sf    foreach      knitr    stringr ggnewscale 
##       TRUE       TRUE       TRUE       TRUE       TRUE       TRUE       TRUE       TRUE       TRUE       TRUE       TRUE

# Functions
source('./R/0_functions.R')

# Lines to run to create html output
opts_knit$set(root.dir = rprojroot::find_rstudio_root_file())
# rmarkdown::render('./R/6_subset_breeders.R', output_dir = './OUTPUTS/R_COMPILED')

# Projection
PROJ = '+proj=laea +lat_0=90 +lon_0=-156.653428 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0 '

# Data
d = fread('./DATA/NANO_TAGS.txt', sep = '\t', header = TRUE, nThread = 20) %>% data.table
d = d[filtered == TRUE]
st_transform_DT(d)

dp = fread('./DATA/PRODUCTS/PAIR_WISE_INTERACTIONS.txt', sep = '\t', header = TRUE, nThread = 20) %>% data.table
dp[, year_ := year(datetime_1)]

dn = fread('./DATA/NESTS.txt', sep = '\t', header = TRUE, nThread = 20) %>% data.table

# change projection
st_transform_DT(dn)

Define breeding pairs

# start and end of the data
d[, start := min(datetime_), by = ID]
d[, end   := max(datetime_), by = ID]
dID = unique(d, by = c('ID', 'year_'))

# check if data overlap
dn = merge(dn, dID[, .(year_, male_id = ID, start_m = start, end_m = end)], by = c('male_id', 'year_'), all.x = TRUE)
dn = merge(dn, dID[, .(year_, female_id = ID, start_f = start, end_f = end)], by = c('female_id', 'year_'), all.x = TRUE)

# subset nests with both IDs tagged
dn[, both_tagged := !is.na(start_m) & !is.na(start_f), by = nestID]

# subset nests with both IDs tagged and overlapping time intervals
dn[, overlap := DescTools::Overlap(c(start_m, end_m), c(start_f, end_f)), by = nestID]
dn[, both_tagged_overlapping := overlap > 0, by = nestID]
dn[is.na(both_tagged_overlapping), both_tagged_overlapping := FALSE]

# check overlap with initiation date
dn[, overlap_initiation_m := DescTools::Overlap(c(start_m, end_m), c(initiation - 86400, initiation + 86400)), 
   by = nestID]
dn[, overlap_initiation_f := DescTools::Overlap(c(start_f, end_f), c(initiation - 86400, initiation + 86400)), 
   by = nestID]
dn[, both_tagged_at_initiation := overlap_initiation_m > 0 & overlap_initiation_f > 0, by = nestID]
dn[is.na(both_tagged_at_initiation), both_tagged_at_initiation := FALSE]

# any EPY as logic 
dn[, anyEPY := as.logical(anyEPY)]

# nest data
dnID = dn[, .(year_, nestID, male_id, female_id, initiation, nest_state_date, anyEPY, m_tagged, f_tagged,
              lat_n = lat, lon_n = lon, female_clutch, male_clutch, clutch_together, overlap, both_tagged_overlapping)]
dnID = unique(dnID, by = 'nestID')

# as integer
dnID[, male_id := as.integer(male_id)]
dnID[, female_id := as.integer(female_id)]

# data available relative to clutch initiation for each ID
dnIDu = rbind(dnID[!is.na(male_id), .(year_, ID = male_id, nestID, initiation, both_tagged_overlapping)],
              dnID[!is.na(female_id), .(year_, ID = female_id, nestID, initiation, both_tagged_overlapping)])


# nests of tagged birds
ds = unique(dnID[m_tagged == TRUE | f_tagged == TRUE], by = c('male_id', 'female_id', 'nestID'))
ds[, .N, by = year_]

##    year_  N
## 1:  2018 15
## 2:  2019 86

ds[m_tagged == TRUE, .N, by = year_]

##    year_  N
## 1:  2018 13
## 2:  2019 76

ds[f_tagged == TRUE, .N, by = year_]

##    year_  N
## 1:  2018 13
## 2:  2019 66

ds[f_tagged == TRUE & m_tagged == TRUE, .N, by = year_]

##    year_  N
## 1:  2018 11
## 2:  2019 56

ds[both_tagged_overlapping == TRUE, .N, by = year_]

##    year_  N
## 1:  2018 10
## 2:  2019 58

# by ID
ds = unique(dnID[m_tagged == TRUE | f_tagged == TRUE], by = c('male_id', 'female_id'))
ds[both_tagged_overlapping == TRUE, .N, by = year_] # four replacement clutches of same pair

##    year_  N
## 1:  2018 10
## 2:  2019 54

# merge all with nest data
dID = merge(d, dnIDu, by = c('year_', 'ID'), all.x = TRUE, allow.cartesian = TRUE)
dID = dID[!is.na(nestID)]

# relative timing
dID[, initiation_day := as.Date(initiation)]
dID[, date_ := as.Date(datetime_)]
dID[, date_rel_pair := difftime(date_, initiation_day, units = 'days') %>% as.numeric()]

# unique by date_rel_pair
dID = dID[, .(N = .N), by = c('year_', 'nestID', 'ID', 'sex', 'date_rel_pair', 'both_tagged_overlapping')]

# check max
dID[, N] |> max()

## [1] 142

142 # max possible

## [1] 142

# proportion of day with data
dID[, Np := N / 142]

# save data
# fwrite(dID, './DATA/NANO_TAGS_UNIQUE_BY_DAY.txt', quote = TRUE, sep = '\t', row.names = FALSE)

How many breeders with overlap? Divide first and second clutch nest data

ds = dnID[!is.na(male_id) & !is.na(female_id) & year_ > 2017 & !is.na(overlap) & overlap > 0] %>% 
  unique(., by = c('male_id', 'female_id', 'nestID'))

ds[, N := .N, by = .(male_id, female_id)]
ds[N > 1]

##    year_  nestID   male_id female_id          initiation     nest_state_date anyEPY m_tagged f_tagged    lat_n     lon_n female_clutch male_clutch
## 1:  2019 R406_19 270170938 270170935 2019-06-18 11:11:51 2019-06-21 17:55:00  FALSE     TRUE     TRUE -2076229 -652.0853             2           1
## 2:  2019 R320_19 270170938 270170935 2019-06-24 14:48:42 2019-07-07 12:06:00  FALSE     TRUE     TRUE -2076798 1260.7423             3           2
## 3:  2019 R502_19 273145126 270170942 2019-06-11 12:00:00 2019-06-14 00:02:36  FALSE     TRUE     TRUE -2076098 -329.0839             1           1
## 4:  2019 R218_19 273145126 270170942 2019-06-17 12:18:36 2019-06-21 15:52:00  FALSE     TRUE     TRUE -2076359 -270.4901             2           2
## 5:  2019 R905_19 273145139 270170970 2019-06-13 11:34:00 2019-06-16 15:33:59  FALSE     TRUE     TRUE -2077227  -94.4791             1           1
## 6:  2019 R913_19 273145139 270170970 2019-06-21 14:19:56 2019-07-08 04:14:00  FALSE     TRUE     TRUE -2076978 -404.3699             2           2
## 7:  2019 R201_19 270170620 273145050 2019-06-08 12:12:00 2019-06-18 10:37:00  FALSE     TRUE     TRUE -2076047  737.3589             1           1
## 8:  2019 R232_19 270170620 273145050 2019-06-22 14:01:25 2019-07-19 17:27:00  FALSE     TRUE     TRUE -2075940 1801.6723             2           2
##    clutch_together overlap both_tagged_overlapping N
## 1:               1  975057                    TRUE 2
## 2:               2  975057                    TRUE 2
## 3:               1  950209                    TRUE 2
## 4:               2  950209                    TRUE 2
## 5:               1  783930                    TRUE 2
## 6:               2  783930                    TRUE 2
## 7:               1 1395702                    TRUE 2
## 8:               2 1395702                    TRUE 2

# number of pairs with data for both at the same time
ds %>% unique(., by = c('male_id', 'female_id')) %>% nrow

## [1] 64

ds[year_ == 2018] %>% unique(., by = c('male_id', 'female_id')) %>% nrow

## [1] 10

ds[year_ == 2019] %>% unique(., by = c('male_id', 'female_id')) %>% nrow

## [1] 54

# by nests 
ds %>% unique(., by = c('male_id', 'female_id', 'nestID')) %>% nrow

## [1] 68

# look at pairs with two clutches 
ggplot(data = dp[ID1 == 270170620 & ID2 == 273145050]) +
  geom_tile(aes(datetime_1, pairID, fill = interaction), width = 900, show.legend = FALSE) +
  scale_fill_manual(values = c('TRUE' = 'green4', 'FALSE' = 'firebrick3', 'NA' = 'grey50')) +
  geom_vline(aes(xintercept = as.POSIXct('2019-06-18 10:37:00')), color = 'black', size = 3, alpha = 0.5) +
  xlab('Date') + ylab('Nest') +
  theme_classic()

ggplot(data = dp[ID1 == 270170938 & ID2 == 270170935]) +
  geom_tile(aes(datetime_1, pairID, fill = interaction), width = 900, show.legend = FALSE) +
  scale_fill_manual(values = c('TRUE' = 'green4', 'FALSE' = 'firebrick3', 'NA' = 'grey50')) +
  geom_vline(aes(xintercept = as.POSIXct('2019-06-21 17:55:00')), color = 'black', size = 3, alpha = 0.5) +
  xlab('Date') + ylab('Nest') +
  theme_classic()

ggplot(data = dp[ID1 == 273145126 & ID2 == 270170942]) +
  geom_tile(aes(datetime_1, pairID, fill = interaction), width = 900, show.legend = FALSE) +
  scale_fill_manual(values = c('TRUE' = 'green4', 'FALSE' = 'firebrick3', 'NA' = 'grey50')) +
  geom_vline(aes(xintercept = as.POSIXct('2019-06-14 00:02:36')), color = 'black', size = 3, alpha = 0.5) +
  xlab('Date') + ylab('Nest') +
  theme_classic()

ggplot(data = dp[ID1 == 273145139 & ID2 == 270170970]) +
  geom_tile(aes(datetime_1, pairID, fill = interaction), width = 900, show.legend = FALSE) +
  scale_fill_manual(values = c('TRUE' = 'green4', 'FALSE' = 'firebrick3', 'NA' = 'grey50')) +
  geom_vline(aes(xintercept = as.POSIXct('2019-06-16 15:33:59')), color = 'black', size = 3, alpha = 0.5) +
  xlab('Date') + ylab('Nest') +
  theme_classic()

# decided to split based on date first clutch failed

# delete column 
dnID[, overlap := NULL]

Merge dp with nest data

# merge with first nest number
dp = merge(dp, dnID[clutch_together == 1, .(male_id, female_id, year_, clutch_together)], 
           by.x = c('ID1', 'ID2', 'year_'), by.y = c('male_id', 'female_id', 'year_'), all.x = TRUE)

# assign second clutches
dp[ID1 == 270170620 & ID2 == 273145050 & datetime_1 > as.POSIXct('2019-06-18 10:37:00'), clutch_together := 2]
dp[ID1 == 270170938 & ID2 == 270170935 & datetime_1 > as.POSIXct('2019-06-21 17:55:00'), clutch_together := 2]
dp[ID1 == 273145126 & ID2 == 270170942 & datetime_1 > as.POSIXct('2019-06-14 00:02:36'), clutch_together := 2]
dp[ID1 == 273145139 & ID2 == 270170970 & datetime_1 > as.POSIXct('2019-06-16 15:33:59'), clutch_together := 2]

# merge with nests
dp = merge(dp, dnID, by.x = c('ID1', 'ID2', 'year_', 'clutch_together'), 
           by.y = c('male_id', 'female_id', 'year_', 'clutch_together'), all.x = TRUE)

unique(dp[!is.na(nestID)], by = 'nestID') |> nrow()

## [1] 68

# datetime relative to nest initiation date
dp[, datetime_rel_pair := difftime(datetime_1, initiation, units = 'days') %>% as.numeric()]

# datetime relative to nest initiation date
dp[, date_ := as.Date(datetime_1)]
dp[, initiation_day := as.Date(initiation)]
dp[, date_rel_pair := difftime(date_, initiation_day, units = 'days') %>% as.numeric()]

# same sex interaction?
dp[, same_sex := sex1 == sex2]

# breeding pair
dp[, breeding_pair := !is.na(nestID)]

# Assign polyandry
# polyandrous females
xf = dn[polyandrous == TRUE]$female_id |> unique()
dp[ID2 %in% xf, f_polyandrous := TRUE]
dp[is.na(f_polyandrous), f_polyandrous := FALSE]

# first nests
x1 = dn[polyandrous == TRUE & female_clutch == 1]$nestID |> unique()
dp[nestID %in% x1, f_polyandrous_first := TRUE]
dp[is.na(f_polyandrous_first), f_polyandrous_first := FALSE]

# second nest
x2 = dn[polyandrous == TRUE & female_clutch == 2]$nestID |> unique()
dp[nestID %in% x2, f_polyandrous_second := TRUE]
dp[is.na(f_polyandrous_second), f_polyandrous_second := FALSE]

# renesting females
# first nests of renesting females
x1 = dn[polyandrous == FALSE & female_clutch == 1 & N_female_clutch > 1 |
          polyandrous == TRUE & female_clutch == 2 & N_female_clutch > 2]$nestID |> unique()
dp[nestID %in% x1, f_renesting_first := TRUE]

# second nests of renesting females
x2 = dn[polyandrous == FALSE & female_clutch == 2 & N_female_clutch > 1 |
          polyandrous == TRUE & female_clutch == 3 & N_female_clutch > 2]$nestID |> unique()
dp[nestID %in% x2, f_renesting_second := TRUE]

# relative timing 
dn[, initiation_day := as.Date(initiation)]
di = dn[!is.na(year_) & data_type == 'study_site', .(initiation_mean = mean(initiation_day, na.rm = TRUE)), 
        by = year_]
di[, initiation_mean := as.character(initiation_mean) |> as.Date()]

dp = merge(dp, di, by = 'year_', all.x = TRUE)

# relative initiation date
dp[, initiation_rel := difftime(initiation_day, initiation_mean, units = 'days') %>% as.numeric]

Nest attendance

# distance to nest
dps = dp[!is.na(nestID)]
dps[, distance_nest_1 := sqrt(sum((c(lon1, lat1) - c(lon_n, lat_n))^2)), by = 1:nrow(dps)]
dps[, at_nest1 := distance_nest_1 < 15]

dps[, distance_nest_2 := sqrt(sum((c(lon2, lat2) - c(lon_n, lat_n))^2)), by = 1:nrow(dps)]
dps[, at_nest2 := distance_nest_2 < 15]

# merge back with data
dp = merge(dp, dps[, .(ID1, ID2, datetime_1, nestID, at_nest1, at_nest2)], 
           by = c('ID1', 'ID2', 'datetime_1', 'nestID'), all.x = TRUE)

Subset relevant data

# N pairwise data per day
dpn = dp[!is.na(date_rel_pair), .N, by = .(pairID, nestID, date_rel_pair)]

# check max
dpn[, N] |> max()

## [1] 142

# proportion of locations send
dpn[, Np := N / 142]

# merge with dp
dp = merge(dp, dpn, by = c('pairID', 'nestID', 'date_rel_pair'), all.x = TRUE)

# subset 
dps = 
dp[breeding_pair == TRUE & sex1 == 'M', 
   .(pairID, year_, ID1, ID2, sex1, sex2, datetime_1, datetime_2, N, Np, date_, date_rel_pair, 
     datetime_rel_pair, distance_pair, interaction, split, merge, IDsplitting, IDmerging, distance1_before, 
     distance2_before, nestID, initiation, initiation_day, initiation_rel, at_nest1, 
     at_nest2, female_clutch, anyEPY, breeding_pair, f_polyandrous, f_polyandrous_first,
     f_polyandrous_second, f_renesting_first, f_renesting_second, type = 'breeding pair')]

# save data
# fwrite(dps, './DATA/PAIR_WISE_INTERACTIONS_BREEDING_PAIRS.txt', quote = TRUE, sep = '\t', row.names = FALSE)

Randomization for interaction base line

# unique data
du = unique(dp, by = c('pairID'))
ds = unique(dp, by = c('nestID', 'date_rel_pair'))
ds = ds[!is.na(date_rel_pair)]

# nests to exclude (second clutches)
n2 = c('R201_19', 'R231_19', 'R905_19', 'R502_19')
ds = ds[!(nestID %in% n2)]

# data needed for null model
d0 = ds[, .(date_rel_pair, date_)] %>% unique

# subset non-breeders pairs within breeders
breeding_males = du[breeding_pair == TRUE]$ID1
breeding_females = du[breeding_pair == TRUE]$ID2

dps = dp[breeding_pair == FALSE & ID1 %in% breeding_males & ID2 %in% breeding_females]

# subset all interactions on days with relative initiation date for breeders
x = d0$date_rel_pair %>% unique

d0a = foreach(i = x, .combine = 'rbind') %do% {

  # subset relative day
  dss = d0[date_rel_pair == i]

  # subset all pairwise interactions from this date
  dsms = dps[date_ %in% dss$date_]

  # assign type
  dsms[, date_rel_pair := i]

  dsms

}

d0a[, date_rel_pair_type := 'null_model']


# subset pairwise data with at least one interaction per day

# pairwise positions non-breeder pairs
dpn = d0a[, .N, by = .(pairID, date_, date_rel_pair)]
setnames(dpn, 'N', 'Nnb')

# proportion of locations send
dpn[, Np_nb := Nnb / 142]

d0a = merge(d0a, dpn, by = c('pairID', 'date_', 'date_rel_pair'), all.x = TRUE)

# Proportion of time together
dps = d0a[interaction == TRUE, .(N_int_nb = .N), by = .(pairID, date_, date_rel_pair)]
du = unique(d0a, by = c('pairID', 'date_', 'date_rel_pair'))
du = merge(du, dps, by = c('pairID', 'date_', 'date_rel_pair'), all.x = TRUE)
du[is.na(N_int_nb), N_int_nb := 0]
du[, int_prop_nb := N_int_nb / Nnb]

# merge back with full table
d0a = merge(d0a, du[, .(pairID, date_, date_rel_pair, N_int_nb, int_prop_nb)],
            by = c('pairID', 'date_', 'date_rel_pair'), all.x = TRUE)

# subset only pairs with at least one interaction
d0a = d0a[int_prop_nb > 0]

# subset pairs with at least 50% data
d0a = d0a[Np_nb >= 0.5]

# select random pairs for each day
ds = unique(d0a, by = c('pairID', 'date_', 'date_rel_pair'))

# shuffle data
set.seed(210191)
ds = ds[sample(dim(ds)[1])]

# subset pairs
ds[, date_rel_pair_id := seq_len(.N), by = date_rel_pair]
ds = ds[date_rel_pair_id <= 50]
ds[, sub := paste0(pairID, '_', date_, '_', date_rel_pair)]
d0a[, sub := paste0(pairID, '_', date_, '_', date_rel_pair)]

d0as = d0a[sub %in% ds$sub]

ds[date_rel_pair >= -10 & date_rel_pair <= 10, .N, date_rel_pair]

##     date_rel_pair  N
##  1:             2 50
##  2:             6 50
##  3:            -2 50
##  4:            -4 50
##  5:             9 50
##  6:             8 50
##  7:             1 50
##  8:             4 50
##  9:             3 50
## 10:            -3 50
## 11:            -5 50
## 12:             0 50
## 13:            -8 50
## 14:            -7 50
## 15:            -1 50
## 16:            10 50
## 17:            -6 50
## 18:             7 50
## 19:             5 50
## 20:            -9 50
## 21:           -10 50
##     date_rel_pair  N

# merge with nest data from female
duf = unique(dp[!is.na(nestID)], by = c('ID2', 'year_'))

# delete columns in d0as
d0as[, nestID := NULL]
d0as[, initiation := NULL]
d0as[, initiation_rel := NULL]

d0as = merge(d0as, duf[, .(year_, ID2, nestID, initiation, initiation_rel)],
             by = c('year_', 'ID2'), all.x = TRUE)

d0as[is.na(nestID)]

## Empty data.table (0 rows and 78 cols): year_,ID2,pairID,date_,date_rel_pair,ID1...

# rename order and save
d0as = d0as[,
        .(pairID, year_, ID1, ID2, sex1, sex2, datetime_1, datetime_2, N = Nnb, Np = Np_nb, date_, 
          date_rel_pair, datetime_rel_pair, distance_pair, interaction, split, merge, nestID, initiation, 
          initiation_day, initiation_rel, type = 'randomization')]

# save data
# fwrite(d0as, './DATA/PAIR_WISE_INTERACTIONS_BREEDING_PAIRS_RANDOM.txt', quote = TRUE, sep = '\t', row.names = FALSE)

# version information
sessionInfo()

## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=German_Germany.utf8  LC_CTYPE=German_Germany.utf8    LC_MONETARY=German_Germany.utf8 LC_NUMERIC=C                   
## [5] LC_TIME=German_Germany.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggnewscale_0.4.8  stringr_1.5.0     knitr_1.42        foreach_1.5.2     sf_1.0-13         auksRuak_0.1.0    viridis_0.6.2     viridisLite_0.4.1
##  [9] ggplot2_3.4.0     sdb_2019.12       magrittr_2.0.3    data.table_1.14.8
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.4         sass_0.4.5         RMySQL_0.10.25     jsonlite_1.8.4     bit64_4.0.5        bslib_0.4.2        expm_0.999-7      
##  [8] askpass_1.1        highr_0.10         gld_2.6.6          lmom_2.9           cellranger_1.1.0   yaml_2.3.7         pillar_1.8.1      
## [15] lattice_0.20-45    glue_1.6.2         filenamr_0.1.0     digest_0.6.31      cliExtras_0.1.0    sfext_0.1.0        snakecase_0.11.0  
## [22] colorspace_2.1-0   htmltools_0.5.4    Matrix_1.5-1       pkgconfig_2.0.3    ggspatial_1.1.7    purrr_1.0.1        mvtnorm_1.1-3     
## [29] scales_1.2.1       rootSolve_1.8.2.3  timechange_0.2.0   tibble_3.1.8       proxy_0.4-27       farver_2.1.1       generics_0.1.3    
## [36] cachem_1.0.6       withr_2.5.0        janitor_2.1.0      cli_3.6.0          readxl_1.4.1       evaluate_0.20      fansi_1.0.4       
## [43] MASS_7.3-58.1      anytime_0.3.9      class_7.3-20       tools_4.2.2        RMariaDB_1.2.2     hms_1.1.3          lifecycle_1.0.3   
## [50] Exact_3.2          munsell_0.5.0      jquerylib_0.1.4    compiler_4.2.2     e1071_1.7-13       rlang_1.0.6        classInt_0.4-9    
## [57] units_0.8-2        grid_4.2.2         iterators_1.0.14   rstudioapi_0.14    rmarkdown_2.20     boot_1.3-28        DescTools_0.99.47 
## [64] gtable_0.3.1       codetools_0.2-18   DBI_1.1.3          R6_2.5.1           gridExtra_2.3      lubridate_1.9.2    dplyr_1.1.0       
## [71] fastmap_1.1.0      bit_4.0.5          utf8_1.2.3         rprojroot_2.0.3    KernSmooth_2.23-20 stringi_1.7.12     Rcpp_1.0.10       
## [78] vctrs_0.5.2        tidyselect_1.2.0   xfun_0.40

Subset data from breeding pairs and bind with metadata

Johannes Krietsch

2023-08-15