class: center, middle, inverse, title-slide # R-Ladies NYC: An Analysis ##
NYR 2020 ### Ludmila Janda ### Amplify ### 2020/08/14 --- # WHAT IS R-LADIES? [R-Ladies Global](https://benubah.github.io/r-community-explorer/rladies.html) <img src="/Users/ljanda/Documents/misc/nyr_talk_2020/images/rladies_global.png" width="100%" /> --- # WHAT IS THIS TALK? -- - An exploration at the R-Ladies NYC meetup data -- - A look at the code used to do this <img src="/Users/ljanda/Documents/misc/nyr_talk_2020/images/rladies_hex.png" width="60%" style="display: block; margin: auto;" /> --- # Getting data using meetupr [meetupr](https://github.com/rladies/meetupr) github link ```r urlname <- "rladies-newyork" events <- meetupr::get_events(urlname, "past") ``` ```r colnames(events) ``` ``` ## [1] "id" "name" "created" "status" ## [5] "time" "local_date" "local_time" "waitlist_count" ## [9] "yes_rsvp_count" "venue_id" "venue_name" "venue_lat" ## [13] "venue_lon" "venue_address_1" "venue_city" "venue_state" ## [17] "venue_zip" "venue_country" "description" "link" ## [21] "resource" ``` ```r nrow(events) ``` ``` ## [1] 47 ``` --- # Are our RSVP counts trending upwards? ```r summary(events$yes_rsvp_count) ``` ``` ## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 20.00 42.00 55.00 56.68 71.50 103.00 ``` ```r ggplot(events, aes(local_date, yes_rsvp_count)) + geom_line(color = "#88398a") + geom_point(color = "#88398a", size = 2) + labs(x = "Date", y = "RSVP Count") + theme_rladies() ``` --- # Are our RSVP counts trending upwards? <img src="nyr_talk_2020_slides_files/figure-html/unnamed-chunk-11-1.png" width="75%" /> --- # What are those low RSVP count events? ```r events_low_labels <- events %>% mutate(name = ifelse(yes_rsvp_count > 35, "", name), name = str_remove(name, "R-ladies"), name = str_remove(name, "R-Ladies"), name = str_remove(name, "R Ladies")) ggplot(events_low_labels, aes(local_date, yes_rsvp_count, label = name)) + geom_line(color = "#88398a") + geom_point(color = "#88398a", size = 2) + geom_label_repel(size = 2.5, force = 5, nudge_y = 5, segment.color = "grey60") + labs(x = NULL, y = "RSVP Count") + theme_rladies() ``` --- # What are those low RSVP count events? <img src="nyr_talk_2020_slides_files/figure-html/unnamed-chunk-13-1.png" width="75%" /> --- # Filter out expectedly small events ```r events_normal <- events %>% filter(!str_detect(name, "Book Club"), !str_detect(name, "book club"), !str_detect(name, "Social"), !str_detect(name, "Workshop")) ``` ```r summary(events_normal$yes_rsvp_count) ``` ``` ## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 38.00 55.00 66.00 68.72 82.00 103.00 ``` --- # Trending Up? ```r ggplot(events_normal, aes(local_date, yes_rsvp_count)) + geom_line(color = "#88398a") + geom_point(color = "#88398a", size = 2) + labs(x = NULL, y = "RSVP Count") + theme_rladies() ``` --- # Trending Up? <img src="nyr_talk_2020_slides_files/figure-html/unnamed-chunk-17-1.png" width="75%" /> --- # What were those "popular" events? ```r events_normal_labels <- events_normal %>% mutate(name = ifelse(yes_rsvp_count < 80, "", name)) ggplot(events_normal_labels, aes(local_date, yes_rsvp_count, label = name)) + geom_col(fill = "#88398a") + geom_label_repel(size = 4, force = 5, nudge_y = -7, segment.color = "grey60") + labs(x = NULL, y = "RSVP Count") + theme_rladies() ``` --- # What were those "popular" events? <img src="nyr_talk_2020_slides_files/figure-html/unnamed-chunk-19-1.png" width="75%" /> --- # Look at members data ```r members <- meetupr::get_members(urlname) ``` ```r table(members$status) ``` ``` ## ## active ## 1856 ``` --- # Count of new members by month ```r members_month <- members %>% mutate(year_month = as.Date(floor_date(created, "month"))) %>% group_by(year_month) %>% summarise(count_new_members = n()) ggplot(members_month, aes(year_month, count_new_members)) + geom_col(fill = "#88398a") + scale_x_date(date_breaks = '6 months', date_labels = '%Y-%m') + labs(x = NULL, y = "Number of New Members") + theme_rladies() ``` --- # Count of new members by month <img src="nyr_talk_2020_slides_files/figure-html/unnamed-chunk-23-1.png" width="75%" /> --- # New members by nearest upcoming event ```r # group by day and get count of new accounts # use complete() to get all the days new_members <- members %>% mutate(date = as.Date(created)) %>% group_by(date) %>% summarise(count_new_members = n()) %>% tidyr::complete(date = seq.Date(min(date), max(date), by = "day")) # make event dataset to merge with events_dates <- events %>% select(date = local_date, name) ``` --- # New members by nearest upcoming event ```r # merge members with complete dates and events # get counts by event members_events_count <- left_join(new_members, events_dates, by = "date") %>% mutate(event_flag = ifelse(!is.na(name), 1, 0), event_group = cumsum(event_flag) - event_flag) %>% group_by(event_group) %>% summarise(new_members = sum(count_new_members, na.rm = TRUE)) # get event dates for event groups members_events_groups <- left_join(new_members, events_dates, by = "date") %>% mutate(event_flag = ifelse(!is.na(name), 1, 0), event_group = cumsum(event_flag) - event_flag) %>% filter(!is.na(name)) %>% distinct(event_group, name, date) # final join members_events <- left_join(members_events_groups, members_events_count, by = "event_group") %>% mutate(name = ifelse(new_members < 50, "", name)) ``` --- # New members by nearest upcoming event ```r members_events %>% mutate(name = str_remove(name, "R-Ladies New York Lecture Series: ")) %>% ggplot(aes(date, new_members, label = name)) + geom_col(color = "black") + geom_label_repel(size = 4, force = 40, nudge_y = 40, segment.color = "grey60") + labs(x = NULL, y = "New Member Count") + theme_rladies() ``` --- # New members by nearest upcoming event <img src="nyr_talk_2020_slides_files/figure-html/unnamed-chunk-27-1.png" width="75%" /> --- # Hadley Wickham Bump? ```r members_events <- left_join(members_events_groups, members_events_count, by = "event_group") %>% mutate(name = ifelse(str_detect(name, "Tidy evaluation"), name, "")) ggplot(members_events, aes(date, new_members, label = name)) + geom_col(color = "black") + geom_label_repel(size = 4, force = 7, nudge_y = 10, segment.color = "grey60") + labs(x = NULL, y = "New Member Count") + theme_rladies() ``` --- # Hadley Wickham Bump? <img src="nyr_talk_2020_slides_files/figure-html/unnamed-chunk-29-1.png" width="75%" /> --- # Member counts with events ```r members_events_all_days <- left_join(new_members, events_dates, by = "date") %>% mutate(event_flag = factor(ifelse(!is.na(name), "Yes", "No")), count_new_members = ifelse(is.na(count_new_members), 0, count_new_members), year = year(date)) ggplot(members_events_all_days, aes(date, count_new_members, fill = event_flag, color = event_flag)) + geom_col() + facet_wrap(~year, scales = "free") + scale_fill_manual(values = c("#D3D3D3", "#88398a")) + scale_color_manual(values = c("#D3D3D3", "#88398a"), guide = FALSE) + scale_x_date(date_breaks = "2 months", date_labels = "%m") + labs(x = "Month", y = "New Member Count", fill = "Event Day") + theme_rladies() ``` --- # Member counts with events <img src="nyr_talk_2020_slides_files/figure-html/unnamed-chunk-31-1.png" width="75%" /> --- # Loyalty ```r # all attendees for one event attendees <- meetupr::get_event_attendees(urlname, event_id = "235073330") # get all event ids event_ids <- events$id # get attendees for all the events all_event_attendees <- event_ids %>% map_dfr(slowly(~(get_event_attendees(urlname, event_id = .)))) ``` --- # Loyalty ```r attendee_rsvp_counts <- all_event_attendees %>% filter(id != 0, rsvp_response == "yes") %>% group_by(id) %>% summarise(count_events_rsvp = n()) %>% left_join(members, by = "id") %>% filter(created < "2020-01-01") ggplot(attendee_rsvp_counts, aes(count_events_rsvp)) + geom_histogram(binwidth = 1, fill = "#88398a") + labs(x = "Number of Events Attended", y = "Count") + theme_rladies() ``` --- # Loyalty <img src="nyr_talk_2020_slides_files/figure-html/unnamed-chunk-34-1.png" width="75%" /> --- # R-Ladies NYC Number One Fan ```r subset(attendee_rsvp_counts, id == 103401142, select = count_events_rsvp) ``` ``` ## # A tibble: 1 x 1 ## count_events_rsvp ## <int> ## 1 40 ``` --- # Map of Event Locations ```r shps <- here::here("nyc") boroughs <- st_read(shps, "nyc") %>% filter(boro_name == "Manhattan" | boro_name == "Brooklyn") %>% pull(geometry) ``` ``` ## Reading layer `nyc' from data source `/Users/ljanda/Documents/misc/nyr_talk_2020/nyc' using driver `ESRI Shapefile' ## Simple feature collection with 4 features and 4 fields ## geometry type: MULTIPOLYGON ## dimension: XY ## bbox: xmin: -74.04773 ymin: 40.54183 xmax: -73.70001 ymax: 40.91553 ## epsg (SRID): 4326 ## proj4string: +proj=longlat +datum=WGS84 +no_defs ``` ```r nyc_boroughs = st_transform(boroughs, 2263) events_spatial <- events %>% select(latitude = venue_lat, longitude = venue_lon) %>% filter(!is.na(latitude)) %>% st_as_sf(coords = c("longitude", "latitude"), crs = 4326) %>% st_transform(2263) ``` --- # Map of Event Locations ```r ggplot() + geom_sf(data = nyc_boroughs, fill = "white", color = "darkgrey") + geom_sf(data = events_spatial, alpha = 0.25, size = 2, color = "#88398a") + labs(x = "", y = "") + coord_sf() + theme_minimal() + theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "white"), axis.text.x = element_blank(), axis.text.y = element_blank(), axis.ticks = element_blank()) ``` --- background-image: url("images/event_locations.png") background-size: 125% background-position: center top # Event Locations --- # Further Work - flexdashboard -- - automate website event posts -- - consider who our speakers are/if we are involving the community --- # Word Cloud - Comments <img src="nyr_talk_2020_slides_files/figure-html/unnamed-chunk-40-1.png" width="85%" /> --- # Thank You - Sebastian Teran Hidalgo -- - Laura Janda -- - Samuel Crane -- - Ayanthi Gunawardana -- - R-Ladies NYC!!!