library(DBI)
library(RPostgreSQL)
# CONNECT
conn <- dbConnect(RPostgreSQL::PostgreSQL(), host=config$pg_conn$host, dbname=config$pg_conn$name,
user=config$pg_conn$user, password=config$pg_conn$pwd, port=config$pg_conn$port)
# R AGGREGATION (MULTIPLE FUNCS)
# do.call(data.frame,
# aggregate(rides ~ route + routename, agg_csv,
# function(x) c(count=length(x), sum=sum(x), mean=mean(x),
# median=median(x), min=min(x), max=max(x))))
# POSTGRES AGGREGATION
sql <- 'SELECT rt.route_name,
COUNT(rd.rides) AS "count",
SUM(rd.rides) AS "sum",
AVG(rd.rides) AS "mean",
MEDIAN(rd.rides) AS "median",
R_MEDIAN(rd.rides) AS "r_median",
MIN(rd.rides) AS "min",
MAX(rd.rides) AS "max"
FROM bus_routes rt
INNER JOIN bus_rides rd ON rt.route_id = rd.route_id
GROUP BY rt.route_name
ORDER BY SUM(rd.rides) DESC
LIMIT 10'
agg_sql <- dbGetQuery(conn, sql)
kable_styling(kable(agg_sql),
bootstrap_options = c("striped", "hover", "condensed"))
| route_name | count | sum | mean | median | r_median | min | max |
|---|---|---|---|---|---|---|---|
| 79th | 6513 | 176238493 | 27059.50 | 27672 | 27672 | 7896 | 43081 |
| Ashland | 6513 | 156178845 | 23979.56 | 23676 | 23676 | 5570 | 45177 |
| Chicago | 6513 | 130620291 | 20055.32 | 21767 | 21767 | 4448 | 35893 |
| Western | 6513 | 129322319 | 19856.03 | 19553 | 19553 | 4985 | 37202 |
| Cottage Grove | 6513 | 128999766 | 19806.50 | 21181 | 21181 | 5489 | 31187 |
| Belmont | 6513 | 123065617 | 18895.38 | 20732 | 20732 | 3451 | 30025 |
| Clark | 6513 | 119459342 | 18341.68 | 19171 | 19171 | 3310 | 26896 |
| King Drive | 6513 | 119070638 | 18282.00 | 19402 | 19402 | 3993 | 29896 |
| Halsted | 6513 | 118284951 | 18161.36 | 19305 | 19305 | 2915 | 30605 |
| Sheridan | 6513 | 118178375 | 18145.00 | 18812 | 18812 | 2666 | 33236 |
ggplot(agg_sql, aes(route_name, sum, fill=route_name)) + geom_col(position = "dodge") +
labs(title="CTA Top 10 Bus Routes by Ridership", x="Year", y="Rides") +
scale_y_continuous(expand = c(0, 0), label=comma, limit=c(0,2E8)) + guides(fill=FALSE) +
scale_fill_manual(values=seaborn_palette) +
theme(legend.position="bottom",
plot.title = element_text(hjust=0.5, size=18),
axis.text.x = element_text(angle=0, hjust=0.5))
sql <- 'WITH station_agg AS
(SELECT DATE_PART(\'year\', r.ride_date)::integer AS "year",
r.station_id,
r.station_name,
COUNT(r.rides)::numeric(20,5) AS "count",
SUM(r.rides)::numeric(20,5) AS "sum",
AVG(r.rides)::numeric(20,5) AS "mean",
MEDIAN(r.rides)::numeric(20,5) AS "median",
MIN(r.rides)::numeric(20,5) AS "min",
MAX(r.rides)::numeric(20,5) AS "max"
FROM rail_rides r
GROUP BY DATE_PART(\'year\', r.ride_date),
r.station_id,
r.station_name
),
merge_rail AS
(SELECT s.*,
r.rail_line,
(s."sum" / COUNT(*) OVER(PARTITION BY s.station_id, "year")) AS rail_total
FROM station_agg s
INNER JOIN rail_stations r ON s.station_id = r.station_id
)
SELECT m."year", m.rail_line, SUM(m.rail_total) AS rail_total
FROM merge_rail m
GROUP BY m."year", m.rail_line
ORDER BY m.rail_line, m."year"'
agg_sql <- dbGetQuery(conn, sql)
kable_styling(kable(agg_sql[sample(1:nrow(agg_sql), 20),]),
bootstrap_options = c("striped", "hover", "condensed"))
| year | rail_line | rail_total | |
|---|---|---|---|
| 115 | 2007 | purple_exp | 11685317 |
| 132 | 2006 | red | 56267405 |
| 25 | 2007 | brown | 15158712 |
| 24 | 2006 | brown | 15900764 |
| 85 | 2013 | pink | 10076391 |
| 91 | 2001 | purple | 2025669 |
| 74 | 2002 | pink | 5668260 |
| 112 | 2004 | purple_exp | 10812035 |
| 161 | 2017 | yellow | 1373701 |
| 107 | 2017 | purple | 2054181 |
| 38 | 2002 | green | 11275092 |
| 13 | 2013 | blue | 45304180 |
| 145 | 2001 | yellow | 1175412 |
| 51 | 2015 | green | 15502181 |
| 62 | 2008 | orange | 12915598 |
| 93 | 2003 | purple | 2016380 |
| 117 | 2009 | purple_exp | 12493455 |
| 128 | 2002 | red | 55489388 |
| 8 | 2008 | blue | 37294109 |
| 109 | 2001 | purple_exp | 11168731 |
cta_palette <- c(blue="#00A1DE", brown="#62361B", green="#009B3A", orange="#F9461C", pink="#E27EA6",
purple="#522398", purple_exp="#8059BA", red="#C60C30", yellow="#F9E300")
ggplot(agg_sql, aes(year, rail_total, color=rail_line)) +
geom_line(stat="identity") + geom_point(stat="identity") +
labs(title="CTA Rail Ridership By Year", x="Year", y="Rides") +
scale_x_continuous("year", breaks=unique(agg_sql$year)) +
scale_y_continuous(expand = c(0, 0), label=comma) +
scale_color_manual(values = cta_palette) +
theme(legend.position="bottom",
plot.title = element_text(hjust=0.5, size=18),
axis.text.x = element_text(angle=0, hjust=0.5))
CREATE MATERIALIZED VIEW Rail_Model_Data AS
SELECT r.id, r.station_id, r.station_name, r.ride_date, r.day_type, r.rides AS raw,
(r.rides / COUNT(*) OVER(PARTITION BY r.station_id, r.ride_date)) AS rides,
CASE
WHEN r.normalized_date BETWEEN '2099-01-01' AND '2099-03-19' THEN 'winter'
WHEN r.normalized_date BETWEEN '2099-03-20' AND '2099-06-19' THEN 'spring'
WHEN r.normalized_date BETWEEN '2099-06-20' AND '2099-09-19' THEN 'summer'
WHEN r.normalized_date BETWEEN '2099-09-20' AND '2099-12-19' THEN 'fall'
WHEN r.normalized_date BETWEEN '2099-12-20' AND '2099-12-31' THEN 'winter'
ELSE NULL
END As season,
REPLACE(REPLACE((regexp_split_to_array(s.location, '\s+'))[1], ',', ''), '(', '')::numeric AS latitude,
REPLACE((regexp_split_to_array(s.location, '\s+'))[2], ')', '')::numeric AS longitude,
s.rail_line, s.ada, s.direction,
ue.ue_rate, g.gas_price, w.avg_temp, w.precipitation, w.snow_depth
FROM
(
SELECT id, station_id, station_name, day_type, rides, ride_date,
ride_date + (2099 - date_part('year', ride_date) ||' year')::interval as normalized_date
FROM rail_rides
)r
INNER JOIN rail_stations s ON s.station_id = r.station_id
INNER JOIN unemployment_rates ue ON ue.ue_date = r.ride_date
INNER JOIN gas_prices g ON g.gas_date = r.ride_date
INNER JOIN weather_data w ON w.weather_date = r.ride_date
ORDER BY r.ride_date, r.station_id;
REFRESH MATERIALIZED VIEW Rail_Model_Data;
rail_model_data <- dbGetQuery(conn, "SELECT * FROM rail_model_data")
scroll_box(kable_styling(kable(head(rail_model_data)),
bootstrap_options = c("striped", "hover", "condensed")), width = "100%")
| id | station_id | station_name | ride_date | day_type | raw | rides | season | latitude | longitude | rail_line | ada | direction | ue_rate | gas_price | avg_temp | precipitation | snow_depth |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 62 | 40010 | Austin-Forest Park | 2001-01-01 | U | 290 | 290 | winter | 41.87085 | -87.77681 | blue | 0 | W | E | 5.3 | 1.487 | 14.5 | 0 | 17 |
| 61 | 40020 | Harlem-Lake | 2001-01-01 | U | 633 | 633 | winter | 41.88685 | -87.80318 | green | 1 | E | W | 5.3 | 1.487 | 14.5 | 0 | 17 |
| 50 | 40030 | Pulaski-Lake | 2001-01-01 | U | 483 | 483 | winter | 41.88541 | -87.72540 | green | 1 | W | E | 5.3 | 1.487 | 14.5 | 0 | 17 |
| 109 | 40040 | Quincy/Wells | 2001-01-01 | U | 374 | 93 | winter | 41.87872 | -87.63374 | pink | 0 | N | 5.3 | 1.487 | 14.5 | 0 | 17 |
| 109 | 40040 | Quincy/Wells | 2001-01-01 | U | 374 | 93 | winter | 41.87872 | -87.63374 | brown | 0 | S | 5.3 | 1.487 | 14.5 | 0 | 17 |
| 109 | 40040 | Quincy/Wells | 2001-01-01 | U | 374 | 93 | winter | 41.87872 | -87.63374 | orange | 0 | N | 5.3 | 1.487 | 14.5 | 0 | 17 |
model <- lm(rides ~ day_type + season + latitude + longitude + rail_line +
ue_rate + gas_price + avg_temp + precipitation + snow_depth,
data = rail_model_data)
# ANOVA
kable_styling(kable(xtable(anova(model))),
bootstrap_options = c("striped", "hover", "condensed"))
| Df | Sum Sq | Mean Sq | F value | Pr(>F) | |
|---|---|---|---|---|---|
| day_type | 2 | 445663599687 | 222831799844 | 64679.229601 | 0.0000000 |
| season | 3 | 12065643417 | 4021881139 | 1167.392508 | 0.0000000 |
| latitude | 1 | 86867071786 | 86867071786 | 25214.064082 | 0.0000000 |
| longitude | 1 | 319342686 | 319342686 | 92.692510 | 0.0000000 |
| rail_line | 8 | 1832581552014 | 229072694002 | 66490.713539 | 0.0000000 |
| ue_rate | 1 | 1655357962 | 1655357962 | 480.484732 | 0.0000000 |
| gas_price | 1 | 22101747909 | 22101747909 | 6415.260428 | 0.0000000 |
| avg_temp | 1 | 3204630785 | 3204630785 | 930.177158 | 0.0000000 |
| precipitation | 1 | 878964701 | 878964701 | 255.128576 | 0.0000000 |
| snow_depth | 1 | 8106972 | 8106972 | 2.353132 | 0.1250319 |
| Residuals | 1038214 | 3576837505172 | 3445183 | NA | NA |
# POINT ESTIMATES
kable_styling(kable(xtable(summary(model))),
bootstrap_options = c("striped", "hover", "condensed"))
| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| (Intercept) | -67333.240996 | 3539.2418169 | -19.024764 | 0.0000000 |
| day_typeU | -454.935197 | 6.6213391 | -68.707430 | 0.0000000 |
| day_typeW | 1159.154756 | 5.2660141 | 220.119950 | 0.0000000 |
| seasonspring | -180.266785 | 5.3603614 | -33.629595 | 0.0000000 |
| seasonsummer | -179.982024 | 6.5944919 | -27.292781 | 0.0000000 |
| seasonwinter | -160.120620 | 6.1885199 | -25.873815 | 0.0000000 |
| latitude | -3954.668735 | 36.5219229 | -108.282051 | 0.0000000 |
| longitude | -2679.792012 | 43.6084183 | -61.451254 | 0.0000000 |
| rail_linebrown | -1187.766815 | 7.2025790 | -164.908543 | 0.0000000 |
| rail_linegreen | -2076.775862 | 6.8343576 | -303.872871 | 0.0000000 |
| rail_lineorange | -1082.768528 | 8.2804788 | -130.761584 | 0.0000000 |
| rail_linepink | -2215.792821 | 7.3581389 | -301.134954 | 0.0000000 |
| rail_linepurple | -2043.636304 | 11.1396088 | -183.456738 | 0.0000000 |
| rail_linepurple_exp | -1558.043094 | 7.6564149 | -203.495122 | 0.0000000 |
| rail_linered | 1636.661666 | 6.9037116 | 237.069820 | 0.0000000 |
| rail_lineyellow | -1418.136925 | 17.5558352 | -80.778665 | 0.0000000 |
| ue_rate | -7.639672 | 0.9665412 | -7.904135 | 0.0000000 |
| gas_price | 188.159773 | 2.4375323 | 77.192729 | 0.0000000 |
| avg_temp | 5.141775 | 0.1658352 | 31.005329 | 0.0000000 |
| precipitation | -84.092077 | 5.2552649 | -16.001492 | 0.0000000 |
| snow_depth | 1.848654 | 1.2051261 | 1.533992 | 0.1250319 |
graph_data <- data.frame(param = names(model$coefficients[-1]),
value = model$coefficients[-1],
row.names = NULL)
ggplot(graph_data) + geom_col(aes(x=param, y=value, fill=param), position = "dodge") +
labs(title="CTA System Rail Regression Point Estimates", x="Parameters", y="Value") +
guides(fill=FALSE) + ylim(-4000, 2000) +
scale_fill_manual(values = seaborn_palette) +
theme(legend.position="bottom",
plot.title = element_text(hjust=0.5, size=18),
axis.text.x = element_text(angle=45, vjust=0.95, hjust=0.95))
# DISCONNECT FROM DATABASE
dbDisconnect(conn)
## [1] TRUE
options(connectionObserver = NULL)
library(odbc)
conn <- dbConnect(odbc::odbc(),
instance = config$db2_conn$instance,
driver = config$db2_conn$driver,
database = config$db2_conn$database,
server = config$db2_conn$server,
hostname = config$db2_conn$hostname,
port = config$db2_conn$port,
uid = config$db2_conn$uid,
pwd = config$db2_conn$pwd,
LongDataCompat = 1)
day_df <- dbGetQuery(conn, "SELECT t.START_TIME, t.STOP_TIME,
t.TRIP_DURATION
FROM TRIPS t
WHERE DATE(t.START_TIME) = TO_DATE('2015-09-23', 'YYYY-MM-DD')
ORDER BY t.START_TIME")
kable_styling(kable(head(day_df, 20)),
bootstrap_options = c("striped", "hover", "condensed"))
| START_TIME | STOP_TIME | TRIP_DURATION |
|---|---|---|
| 2015-09-23 00:00:00 | 2015-09-23 00:07:00 | 414 |
| 2015-09-23 00:00:00 | 2015-09-23 00:03:00 | 183 |
| 2015-09-23 00:01:00 | 2015-09-23 00:09:00 | 450 |
| 2015-09-23 00:01:00 | 2015-09-23 00:24:00 | 1350 |
| 2015-09-23 00:01:00 | 2015-09-23 00:07:00 | 402 |
| 2015-09-23 00:02:00 | 2015-09-23 00:15:00 | 793 |
| 2015-09-23 00:03:00 | 2015-09-23 00:27:00 | 1435 |
| 2015-09-23 00:04:00 | 2015-09-23 00:06:00 | 134 |
| 2015-09-23 00:05:00 | 2015-09-23 00:19:00 | 820 |
| 2015-09-23 00:06:00 | 2015-09-23 00:15:00 | 526 |
| 2015-09-23 00:07:00 | 2015-09-23 00:15:00 | 506 |
| 2015-09-23 00:08:00 | 2015-09-23 00:15:00 | 425 |
| 2015-09-23 00:09:00 | 2015-09-23 00:32:00 | 1372 |
| 2015-09-23 00:10:00 | 2015-09-23 00:24:00 | 881 |
| 2015-09-23 00:10:00 | 2015-09-23 00:35:00 | 1445 |
| 2015-09-23 00:10:00 | 2015-09-23 00:26:00 | 935 |
| 2015-09-23 00:11:00 | 2015-09-23 00:23:00 | 717 |
| 2015-09-23 00:13:00 | 2015-09-23 00:19:00 | 382 |
| 2015-09-23 00:14:00 | 2015-09-23 00:15:00 | 66 |
| 2015-09-23 00:16:00 | 2015-09-23 00:38:00 | 1316 |
ggplot(day_df, aes(START_TIME, TRIP_DURATION)) + geom_line(color=seaborn_palette[1]) +
xlab("Start Time") + ylab("Trip Duration")
agg_sql <- dbGetQuery(conn, "WITH sub AS
(SELECT t.FROM_STATION_ID, t.FROM_STATION_NAME,
t.FROM_LATITUDE, t.FROM_LONGITUDE,
ROUND(CASE
WHEN t.TO_LATITUDE IS NOT NULL AND t.FROM_LATITUDE IS NOT NULL
THEN
3963.1 * (2 * ATAN(1) -
ASIN(SIN(RADIANS(t.FROM_LATITUDE)) * SIN(RADIANS(t.TO_LONGITUDE)) +
COS(RADIANS(t.FROM_LATITUDE)) * COS(RADIANS(t.TO_LONGITUDE)) *
COS(RADIANS(t.TO_LONGITUDE - t.FROM_LONGITUDE))
)
)
ELSE NULL
END, 4) AS TRIP_DISTANCE
FROM TRIPS t
WHERE DATE(t.START_TIME) = TO_DATE('2015-09-23', 'YYYY-MM-DD'))
SELECT FROM_STATION_ID, FROM_STATION_NAME,
FROM_LATITUDE, FROM_LONGITUDE,
MIN(TRIP_DISTANCE) AS MIN_DISTANCE,
MAX(TRIP_DISTANCE) AS MAX_DISTANCE
FROM sub
GROUP BY FROM_STATION_ID, FROM_STATION_NAME,
FROM_LATITUDE, FROM_LONGITUDE
ORDER BY MAX(TRIP_DISTANCE) DESC
FETCH FIRST 10 ROWS ONLY;")
kable_styling(kable(agg_sql), bootstrap_options = c("striped", "hover", "condensed"))
| FROM_STATION_ID | FROM_STATION_NAME | FROM_LATITUDE | FROM_LONGITUDE | MIN_DISTANCE | MAX_DISTANCE |
|---|---|---|---|---|---|
| 467 | Western Ave & Lunt Ave | 42.00859 | -87.69049 | 8969.148 | 8971.314 |
| 494 | Kedzie Ave & Bryn Mawr Ave | 41.98240 | -87.70892 | 8969.882 | 8971.135 |
| 447 | Glenwood Ave & Morse Ave | 42.00797 | -87.66550 | 8967.513 | 8971.132 |
| 466 | Ridge Blvd & Touhy Ave | 42.01213 | -87.68291 | 8969.393 | 8970.895 |
| 294 | Broadway & Berwyn Ave | 41.97835 | -87.65975 | 8963.826 | 8970.855 |
| 479 | Drake Ave & Montrose Ave | 41.96115 | -87.71657 | 8969.369 | 8970.804 |
| 469 | St. Louis Ave & Balmoral Ave | 41.98039 | -87.71611 | 8967.694 | 8970.700 |
| 450 | Warren Park West | 42.00201 | -87.68973 | 8970.580 | 8970.580 |
| 451 | Sheridan Rd & Loyola Ave | 42.00104 | -87.66120 | 8966.526 | 8970.579 |
| 325 | Clark St & Winnemac Ave | 41.97339 | -87.66836 | 8964.613 | 8970.512 |
divvy_from <- dbGetQuery(conn, "SELECT t.FROM_STATION_NAME,
t.FROM_LATITUDE,
t.FROM_LONGITUDE,
SUM(t.TRIP_DURATION) AS Total_Duration,
MIN(t.TRIP_DURATION) AS Min_Duration,
AVG(t.TRIP_DURATION) AS Avg_Duration,
MEDIAN(t.TRIP_DURATION) AS Median_Duration,
MAX(t.TRIP_DURATION) AS Max_Duration
FROM TRIPS t
GROUP BY t.FROM_STATION_NAME,
t.FROM_LATITUDE,
t.FROM_LONGITUDE
ORDER BY SUM(t.TRIP_DURATION) DESC
FETCH FIRST 10 ROWS ONLY;")
kable_styling(kable(divvy_from), bootstrap_options = c("striped", "hover", "condensed"))
| FROM_STATION_NAME | FROM_LATITUDE | FROM_LONGITUDE | TOTAL_DURATION | MIN_DURATION | AVG_DURATION | MEDIAN_DURATION | MAX_DURATION |
|---|---|---|---|---|---|---|---|
| Lake Shore Dr & Monroe St | 41.88096 | -87.61674 | 444455108 | 60 | 1803 | 1347 | 6403880 |
| Streeter Dr & Grand Ave | 41.89228 | -87.61204 | 431638473 | 60 | 1956 | 1417 | 5444590 |
| Michigan Ave & Oak St | 41.90096 | -87.62378 | 351053592 | 60 | 1815 | 1300 | 2549130 |
| Millennium Park | 41.88103 | -87.62408 | 347331136 | 60 | 1820 | 1241 | 6785180 |
| Theater on the Lake | 41.92628 | -87.63083 | 330660276 | 60 | 1524 | 1285 | 740561 |
| Lake Shore Dr & North Blvd | 41.91172 | -87.62680 | 286215561 | 60 | 1452 | 1173 | 336578 |
| Streeter Dr & Illinois St | 41.89107 | -87.61220 | 222454638 | 60 | 1615 | 1320 | 84073 |
| Canal St & Adams St | 41.87926 | -87.63990 | 184661822 | 60 | 837 | 614 | 9961130 |
| Michigan Ave & Washington St | 41.88389 | -87.62465 | 179427697 | 60 | 1160 | 649 | 1876990 |
| Shedd Aquarium | 41.86722 | -87.61535 | 178996820 | 60 | 1675 | 1307 | 1389890 |
ggplot(transform(divvy_from, FROM_STATION_NAME=gsub("&", "\n&", FROM_STATION_NAME)),
aes(FROM_STATION_NAME, TOTAL_DURATION, fill=FROM_STATION_NAME)) +
geom_col(position = "dodge") +
labs(title="Divvy Top 10 Origination Stations", x="Station", y="Trip Duration (seconds)") +
scale_y_continuous(expand = c(0, 0), label=comma) + guides(fill=FALSE) +
scale_fill_manual(values=seaborn_palette) +
theme(legend.position="bottom",
plot.title = element_text(hjust=0.5, size=18),
axis.text.x = element_text(angle=0, hjust=0.5))
divvy_to <- dbGetQuery(conn, "SELECT t.TO_STATION_NAME,
t.TO_LATITUDE,
t.TO_LONGITUDE,
SUM(t.TRIP_DURATION) AS Total_Duration,
MIN(t.TRIP_DURATION) AS Min_Duration,
AVG(t.TRIP_DURATION) AS Avg_Duration,
MEDIAN(t.TRIP_DURATION) AS Median_Duration,
MAX(t.TRIP_DURATION) AS Max_Duration
FROM TRIPS t
WHERE TO_STATION_NAME <> 'DIVVY Map Frame B/C Station'
GROUP BY t.TO_STATION_NAME,
t.TO_LATITUDE,
t.TO_LONGITUDE
ORDER BY SUM(t.TRIP_DURATION) DESC
FETCH FIRST 10 ROWS ONLY;")
kable_styling(kable(divvy_to), bootstrap_options = c("striped", "hover", "condensed"))
| TO_STATION_NAME | TO_LATITUDE | TO_LONGITUDE | TOTAL_DURATION | MIN_DURATION | AVG_DURATION | MEDIAN_DURATION | MAX_DURATION |
|---|---|---|---|---|---|---|---|
| Streeter Dr & Grand Ave | 41.89228 | -87.61204 | 469992965 | 60 | 1914 | 1432 | 297905 |
| Lake Shore Dr & Monroe St | 41.88096 | -87.61674 | 396754905 | 60 | 1702 | 1318 | 731920 |
| Theater on the Lake | 41.92628 | -87.63083 | 370722559 | 60 | 1598 | 1374 | 383963 |
| Michigan Ave & Oak St | 41.90096 | -87.62378 | 369016876 | 60 | 1782 | 1306 | 2549130 |
| Millennium Park | 41.88103 | -87.62408 | 344723608 | 60 | 1637 | 1157 | 566401 |
| Lake Shore Dr & North Blvd | 41.91172 | -87.62680 | 337381528 | 60 | 1524 | 1295 | 269506 |
| Streeter Dr & Illinois St | 41.89107 | -87.61220 | 264397622 | 60 | 1611 | 1325 | 85368 |
| Michigan Ave & Washington St | 41.88389 | -87.62465 | 158901379 | 60 | 1003 | 572 | 3054250 |
| Shedd Aquarium | 41.86722 | -87.61535 | 158356779 | 60 | 1563 | 1307 | 184015 |
| Adler Planetarium | 41.86610 | -87.60727 | 153361126 | 60 | 1635 | 1357 | 524315 |
ggplot(transform(divvy_to, TO_STATION_NAME=gsub("&", "\n&", TO_STATION_NAME)),
aes(TO_STATION_NAME, TOTAL_DURATION, fill=TO_STATION_NAME)) +
geom_col(position = "dodge") +
labs(title="Divvy Top 10 Destination Stations", x="Station", y="Trip Duration (seconds)") +
scale_y_continuous(expand = c(0, 0), label=comma) + guides(fill=FALSE) +
scale_fill_manual(values=seaborn_palette) +
theme(legend.position="bottom",
plot.title = element_text(hjust=0.5, size=18),
axis.text.x = element_text(angle=0, hjust=0.5))
library(jsonlite)
x <- toJSON(divvy_from[c("FROM_STATION_NAME", "FROM_LATITUDE", "FROM_LONGITUDE")], pretty=TRUE)
# EXPORT TO FILE
fileConn <- file("Divvy_From_Coords.json")
writeLines(x, fileConn)
close(fileConn)
x
## [
## {
## "FROM_STATION_NAME": "Lake Shore Dr & Monroe St",
## "FROM_LATITUDE": 41.881,
## "FROM_LONGITUDE": -87.6167
## },
## {
## "FROM_STATION_NAME": "Streeter Dr & Grand Ave",
## "FROM_LATITUDE": 41.8923,
## "FROM_LONGITUDE": -87.612
## },
## {
## "FROM_STATION_NAME": "Michigan Ave & Oak St",
## "FROM_LATITUDE": 41.901,
## "FROM_LONGITUDE": -87.6238
## },
## {
## "FROM_STATION_NAME": "Millennium Park",
## "FROM_LATITUDE": 41.881,
## "FROM_LONGITUDE": -87.6241
## },
## {
## "FROM_STATION_NAME": "Theater on the Lake",
## "FROM_LATITUDE": 41.9263,
## "FROM_LONGITUDE": -87.6308
## },
## {
## "FROM_STATION_NAME": "Lake Shore Dr & North Blvd",
## "FROM_LATITUDE": 41.9117,
## "FROM_LONGITUDE": -87.6268
## },
## {
## "FROM_STATION_NAME": "Streeter Dr & Illinois St",
## "FROM_LATITUDE": 41.8911,
## "FROM_LONGITUDE": -87.6122
## },
## {
## "FROM_STATION_NAME": "Canal St & Adams St",
## "FROM_LATITUDE": 41.8793,
## "FROM_LONGITUDE": -87.6399
## },
## {
## "FROM_STATION_NAME": "Michigan Ave & Washington St",
## "FROM_LATITUDE": 41.8839,
## "FROM_LONGITUDE": -87.6246
## },
## {
## "FROM_STATION_NAME": "Shedd Aquarium",
## "FROM_LATITUDE": 41.8672,
## "FROM_LONGITUDE": -87.6154
## }
## ]
x <- toJSON(divvy_to[c("TO_STATION_NAME", "TO_LATITUDE", "TO_LONGITUDE")], pretty=TRUE)
# EXPORT TO FILE
fileConn <- file("Divvy_To_Coords.json")
writeLines(x, fileConn)
close(fileConn)
x
## [
## {
## "TO_STATION_NAME": "Streeter Dr & Grand Ave",
## "TO_LATITUDE": 41.8923,
## "TO_LONGITUDE": -87.612
## },
## {
## "TO_STATION_NAME": "Lake Shore Dr & Monroe St",
## "TO_LATITUDE": 41.881,
## "TO_LONGITUDE": -87.6167
## },
## {
## "TO_STATION_NAME": "Theater on the Lake",
## "TO_LATITUDE": 41.9263,
## "TO_LONGITUDE": -87.6308
## },
## {
## "TO_STATION_NAME": "Michigan Ave & Oak St",
## "TO_LATITUDE": 41.901,
## "TO_LONGITUDE": -87.6238
## },
## {
## "TO_STATION_NAME": "Millennium Park",
## "TO_LATITUDE": 41.881,
## "TO_LONGITUDE": -87.6241
## },
## {
## "TO_STATION_NAME": "Lake Shore Dr & North Blvd",
## "TO_LATITUDE": 41.9117,
## "TO_LONGITUDE": -87.6268
## },
## {
## "TO_STATION_NAME": "Streeter Dr & Illinois St",
## "TO_LATITUDE": 41.8911,
## "TO_LONGITUDE": -87.6122
## },
## {
## "TO_STATION_NAME": "Michigan Ave & Washington St",
## "TO_LATITUDE": 41.8839,
## "TO_LONGITUDE": -87.6246
## },
## {
## "TO_STATION_NAME": "Shedd Aquarium",
## "TO_LATITUDE": 41.8672,
## "TO_LONGITUDE": -87.6154
## },
## {
## "TO_STATION_NAME": "Adler Planetarium",
## "TO_LATITUDE": 41.8661,
## "TO_LONGITUDE": -87.6073
## }
## ]
# DISCONNECT FROM DATABASE
dbDisconnect(conn)
library(RSQLite)
conn <- dbConnect(RSQLite::SQLite(), dbname=config$sqlite_conn$database)
sql <- "SELECT l.Line,
t.Line AS Short,
CAST(strftime('%Y', Report_Month) AS INT) as Year,
SUM(t.Late_Trains) AS Total_Late_Trains,
AVG(t.Late_Trains) AS Avg_Late_Trains
FROM Trains t
INNER JOIN Lines l ON t.LineID = l.ID
WHERE l.Line <> 'SYSTEM'
GROUP BY l.Line,
t.Line,
strftime('%Y', t.Report_Month)"
agg_sql <- dbGetQuery(conn, sql)
kable_styling(kable(agg_sql[sample(1:nrow(agg_sql), 20),]),
bootstrap_options = c("striped", "hover", "condensed"))
| Line | Short | Year | Total_Late_Trains | Avg_Late_Trains | |
|---|---|---|---|---|---|
| 111 | Union Pacific / Northwest | UP-NW | 2009 | 2053 | 28.513889 |
| 99 | SouthWest Service | SWS | 2017 | 1205 | 20.083333 |
| 21 | Metra Electric Blue Island | Elec-SC | 2009 | 705 | 9.791667 |
| 29 | Metra Electric Blue Island | Elec-SC | 2017 | 729 | 10.125000 |
| 84 | Rock Island District | RI | 2012 | 2594 | 36.027778 |
| 36 | Metra Electric Main Line | Elec-ML | 2014 | 2032 | 28.222222 |
| 6 | BNSF Railway | BNSF | 2014 | 7955 | 110.486111 |
| 14 | Heritage Corridor | Heritage | 2012 | 204 | 5.368421 |
| 71 | North Central Service | NCS | 2009 | 807 | 16.812500 |
| 108 | Union Pacific / North | UP-N | 2016 | 1252 | 17.388889 |
| 113 | Union Pacific / Northwest | UP-NW | 2011 | 2641 | 36.680556 |
| 23 | Metra Electric Blue Island | Elec-SC | 2011 | 1245 | 17.291667 |
| 30 | Metra Electric Blue Island | Elec-SC | 2018 | 565 | 7.847222 |
| 92 | SouthWest Service | SWS | 2010 | 1385 | 23.083333 |
| 126 | Union Pacific / West | UP-W | 2014 | 2773 | 38.513889 |
| 89 | Rock Island District | RI | 2017 | 2736 | 38.000000 |
| 54 | Milwaukee District / North | Milw-N | 2012 | 3064 | 42.555556 |
| 31 | Metra Electric Main Line | Elec-ML | 2009 | 2027 | 28.152778 |
| 124 | Union Pacific / West | UP-W | 2012 | 2265 | 31.458333 |
| 101 | Union Pacific / North | UP-N | 2009 | 2868 | 39.833333 |
ggplot(agg_sql, aes(Year, Total_Late_Trains, color=Line)) +
geom_line(stat="identity") +
labs(title="Metra Late Trains by Line", x="Year", y="Late Trains") +
scale_x_continuous("year", breaks=unique(agg_sql$Year)) +
scale_y_continuous(expand = c(0, 0), label=comma) + guides(fill=FALSE) +
scale_color_manual(values=seaborn_palette) +
theme(legend.position="bottom",
plot.title = element_text(hjust=0.5, size=18),
axis.text.x = element_text(angle=0, hjust=0.5))
lines_list <- split(agg_sql, agg_sql$Short)
nms <- lapply(combn(unique(agg_sql$Short), 2, simplify=FALSE), function(i) paste(i, collapse="_"))
res <- t(sapply(combn(unique(agg_sql$Short), 2, simplify=FALSE), function(i) {
t <- t.test(lines_list[[i[1]]]$Total_Late_Trains, lines_list[[i[2]]]$Total_Late_Trains)
c(statistic = t$statistic, p_value = t$p.value)
}))
row.names(res) <- nms
scroll_box(kable_styling(kable(res), bootstrap_options = c("striped", "hover", "condensed")),
height="800px")
| statistic.t | p_value | |
|---|---|---|
| BNSF_Heritage | 9.8494756 | 0.0000034 |
| BNSF_Elec-SC | 8.7158417 | 0.0000076 |
| BNSF_Elec-ML | 6.1374028 | 0.0000957 |
| BNSF_Elec-BI | 9.3920454 | 0.0000054 |
| BNSF_Milw-N | 2.8910097 | 0.0111757 |
| BNSF_Milw-W | 4.9595192 | 0.0002727 |
| BNSF_NCS | 7.9789334 | 0.0000143 |
| BNSF_RI | 4.5415218 | 0.0006351 |
| BNSF_SWS | 7.5383993 | 0.0000207 |
| BNSF_UP-N | 4.9702928 | 0.0001637 |
| BNSF_UP-NW | 5.4107001 | 0.0002478 |
| BNSF_UP-W | 3.7401929 | 0.0021030 |
| Heritage_Elec-SC | -5.2621722 | 0.0000918 |
| Heritage_Elec-ML | -12.0629835 | 0.0000001 |
| Heritage_Elec-BI | -3.4987332 | 0.0027396 |
| Heritage_Milw-N | -10.3277589 | 0.0000017 |
| Heritage_Milw-W | -9.1513465 | 0.0000037 |
| Heritage_NCS | -7.7996898 | 0.0000020 |
| Heritage_RI | -11.0240760 | 0.0000006 |
| Heritage_SWS | -8.6861313 | 0.0000010 |
| Heritage_UP-N | -6.3532760 | 0.0001028 |
| Heritage_UP-NW | -13.6677426 | 0.0000000 |
| Heritage_UP-W | -9.5307864 | 0.0000033 |
| Elec-SC_Elec-ML | -7.6930971 | 0.0000013 |
| Elec-SC_Elec-BI | 3.0553272 | 0.0090725 |
| Elec-SC_Milw-N | -8.4524134 | 0.0000058 |
| Elec-SC_Milw-W | -6.7021299 | 0.0000291 |
| Elec-SC_NCS | -2.6856024 | 0.0153183 |
| Elec-SC_RI | -8.3005633 | 0.0000030 |
| Elec-SC_SWS | -3.9255453 | 0.0011167 |
| Elec-SC_UP-N | -4.5812945 | 0.0009268 |
| Elec-SC_UP-NW | -9.4434609 | 0.0000001 |
| Elec-SC_UP-W | -7.5259529 | 0.0000146 |
| Elec-ML_Elec-BI | 10.7051161 | 0.0000005 |
| Elec-ML_Milw-N | -4.3660738 | 0.0008571 |
| Elec-ML_Milw-W | -1.6151844 | 0.1279065 |
| Elec-ML_NCS | 5.2419772 | 0.0000733 |
| Elec-ML_RI | -2.7359672 | 0.0152246 |
| Elec-ML_SWS | 3.8518590 | 0.0012327 |
| Elec-ML_UP-N | -0.7261232 | 0.4813509 |
| Elec-ML_UP-NW | -1.8073859 | 0.0875269 |
| Elec-ML_UP-W | -3.2060881 | 0.0069770 |
| Elec-BI_Milw-N | -9.6182342 | 0.0000038 |
| Elec-BI_Milw-W | -8.2352811 | 0.0000119 |
| Elec-BI_NCS | -5.9428602 | 0.0000643 |
| Elec-BI_RI | -10.0649428 | 0.0000019 |
| Elec-BI_SWS | -7.0205384 | 0.0000176 |
| Elec-BI_UP-N | -5.6282342 | 0.0002825 |
| Elec-BI_UP-NW | -12.4136552 | 0.0000001 |
| Elec-BI_UP-W | -8.7676538 | 0.0000080 |
| Milw-N_Milw-W | 2.6700024 | 0.0162767 |
| Milw-N_NCS | 7.2630725 | 0.0000175 |
| Milw-N_RI | 2.0467572 | 0.0573187 |
| Milw-N_SWS | 6.5511394 | 0.0000355 |
| Milw-N_UP-N | 2.7933483 | 0.0120074 |
| Milw-N_UP-NW | 3.2584554 | 0.0063262 |
| Milw-N_UP-W | 1.0801680 | 0.2944043 |
| Milw-W_NCS | 5.1854922 | 0.0002195 |
| Milw-W_RI | -0.8225248 | 0.4216052 |
| Milw-W_SWS | 4.2973444 | 0.0008763 |
| Milw-W_UP-N | 0.4888439 | 0.6313060 |
| Milw-W_UP-NW | 0.2804970 | 0.7829139 |
| Milw-W_UP-W | -1.5653195 | 0.1356000 |
| NCS_RI | -6.6347574 | 0.0000189 |
| NCS_SWS | -1.3501689 | 0.1939515 |
| NCS_UP-N | -3.4515401 | 0.0055512 |
| NCS_UP-NW | -7.0452631 | 0.0000029 |
| NCS_UP-W | -6.2606259 | 0.0000586 |
| RI_SWS | 5.6476491 | 0.0000687 |
| RI_UP-N | 1.1936645 | 0.2499458 |
| RI_UP-NW | 1.3014342 | 0.2118233 |
| RI_UP-W | -0.8818209 | 0.3903462 |
| SWS_UP-N | -2.7957833 | 0.0169999 |
| SWS_UP-NW | -5.6470712 | 0.0000295 |
| SWS_UP-W | -5.5094534 | 0.0001466 |
| UP-N_UP-NW | -0.3290665 | 0.7474460 |
| UP-N_UP-W | -1.8187654 | 0.0857421 |
| UP-NW_UP-W | -2.0429687 | 0.0613004 |
sql <- "SELECT l.ID,
l.Line,
t.Line AS Short,
CAST(strftime('%m', Report_Month) AS INT) as Month_Num,
SUM(t.Late_Trains) AS Total_Late_Trains,
AVG(t.Late_Trains) AS Avg_Late_Trains
FROM Trains t
INNER JOIN Lines l ON t.LineID = l.ID
WHERE l.Line <> 'SYSTEM'
GROUP BY l.ID,
l.Line,
t.Line,
CAST(strftime('%m', Report_Month) AS INT)
ORDER BY l.ID,
CAST(strftime('%m', Report_Month) AS INT)"
agg_sql <- transform(dbGetQuery(conn, sql), Month = factor(month.abb[Month_Num], levels=month.abb))
kable_styling(kable(agg_sql[sample(1:nrow(agg_sql), 20),]),
bootstrap_options = c("striped", "hover", "condensed"))
| ID | Line | Short | Month_Num | Total_Late_Trains | Avg_Late_Trains | Month | |
|---|---|---|---|---|---|---|---|
| 36 | 3 | Metra Electric Main Line | Elec-ML | 12 | 1599 | 29.611111 | Dec |
| 13 | 2 | Heritage Corridor | Heritage | 1 | 336 | 10.500000 | Jan |
| 86 | 8 | North Central Service | NCS | 2 | 1368 | 34.200000 | Feb |
| 106 | 9 | Rock Island District | RI | 10 | 1605 | 26.750000 | Oct |
| 27 | 3 | Metra Electric Main Line | Elec-ML | 3 | 1202 | 20.033333 | Mar |
| 16 | 2 | Heritage Corridor | Heritage | 4 | 189 | 5.727273 | Apr |
| 100 | 9 | Rock Island District | RI | 4 | 1661 | 27.683333 | Apr |
| 76 | 7 | Milwaukee District / West | Milw-W | 4 | 1208 | 20.133333 | Apr |
| 152 | 13 | Union Pacific / West | UP-W | 8 | 2792 | 46.533333 | Aug |
| 17 | 2 | Heritage Corridor | Heritage | 5 | 354 | 10.727273 | May |
| 37 | 4 | Metra Electric South Chicago | Elec-BI | 1 | 780 | 15.600000 | Jan |
| 153 | 13 | Union Pacific / West | UP-W | 9 | 1857 | 30.950000 | Sep |
| 64 | 6 | Milwaukee District / North | Milw-N | 4 | 1932 | 32.200000 | Apr |
| 79 | 7 | Milwaukee District / West | Milw-W | 7 | 2300 | 38.333333 | Jul |
| 62 | 6 | Milwaukee District / North | Milw-N | 2 | 3327 | 55.450000 | Feb |
| 115 | 10 | SouthWest Service | SWS | 7 | 1186 | 23.254902 | Jul |
| 19 | 2 | Heritage Corridor | Heritage | 7 | 378 | 11.454546 | Jul |
| 45 | 4 | Metra Electric South Chicago | Elec-BI | 9 | 470 | 9.215686 | Sep |
| 114 | 10 | SouthWest Service | SWS | 6 | 1608 | 32.160000 | Jun |
| 125 | 11 | Union Pacific / North | UP-N | 5 | 1623 | 27.050000 | May |
ggplot(agg_sql, aes(Month, Total_Late_Trains, fill=Line)) +
geom_col(position = "dodge") +
labs(title="Metra Late Trains by Month", x="Year", y="Late Trains") +
scale_x_discrete("Month", breaks=unique(agg_sql$Month)) +
scale_y_continuous(expand = c(0, 0), label=comma) + guides(fill=FALSE) +
scale_fill_manual(values=seaborn_palette) +
facet_wrap(~Line, ncol=3, scales="free_y") +
theme(legend.position="bottom",
plot.title = element_text(hjust=0.5, size=18),
axis.text.x = element_text(angle=0, hjust=0.5))
sql <- "SELECT CAST(strftime('%Y', c.Report_Month) AS INT) AS Year,
SUM(CASE WHEN c.Late_Cause = 'Accident' THEN c.Late_Trains ELSE NULL END) AS [Accident],
SUM(CASE WHEN c.Late_Cause = 'Catenary Failure' THEN c.Late_Trains ELSE NULL END) AS [Catenary],
SUM(CASE WHEN c.Late_Cause = 'Freight Interference - Peak' THEN c.Late_Trains ELSE NULL END) AS [Freight Interference Peak],
SUM(CASE WHEN c.Late_Cause = 'Freight Interference - Off-Peak' THEN c.Late_Trains ELSE NULL END) AS [Freight Interference Off Peak],
SUM(CASE WHEN c.Late_Cause = 'Human Error' THEN c.Late_Trains ELSE NULL END) AS [Human Error],
SUM(CASE WHEN c.Late_Cause = 'Lift Deployment' THEN c.Late_Trains ELSE NULL END) AS [Lift Deployment],
SUM(CASE WHEN c.Late_Cause = 'Locomotive Failure' THEN c.Late_Trains ELSE NULL END) AS [Locomotive Failure],
SUM(CASE WHEN c.Late_Cause = 'Non-Locomotive Equipment Failure' THEN c.Late_Trains ELSE NULL END) AS [Non-Locomotive Equipment Failure],
SUM(CASE WHEN c.Late_Cause = 'Obstruction/Debris' THEN c.Late_Trains ELSE NULL END) AS [Obstruction Debris],
SUM(CASE WHEN c.Late_Cause = 'Other' THEN c.Late_Trains ELSE NULL END) AS [Other],
SUM(CASE WHEN c.Late_Cause = 'Passenger Loading' THEN c.Late_Trains ELSE NULL END) AS [Passenger Loading],
SUM(CASE WHEN c.Late_Cause = 'Passenger Train Interference' THEN c.Late_Trains ELSE NULL END) AS [Passenger Train Interference],
SUM(CASE WHEN c.Late_Cause = 'Sick, Injured, Unruly Passenger' THEN c.Late_Trains ELSE NULL END) AS [Sick Injured Unruly Passenger],
SUM(CASE WHEN c.Late_Cause = 'Signal/Switch Failure' THEN c.Late_Trains ELSE NULL END) AS [Signal Switch Failure],
SUM(CASE WHEN c.Late_Cause = 'Track Work' THEN c.Late_Trains ELSE NULL END) AS [Track Work],
SUM(CASE WHEN c.Late_Cause = 'Weather' THEN c.Late_Trains ELSE NULL END) AS [Weather] FROM Causes c
GROUP BY CAST(strftime('%Y', c.Report_Month) AS INT);"
wide_sql <- dbGetQuery(conn, sql)
scroll_box(kable_styling(knitr::kable(wide_sql),
bootstrap_options = c("striped", "hover", "condensed")), width = "100%")
| Year | Accident | Catenary | Freight Interference Peak | Freight Interference Off Peak | Human Error | Lift Deployment | Locomotive Failure | Non-Locomotive Equipment Failure | Obstruction Debris | Other | Passenger Loading | Passenger Train Interference | Sick Injured Unruly Passenger | Signal Switch Failure | Track Work | Weather |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2009 | 522 | 116 | 688 | 1040 | 1058 | 510 | 1202 | 402 | 798 | 538 | 2736 | 608 | 788 | 2798 | 1616 | 2150 |
| 2010 | 1600 | 196 | 1754 | 3054 | 2212 | 1082 | 2476 | 1144 | 1468 | 1158 | 4130 | 1484 | 1670 | 5290 | 2746 | 2946 |
| 2011 | 1338 | 80 | 990 | 2272 | 1740 | 902 | 1320 | 486 | 802 | 792 | 4290 | 988 | 1000 | 3296 | 2758 | 3094 |
| 2012 | 932 | 162 | 496 | 1474 | 1294 | 500 | 1086 | 326 | 848 | 638 | 2364 | 440 | 874 | 2506 | 1806 | 1262 |
| 2013 | 1150 | 338 | 642 | 1706 | 1508 | 410 | 1202 | 468 | 782 | 526 | 2276 | 404 | 768 | 3274 | 1328 | 2194 |
| 2014 | 1370 | 144 | 1336 | 2334 | 1470 | 428 | 1658 | 828 | 1100 | 568 | 1509 | 490 | 732 | 2536 | 1964 | 3322 |
| 2015 | 898 | 302 | 726 | 1360 | 1318 | 330 | 1130 | 390 | 870 | 488 | 1084 | 260 | 574 | 1770 | 1188 | 1950 |
| 2016 | 1088 | 200 | 618 | 1066 | 1270 | 290 | 1704 | 382 | 892 | 484 | 1082 | 308 | 814 | 2782 | 2000 | 1108 |
| 2017 | 1268 | 66 | 718 | 1294 | 1796 | 496 | 1596 | 398 | 1090 | 640 | 1162 | 266 | 712 | 2446 | 1896 | 1194 |
| 2018 | 814 | 154 | 1112 | 1916 | 1990 | 640 | 1732 | 552 | 1230 | 562 | 1324 | 444 | 776 | 3478 | 1570 | 2108 |
scroll_box(kable_styling(kable(cor(wide_sql[-1], use = "complete.obs", method="pearson")),
bootstrap_options = c("striped", "hover", "condensed")), width = "100%")
| Accident | Catenary | Freight Interference Peak | Freight Interference Off Peak | Human Error | Lift Deployment | Locomotive Failure | Non-Locomotive Equipment Failure | Obstruction Debris | Other | Passenger Loading | Passenger Train Interference | Sick Injured Unruly Passenger | Signal Switch Failure | Track Work | Weather | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Accident | 1.0000000 | -0.0571909 | 0.6196663 | 0.7373892 | 0.6300398 | 0.4951796 | 0.6159002 | 0.6666842 | 0.4974521 | 0.6643722 | 0.3652908 | 0.5278519 | 0.5798711 | 0.4952077 | 0.6901787 | 0.4391694 |
| Catenary | -0.0571909 | 1.0000000 | -0.1357559 | -0.0438564 | -0.1705789 | -0.3446550 | -0.1504044 | 0.0015760 | -0.1653271 | -0.2195482 | -0.2041612 | -0.1828021 | -0.0959763 | -0.0188764 | -0.5307573 | -0.0582482 |
| Freight Interference Peak | 0.6196663 | -0.1357559 | 1.0000000 | 0.9055098 | 0.7302826 | 0.7151507 | 0.8259202 | 0.9622278 | 0.8339600 | 0.7363075 | 0.4311431 | 0.7540010 | 0.7063508 | 0.7282518 | 0.5910244 | 0.7508904 |
| Freight Interference Off Peak | 0.7373892 | -0.0438564 | 0.9055098 | 1.0000000 | 0.7529271 | 0.7904312 | 0.6569837 | 0.8892313 | 0.6659303 | 0.7972720 | 0.6107565 | 0.8029986 | 0.7435435 | 0.7480295 | 0.6527215 | 0.7935347 |
| Human Error | 0.6300398 | -0.1705789 | 0.7302826 | 0.7529271 | 1.0000000 | 0.7518244 | 0.7422892 | 0.6521274 | 0.8059425 | 0.7214470 | 0.3432964 | 0.5811082 | 0.6256107 | 0.7313770 | 0.5249417 | 0.3568060 |
| Lift Deployment | 0.4951796 | -0.3446550 | 0.7151507 | 0.7904312 | 0.7518244 | 1.0000000 | 0.5793972 | 0.6491509 | 0.5546727 | 0.9197200 | 0.8436129 | 0.9302116 | 0.8559500 | 0.8286376 | 0.7889907 | 0.5610627 |
| Locomotive Failure | 0.6159002 | -0.1504044 | 0.8259202 | 0.6569837 | 0.7422892 | 0.5793972 | 1.0000000 | 0.8334991 | 0.9082875 | 0.6965826 | 0.2240091 | 0.6151823 | 0.7431315 | 0.7710464 | 0.5922982 | 0.3001438 |
| Non-Locomotive Equipment Failure | 0.6666842 | 0.0015760 | 0.9622278 | 0.8892313 | 0.6521274 | 0.6491509 | 0.8334991 | 1.0000000 | 0.8091589 | 0.7532443 | 0.4373013 | 0.7563912 | 0.7624001 | 0.7610562 | 0.5534812 | 0.6992606 |
| Obstruction Debris | 0.4974521 | -0.1653271 | 0.8339600 | 0.6659303 | 0.8059425 | 0.5546727 | 0.9082875 | 0.8091589 | 1.0000000 | 0.6446795 | 0.0968578 | 0.5028209 | 0.6174117 | 0.6557449 | 0.4004296 | 0.2865157 |
| Other | 0.6643722 | -0.2195482 | 0.7363075 | 0.7972720 | 0.7214470 | 0.9197200 | 0.6965826 | 0.7532443 | 0.6446795 | 1.0000000 | 0.7718366 | 0.9286207 | 0.9502399 | 0.8323155 | 0.8042321 | 0.4541852 |
| Passenger Loading | 0.3652908 | -0.2041612 | 0.4311431 | 0.6107565 | 0.3432964 | 0.8436129 | 0.2240091 | 0.4373013 | 0.0968578 | 0.7718366 | 1.0000000 | 0.8907353 | 0.7599024 | 0.6769158 | 0.7213697 | 0.5713612 |
| Passenger Train Interference | 0.5278519 | -0.1828021 | 0.7540010 | 0.8029986 | 0.5811082 | 0.9302116 | 0.6151823 | 0.7563912 | 0.5028209 | 0.9286207 | 0.8907353 | 1.0000000 | 0.9342033 | 0.8602439 | 0.7997392 | 0.6390272 |
| Sick Injured Unruly Passenger | 0.5798711 | -0.0959763 | 0.7063508 | 0.7435435 | 0.6256107 | 0.8559500 | 0.7431315 | 0.7624001 | 0.6174117 | 0.9502399 | 0.7599024 | 0.9342033 | 1.0000000 | 0.9149180 | 0.7682354 | 0.3985369 |
| Signal Switch Failure | 0.4952077 | -0.0188764 | 0.7282518 | 0.7480295 | 0.7313770 | 0.8286376 | 0.7710464 | 0.7610562 | 0.6557449 | 0.8323155 | 0.6769158 | 0.8602439 | 0.9149180 | 1.0000000 | 0.6286025 | 0.4424591 |
| Track Work | 0.6901787 | -0.5307573 | 0.5910244 | 0.6527215 | 0.5249417 | 0.7889907 | 0.5922982 | 0.5534812 | 0.4004296 | 0.8042321 | 0.7213697 | 0.7997392 | 0.7682354 | 0.6286025 | 1.0000000 | 0.4417094 |
| Weather | 0.4391694 | -0.0582482 | 0.7508904 | 0.7935347 | 0.3568060 | 0.5610627 | 0.3001438 | 0.6992606 | 0.2865157 | 0.4541852 | 0.5713612 | 0.6390272 | 0.3985369 | 0.4424591 | 0.4417094 | 1.0000000 |
sql <- "SELECT CAST(strftime('%Y', c.Report_Month) AS INT) AS Year,
c.Late_Cause,
SUM(c.Late_Trains) AS Total_Late_Trains
FROM Causes c
INNER JOIN Lines l ON c.LineID = l.ID
WHERE c.Late_Cause IN
(SELECT DISTINCT sub_c.Late_Cause
FROM Causes AS sub_c
WHERE strftime('%Y', sub_c.Report_Month) = '2012')
AND c.Late_Cause <> 'TOTAL TRAINS DELAYED'
GROUP BY strftime('%Y', c.Report_Month),
c.Late_Cause"
agg_sql <- dbGetQuery(conn, sql)
kable_styling(knitr::kable(agg_sql[sample(1:nrow(agg_sql), 20),]),
bootstrap_options = c("striped", "hover", "condensed"))
| Year | Late_Cause | Total_Late_Trains | |
|---|---|---|---|
| 120 | 2016 | Accident | 1088 |
| 108 | 2015 | Human Error | 1318 |
| 7 | 2009 | Lift Deployment | 510 |
| 12 | 2009 | Passenger Loading | 2736 |
| 156 | 2018 | Freight Interference - Off-Peak | 1916 |
| 85 | 2013 | Weather | 2194 |
| 75 | 2013 | Lift Deployment | 410 |
| 125 | 2016 | Human Error | 1270 |
| 27 | 2010 | Obstruction/Debris | 1468 |
| 89 | 2014 | Freight Interference - Peak | 1336 |
| 139 | 2017 | Freight Interference - Off-Peak | 1294 |
| 57 | 2012 | Human Error | 1294 |
| 87 | 2014 | Catenary Failure | 144 |
| 33 | 2010 | Track Work | 2746 |
| 47 | 2011 | Passenger Train Interference | 988 |
| 65 | 2012 | Sick, Injured, Unruly Passenger | 874 |
| 82 | 2013 | Sick, Injured, Unruly Passenger | 768 |
| 1 | 2009 | Accident | 522 |
| 106 | 2015 | Freight Interference - Peak | 726 |
| 105 | 2015 | Freight Interference - Off-Peak | 1360 |
ggplot(subset(agg_sql, Late_Cause != 'Freight Interference - Total'),
aes(Year, Total_Late_Trains, fill=Late_Cause)) +
geom_bar(position = "fill", stat = "identity") +
labs(title="Metra Late Trains by Cause", x="Year", y="Late Trains") +
guides(fill=guide_legend(title="Cause", ncol=4)) +
scale_x_continuous("year", breaks=unique(agg_sql$Year)) +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_manual(values=seaborn_palette) +
theme(legend.position="bottom",
plot.title = element_text(hjust=0.5, size=18),
axis.text.x = element_text(angle=0, hjust=0.5))
# DISCONNECT FROM DATABASE
dbDisconnect(conn)