Hemos elegido hacer nuestro análisis con datos sobre los diferentes delitos que hay en Europa. Para ello hemos descargado de Eurostat un Data Frame que recoge los delitos registrados por la policía en función de la categoría de delito, ( número de delincuentes) aquí lo teneis.
library(tidyverse)
library(eurostat)
library(rio)
library(gganimate)
library(viridis)
library(summarytools)
library(kableExtra)
library(gridExtra)
para 39 paises Europeos, desde 1993 hasta 2007.
df <- get_eurostat("crim_gen",time_format = 'raw')
df <- label_eurostat(df, code = c("geo","iccs"))
df <- df %>% select(geo_code,geo,iccs_code,iccs,time,values) %>%
mutate(time = as.numeric(time)) %>%
rename(infraccion = iccs, code_pais = geo_code, pais = geo, anyo = time, numinf = values)
Los datos tienen 6 variables y recogen en total 3532 valores.
df <- df %>% filter(code_pais !="FX" & iccs_code != "TOTAL") %>% filter(code_pais !="TR") #Quito Turquia tambien ^^
df <-df %>% mutate(infraccion = case_when(
infraccion == "Intentional homicide" ~ "Homicidios",
infraccion == "Acts causing harm or intending to cause harm to the person, injurious acts of a sexual nature and acts against property involving violence or threat against a person" ~ "Actos violentos",
infraccion == "Robbery" ~ "Robos",
infraccion == "Burglary of private residential premises" ~ "Robo en residencias privadas",
infraccion == "Theft of a motorized land vehicle" ~ "Robo de vehiculos",
infraccion == "Unlawful acts involving controlled drugs or precursors" ~ "Actos ilicitos con drogas", TRUE ~ as.character(infraccion)))
df <-df %>% mutate(pais = case_when(
pais == "Germany (until 1990 former territory of the FRG)" ~ "Germany",
pais == "England and Wales" ~ "RU",
TRUE ~ as.character(pais)))
Ahora nos quedan 6 variables y recogen 2830 valores.
Para no volver a empenzar desde 0 hemos exportado nuestro df limpio en nuestra carpeta de datos del proyecto.
rio::export(df, "./datos/df_crim_gen.rds")
## Classes 'tbl_df', 'tbl' and 'data.frame': 2830 obs. of 6 variables:
## $ code_pais : Factor w/ 39 levels "AT","BE","BG",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ pais : chr "Austria" "Belgium" "Bulgaria" "Switzerland" ...
## $ iccs_code : Factor w/ 7 levels "ICCS0101","ICCS02-04",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ infraccion: chr "Homicidios" "Homicidios" "Homicidios" "Homicidios" ...
## $ anyo : num 2007 2007 2007 2007 2007 ...
## $ numinf : num 45 211 169 51 11 126 757 76 93 128 ...
## Data Frame Summary
## df
## Dimensions: 2830 x 6
## Duplicates: 0
##
## ----------------------------------------------------------------------------------------------------------------------------------
## No Variable Stats / Values Freqs (% of Valid) Graph Valid Missing
## ---- ------------- -------------------------------- ---------------------- ------------------------------------ -------- ---------
## 1 code_pais 1. AT 74 ( 2.6%) 2830 0
## [factor] 2. BE 48 ( 1.7%) (100%) (0%)
## 3. BG 70 ( 2.5%)
## 4. CH 74 ( 2.6%)
## 5. CY 90 ( 3.2%)
## 6. CZ 81 ( 2.9%)
## 7. DE 86 ( 3.0%)
## 8. DK 83 ( 2.9%)
## 9. EE 83 ( 2.9%)
## 10. EL 87 ( 3.1%)
## [ 29 others ] 2054 (72.6%) IIIIIIIIIIIIII
##
## 2 pais 1. Cyprus 90 ( 3.2%) 2830 0
## [character] 2. Hungary 90 ( 3.2%) (100%) (0%)
## 3. Italy 90 ( 3.2%)
## 4. Lithuania 90 ( 3.2%)
## 5. Norway 90 ( 3.2%)
## 6. Netherlands 89 ( 3.1%)
## 7. Poland 89 ( 3.1%)
## 8. Scotland 89 ( 3.1%)
## 9. Northern Ireland (UK) 88 ( 3.1%)
## 10. Romania 88 ( 3.1%)
## [ 27 others ] 1937 (68.5%) IIIIIIIIIIIII
##
## 3 iccs_code 1. ICCS0101 499 (17.6%) III 2830 0
## [factor] 2. ICCS02-04 460 (16.2%) III (100%) (0%)
## 3. ICCS0401 469 (16.6%) III
## 4. ICCS05012 478 (16.9%) III
## 5. ICCS050211 467 (16.5%) III
## 6. ICCS0601 457 (16.2%) III
## 7. TOTAL 0 ( 0.0%)
##
## 4 infraccion 1. Actos ilicitos con drogas 457 (16.2%) III 2830 0
## [character] 2. Actos violentos 460 (16.2%) III (100%) (0%)
## 3. Homicidios 499 (17.6%) III
## 4. Robo de vehiculos 467 (16.5%) III
## 5. Robo en residencias priva 478 (16.9%) III
## 6. Robos 469 (16.6%) III
##
## 5 anyo Mean (sd) : 2000.7 (4.2) 15 distinct values . : : 2830 0
## [numeric] min < med < max: : : : : (100%) (0%)
## 1993 < 2001 < 2007 : : : . . : . :
## IQR (CV) : 7 (0) : : : : : : : : : :
## : : : : : : : : : :
##
## 6 numinf Mean (sd) : 31370.1 (81961.6) 2387 distinct values : 2830 0
## [numeric] min < med < max: : (100%) (0%)
## 0 < 4645.5 < 997607 :
## IQR (CV) : 21996 (2.6) :
## : .
## ----------------------------------------------------------------------------------------------------------------------------------
df1a <- df %>% filter(iccs_code == "ICCS0101") %>%
group_by(anyo, numinf) %>%
arrange(numinf) %>%
ungroup() %>%
group_by(iccs_code) %>%
mutate(cat_inf = cut_to_classes(numinf, n = 6)) %>%
ungroup()
geometrias <- get_eurostat_geospatial(resolution = "20", nuts_level = "0")
mapdata <- full_join(df1a, geometrias, by = c("code_pais" = "id"))
mapdata_1a <- mapdata %>% filter(iccs_code == "ICCS0101", anyo %in% c(1994,2000,2007))
p <- ggplot(mapdata_1a) +
geom_sf(aes(fill = cat_inf, geometry = geometry), color = "black", size = .1) +
facet_wrap( ~ anyo) +
scale_fill_brewer(palette = "RdYlBu", direction = -1) +
labs(title = "Número de homicidios en Europa en 1994, 2000 y 2007",
subtitle = "(Para Europa)",
fill = "Intencional homicide",
caption = "Datos Eurostat") +
theme_linedraw() +
coord_sf(xlim = c(-10,29), ylim = c(35,70))
p <- p + theme(plot.title = element_text(face = "bold"))+
theme(axis.title.y = element_text(size=8))+
theme(axis.title.x = element_text(size=8))
p
df2 <- df %>% select(anyo, pais, infraccion, numinf) %>%
group_by(anyo, pais, infraccion) %>%
summarise(sum(NN = numinf)) %>%
ungroup() %>%
rename(numinf_total = 4)
df2a <- df2 %>% filter(infraccion == "Robos" & anyo == 2007) %>%
group_by(pais) %>%
summarise(sum(NN = numinf_total)) %>%
ungroup() %>%
rename(numinf_robos = 2) %>%
arrange(desc(numinf_robos)) %>%
slice(c(1:5))
df2a <- df2a %>%
mutate(numinf_robos = forcats::as_factor(numinf_robos)) %>%
mutate(numinf_robos = forcats::fct_infreq(numinf_robos))
p <- ggplot(df2a) +
geom_col(mapping = aes(x = pais, y = fct_infreq(numinf_robos), fill = pais)) +
scale_fill_brewer(palette = "Spectral", direction = -1) +
labs(title = "5 Países con más robos en 2007",
fill = "Paises",
caption = "Datos Eurostat") +
ylab(label = "Número de robos") +
theme_classic()
p <- p + theme(plot.title = element_text(face = "bold"))+
theme(axis.title.y = element_text(size=8))+
theme(axis.title.x = element_text(size=8))
p
df3a <- df %>% select(anyo, pais, infraccion, numinf) %>%
filter(pais == "France" & anyo == 2007) %>%
mutate(Prop_infraccion = (numinf/sum(numinf))*100) %>%
arrange(Prop_infraccion) %>%
ungroup()
df3b <- df3a %>% mutate(Prop_infraccion = forcats::as_factor(Prop_infraccion))
df3b <- df3b %>% mutate(Prop_infraccion = forcats::fct_infreq(Prop_infraccion))
Para ello hemos reescalado las infracciones para poder enseñarlas en el mismo gráfico y sobre todo poder comparar las evoluciones en terminos relativos.
df4a <- df %>% select(anyo, pais, infraccion, numinf) %>%
group_by(anyo, infraccion) %>%
summarise(sum(NN = numinf)) %>%
ungroup() %>%
rename(numinf_total = 3)
#Reescalamos con la funccion rescaled_inf :
df4a <-df4a %>% group_by(infraccion) %>%
mutate(rescaled_inf = 100*numinf_total/numinf_total[1])
Hemos conseguido contestar a esta pregunta con la funccion case_when().
* El código es :
df_1 <-df %>%
select(anyo,code_pais,pais,infraccion,numinf) %>%
group_by(pais) %>%
mutate(mean_pais = mean(numinf)) %>%
ungroup()%>%
mutate(mean_europ = mean(numinf)) %>%
mutate(GOOD_or_BAD = case_when(
mean_pais > mean_europ ~ "Bad",
mean_pais < mean_europ ~ "Good"))
geometrias <- get_eurostat_geospatial(resolution = "20", nuts_level = "0")
mapdata <- full_join(df_1, geometrias, by = c("code_pais" = "id"))
Como ya hemos contestado un poco a esta pregunta en los apartados anteriores ahora lo vamos a hacer pero de manera dinámica con el paquete gganimate.
df_2a <- df %>% filter(pais %in% c("Spain","France") & infraccion == "Homicidios")
p<-ggplot(df_2a, aes(anyo, numinf, group =pais, colour = pais)) +
geom_line() +
geom_point() +
scale_colour_manual(values = c(France = "blue", Spain = "red")) +
labs(title ="Evolución de los homicidios en Francia y España",
subtitle = "(Desde 1993 hasta 2007)",
caption = "Datos de Eurostat",
y = "Número de homicidios") +
scale_x_continuous(breaks = seq(1993, 2007, 2), limits = c(1993, 2007)) +
transition_reveal(anyo)+
theme_linedraw()
p <-p + theme(plot.title = element_text(face = "bold")) +
theme(axis.title.y = element_text(size=12)) +
theme(axis.title.x = element_text(size=12))
p
Para la tabla hemos realizado una tabla que recoge para cada país el porcentaje de cada tipo de infraccion sobre las infracciones totales del país.
df_3 <- df %>% select(2,4,6) %>%
group_by(pais) %>%
mutate(numinf_total = sum(NN = numinf)) %>%
group_by(pais,infraccion) %>%
mutate(numinf_by_inf_and_country = sum(NN = numinf)) %>%
mutate(percent = (numinf_by_inf_and_country/numinf_total)*100) %>%
select(1,2,6) %>%
slice(1:218) %>%
mutate(row_num = 1:n()) %>%
pivot_wider(names_from = infraccion, values_from = percent) %>%
filter(row_num == "1") %>% select(-2) %>% mutate(Total = "100%")
#Para hacer la Tabla :
knitr::kable(df_3,format = "html") %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),fixed_thead = list(enabled = T, background = "lightgrey"))
pais | Actos ilicitos con drogas | Actos violentos | Homicidios | Robo de vehiculos | Robo en residencias privadas | Robos | Total |
---|---|---|---|---|---|---|---|
Austria | 4.1408903 | 49.084041 | 0.1298585 | 12.650911 | 28.525119 | 5.4691801 | 100% |
Belgium | 4.8101086 | 43.273095 | 0.0975377 | 14.550799 | 26.991683 | 10.2767762 | 100% |
Bulgaria | 2.3132794 | 13.362142 | 0.5215756 | 2.948007 | 73.032978 | 7.8220177 | 100% |
Croatia | 29.5185755 | 42.131377 | 0.8129488 | 8.150941 | 13.008093 | 6.3780645 | 100% |
Cyprus | 10.0023282 | 6.207353 | 0.2632663 | 34.822788 | 47.559862 | 1.1444025 | 100% |
Czechia | 2.6535607 | 33.012907 | 0.2052390 | 38.850147 | 18.302592 | 6.9755544 | 100% |
Denmark | 1.5520067 | 18.094992 | 0.0760375 | 40.509286 | 34.376261 | 5.3914171 | 100% |
Estonia | 3.2033462 | 14.633031 | 1.1691045 | 16.086877 | 50.197709 | 14.7099322 | 100% |
Finland | 10.4016105 | 45.379758 | 0.1840150 | 29.332181 | 12.631397 | 2.0710390 | 100% |
France | 0.7229428 | 27.123103 | 0.1106335 | 38.896781 | 23.741546 | 9.4049941 | 100% |
Germany | 10.3484691 | 30.865878 | 0.1696239 | 25.830197 | 25.023695 | 7.7621368 | 100% |
Greece | 11.2088522 | 14.643157 | 0.2212833 | 12.095246 | 58.761444 | 3.0700174 | 100% |
Hungary | 4.4185446 | 39.067228 | 0.3233566 | 20.581861 | 31.494514 | 4.1144956 | 100% |
Iceland | 4.2412015 | NA | 0.1031159 | NA | 94.221027 | 1.4346559 | 100% |
Ireland | 3.3131156 | 17.074680 | 0.1219067 | 32.507252 | 44.226405 | 2.7566397 | 100% |
Italy | 5.3276338 | 16.806000 | 0.1219987 | 40.155057 | 27.276840 | 10.3124695 | 100% |
Latvia | 6.8459986 | 20.174485 | 1.9050779 | 23.618307 | 31.679772 | 15.7763598 | 100% |
Liechtenstein | 57.9767775 | 16.987434 | 0.0318117 | 1.670113 | 22.634007 | 0.6998568 | 100% |
Lithuania | 1.3356886 | 22.480755 | 1.6854910 | 21.690364 | 34.842473 | 17.9652286 | 100% |
Luxembourg | 18.5048875 | 21.483515 | 0.0769213 | 11.928712 | 42.493669 | 5.5122956 | 100% |
Malta | 3.5130719 | 8.277165 | 0.2246732 | 41.753472 | 35.355392 | 10.8762255 | 100% |
Montenegro | 7.7861587 | 45.987000 | 0.4101637 | 2.182905 | NA | 43.6337725 | 100% |
Netherlands | 4.0475033 | 36.153266 | 0.0705004 | 15.151472 | 38.188296 | 6.3889621 | 100% |
North Macedonia | 10.4062267 | 24.345168 | 1.4962505 | 16.167324 | 32.963713 | 14.6213171 | 100% |
Northern Ireland (UK) | 0.6833994 | 55.941477 | 0.0990863 | 18.744435 | 20.352201 | 4.1794022 | 100% |
Norway | 22.8944866 | 28.305697 | 0.0580880 | 28.114278 | 18.675133 | 1.9523177 | 100% |
Poland | 0.9514115 | 31.033885 | 0.3565053 | 23.839150 | 28.886794 | 14.9322543 | 100% |
Portugal | 4.1201614 | 21.968659 | 0.1414159 | 28.480730 | 25.663949 | 19.6250841 | 100% |
Romania | 3.3437008 | 23.379852 | 1.7942400 | 5.546310 | 55.178737 | 10.7571596 | 100% |
RU | 1.4355064 | 43.248512 | 0.0539820 | 24.388840 | 25.086532 | 5.7866271 | 100% |
Scotland | 9.0836018 | 27.153522 | 0.1211306 | 27.348229 | 31.548662 | 4.7448542 | 100% |
Serbia | 4.3418617 | 42.328235 | 0.2538511 | 5.882788 | 10.981496 | 36.2117689 | 100% |
Slovakia | 2.3237595 | 51.151398 | 0.4827373 | 27.337276 | 13.990279 | 4.7145505 | 100% |
Slovenia | 13.2688521 | 36.132420 | 0.3711793 | 17.387328 | 26.342970 | 6.4972511 | 100% |
Spain | 3.1026066 | 26.658567 | 0.1168904 | 30.504520 | 18.592730 | 21.0246859 | 100% |
Sweden | 2.7896268 | 44.434158 | 0.0530046 | 38.677594 | 9.695354 | 4.3502635 | 100% |
Switzerland | 8.6229240 | 10.234387 | 0.0812109 | NA | 76.403247 | 4.6582311 | 100% |
DT::datatable(df_3)
Para la realizacion del trabajo hemos utilizado todos los tutoriales de clase. Ademas hemos utilizado tutoriales de internet :
Para importar los datos : IMPORT
Para limpiar los datos : TIDY
Para transformar los datos : TRANSFORM
Para hacer las graficas : VISUALISE
The R Graph Gallery. Visto en clase.
Otra Galeria. No visto en clase.
Para comunicar los resultados : COMMUNICATE