Code
library(dplyr)
library(sf)
library(ClustGeo)
library(ggplot2)
library(tidyr)
library(patchwork)
library(readxl)
with ZAT- and calle-level indicators in Bogotá
Heli Xu
March 27, 2024
First, we’ll aggregate a few calle level indicators to ZAT level for clustering. Here, for the calles matching to multiple ZAT units, we are making replicates of the calles and distribute them to each matched ZAT unit. We take the area-weighted average of road width and lanes, and sum of the areas of calzada, separado and andenes. After aggregation, for areas in m2, we are converting the unit to km2 and adjusting by the ZAT area in km2 to keep it consistent with the ZAT-level data.
calle_zat_xwalk <- readRDS("calle_zat_xwalk.rds")
calle_clean_df <- readRDS("calles/calle_clean_df.rds")
calle_rename_df <- calle_clean_df %>%
rename_with(~ tolower(.), area:X9ceder_el) %>%
rename(
area_calle = area,
trees = arboles,
grade = ave_pendie,
road_width = p_ancho_cl,
area_roadway = a_calzada,
area_median = a_separado,
vehicle_bridge = puente_vh,
ped_bridge = puente_pt,
area_sidewalk = a_andenes,
brt_routes = rutas_trm,
bus_routes = rutas_sitp,
bus_stops = parad_sitp,
bus_lanes = caril_sitp,
bike_length = largo_cicl,
road_marks = sen_horizo,
warning_signs = se_hor_seg,
road_signs = sen_vert,
traffic_lights = semaforo,
road_segments = segme_via,
speed_limit = velcidad,
num_lanes_total = sum_carril,
num_lanes_avg = av_carrile,
# road_signs_inv = sen_v_inv,
stop_signs = s_pare_inv,
lturn_sign = x1_girar_iz,
bike_signs = x2_ciclov.,
bus_signs = x3bus_o_tra,
pedxwalk_signs = x4peatonale,
speed_bump_signs = x5policiasa,
stop_signs2 = x6pare,
parking_signs = x7estaciona,
school_zone_signs = x8zonas_esc,
yield_signs = x9ceder_el
)
rep_calle_zat <- calle_rename_df %>%
select(CodigoCL, area_calle, area_roadway, area_median, area_sidewalk,
road_width, road_marks, road_signs, pedxwalk_signs) %>%
left_join(calle_zat_xwalk, by = "CodigoCL") %>%
drop_na(ZAT) %>% #still need to remove NA here
uncount(match_n)
rep_calle_zat_agg <- rep_calle_zat %>%
group_by(ZAT) %>%
summarise(
across(road_width, ~weighted.mean(.x, w=area_calle), .names = "{.col}"),
across(-c(CodigoCL, road_width), ~sum(.x), .names = "{.col}")
)
zat_std2n <- readRDS("ZAT/zat_std2n.rds")
area <- zat_std2n %>%
select(ZAT, areakm2)
rep_calle_zat_agg2 <- rep_calle_zat_agg %>%
select(-area_calle) %>%
left_join(area, by = "ZAT") %>%
mutate(
across(c(area_roadway, area_median, area_sidewalk,
road_marks, road_signs, pedxwalk_signs), ~.x/areakm2),
across(c(area_roadway, area_median, area_sidewalk), ~.x/1000) #to km2
)
calle_2_zat_rep <- rep_calle_zat_agg2 %>%
select(-areakm2) %>% #we don't want this in clustering much
left_join(zat_std2n %>% select(-areakm2), by = "ZAT") #areakm2 already included
Using a similar approach to the previous post, we’ll first use only the aggregated calle data with the ZAT data (indicator matrix D0
) for the clustering process to determine the cluster number.
Keeping the cluster number as 4, we’ll calculate the neighborhood distance matrix (D1
) using the network distance. To choose a mixing parameter, we’ll take a look at the homogeneity plot:
Choosing 0.25 as the mixing parameter, we can perform the hierarchical clustering and visualize the result:
pal <- c("#225ea8","#41b6c4","#a1dab4","#fecb3e")
source("../../../../functions/cluster_plot.R")
map_clust <- ggplot()+
geom_sf(data = zat_shapefile %>% st_zm(), color = "grey70")+
geom_sf(data = calle2zat_geo, color = "grey70", fill = pal[calle2zat_geo$clus])
(cluster_plot(calle2zat_clust) | map_clust ) +
plot_annotation('Hierarchical Clustering with Indicators and Neighborhood Constraint',
subtitle = 'ZAT level and aggregated calle level, Bogotá',
theme=theme(plot.title=element_text(size=14, face = "bold", hjust=0.5),
plot.subtitle = element_text(size = 10, face = "bold", hjust = 0.5))) +
plot_layout(width = c(1, 1.5), height = unit(20, "cm"))
To visualize and compare the neighborhood profile with traffic flow (total walking trips and public transit trips) and population at the ZAT level:
#traffic flow
total_trips <- readRDS("../../../../data/zat_denom.rds")
walk_transit <- total_trips %>%
select(ZAT, total_walk, total_pubt) %>%
mutate(flow = total_walk+total_pubt) %>%
select(-total_walk, -total_pubt)
walk_transit_geo <- walk_transit %>%
left_join(zat_shapefile, by = "ZAT") %>%
filter(!st_is_empty(geometry)) %>%
st_as_sf() %>%
st_zm()
map_flow <- ggplot()+
geom_sf(data = zat_shapefile %>% st_zm(), color = "grey70")+
geom_sf(data = walk_transit_geo, aes(fill = flow), color = "grey70")+
scale_fill_viridis_c() +
labs(fill = "Traffic Flow")+
theme_minimal()+ # note the order of adjusting theme
theme(
legend.position = c(0.9, 0.2),
axis.text = element_blank(),
panel.grid = element_blank()
)
# population
pop <- read_excel("../../../../data/pop_zat.xlsx")
zat_pop2021 <- pop %>%
select(ZAT, POBD2021) %>%
left_join(zat_shapefile, by = "ZAT") %>%
filter(!st_is_empty(geometry)) %>%
st_as_sf() %>%
st_zm()
map_pop <- ggplot() +
geom_sf(data = zat_shapefile %>% st_zm(), color = "grey70") +
geom_sf(data = zat_pop2021, color = "grey70", aes(fill = POBD2021)) +
scale_fill_viridis_c() +
labs(fill = "Population")+
theme_minimal()+
theme(
legend.position = c(0.9, 0.2),
axis.text = element_blank(),
panel.grid = element_blank()
)
map_clust2 <- ggplot()+
geom_sf(data = zat_shapefile %>% st_zm(), color = "grey70")+
geom_sf(data = calle2zat_geo, color = "grey70", aes(fill = factor(clus)))+
scale_fill_manual(values = pal) +
labs(fill = "Cluster")+
theme_minimal()+
theme(
legend.position = c(0.9, 0.2),
axis.text = element_blank(),
panel.grid = element_blank()
)
(map_clust2 | map_flow | map_pop)+
plot_annotation('Neighborhood Profiles, Traffic Flows and Population',
subtitle = 'ZAT level, Bogotá',
theme=theme(plot.title=element_text(size=14, face = "bold", hjust=0.5),
plot.subtitle = element_text(size = 10, face = "bold", hjust = 0.5)))