Hierarchical Clustering with Spatial Constraint

with ZAT- and calle-level indicators in Bogotá

Author

Heli Xu

Published

March 27, 2024

Code
library(dplyr)
library(sf)
library(ClustGeo)
library(ggplot2)
library(tidyr)
library(patchwork)
library(readxl)

Aggregation from calle to ZAT

First, we’ll aggregate a few calle level indicators to ZAT level for clustering. Here, for the calles matching to multiple ZAT units, we are making replicates of the calles and distribute them to each matched ZAT unit. We take the area-weighted average of road width and lanes, and sum of the areas of calzada, separado and andenes. After aggregation, for areas in m2, we are converting the unit to km2 and adjusting by the ZAT area in km2 to keep it consistent with the ZAT-level data.

Code
calle_zat_xwalk <- readRDS("calle_zat_xwalk.rds")

calle_clean_df <- readRDS("calles/calle_clean_df.rds")

calle_rename_df <- calle_clean_df %>%   
  rename_with(~ tolower(.), area:X9ceder_el) %>% 
  rename(
    area_calle = area,
    trees = arboles,
    grade = ave_pendie,
    road_width = p_ancho_cl,
    area_roadway = a_calzada,
    area_median = a_separado,
    vehicle_bridge = puente_vh,
    ped_bridge = puente_pt,
    area_sidewalk = a_andenes,
    brt_routes = rutas_trm,
    bus_routes = rutas_sitp,
    bus_stops = parad_sitp,
    bus_lanes = caril_sitp,
    bike_length = largo_cicl,
    road_marks = sen_horizo,
    warning_signs = se_hor_seg,
    road_signs = sen_vert,
    traffic_lights = semaforo,
    road_segments = segme_via,
    speed_limit = velcidad,
    num_lanes_total = sum_carril,
    num_lanes_avg = av_carrile,
   # road_signs_inv = sen_v_inv,
    stop_signs = s_pare_inv,
    lturn_sign = x1_girar_iz,
    bike_signs = x2_ciclov.,
    bus_signs = x3bus_o_tra,
    pedxwalk_signs = x4peatonale,
    speed_bump_signs = x5policiasa,
    stop_signs2 = x6pare,
    parking_signs = x7estaciona,
    school_zone_signs = x8zonas_esc,
    yield_signs = x9ceder_el
  )

rep_calle_zat <- calle_rename_df %>%
  select(CodigoCL, area_calle, area_roadway, area_median, area_sidewalk,
    road_width, road_marks, road_signs, pedxwalk_signs) %>% 
  left_join(calle_zat_xwalk, by = "CodigoCL") %>% 
  drop_na(ZAT) %>% #still need to remove NA here
  uncount(match_n)

rep_calle_zat_agg <- rep_calle_zat %>% 
  group_by(ZAT) %>% 
  summarise(
    across(road_width, ~weighted.mean(.x, w=area_calle), .names = "{.col}"),
    across(-c(CodigoCL, road_width), ~sum(.x), .names = "{.col}")
  )

zat_std2n <- readRDS("ZAT/zat_std2n.rds")

area <- zat_std2n %>%
  select(ZAT, areakm2)

rep_calle_zat_agg2 <- rep_calle_zat_agg %>% 
  select(-area_calle) %>% 
  left_join(area, by = "ZAT") %>% 
  mutate(
    across(c(area_roadway, area_median, area_sidewalk, 
      road_marks, road_signs, pedxwalk_signs), ~.x/areakm2),
    across(c(area_roadway, area_median, area_sidewalk), ~.x/1000) #to km2
  )

calle_2_zat_rep <- rep_calle_zat_agg2 %>% 
  select(-areakm2) %>% #we don't want this in clustering much
  left_join(zat_std2n %>% select(-areakm2), by = "ZAT") #areakm2 already included

Hierarchical Clustering with Spatial Constraint

Using a similar approach to the previous post, we’ll first use only the aggregated calle data with the ZAT data (indicator matrix D0) for the clustering process to determine the cluster number.

Keeping the cluster number as 4, we’ll calculate the neighborhood distance matrix (D1) using the network distance. To choose a mixing parameter, we’ll take a look at the homogeneity plot:

Choosing 0.25 as the mixing parameter, we can perform the hierarchical clustering and visualize the result:

Code
pal <- c("#225ea8","#41b6c4","#a1dab4","#fecb3e")
source("../../../../functions/cluster_plot.R")

map_clust <- ggplot()+
  geom_sf(data = zat_shapefile %>% st_zm(), color = "grey70")+
  geom_sf(data = calle2zat_geo, color = "grey70", fill = pal[calle2zat_geo$clus])

(cluster_plot(calle2zat_clust) | map_clust ) + 
  plot_annotation('Hierarchical Clustering with Indicators and Neighborhood Constraint', 
    subtitle = 'ZAT level and aggregated calle level, Bogotá',
    theme=theme(plot.title=element_text(size=14, face = "bold", hjust=0.5),
                plot.subtitle = element_text(size = 10, face = "bold", hjust = 0.5))) +
  plot_layout(width = c(1, 1.5), height = unit(20, "cm"))

Traffic flow and Population

To visualize and compare the neighborhood profile with traffic flow (total walking trips and public transit trips) and population at the ZAT level:

Code
#traffic flow
total_trips <- readRDS("../../../../data/zat_denom.rds")

walk_transit <- total_trips %>% 
  select(ZAT, total_walk, total_pubt) %>% 
  mutate(flow = total_walk+total_pubt) %>% 
  select(-total_walk, -total_pubt)

walk_transit_geo <- walk_transit %>% 
  left_join(zat_shapefile, by = "ZAT") %>% 
  filter(!st_is_empty(geometry)) %>% 
  st_as_sf() %>% 
  st_zm()

map_flow <- ggplot()+
  geom_sf(data = zat_shapefile %>% st_zm(), color = "grey70")+
  geom_sf(data = walk_transit_geo, aes(fill = flow), color = "grey70")+
  scale_fill_viridis_c() +
  labs(fill = "Traffic Flow")+
  theme_minimal()+  # note the order of adjusting theme
  theme(
    legend.position = c(0.9, 0.2),
    axis.text = element_blank(),
    panel.grid = element_blank()
  )

# population 
pop <- read_excel("../../../../data/pop_zat.xlsx")

zat_pop2021 <- pop %>% 
  select(ZAT, POBD2021) %>% 
  left_join(zat_shapefile, by = "ZAT") %>% 
  filter(!st_is_empty(geometry)) %>%
  st_as_sf() %>% 
  st_zm()
  

map_pop <- ggplot() +
  geom_sf(data = zat_shapefile %>% st_zm(), color = "grey70") +
  geom_sf(data = zat_pop2021, color = "grey70", aes(fill = POBD2021)) +
  scale_fill_viridis_c() +
  labs(fill = "Population")+
  theme_minimal()+  
  theme(
    legend.position = c(0.9, 0.2),
    axis.text = element_blank(),
    panel.grid = element_blank()
  )

map_clust2 <- ggplot()+
  geom_sf(data = zat_shapefile %>% st_zm(), color = "grey70")+
  geom_sf(data = calle2zat_geo, color = "grey70", aes(fill = factor(clus)))+
  scale_fill_manual(values = pal) +
  labs(fill = "Cluster")+
  theme_minimal()+
  theme(
    legend.position = c(0.9, 0.2),
    axis.text = element_blank(),
    panel.grid = element_blank()
  )
  
(map_clust2 | map_flow | map_pop)+ 
  plot_annotation('Neighborhood Profiles, Traffic Flows and Population', 
    subtitle = 'ZAT level, Bogotá',
    theme=theme(plot.title=element_text(size=14, face = "bold", hjust=0.5),
                plot.subtitle = element_text(size = 10, face = "bold", hjust = 0.5)))