Skip to contents

Here, we’ll discuss the algorithm for binning data.

By passing the preprocessed 2D embedding data and hexagonal grid configurations, you can obtain the hexagonal binning information like centroid coordinates, hexagonal polygon coordinates, the standardise counts within each hexagon etc.

r2 <- diff(range(s_curve_noise_umap$UMAP2))/diff(range(s_curve_noise_umap$UMAP1))
hb_obj <- hex_binning(data = s_curve_noise_umap_scaled, bin1 = 6, r2 = r2)
## Data set with all possible centroids in the hexagonal grid
all_centroids_df <- hb_obj$centroids
glimpse(all_centroids_df)
#> Rows: 78
#> Columns: 3
#> $ hexID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 1…
#> $ c_x   <dbl> -0.100000000, 0.113750068, 0.327500137, 0.541250205, 0.755000273…
#> $ c_y   <dbl> -0.20194144, -0.20194144, -0.20194144, -0.20194144, -0.20194144,…

## Generate all coordinates of hexagons
hex_grid <- hb_obj$hex_poly
glimpse(hex_grid)
#> Rows: 468
#> Columns: 3
#> $ hex_poly_id <int> 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4…
#> $ x           <dbl> -0.100000000, -0.206875034, -0.206875034, -0.100000000, 0.…
#> $ y           <dbl> -0.07853278, -0.14023711, -0.26364577, -0.32535010, -0.263…

## To obtain the standardise counts within hexbins
counts_df <- hb_obj$std_cts
df_bin_centroids <- extract_hexbin_centroids(centroids_df = all_centroids_df, 
                                             counts_df = counts_df) |>
      filter(drop_empty == FALSE)
ggplot(data = hex_grid, aes(x = x, y = y)) + 
  geom_polygon(fill = "white", color = "black", aes(group = hex_poly_id)) +
  geom_point(data = all_centroids_df, aes(x = c_x, y = c_y), color = "red") +
  coord_fixed()

ggplot(data = hex_grid, aes(x = x, y = y)) + 
  geom_polygon(fill = "white", color = "black", aes(group = hex_poly_id)) +
  geom_point(data = all_centroids_df, aes(x = c_x, y = c_y), color = "red") +
  geom_point(data = df_bin_centroids, aes(x = c_x, y = c_y), color = "purple") +
  coord_fixed()

ggplot(data = hex_grid, aes(x = x, y = y)) + 
  geom_polygon(fill = "white", color = "black", aes(group = hex_poly_id)) +
  geom_point(data = s_curve_noise_umap_scaled, aes(x = UMAP1, y = UMAP2), color = "blue") +
  coord_fixed()

hex_grid_with_counts <- left_join(hex_grid, counts_df, by = c("hex_poly_id" = "hb_id"))

ggplot(data = hex_grid_with_counts, aes(x = x, y = y)) +
  geom_polygon(color = "black", aes(group = hex_poly_id, fill = std_counts)) +
  geom_text(data = all_centroids_df, aes(x = c_x, y = c_y, label = hexID)) +
  scale_fill_viridis_c(direction = -1, na.value = "#ffffff") +
  coord_fixed()

You can also use geom_hexgrid to visualise the hexagonal grid rather than geom_polygon.

ggplot(data = all_centroids_df, aes(x = c_x, y = c_y)) +
  geom_hexgrid() +
  coord_equal() +
  xlab("x") + ylab("y") +
  theme(axis.text = element_text(size = 5),
        axis.title = element_text(size = 7))