lim1 <- s_curve_obj$s_curve_umap_scaled_obj$lim1
lim2 <- s_curve_obj$s_curve_umap_scaled_obj$lim2
r2 <- diff(lim2)/diff(lim1)
umap_scaled <- s_curve_obj$s_curve_umap_scaled_obj$scaled_nldr
nldr1 <- umap_scaled |>
ggplot(aes(x = emb1,
y = emb2))+
geom_point(alpha=0.1, size=1, colour='#e41a1c') +
theme(aspect.ratio = 1)
## To initialize number of bins along the x-axis
bin1_vec <- 5:sqrt(NROW(s_curve_noise_training)/r2)
error_df_umap1 <- data.frame(matrix(nrow = 0, ncol = 0))
for (xbins in bin1_vec) {
hb_obj <- calc_bins_y(bin1 = xbins, r2 = r2, q = 0.1)
bin2 <- hb_obj$bin2
a1 <- hb_obj$a1
a2 <- hb_obj$a2
scurve_model <- fit_highd_model(
highd_data = s_curve_noise_training,
nldr_data = umap_scaled,
bin1 = xbins,
r2 = r2,
q = 0.1,
is_bin_centroid = TRUE
)
df_bin_centroids_scurve <- scurve_model$df_bin_centroids
df_bin_scurve <- scurve_model$df_bin
## Compute error
error_df <- glance(
model_2d = df_bin_centroids_scurve,
model_highd = df_bin_scurve,
highd_data = s_curve_noise_training) |>
mutate(bin1 = xbins,
bin2 = bin2,
b = bin1 * bin2,
b_non_empty = NROW(df_bin_centroids_scurve),
method = "UMAP",
a1 = round(a1, 2),
a2 = round(a2, 2))
error_df_umap1 <- bind_rows(error_df_umap1, error_df)
}
nldr_scaled_obj <- gen_scaled_data(
data = s_curve_noise_umap2)
umap_scaled <- nldr_scaled_obj$scaled_nldr
nldr2 <- umap_scaled |>
ggplot(aes(x = emb1,
y = emb2))+
geom_point(alpha=0.1, size=1, colour='#ff7f00') +
theme(aspect.ratio = 1)
lim1 <- nldr_scaled_obj$lim1
lim2 <- nldr_scaled_obj$lim2
r2 <- diff(lim2)/diff(lim1)
## To initialize number of bins along the x-axis
bin1_vec <- 5:sqrt(NROW(s_curve_noise_training)/r2)
error_df_umap2 <- data.frame(matrix(nrow = 0, ncol = 0))
for (xbins in bin1_vec) {
hb_obj <- calc_bins_y(bin1 = xbins, r2 = r2, q = 0.1)
bin2 <- hb_obj$bin2
a1 <- hb_obj$a1
a2 <- hb_obj$a2
scurve_model <- fit_highd_model(
highd_data = s_curve_noise_training,
nldr_data = umap_scaled,
bin1 = xbins,
r2 = r2,
q = 0.1,
is_bin_centroid = TRUE
)
df_bin_centroids_scurve <- scurve_model$df_bin_centroids
df_bin_scurve <- scurve_model$df_bin
## Compute error
error_df <- glance(
model_2d = df_bin_centroids_scurve,
model_highd = df_bin_scurve,
highd_data = s_curve_noise_training) |>
mutate(bin1 = xbins,
bin2 = bin2,
b = bin1 * bin2,
b_non_empty = NROW(df_bin_centroids_scurve),
method = "UMAP2",
a1 = round(a1, 2),
a2 = round(a2, 2))
error_df_umap2 <- bind_rows(error_df_umap2, error_df)
}
nldr_scaled_obj <- gen_scaled_data(
data = s_curve_noise_umap3)
umap_scaled <- nldr_scaled_obj$scaled_nldr
nldr3 <- umap_scaled |>
ggplot(aes(x = emb1,
y = emb2))+
geom_point(alpha=0.1, size=1, colour='#4daf4a') +
theme(aspect.ratio = 1)
lim1 <- nldr_scaled_obj$lim1
lim2 <- nldr_scaled_obj$lim2
r2 <- diff(lim2)/diff(lim1)
## To initialize number of bins along the x-axis
bin1_vec <- 5:sqrt(NROW(s_curve_noise_training)/r2)
error_df_umap3 <- data.frame(matrix(nrow = 0, ncol = 0))
for (xbins in bin1_vec) {
hb_obj <- calc_bins_y(bin1 = xbins, r2 = r2, q = 0.1)
bin2 <- hb_obj$bin2
a1 <- hb_obj$a1
a2 <- hb_obj$a2
scurve_model <- fit_highd_model(
highd_data = s_curve_noise_training,
nldr_data = umap_scaled,
bin1 = xbins,
r2 = r2,
q = 0.1,
is_bin_centroid = TRUE
)
df_bin_centroids_scurve <- scurve_model$df_bin_centroids
df_bin_scurve <- scurve_model$df_bin
## Compute error
error_df <- glance(
model_2d = df_bin_centroids_scurve,
model_highd = df_bin_scurve,
highd_data = s_curve_noise_training) |>
mutate(bin1 = xbins,
bin2 = bin2,
b = bin1 * bin2,
b_non_empty = NROW(df_bin_centroids_scurve),
method = "UMAP3",
a1 = round(a1, 2),
a2 = round(a2, 2))
error_df_umap3 <- bind_rows(error_df_umap3, error_df)
}
nldr_scaled_obj <- gen_scaled_data(
data = s_curve_noise_umap4)
umap_scaled <- nldr_scaled_obj$scaled_nldr
nldr4 <- umap_scaled |>
ggplot(aes(x = emb1,
y = emb2))+
geom_point(alpha=0.1, size=1, colour='#a65628') +
theme(aspect.ratio = 1)
lim1 <- nldr_scaled_obj$lim1
lim2 <- nldr_scaled_obj$lim2
r2 <- diff(lim2)/diff(lim1)
## To initialize number of bins along the x-axis
bin1_vec <- 5:sqrt(NROW(s_curve_noise_training)/r2)
error_df_umap4 <- data.frame(matrix(nrow = 0, ncol = 0))
for (xbins in bin1_vec) {
hb_obj <- calc_bins_y(bin1 = xbins, r2 = r2, q = 0.1)
bin2 <- hb_obj$bin2
a1 <- hb_obj$a1
a2 <- hb_obj$a2
scurve_model <- fit_highd_model(
highd_data = s_curve_noise_training,
nldr_data = umap_scaled,
bin1 = xbins,
r2 = r2,
q = 0.1,
is_bin_centroid = TRUE
)
df_bin_centroids_scurve <- scurve_model$df_bin_centroids
df_bin_scurve <- scurve_model$df_bin
## Compute error
error_df <- glance(
model_2d = df_bin_centroids_scurve,
model_highd = df_bin_scurve,
highd_data = s_curve_noise_training) |>
mutate(bin1 = xbins,
bin2 = bin2,
b = bin1 * bin2,
b_non_empty = NROW(df_bin_centroids_scurve),
method = "UMAP4",
a1 = round(a1, 2),
a2 = round(a2, 2))
error_df_umap4 <- bind_rows(error_df_umap4, error_df)
}
nldr_scaled_obj <- gen_scaled_data(
data = s_curve_noise_umap5)
umap_scaled <- nldr_scaled_obj$scaled_nldr
nldr5 <- umap_scaled |>
ggplot(aes(x = emb1,
y = emb2))+
geom_point(alpha=0.1, size=1, colour='#636363') +
theme(aspect.ratio = 1)
lim1 <- nldr_scaled_obj$lim1
lim2 <- nldr_scaled_obj$lim2
r2 <- diff(lim2)/diff(lim1)
## To initialize number of bins along the x-axis
bin1_vec <- 5:sqrt(NROW(s_curve_noise_training)/r2)
error_df_umap5 <- data.frame(matrix(nrow = 0, ncol = 0))
for (xbins in bin1_vec) {
hb_obj <- calc_bins_y(bin1 = xbins, r2 = r2, q = 0.1)
bin2 <- hb_obj$bin2
a1 <- hb_obj$a1
a2 <- hb_obj$a2
scurve_model <- fit_highd_model(
highd_data = s_curve_noise_training,
nldr_data = umap_scaled,
bin1 = xbins,
r2 = r2,
q = 0.1,
is_bin_centroid = TRUE
)
df_bin_centroids_scurve <- scurve_model$df_bin_centroids
df_bin_scurve <- scurve_model$df_bin
## Compute error
error_df <- glance(
model_2d = df_bin_centroids_scurve,
model_highd = df_bin_scurve,
highd_data = s_curve_noise_training) |>
mutate(bin1 = xbins,
bin2 = bin2,
b = bin1 * bin2,
b_non_empty = NROW(df_bin_centroids_scurve),
method = "UMAP5",
a1 = round(a1, 2),
a2 = round(a2, 2))
error_df_umap5 <- bind_rows(error_df_umap5, error_df)
}
nldr_scaled_obj <- gen_scaled_data(
data = s_curve_noise_umap6)
umap_scaled <- nldr_scaled_obj$scaled_nldr
nldr6 <- umap_scaled |>
ggplot(aes(x = emb1,
y = emb2))+
geom_point(alpha=0.1, size=1, colour='#984ea3') +
theme(aspect.ratio = 1)
lim1 <- nldr_scaled_obj$lim1
lim2 <- nldr_scaled_obj$lim2
r2 <- diff(lim2)/diff(lim1)
## To initialize number of bins along the x-axis
bin1_vec <- 5:sqrt(NROW(s_curve_noise_training)/r2)
error_df_umap6 <- data.frame(matrix(nrow = 0, ncol = 0))
for (xbins in bin1_vec) {
hb_obj <- calc_bins_y(bin1 = xbins, r2 = r2, q = 0.1)
bin2 <- hb_obj$bin2
a1 <- hb_obj$a1
a2 <- hb_obj$a2
scurve_model <- fit_highd_model(
highd_data = s_curve_noise_training,
nldr_data = umap_scaled,
bin1 = xbins,
r2 = r2,
q = 0.1,
is_bin_centroid = TRUE
)
df_bin_centroids_scurve <- scurve_model$df_bin_centroids
df_bin_scurve <- scurve_model$df_bin
## Compute error
error_df <- glance(
model_2d = df_bin_centroids_scurve,
model_highd = df_bin_scurve,
highd_data = s_curve_noise_training) |>
mutate(bin1 = xbins,
bin2 = bin2,
b = bin1 * bin2,
b_non_empty = NROW(df_bin_centroids_scurve),
method = "UMAP6",
a1 = round(a1, 2),
a2 = round(a2, 2))
error_df_umap6 <- bind_rows(error_df_umap6, error_df)
}
error_df_all <- dplyr::bind_rows(error_df_umap1,
error_df_umap2,
error_df_umap3,
error_df_umap4,
error_df_umap5,
error_df_umap6)
error_df_all <- error_df_all |>
mutate(a1 = round(a1, 2)) |>
filter(bin1 >= 5) |>
group_by(method, a1) |>
filter(MSE == min(MSE)) |>
ungroup()
error_df_all |>
arrange(-a1) |>
head(5)
#> # A tibble: 5 × 9
#> Error MSE bin1 bin2 b b_non_empty method a1 a2
#> <dbl> <dbl> <int> <dbl> <dbl> <int> <chr> <dbl> <dbl>
#> 1 2559. 0.202 5 7 35 19 UMAP3 0.3 0.26
#> 2 2991. 0.306 5 5 25 16 UMAP2 0.28 0.24
#> 3 2875. 0.279 5 5 25 17 UMAP5 0.28 0.25
#> 4 2403. 0.180 5 6 30 23 UMAP4 0.26 0.22
#> 5 2454. 0.189 5 6 30 20 UMAP6 0.26 0.23
error_plot <- ggplot(error_df_all,
aes(x = a1,
y = sqrt(MSE),
color = method)) +
geom_point(size = 0.8) +
geom_line(linewidth = 0.3) +
scale_x_continuous(breaks =
sort(unique(error_df_all$a1))[
seq(1, length(
unique(error_df_all$a1)),
by = 5)]) +
scale_color_manual(
values=c('#e41a1c','#ff7f00','#4daf4a',
"#a65628",'#636363', '#984ea3'))+
ylab("RMSE") +
xlab(expression(paste("binwidth (", a[1], ")"))) +
theme_minimal() +
theme(panel.border = element_rect(fill = 'transparent'),
plot.title = element_text(size = 12, hjust = 0.5, vjust = -0.5),
axis.ticks.x = element_line(),
axis.ticks.y = element_line(),
legend.position = "none",
axis.text.x = element_text(size = 7),
axis.text.y = element_text(size = 7),
axis.title.x = element_text(size = 7),
axis.title.y = element_text(size = 7),
aspect.ratio = 1.5)
error_plot + wrap_plots(nldr1, nldr2,
nldr3, nldr4,
nldr5,nldr6,
ncol = 2, widths = c(50, 50))
