To obtain the model summaries, first you need to obtain the 2D
hexagonal bin centroids which is the 2D model
(df_bin_centroids
) and the high-dimensional coordinates of
bin centroids which represents the lifted model in high-dimensions
(df_bin
).
lim1 <- s_curve_obj$s_curve_umap_scaled_obj$lim1
lim2 <- s_curve_obj$s_curve_umap_scaled_obj$lim2
r2 <- diff(lim2)/diff(lim1)
umap_scaled <- s_curve_obj$s_curve_umap_scaled_obj$scaled_nldr
model <- fit_highd_model(highd_data = s_curve_noise_training,
nldr_data = umap_scaled,
bin1 = 15, r2 = r2)
df_bin_centroids <- model$df_bin_centroids
df_bin <- model$df_bin
Let’s first compute the model summaries for training data. To do that, you need to predict 2D embedding for the training data.
pred_df_training <- predict_emb(highd_data = s_curve_noise_training,
model_2d = df_bin_centroids,
model_highd = df_bin)
glimpse(pred_df_training)
#> Rows: 3,750
#> Columns: 4
#> $ pred_emb_1 <dbl> 0.27185943, 0.80898973, 0.80898973, 0.27185943, 0.56108344,…
#> $ pred_emb_2 <dbl> 0.84184008, 0.48401817, 0.34088941, 0.55558255, 0.77027570,…
#> $ ID <int> 1, 2, 3, 5, 6, 7, 9, 10, 11, 12, 15, 16, 18, 19, 24, 25, 26…
#> $ pred_hb_id <int> 200, 132, 102, 140, 189, 79, 145, 101, 132, 35, 170, 95, 21…
s_curve_noise_umap_scaled |>
ggplot(aes(x = emb1,
y = emb2,
label = ID))+
geom_point(alpha=0.5) +
geom_point(data = pred_df_training, aes(x = pred_emb_1, y = pred_emb_2),
color = "red", alpha=0.5) +
coord_equal() +
theme(plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),
axis.text = element_text(size = 5),
axis.title = element_text(size = 7))
Next, you can use the generated 2D embedding to compute model summaries.
glance(highd_data = s_curve_noise_training,
model_2d = df_bin_centroids,
model_highd = df_bin)
#> # A tibble: 1 × 2
#> Error MSE
#> <dbl> <dbl>
#> 1 1116. 0.0320
To obtain a dataframe with the high-dimensional data, the fitted model data, the predicted NLDR data, and summary information, you can use the following function.
augment(highd_data = s_curve_noise_training,
model_2d = df_bin_centroids, model_highd = df_bin) |>
head(5)
#> # A tibble: 5 × 32
#> ID x1 x2 x3 x4 x5 x6 x7 pred_hb_id
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
#> 1 1 -0.120 1.64 -1.99 0.0104 0.0125 0.0923 -0.00128 200
#> 2 2 -0.0492 1.51 0.00121 -0.0177 0.00726 -0.0362 -0.00535 132
#> 3 3 -0.774 1.30 0.367 -0.00173 0.0156 -0.0962 0.00335 102
#> 4 5 -0.478 0.0177 -1.88 0.00848 0.00533 0.0998 0.000677 140
#> 5 6 0.818 0.927 -1.58 -0.00318 -0.00980 0.0989 0.00696 189
#> # ℹ 23 more variables: model_high_d_x1 <dbl>, model_high_d_x2 <dbl>,
#> # model_high_d_x3 <dbl>, model_high_d_x4 <dbl>, model_high_d_x5 <dbl>,
#> # model_high_d_x6 <dbl>, model_high_d_x7 <dbl>, error_square_x1 <dbl>,
#> # error_square_x2 <dbl>, error_square_x3 <dbl>, error_square_x4 <dbl>,
#> # error_square_x5 <dbl>, error_square_x6 <dbl>, error_square_x7 <dbl>,
#> # row_wise_total_error <dbl>, abs_error_x1 <dbl>, abs_error_x2 <dbl>,
#> # abs_error_x3 <dbl>, abs_error_x4 <dbl>, abs_error_x5 <dbl>, …
The same workflow is followed for the test data as well.
pred_df_test <- predict_emb(highd_data = s_curve_noise_test,
model_2d = df_bin_centroids,
model_highd = df_bin)
s_curve_noise_umap_scaled |>
ggplot(aes(x = emb1,
y = emb2,
label = ID))+
geom_point(alpha=0.5) +
geom_point(data = pred_df_test, aes(x = pred_emb_1, y = pred_emb_2),
color = "red", alpha=0.5) +
coord_equal() +
theme(plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),
axis.text = element_text(size = 5),
axis.title = element_text(size = 7))
glance(highd_data = s_curve_noise_test,
model_2d = df_bin_centroids,
model_highd = df_bin)
#> # A tibble: 1 × 2
#> Error MSE
#> <dbl> <dbl>
#> 1 386. 0.0341
augment(highd_data = s_curve_noise_test,
model_2d = df_bin_centroids, model_highd = df_bin) |>
head(5)
#> # A tibble: 5 × 32
#> ID x1 x2 x3 x4 x5 x6 x7 pred_hb_id
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
#> 1 4 -0.606 0.246 -1.80 -0.00897 -0.0187 -0.0716 0.00126 140
#> 2 8 -0.0691 1.59 0.00239 0.0127 -0.0130 0.0396 -0.000185 132
#> 3 13 0.513 1.02 1.86 0.0141 -0.0149 0.0619 -0.00309 66
#> 4 14 0.869 0.576 -0.505 -0.0196 0.00169 -0.0197 0.00597 177
#> 5 17 -0.737 1.94 -1.68 0.00601 -0.0113 0.0301 -0.00988 198
#> # ℹ 23 more variables: model_high_d_x1 <dbl>, model_high_d_x2 <dbl>,
#> # model_high_d_x3 <dbl>, model_high_d_x4 <dbl>, model_high_d_x5 <dbl>,
#> # model_high_d_x6 <dbl>, model_high_d_x7 <dbl>, error_square_x1 <dbl>,
#> # error_square_x2 <dbl>, error_square_x3 <dbl>, error_square_x4 <dbl>,
#> # error_square_x5 <dbl>, error_square_x6 <dbl>, error_square_x7 <dbl>,
#> # row_wise_total_error <dbl>, abs_error_x1 <dbl>, abs_error_x2 <dbl>,
#> # abs_error_x3 <dbl>, abs_error_x4 <dbl>, abs_error_x5 <dbl>, …