Skip to contents

To obtain the model summaries, first you need to obtain the 2D hexagonal bin centroids which is the 2D model (df_bin_centroids) and the high-dimensional coordinates of bin centroids which represents the lifted model in high-dimensions (df_bin).

lim1 <- s_curve_obj$s_curve_umap_scaled_obj$lim1
lim2 <- s_curve_obj$s_curve_umap_scaled_obj$lim2
r2 <- diff(lim2)/diff(lim1)

umap_scaled <- s_curve_obj$s_curve_umap_scaled_obj$scaled_nldr

model <- fit_highd_model(highd_data = s_curve_noise_training, 
                         nldr_data = umap_scaled,
                         bin1 = 15, r2 = r2)

df_bin_centroids <- model$df_bin_centroids
df_bin <- model$df_bin

Let’s first compute the model summaries for training data. To do that, you need to predict 2D embedding for the training data.

pred_df_training <- predict_emb(highd_data = s_curve_noise_training, 
                                model_2d = df_bin_centroids, 
                                model_highd = df_bin)
glimpse(pred_df_training)
#> Rows: 3,750
#> Columns: 4
#> $ pred_emb_1 <dbl> 0.27185943, 0.80898973, 0.80898973, 0.27185943, 0.56108344,
#> $ pred_emb_2 <dbl> 0.84184008, 0.48401817, 0.34088941, 0.55558255, 0.77027570,
#> $ ID         <int> 1, 2, 3, 5, 6, 7, 9, 10, 11, 12, 15, 16, 18, 19, 24, 25, 26…
#> $ pred_hb_id <int> 200, 132, 102, 140, 189, 79, 145, 101, 132, 35, 170, 95, 21…
s_curve_noise_umap_scaled |>
    ggplot(aes(x = emb1,
               y = emb2,
               label = ID))+
    geom_point(alpha=0.5) +
    geom_point(data = pred_df_training, aes(x = pred_emb_1, y = pred_emb_2), 
               color = "red", alpha=0.5) +
    coord_equal() +
    theme(plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),
          axis.text = element_text(size = 5),
          axis.title = element_text(size = 7))

UMAP embedding of the S-curve training data with predictions in red.

Next, you can use the generated 2D embedding to compute model summaries.

glance(highd_data = s_curve_noise_training, 
       model_2d = df_bin_centroids, 
       model_highd = df_bin)
#> # A tibble: 1 × 2
#>   Error    MSE
#>   <dbl>  <dbl>
#> 1 1116. 0.0320

To obtain a dataframe with the high-dimensional data, the fitted model data, the predicted NLDR data, and summary information, you can use the following function.

augment(highd_data = s_curve_noise_training, 
        model_2d = df_bin_centroids, model_highd = df_bin) |>
  head(5)
#> # A tibble: 5 × 32
#>      ID      x1     x2       x3       x4       x5      x6        x7 pred_hb_id
#>   <int>   <dbl>  <dbl>    <dbl>    <dbl>    <dbl>   <dbl>     <dbl>      <int>
#> 1     1 -0.120  1.64   -1.99     0.0104   0.0125   0.0923 -0.00128         200
#> 2     2 -0.0492 1.51    0.00121 -0.0177   0.00726 -0.0362 -0.00535         132
#> 3     3 -0.774  1.30    0.367   -0.00173  0.0156  -0.0962  0.00335         102
#> 4     5 -0.478  0.0177 -1.88     0.00848  0.00533  0.0998  0.000677        140
#> 5     6  0.818  0.927  -1.58    -0.00318 -0.00980  0.0989  0.00696         189
#> # ℹ 23 more variables: model_high_d_x1 <dbl>, model_high_d_x2 <dbl>,
#> #   model_high_d_x3 <dbl>, model_high_d_x4 <dbl>, model_high_d_x5 <dbl>,
#> #   model_high_d_x6 <dbl>, model_high_d_x7 <dbl>, error_square_x1 <dbl>,
#> #   error_square_x2 <dbl>, error_square_x3 <dbl>, error_square_x4 <dbl>,
#> #   error_square_x5 <dbl>, error_square_x6 <dbl>, error_square_x7 <dbl>,
#> #   row_wise_total_error <dbl>, abs_error_x1 <dbl>, abs_error_x2 <dbl>,
#> #   abs_error_x3 <dbl>, abs_error_x4 <dbl>, abs_error_x5 <dbl>, …

The same workflow is followed for the test data as well.

pred_df_test <- predict_emb(highd_data = s_curve_noise_test, 
                            model_2d = df_bin_centroids, 
                            model_highd = df_bin)
s_curve_noise_umap_scaled |>
    ggplot(aes(x = emb1,
               y = emb2,
               label = ID))+
    geom_point(alpha=0.5) +
    geom_point(data = pred_df_test, aes(x = pred_emb_1, y = pred_emb_2), 
               color = "red", alpha=0.5) +
    coord_equal() +
    theme(plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),
          axis.text = element_text(size = 5),
          axis.title = element_text(size = 7))

UMAP embedding of the S-curve test data with predictions in red.

glance(highd_data = s_curve_noise_test, 
       model_2d = df_bin_centroids, 
       model_highd = df_bin) 
#> # A tibble: 1 × 2
#>   Error    MSE
#>   <dbl>  <dbl>
#> 1  386. 0.0341
augment(highd_data = s_curve_noise_test, 
        model_2d = df_bin_centroids, model_highd = df_bin) |>
  head(5)
#> # A tibble: 5 × 32
#>      ID      x1    x2       x3       x4       x5      x6        x7 pred_hb_id
#>   <int>   <dbl> <dbl>    <dbl>    <dbl>    <dbl>   <dbl>     <dbl>      <int>
#> 1     4 -0.606  0.246 -1.80    -0.00897 -0.0187  -0.0716  0.00126         140
#> 2     8 -0.0691 1.59   0.00239  0.0127  -0.0130   0.0396 -0.000185        132
#> 3    13  0.513  1.02   1.86     0.0141  -0.0149   0.0619 -0.00309          66
#> 4    14  0.869  0.576 -0.505   -0.0196   0.00169 -0.0197  0.00597         177
#> 5    17 -0.737  1.94  -1.68     0.00601 -0.0113   0.0301 -0.00988         198
#> # ℹ 23 more variables: model_high_d_x1 <dbl>, model_high_d_x2 <dbl>,
#> #   model_high_d_x3 <dbl>, model_high_d_x4 <dbl>, model_high_d_x5 <dbl>,
#> #   model_high_d_x6 <dbl>, model_high_d_x7 <dbl>, error_square_x1 <dbl>,
#> #   error_square_x2 <dbl>, error_square_x3 <dbl>, error_square_x4 <dbl>,
#> #   error_square_x5 <dbl>, error_square_x6 <dbl>, error_square_x7 <dbl>,
#> #   row_wise_total_error <dbl>, abs_error_x1 <dbl>, abs_error_x2 <dbl>,
#> #   abs_error_x3 <dbl>, abs_error_x4 <dbl>, abs_error_x5 <dbl>, …