Skip to contents

To obtain the model summaries, first you need to obtain the 2D hexagonal bin centroids which is the 2D model (df_bin_centroids) and the high-dimensional coordinates of bin centroids which represents the lifted model in high-dimensions (df_bin).

r2 <- diff(range(s_curve_noise_umap$UMAP2))/diff(range(s_curve_noise_umap$UMAP1))
model <- fit_highd_model(training_data = s_curve_noise_training, 
                         emb_df = s_curve_noise_umap_scaled,
                         bin1 = 6, r2 = r2,
                         col_start_highd = "x")
df_bin_centroids <- model$df_bin_centroids
df_bin <- model$df_bin

Let’s first compute the model summaries for training data. To do that, you need to predict 2D embedding for the training data.

pred_df_training <- predict_emb(test_data = s_curve_noise_training, 
                             df_bin_centroids = df_bin_centroids, 
                             df_bin = df_bin, type_NLDR = "UMAP")
glimpse(pred_df_training)
#> Rows: 75
#> Columns: 4
#> $ pred_UMAP_1 <dbl> 0.01168629, 0.79349029, 0.90517658, 0.23505886, 0.23505886…
#> $ pred_UMAP_2 <dbl> 0.37839752, 0.95873649, 1.15218282, -0.00849512, 0.3783975…
#> $ ID          <int> 1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 14, 15, 16, 17, 19, 20, 21…
#> $ pred_hb_id  <int> 19, 41, 47, 8, 20, 77, 41, 34, 41, 71, 34, 19, 66, 14, 34,…
s_curve_noise_umap_scaled |>
    ggplot(aes(x = UMAP1,
               y = UMAP2,
               label = ID))+
    geom_point(alpha=0.5) +
    geom_point(data = pred_df_training, aes(x = pred_UMAP_1, y = pred_UMAP_2), 
               color = "red", alpha=0.5) +
    coord_equal() +
    theme(plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),
          axis.text = element_text(size = 5),
          axis.title = element_text(size = 7))

Next, you can use the generated 2D embedding to compute model summaries.

glance(df_bin_centroids = df_bin_centroids, df_bin = df_bin, 
        training_data = s_curve_noise_training, newdata = NULL, 
        type_NLDR = "UMAP", col_start = "x")
#> # A tibble: 1 × 2
#>   Error   MSE
#>   <dbl> <dbl>
#> 1  39.4 0.127

To obtain a dataframe with the high-dimensional data, the fitted model data, the predicted NLDR data, and summary information, you can use the following function.

augment(df_bin_centroids = df_bin_centroids, df_bin = df_bin, 
        training_data = s_curve_noise_training, newdata = NULL, 
        type_NLDR = "UMAP", col_start = "x") |>
  head(5)
#> # A tibble: 5 × 32
#>      ID      x1     x2       x3       x4        x5       x6       x7 pred_hb_id
#>   <int>   <dbl>  <dbl>    <dbl>    <dbl>     <dbl>    <dbl>    <dbl>      <int>
#> 1     1 -0.120  0.114  -1.99    -0.00246 -0.0178   -0.0181  -0.00317         19
#> 2     2 -0.0492 0.822   0.00121  0.0161   0.00968  -0.0834   0.00230         41
#> 3     3 -0.774  0.243   0.367   -0.0198   0.00408  -0.0349  -0.00911         47
#> 4     4 -0.606  1.96   -1.80     0.0132  -0.000479 -0.00478 -0.00843          8
#> 5     6  0.818  0.0388 -1.58     0.00253  0.00167   0.0781  -0.00771         20
#> # ℹ 23 more variables: model_high_d_x1 <dbl>, model_high_d_x2 <dbl>,
#> #   model_high_d_x3 <dbl>, model_high_d_x4 <dbl>, model_high_d_x5 <dbl>,
#> #   model_high_d_x6 <dbl>, model_high_d_x7 <dbl>, error_square_x1 <dbl>,
#> #   error_square_x2 <dbl>, error_square_x3 <dbl>, error_square_x4 <dbl>,
#> #   error_square_x5 <dbl>, error_square_x6 <dbl>, error_square_x7 <dbl>,
#> #   row_wise_total_error <dbl>, abs_error_x1 <dbl>, abs_error_x2 <dbl>,
#> #   abs_error_x3 <dbl>, abs_error_x4 <dbl>, abs_error_x5 <dbl>, …

The same workflow is followed for the test data as well.

pred_df_test <- predict_emb(test_data = s_curve_noise_test, 
                             df_bin_centroids = df_bin_centroids, 
                             df_bin = df_bin, type_NLDR = "UMAP")
s_curve_noise_umap_scaled |>
    ggplot(aes(x = UMAP1,
               y = UMAP2,
               label = ID))+
    geom_point(alpha=0.5) +
    geom_point(data = pred_df_test, aes(x = pred_UMAP_1, y = pred_UMAP_2), 
               color = "red", alpha=0.5) +
    coord_equal() +
    theme(plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),
          axis.text = element_text(size = 5),
          axis.title = element_text(size = 7))

glance(df_bin_centroids = df_bin_centroids, df_bin = df_bin, 
        training_data = s_curve_noise_training, newdata = s_curve_noise_test, 
        type_NLDR = "UMAP", col_start = "x")
#> # A tibble: 1 × 2
#>   Error   MSE
#>   <dbl> <dbl>
#> 1  14.0 0.136
augment(df_bin_centroids = df_bin_centroids, df_bin = df_bin, 
        training_data = s_curve_noise_training, newdata = s_curve_noise_test, 
        type_NLDR = "UMAP", col_start = "x") |>
  head(5)
#> # A tibble: 5 × 32
#>      ID      x1    x2     x3       x4       x5       x6       x7 pred_hb_id
#>   <int>   <dbl> <dbl>  <dbl>    <dbl>    <dbl>    <dbl>    <dbl>      <int>
#> 1     5 -0.478   1.61 -1.88   0.0101  -0.00746  0.0101   0.00972          8
#> 2    10 -0.727   1.66  0.314  0.00269  0.0196   0.0559  -0.00481         46
#> 3    13  0.513   1.86  1.86  -0.00648 -0.0127   0.00635 -0.00770         70
#> 4    18  0.0635  1.48 -2.00   0.00458  0.0164  -0.0627  -0.00371         15
#> 5    27  0.918   1.36 -1.40   0.0161   0.0160  -0.0190   0.00341         21
#> # ℹ 23 more variables: model_high_d_x1 <dbl>, model_high_d_x2 <dbl>,
#> #   model_high_d_x3 <dbl>, model_high_d_x4 <dbl>, model_high_d_x5 <dbl>,
#> #   model_high_d_x6 <dbl>, model_high_d_x7 <dbl>, error_square_x1 <dbl>,
#> #   error_square_x2 <dbl>, error_square_x3 <dbl>, error_square_x4 <dbl>,
#> #   error_square_x5 <dbl>, error_square_x6 <dbl>, error_square_x7 <dbl>,
#> #   row_wise_total_error <dbl>, abs_error_x1 <dbl>, abs_error_x2 <dbl>,
#> #   abs_error_x3 <dbl>, abs_error_x4 <dbl>, abs_error_x5 <dbl>, …