JWS C Library
C language utility library
regress.c
Go to the documentation of this file.
00001 /*
00002  * This work is licensed under a Creative Commons 
00003  * Attribution-Noncommercial-Share Alike 3.0 United States License.
00004  * 
00005  *    http://creativecommons.org/licenses/by-nc-sa/3.0/us/
00006  * 
00007  * You are free:
00008  * 
00009  *    to Share - to copy, distribute, display, and perform the work
00010  *    to Remix - to make derivative works
00011  * 
00012  * Under the following conditions:
00013  * 
00014  *    Attribution. You must attribute the work in the manner specified by the
00015  *    author or licensor (but not in any way that suggests that they endorse you
00016  *    or your use of the work).
00017  * 
00018  *    Noncommercial. You may not use this work for commercial purposes.
00019  * 
00020  *    Share Alike. If you alter, transform, or build upon this work, you may
00021  *    distribute the resulting work only under the same or similar license to
00022  *    this one.
00023  * 
00024  * For any reuse or distribution, you must make clear to others the license
00025  * terms of this work. The best way to do this is by including this header.
00026  * 
00027  * Any of the above conditions can be waived if you get permission from the
00028  * copyright holder.
00029  * 
00030  * Apart from the remix rights granted under this license, nothing in this
00031  * license impairs or restricts the author's moral rights.
00032  */
00033 
00034 
00046 #include <jwsc/config.h>
00047 
00048 #include <stdlib.h>
00049 #include <stdio.h>
00050 #include <assert.h>
00051 #include <math.h>
00052 #include <inttypes.h>
00053 
00054 #include "jwsc/base/error.h"
00055 #include "jwsc/vector/vector.h"
00056 #include "jwsc/vector/vector_math.h"
00057 #include "jwsc/matrix/matrix.h"
00058 #include "jwsc/matrix/matrix_math.h"
00059 #include "jwsc/prob/pdf.h"
00060 
00061 
00062 #if defined JWSC_HAVE_LAPACK
00063 
00092 Error* linear_least_squares_regress_f
00093 (
00094     Vector_f**      w_out,
00095     const Matrix_f* x,
00096     const Vector_f* y
00097 )
00098 {
00099     Matrix_f* x_inv = NULL;
00100     Error*    err;
00101 
00102     if ((err = pseudoinvert_matrix_f(&x_inv, x)) != NULL)
00103     {
00104         free_vector_f(*w_out); *w_out = NULL;
00105         return err;
00106     }
00107 
00108     assert(multiply_matrix_and_vector_f(w_out, x_inv, y) == NULL);
00109 
00110     free_matrix_f(x_inv);
00111 
00112     return NULL;
00113 }
00114 
00136 Error* linear_least_squares_regress_d
00137 (
00138     Vector_d**      w_out,
00139     const Matrix_d* x,
00140     const Vector_d* y
00141 )
00142 {
00143     Matrix_d* x_inv = NULL;
00144     Error*    err;
00145 
00146     if ((err = pseudoinvert_matrix_d(&x_inv, x)) != NULL)
00147     {
00148         free_vector_d(*w_out); *w_out = NULL;
00149         return err;
00150     }
00151 
00152     assert(multiply_matrix_and_vector_d(w_out, x_inv, y) == NULL);
00153 
00154     free_matrix_d(x_inv);
00155 
00156     return NULL;
00157 }
00158 
00176 static void split_ransac_points_f
00177 (
00178     Matrix_f**       x_1, 
00179     Vector_f**       y_1,
00180     Matrix_f**       x_2,
00181     Vector_f**       y_2,
00182     const Matrix_f*  x, 
00183     const Vector_f*  y,
00184     uint32_t         n_1
00185 )
00186 {
00187     uint32_t num_cols;
00188     uint32_t row, col;
00189     uint32_t row_1, row_2;
00190     uint32_t n, n_2;
00191     uint32_t i;
00192     uint32_t done;
00193 
00194     Vector_u32* decider = NULL;
00195 
00196     num_cols = x->num_cols;
00197     n_2 = x->num_rows - n_1;
00198 
00199     create_matrix_f(x_1, n_1, num_cols);
00200     create_vector_f(y_1, n_1);
00201 
00202     create_matrix_f(x_2, n_2, num_cols);
00203     create_vector_f(y_2, n_2);
00204 
00205     create_zero_vector_u32(&decider, x->num_rows);
00206     for (n = 0; n < n_1; n++)
00207     {
00208         done = 0;
00209         while (!done)
00210         {
00211             i = (uint32_t) floor(sample_uniform_pdf_f(0, 0.999*(x->num_rows)));
00212 
00213             if (!decider->elts[ i ])
00214             {
00215                 done = decider->elts[ i ] = 1;
00216             }
00217         }
00218     }
00219 
00220     row_1 = 0;
00221     row_2 = 0;
00222 
00223     for (row = 0; row < x->num_rows; row++)
00224     {
00225         if (decider->elts[ row ])
00226         {
00227             for (col = 0; col < num_cols; col++)
00228             {
00229                 (*x_1)->elts[ row_1 ][ col ] = x->elts[ row ][ col ];
00230             }
00231             (*y_1)->elts[ row_1 ] = y->elts[ row ];
00232 
00233             row_1++;
00234         }
00235         else
00236         {
00237             for (col = 0; col < num_cols; col++)
00238             {
00239                 (*x_2)->elts[ row_2 ][ col ] = x->elts[ row ][ col ];
00240             }
00241             (*y_2)->elts[ row_2 ] = y->elts[ row ];
00242 
00243             row_2++;
00244         }
00245     }
00246 
00247     free_vector_u32(decider);
00248 }
00249 
00254 static void split_ransac_points_d
00255 (
00256     Matrix_d**       x_1, 
00257     Vector_d**       y_1,
00258     Matrix_d**       x_2,
00259     Vector_d**       y_2,
00260     const Matrix_d*  x, 
00261     const Vector_d*  y,
00262     uint32_t         n_1
00263 )
00264 {
00265     uint32_t num_cols;
00266     uint32_t row, col;
00267     uint32_t row_1, row_2;
00268     uint32_t n, n_2;
00269     uint32_t i;
00270     uint32_t done;
00271 
00272     Vector_u32* decider = NULL;
00273 
00274     num_cols = x->num_cols;
00275     n_2 = x->num_rows - n_1;
00276 
00277     create_matrix_d(x_1, n_1, num_cols);
00278     create_vector_d(y_1, n_1);
00279 
00280     create_matrix_d(x_2, n_2, num_cols);
00281     create_vector_d(y_2, n_2);
00282 
00283     create_zero_vector_u32(&decider, x->num_rows);
00284     for (n = 0; n < n_1; n++)
00285     {
00286         done = 0;
00287         while (!done)
00288         {
00289             i = (uint32_t) floor(sample_uniform_pdf_d(0, 0.999*(x->num_rows)));
00290 
00291             if (!decider->elts[ i ])
00292             {
00293                 done = decider->elts[ i ] = 1;
00294             }
00295         }
00296     }
00297 
00298     row_1 = 0;
00299     row_2 = 0;
00300 
00301     for (row = 0; row < x->num_rows; row++)
00302     {
00303         if (decider->elts[ row ])
00304         {
00305             for (col = 0; col < num_cols; col++)
00306             {
00307                 (*x_1)->elts[ row_1 ][ col ] = x->elts[ row ][ col ];
00308             }
00309             (*y_1)->elts[ row_1 ] = y->elts[ row ];
00310 
00311             row_1++;
00312         }
00313         else
00314         {
00315             for (col = 0; col < num_cols; col++)
00316             {
00317                 (*x_2)->elts[ row_2 ][ col ] = x->elts[ row ][ col ];
00318             }
00319             (*y_2)->elts[ row_2 ] = y->elts[ row ];
00320 
00321             row_2++;
00322         }
00323     }
00324 
00325     free_vector_u32(decider);
00326 }
00327 
00344 static void append_ransac_fit_point_f
00345 (
00346     Matrix_f** x_fit,
00347     Vector_f** y_fit,
00348     Matrix_f*  x_nofit,
00349     Vector_f*  y_nofit,
00350     uint32_t   pt
00351 )
00352 {
00353     uint32_t num_rows, num_cols;
00354     uint32_t num_elts;
00355     uint32_t col;
00356     uint32_t elt;
00357 
00358     Matrix_f* new_x_fit = NULL;
00359     Vector_f* new_y_fit = NULL;
00360 
00361     num_rows = (*x_fit)->num_rows;
00362     num_cols = (*x_fit)->num_cols;
00363 
00364     create_matrix_f(&new_x_fit, num_rows+1, num_cols);
00365     copy_matrix_block_into_matrix_f(new_x_fit, 0, 0, *x_fit, 0, 0,
00366             num_rows, num_cols);
00367     free_matrix_f(*x_fit);
00368     *x_fit = new_x_fit;
00369 
00370     for (col = 0; col < num_cols; col++)
00371     {
00372         new_x_fit->elts[ num_rows ][ col ] = x_nofit->elts[ pt ][ col ];
00373     }
00374 
00375     num_elts = (*y_fit)->num_elts;
00376 
00377     create_vector_f(&new_y_fit, num_elts+1);
00378     for (elt = 0; elt < num_elts; elt++)
00379     {
00380         new_y_fit->elts[ elt ] = (*y_fit)->elts[ elt ];
00381     }
00382     free_vector_f(*y_fit);
00383     *y_fit = new_y_fit;
00384 
00385     new_y_fit->elts[ num_elts ] = y_nofit->elts[ pt ];
00386 }
00387 
00392 static void append_ransac_fit_point_d
00393 (
00394     Matrix_d** x_fit,
00395     Vector_d** y_fit,
00396     Matrix_d*  x_nofit,
00397     Vector_d*  y_nofit,
00398     uint32_t   pt
00399 )
00400 {
00401     uint32_t num_rows, num_cols;
00402     uint32_t num_elts;
00403     uint32_t col;
00404     uint32_t elt;
00405 
00406     Matrix_d* new_x_fit = NULL;
00407     Vector_d* new_y_fit = NULL;
00408 
00409     num_rows = (*x_fit)->num_rows;
00410     num_cols = (*x_fit)->num_cols;
00411 
00412     create_matrix_d(&new_x_fit, num_rows+1, num_cols);
00413     copy_matrix_block_into_matrix_d(new_x_fit, 0, 0, *x_fit, 0, 0,
00414             num_rows, num_cols);
00415     free_matrix_d(*x_fit);
00416     *x_fit = new_x_fit;
00417 
00418     for (col = 0; col < num_cols; col++)
00419     {
00420         new_x_fit->elts[ num_rows ][ col ] = x_nofit->elts[ pt ][ col ];
00421     }
00422 
00423     num_elts = (*y_fit)->num_elts;
00424 
00425     create_vector_d(&new_y_fit, num_elts+1);
00426     for (elt = 0; elt < num_elts; elt++)
00427     {
00428         new_y_fit->elts[ elt ] = (*y_fit)->elts[ elt ];
00429     }
00430     free_vector_d(*y_fit);
00431     *y_fit = new_y_fit;
00432 
00433     new_y_fit->elts[ num_elts ] = y_nofit->elts[ pt ];
00434 }
00435 
00477 Error* linear_least_squares_ransac_regress_f
00478 (
00479     Vector_f**      w_out,
00480     const Matrix_f* x,
00481     const Vector_f* y,
00482     uint32_t        n,
00483     uint32_t        k,
00484     float           t,
00485     uint32_t        d
00486 )
00487 {
00488     uint32_t i;
00489     uint32_t row;
00490     uint32_t elt;
00491     uint32_t N;
00492     uint32_t best_N;
00493 
00494     float r;
00495     float best_mse;
00496     float mse;
00497 
00498     Matrix_f* x_fit   = NULL;
00499     Matrix_f* x_nofit = NULL;
00500     Vector_f* y_fit   = NULL;
00501     Vector_f* y_nofit = NULL;
00502     Vector_f* r_fit   = NULL;
00503     Vector_f* r_nofit = NULL;
00504     Vector_f* best_w  = NULL;
00505     Error*    err;
00506 
00507     best_mse = -1;
00508     best_N = 0;
00509 
00510     for (i = 0; i < k; i++)
00511     {
00512         N = n;
00513 
00514         split_ransac_points_f(&x_fit, &y_fit, &x_nofit, &y_nofit, x, y, N);
00515 
00516         if ((err = linear_least_squares_regress_f(w_out, x_fit, y_fit)))
00517         {
00518             free_error(err);
00519             continue;
00520         }
00521 
00522         multiply_matrix_and_vector_f(&r_nofit, x_nofit, *w_out);
00523 
00524         for (row = 0; row < x_nofit->num_rows; row++)
00525         {
00526             r = fabs(y_nofit->elts[ row ] - r_nofit->elts[ row ]);
00527 
00528             if (r < t)
00529             {
00530                 append_ransac_fit_point_f(&x_fit, &y_fit, x_nofit, y_nofit,row);
00531 
00532                 N++;
00533             }
00534         }
00535 
00536         if (N >= d)
00537         {
00538             if ((err = linear_least_squares_regress_f(w_out, x_fit, y_fit)))
00539             {
00540                 free_error(err);
00541                 break;
00542             }
00543 
00544             mse = 0;
00545             multiply_matrix_and_vector_f(&r_fit, x_fit, *w_out);
00546             multiply_matrix_and_vector_f(&r_nofit, x_nofit, *w_out);
00547             for (elt = 0; elt < y_fit->num_elts; elt++)
00548             {
00549                 mse += (y_fit->elts[ elt ] - r_fit->elts[ elt ]) *
00550                     (y_fit->elts[ elt ] - r_fit->elts[ elt ]);
00551             }
00552             mse /= N;
00553 
00554             if (mse < best_mse || best_mse < 0)
00555             {
00556                 copy_vector_f(&best_w, *w_out);
00557                 best_mse = mse;
00558                 best_N = N;
00559 
00560                 /*
00561                 for (elt = 0; elt < r_nofit->num_elts; elt++)
00562                 {
00563                     r = fabs(y_nofit->elts[ elt ] - r_nofit->elts[ elt ]);
00564                     fprintf(stdout, "%3d  %8.3f  %8.3f\n", elt, r, 
00565                             y_nofit->elts[ elt ]);
00566                 }
00567                 fprintf(stdout, "--\n");
00568                 for (elt = 0; elt < r_fit->num_elts; elt++)
00569                 {
00570                     r = fabs(y_fit->elts[ elt ] - r_fit->elts[ elt ]);
00571                     fprintf(stdout, "%3d  %8.3f  %8.3f\n", elt, r, 
00572                             y_fit->elts[ elt ]);
00573                 }
00574 
00575                 fprintf(stdout, "%3d  %2d  %6.1f\n\n", i, best_N, best_mse);
00576                 */
00577             }
00578         }
00579     }
00580 
00581     free_vector_f(r_fit);
00582     free_vector_f(r_nofit);
00583 
00584     free_matrix_f(x_fit);
00585     free_vector_f(y_fit);
00586     free_matrix_f(x_nofit);
00587     free_vector_f(y_nofit);
00588 
00589     if (best_N < d || best_mse < 0)
00590     {
00591         free_vector_f(best_w);
00592         free_vector_f(*w_out); *w_out = NULL;
00593         return JWSC_ECALC("Could not fit with RANSAC criteria");
00594     }
00595 
00596     copy_vector_f(w_out, best_w);
00597     free_vector_f(best_w);
00598 
00599     return NULL;
00600 }
00601 
00631 Error* linear_least_squares_ransac_regress_d
00632 (
00633     Vector_d**      w_out,
00634     const Matrix_d* x,
00635     const Vector_d* y,
00636     uint32_t        n,
00637     uint32_t        k,
00638     double          t,
00639     uint32_t        d
00640 )
00641 {
00642     uint32_t i;
00643     uint32_t row;
00644     uint32_t elt;
00645     uint32_t N;
00646     uint32_t best_N;
00647 
00648     double r;
00649     double best_mse;
00650     double mse;
00651 
00652     Matrix_d* x_fit   = NULL;
00653     Matrix_d* x_nofit = NULL;
00654     Vector_d* y_fit   = NULL;
00655     Vector_d* y_nofit = NULL;
00656     Vector_d* r_fit   = NULL;
00657     Vector_d* r_nofit = NULL;
00658     Vector_d* best_w  = NULL;
00659     Error*    err;
00660 
00661     best_mse = -1;
00662     best_N = 0;
00663 
00664     for (i = 0; i < k; i++)
00665     {
00666         N = n;
00667 
00668         split_ransac_points_d(&x_fit, &y_fit, &x_nofit, &y_nofit, x, y, N);
00669 
00670         if ((err = linear_least_squares_regress_d(w_out, x_fit, y_fit)))
00671         {
00672             free_error(err);
00673             continue;
00674         }
00675 
00676         multiply_matrix_and_vector_d(&r_nofit, x_nofit, *w_out);
00677 
00678         for (row = 0; row < x_nofit->num_rows; row++)
00679         {
00680             r = fabs(y_nofit->elts[ row ] - r_nofit->elts[ row ]);
00681 
00682             if (r < t)
00683             {
00684                 append_ransac_fit_point_d(&x_fit, &y_fit, x_nofit, y_nofit,row);
00685 
00686                 N++;
00687             }
00688         }
00689 
00690         if (N >= d)
00691         {
00692             if ((err = linear_least_squares_regress_d(w_out, x_fit, y_fit)))
00693             {
00694                 free_error(err);
00695                 break;
00696             }
00697 
00698             mse = 0;
00699             multiply_matrix_and_vector_d(&r_fit, x_fit, *w_out);
00700             multiply_matrix_and_vector_d(&r_nofit, x_nofit, *w_out);
00701             for (elt = 0; elt < y_fit->num_elts; elt++)
00702             {
00703                 mse += (y_fit->elts[ elt ] - r_fit->elts[ elt ]) *
00704                        (y_fit->elts[ elt ] - r_fit->elts[ elt ]);
00705             }
00706             mse /= N;
00707 
00708             if (mse < best_mse || best_mse < 0)
00709             {
00710                 copy_vector_d(&best_w, *w_out);
00711                 best_mse = mse;
00712                 best_N = N;
00713 
00714                 /*
00715                 for (elt = 0; elt < r_nofit->num_elts; elt++)
00716                 {
00717                     r = fabs(y_nofit->elts[ elt ] - r_nofit->elts[ elt ]);
00718                     fprintf(stdout, "%3d  %8.3f  %8.3f\n", elt, r, 
00719                             y_nofit->elts[ elt ]);
00720                 }
00721                 fprintf(stdout, "--\n");
00722                 for (elt = 0; elt < r_fit->num_elts; elt++)
00723                 {
00724                     r = fabs(y_fit->elts[ elt ] - r_fit->elts[ elt ]);
00725                     fprintf(stdout, "%3d  %8.3f  %8.3f\n", elt, r, 
00726                             y_fit->elts[ elt ]);
00727                 }
00728 
00729                 fprintf(stdout, "%3d  %2d  %6.1f\n\n", i, best_N, best_mse);
00730                 */
00731             }
00732         }
00733     }
00734 
00735     free_vector_d(r_fit);
00736     free_vector_d(r_nofit);
00737 
00738     free_matrix_d(x_fit);
00739     free_vector_d(y_fit);
00740     free_matrix_d(x_nofit);
00741     free_vector_d(y_nofit);
00742 
00743     if (best_N < d || best_mse < 0)
00744     {
00745         free_vector_d(best_w);
00746         free_vector_d(*w_out); *w_out = NULL;
00747         return JWSC_ECALC("Could not fit with RANSAC criteria");
00748     }
00749 
00750     copy_vector_d(w_out, best_w);
00751     free_vector_d(best_w);
00752 
00753     return NULL;
00754 }
00755 
00758 #endif