JWS C Library
C language utility library
|
00001 /* 00002 * This work is licensed under a Creative Commons 00003 * Attribution-Noncommercial-Share Alike 3.0 United States License. 00004 * 00005 * http://creativecommons.org/licenses/by-nc-sa/3.0/us/ 00006 * 00007 * You are free: 00008 * 00009 * to Share - to copy, distribute, display, and perform the work 00010 * to Remix - to make derivative works 00011 * 00012 * Under the following conditions: 00013 * 00014 * Attribution. You must attribute the work in the manner specified by the 00015 * author or licensor (but not in any way that suggests that they endorse you 00016 * or your use of the work). 00017 * 00018 * Noncommercial. You may not use this work for commercial purposes. 00019 * 00020 * Share Alike. If you alter, transform, or build upon this work, you may 00021 * distribute the resulting work only under the same or similar license to 00022 * this one. 00023 * 00024 * For any reuse or distribution, you must make clear to others the license 00025 * terms of this work. The best way to do this is by including this header. 00026 * 00027 * Any of the above conditions can be waived if you get permission from the 00028 * copyright holder. 00029 * 00030 * Apart from the remix rights granted under this license, nothing in this 00031 * license impairs or restricts the author's moral rights. 00032 */ 00033 00034 00048 #include <jwsc/config.h> 00049 00050 #include <stdlib.h> 00051 #include <stdio.h> 00052 #include <assert.h> 00053 #include <math.h> 00054 #include <inttypes.h> 00055 00056 #include "jwsc/base/limits.h" 00057 #include "jwsc/vector/vector.h" 00058 #include "jwsc/vector/vector_math.h" 00059 #include "jwsc/matrix/matrix.h" 00060 #include "jwsc/matrix/matrix_math.h" 00061 #include "jwsc/matblock/matblock.h" 00062 #include "jwsc/prob/mv_pmf.h" 00063 #include "jwsc/stat/stat.h" 00064 #include "jwsc/stat/kmeans.h" 00065 #include "jwsc/stat/mmm.h" 00066 00067 00069 static void mmm_e_step_d 00070 ( 00071 const Matblock_d* mu, 00072 const Vector_d* pi, 00073 Matrix_d* gamma, 00074 const Matblock_u32* x 00075 ) 00076 { 00077 double p; 00078 double C_n; 00079 uint32_t k, K; 00080 uint32_t n, N; 00081 uint32_t d, D; 00082 uint32_t m, M; 00083 00084 Matrix_u32* x_n = 0; 00085 Matrix_d* mu_k = 0; 00086 00087 K = pi->num_elts; 00088 N = x->num_mats; 00089 D = x->num_rows; 00090 M = x->num_cols; 00091 00092 create_zero_matrix_d(&gamma, N, K); 00093 00094 for (n = 0; n < N; n++) 00095 { 00096 copy_matrix_from_matblock_u32(&x_n, x, MATBLOCK_MAT_MATRIX, n); 00097 C_n = 0; 00098 00099 for (k = 0; k < K; k++) 00100 { 00101 copy_matrix_from_matblock_d(&mu_k, mu, MATBLOCK_MAT_MATRIX, k); 00102 00103 p = 1.0; 00104 for (d = 0; d < D; d++) 00105 { 00106 for (m = 0; m < M; m++) 00107 { 00108 if (x_n->elts[ d ][ m ]) 00109 { 00110 p *= mu_k->elts[ d ][ m ]; 00111 } 00112 } 00113 } 00114 00115 gamma->elts[ n ][ k ] = pi->elts[ k ] * p; 00116 00117 C_n += gamma->elts[ n ][ k ]; 00118 } 00119 00120 assert(C_n > 0); 00121 00122 for (k = 0; k < K; k++) 00123 { 00124 gamma->elts[ n ][ k ] /= C_n; 00125 } 00126 } 00127 00128 free_matrix_u32(x_n); 00129 free_matrix_d(mu_k); 00130 } 00131 00132 00134 static void mmm_m_step_d 00135 ( 00136 Matblock_d* mu, 00137 Vector_d* pi, 00138 const Matrix_d* gamma, 00139 const Matblock_u32* x 00140 ) 00141 { 00142 uint32_t k, K; 00143 uint32_t n, N; 00144 uint32_t d, D; 00145 uint32_t m, M; 00146 00147 Matrix_d* mu_k = NULL; 00148 00149 K = pi->num_elts; 00150 N = x->num_mats; 00151 D = x->num_rows; 00152 M = x->num_cols; 00153 00154 create_zero_vector_d(&pi, K); 00155 00156 // mu 00157 create_matblock_d(&mu, K, D, M); 00158 for (k = 0; k < K; k++) 00159 { 00160 create_zero_matrix_d(&mu_k, D, M); 00161 00162 for (n = 0; n < N; n++) 00163 { 00164 for (d = 0; d < D; d++) 00165 { 00166 for (m = 0; m < M; m++) 00167 { 00168 mu_k->elts[ d ][ m ] += 00169 gamma->elts[ n ][ k ]*x->elts[ n ][ d ][ m ]; 00170 } 00171 } 00172 00173 pi->elts[ k ] += gamma->elts[ n ][ k ]; 00174 } 00175 00176 assert(pi->elts[ k ] > 0); 00177 00178 multiply_matrix_by_scalar_d(&mu_k, mu_k, 1.0 / pi->elts[ k ]); 00179 copy_matrix_into_matblock_d(&mu, mu, mu_k, MATBLOCK_MAT_MATRIX, k); 00180 } 00181 00182 // pi 00183 multiply_vector_by_scalar_d(&pi, pi, 1.0 / N); 00184 00185 free_matrix_d(mu_k); 00186 } 00187 00188 00194 double mmm_log_likelihood_d 00195 ( 00196 const Matblock_d* mu, 00197 const Vector_d* pi, 00198 const Matblock_u32* x 00199 ) 00200 { 00201 uint32_t k, K; 00202 uint32_t n, N; 00203 uint32_t d, D; 00204 uint32_t m, M; 00205 double p, q; 00206 double ll; 00207 00208 Matrix_u32* x_n = NULL; 00209 Matrix_d* mu_k = NULL; 00210 00211 K = pi->num_elts; 00212 N = x->num_mats; 00213 D = x->num_rows; 00214 M = x->num_cols; 00215 00216 ll = 0; 00217 00218 for (n = 0; n < N; n++) 00219 { 00220 p = 0; 00221 copy_matrix_from_matblock_u32(&x_n, x, MATBLOCK_MAT_MATRIX, n); 00222 00223 for (k = 0; k < K; k++) 00224 { 00225 copy_matrix_from_matblock_d(&mu_k, mu, MATBLOCK_MAT_MATRIX, k); 00226 00227 q = 1.0; 00228 for (d = 0; d < D; d++) 00229 { 00230 for (m = 0; m < M; m++) 00231 { 00232 if (x_n->elts[ d ][ m ]) 00233 { 00234 q *= mu_k->elts[ d ][ m ]; 00235 } 00236 } 00237 } 00238 00239 p += pi->elts[ k ] * q; 00240 00241 #if defined(JWSC_HAVE_ISNAN) && defined(JWSC_HAVE_ISINF) 00242 assert(!isnan(p) && !isinf(p)); 00243 #endif 00244 } 00245 00246 ll += log((p < JWSC_MIN_LOG_ARG) ? JWSC_MIN_LOG_ARG : p); 00247 } 00248 00249 free_matrix_u32(x_n); 00250 free_matrix_d(mu_k); 00251 00252 return ll; 00253 } 00254 00255 00267 void mmm_kmeans_init_d 00268 ( 00269 Matblock_d** mu_out, 00270 Vector_d** pi_out, 00271 const Matblock_u32* x, 00272 uint32_t K 00273 ) 00274 { 00275 uint32_t d, D; 00276 uint32_t m, M; 00277 uint32_t n, N; 00278 uint32_t k; 00279 double sum; 00280 00281 Vector_i32* members = NULL; 00282 Vector_u32* counts = NULL; 00283 Matrix_d* x_d = NULL; 00284 Matrix_d* mu_d = NULL; 00285 00286 N = x->num_mats; 00287 D = x->num_rows; 00288 M = x->num_cols; 00289 00290 create_matrix_d(&x_d, N, D*M); 00291 for (n = 0; n < N; n++) 00292 { 00293 for (d = 0; d < D; d++) 00294 { 00295 for (m = 0; m < M; m++) 00296 { 00297 x_d->elts[ n ][ d*M + m ] = x->elts[ n ][ d ][ m ]; 00298 } 00299 } 00300 } 00301 00302 init_kmeans_randomly_d(&mu_d, x_d, K); 00303 kmeans_d(&mu_d, &members, x_d, K, 2); 00304 00305 create_zero_vector_u32(&counts, K); 00306 for (n = 0; n < N; n++) 00307 { 00308 k = members->elts[ n ]; 00309 counts->elts[ k ]++; 00310 } 00311 00312 create_vector_d(pi_out, K); 00313 create_matblock_d(mu_out, K, D, M); 00314 00315 for (k = 0; k < K; k++) 00316 { 00317 for (d = 0; d < D; d++) 00318 { 00319 sum = 0; 00320 00321 for (m = 0; m < M; m++) 00322 { 00323 sum += mu_d->elts[ k ][ d*M + m ]; 00324 (*mu_out)->elts[ k ][ d ][ m ] = mu_d->elts[ k ][ d*M + m ]; 00325 } 00326 00327 assert(fabs(sum - 1.0) < 1.0e-8); 00328 } 00329 00330 (*pi_out)->elts[ k ] = counts->elts[ k ] / (double)N; 00331 } 00332 00333 free_vector_i32(members); 00334 free_vector_u32(counts); 00335 free_matrix_d(x_d); 00336 free_matrix_d(mu_d); 00337 } 00338 00339 00352 void train_mmm_d 00353 ( 00354 Matblock_d** mu_out, 00355 Vector_d** pi_out, 00356 Matrix_d** gamma_out, 00357 const Matblock_u32* x, 00358 uint32_t K 00359 ) 00360 { 00361 double ll, ll_prev; 00362 double ll_delta; 00363 00364 Matblock_d* mu; 00365 Vector_d* pi; 00366 Matrix_d* gamma = 0; 00367 00368 mmm_kmeans_init_d(mu_out, pi_out, x, K); 00369 mu = *mu_out; 00370 pi = *pi_out; 00371 00372 if (gamma_out) 00373 { 00374 create_matrix_d(gamma_out, x->num_rows, pi->num_elts); 00375 gamma = *gamma_out; 00376 } 00377 else 00378 { 00379 create_matrix_d(&gamma, x->num_rows, pi->num_elts); 00380 } 00381 00382 ll = log(0); 00383 ll_delta = -log(0); 00384 00385 while (ll_delta > 1.0e-10) 00386 { 00387 mmm_e_step_d(mu, pi, gamma, x); 00388 mmm_m_step_d(mu, pi, gamma, x); 00389 00390 ll_prev = ll; 00391 ll = mmm_log_likelihood_d(mu, pi, x); 00392 00393 ll_delta = ll - ll_prev; 00394 fprintf(stderr, "%e\n", ll_delta); 00395 } 00396 00397 if (!gamma_out) 00398 { 00399 free_matrix_d(gamma); 00400 } 00401 }