Haplo Prediction
predict haplogroups
haplo_test.c
Go to the documentation of this file.
00001 /*
00002  * This work is licensed under a Creative Commons 
00003  * Attribution-Noncommercial-Share Alike 3.0 United States License.
00004  * 
00005  *    http://creativecommons.org/licenses/by-nc-sa/3.0/us/
00006  * 
00007  * You are free:
00008  * 
00009  *    to Share - to copy, distribute, display, and perform the work
00010  *    to Remix - to make derivative works
00011  * 
00012  * Under the following conditions:
00013  * 
00014  *    Attribution. You must attribute the work in the manner specified by the
00015  *    author or licensor (but not in any way that suggests that they endorse you
00016  *    or your use of the work).
00017  * 
00018  *    Noncommercial. You may not use this work for commercial purposes.
00019  * 
00020  *    Share Alike. If you alter, transform, or build upon this work, you may
00021  *    distribute the resulting work only under the same or similar license to
00022  *    this one.
00023  * 
00024  * For any reuse or distribution, you must make clear to others the license
00025  * terms of this work. The best way to do this is by including this header.
00026  * 
00027  * Any of the above conditions can be waived if you get permission from the
00028  * copyright holder.
00029  * 
00030  * Apart from the remix rights granted under this license, nothing in this
00031  * license impairs or restricts the author's moral rights.
00032  */
00033 
00034 
00055 #include <config.h>
00056 
00057 #include <stdlib.h>
00058 #include <stdio.h>
00059 #include <string.h>
00060 #include <assert.h>
00061 #include <inttypes.h>
00062 #include <math.h>
00063 #include <unistd.h>
00064 
00065 #include <libxml/tree.h>
00066 
00067 #ifdef HAPLO_HAVE_DMALLOC
00068 #include <dmalloc.h>
00069 #endif
00070 
00071 #include <jwsc/base/error.h>
00072 #include <jwsc/base/option.h>
00073 #include <jwsc/base/file_io.h>
00074 #include <jwsc/vector/vector.h>
00075 #include <jwsc/vector/vector_math.h>
00076 #include <jwsc/matrix/matrix.h>
00077 #include <jwsc/matrix/matrix_math.h>
00078 #include <jwsc/matblock/matblock.h>
00079 #include <jwsc/stat/stat.h>
00080 
00081 #include "haplo_groups.h"
00082 #include "options.h"
00083 #include "output.h"
00084 #include "input.h"
00085 #include "xml.h"
00086 #include "nb_freq.h"
00087 #include "nb_gauss.h"
00088 #include "nb_gmm.h"
00089 #include "mv_gmm.h"
00090 #ifdef HAPLO_ENABLE_SVM
00091 #include "svm_tree.h"
00092 #endif
00093 #ifdef HAPLO_ENABLE_WEKA
00094 #include "weka.h"
00095 #endif
00096 #include "nearest.h"
00097 
00098 
00099 #ifdef HAPLO_ENABLE_SVM
00100 #define NUM_SVM_OPTS 8
00101 #else
00102 #define NUM_SVM_OPTS 0
00103 #endif
00104 
00105 #ifdef HAPLO_ENABLE_WEKA
00106 #define NUM_WEKA_OPTS 16
00107 #else
00108 #define NUM_WEKA_OPTS 0
00109 #endif
00110 
00111 #define  NUM_OPTS_NO_ARG    2 + NUM_SHARED_OPTS_NO_ARG
00112 #define  NUM_OPTS_WITH_ARG  54 + NUM_SVM_OPTS + NUM_WEKA_OPTS + NUM_SHARED_OPTS_WITH_ARG
00113 
00114 
00115 #define  LABEL_COL                     1
00116 #define  TANDEM                        0
00117 #define  TEST_TYPE                     HAPLO_TEST_LEAVE_ONE_OUT
00118 #define  NUM_CV_FOLDS                  5
00119 #define  NUM_CV_ITERS                  10
00120 #define  TMP_DIRNAME                   "/tmp"
00121 #define  NB_FREQ_SUMMARY_FNAME         "/dev/stdout"
00122 #define  NB_FREQ_DETAILS_CNT_FNAME     "/dev/null"
00123 #define  NB_FREQ_DETAILS_PCT_FNAME     "/dev/null"
00124 #define  NB_FREQ_CONFUSION_CNT_FNAME   "/dev/null"
00125 #define  NB_FREQ_CONFUSION_PCT_FNAME   "/dev/null"
00126 #define  NB_FREQ_PREDS_FNAME           "/dev/null"
00127 #define  NB_GAUSS_SUMMARY_FNAME        "/dev/stdout"
00128 #define  NB_GAUSS_DETAILS_CNT_FNAME    "/dev/null"
00129 #define  NB_GAUSS_DETAILS_PCT_FNAME    "/dev/null"
00130 #define  NB_GAUSS_CONFUSION_CNT_FNAME  "/dev/null"
00131 #define  NB_GAUSS_CONFUSION_PCT_FNAME  "/dev/null"
00132 #define  NB_GAUSS_PREDS_FNAME          "/dev/null"
00133 #define  NB_GMM_SUMMARY_FNAME          "/dev/stdout"
00134 #define  NB_GMM_DETAILS_CNT_FNAME      "/dev/null"
00135 #define  NB_GMM_DETAILS_PCT_FNAME      "/dev/null"
00136 #define  NB_GMM_CONFUSION_CNT_FNAME    "/dev/null"
00137 #define  NB_GMM_CONFUSION_PCT_FNAME    "/dev/null"
00138 #define  NB_GMM_PREDS_FNAME            "/dev/null"
00139 #define  MV_GMM_SUMMARY_FNAME          "/dev/stdout"
00140 #define  MV_GMM_DETAILS_CNT_FNAME      "/dev/null"
00141 #define  MV_GMM_DETAILS_PCT_FNAME      "/dev/null"
00142 #define  MV_GMM_CONFUSION_CNT_FNAME    "/dev/null"
00143 #define  MV_GMM_CONFUSION_PCT_FNAME    "/dev/null"
00144 #define  MV_GMM_PREDS_FNAME            "/dev/null"
00145 #define  SVM_SUMMARY_FNAME             "/dev/stdout"
00146 #define  SVM_DETAILS_CNT_FNAME         "/dev/null"
00147 #define  SVM_DETAILS_PCT_FNAME         "/dev/null"
00148 #define  SVM_CONFUSION_CNT_FNAME       "/dev/null"
00149 #define  SVM_CONFUSION_PCT_FNAME       "/dev/null"
00150 #define  SVM_PREDS_FNAME               "/dev/null"
00151 #define  WEKA_J48_SUMMARY_FNAME        "/dev/stdout"
00152 #define  WEKA_J48_DETAILS_CNT_FNAME    "/dev/null"
00153 #define  WEKA_J48_DETAILS_PCT_FNAME    "/dev/null"
00154 #define  WEKA_J48_CONFUSION_CNT_FNAME  "/dev/null"
00155 #define  WEKA_J48_CONFUSION_PCT_FNAME  "/dev/null"
00156 #define  WEKA_J48_PREDS_FNAME          "/dev/null"
00157 #define  WEKA_PART_SUMMARY_FNAME       "/dev/stdout"
00158 #define  WEKA_PART_DETAILS_CNT_FNAME   "/dev/null"
00159 #define  WEKA_PART_DETAILS_PCT_FNAME   "/dev/null"
00160 #define  WEKA_PART_CONFUSION_CNT_FNAME "/dev/null"
00161 #define  WEKA_PART_CONFUSION_PCT_FNAME "/dev/null"
00162 #define  WEKA_PART_PREDS_FNAME         "/dev/null"
00163 #define  NEAREST_SUMMARY_FNAME         "/dev/stdout"
00164 #define  NEAREST_DETAILS_CNT_FNAME     "/dev/null"
00165 #define  NEAREST_DETAILS_PCT_FNAME     "/dev/null"
00166 #define  NEAREST_CONFUSION_CNT_FNAME   "/dev/null"
00167 #define  NEAREST_CONFUSION_PCT_FNAME   "/dev/null"
00168 #define  NEAREST_PREDS_FNAME           "/dev/null"
00169 #define  TANDEM_AGREE_SUMMARY_FNAME    "/dev/stdout"
00170 #define  TANDEM_AGREE_DETAILS_CNT_FNAME  "/dev/null"
00171 #define  TANDEM_AGREE_DETAILS_PCT_FNAME  "/dev/null"
00172 #define  TANDEM_SUMMARY_FNAME          "/dev/stdout"
00173 #define  TANDEM_DETAILS_CNT_FNAME      "/dev/null"
00174 #define  TANDEM_DETAILS_PCT_FNAME      "/dev/null"
00175 #define  TANDEM_CONFUSION_CNT_FNAME    "/dev/null"
00176 #define  TANDEM_CONFUSION_PCT_FNAME    "/dev/null"
00177 #define  TANDEM_PREDS_FNAME            "/dev/null"
00178 
00179 
00181 typedef enum
00182 {
00183     HAPLO_TEST_LEAVE_ONE_OUT,
00184     HAPLO_TEST_CROSS_VALIDATE
00185 }
00186 Haplo_test_type;
00187 
00188 
00190 Option_no_arg opts_no_arg[NUM_OPTS_NO_ARG];
00191 
00193 Option_with_arg opts_with_arg[NUM_OPTS_WITH_ARG];
00194 
00196 static uint8_t tandem = TANDEM;
00197 
00199 static Haplo_test_type test_type = TEST_TYPE;
00200 
00202 static uint32_t num_cv_folds = NUM_CV_FOLDS;
00203 
00205 static uint32_t num_cv_iters = NUM_CV_ITERS;
00206 
00208 static const char* tmp_dirname = TMP_DIRNAME;
00209 
00214 static const char* nb_freq_summary_fname = NB_FREQ_SUMMARY_FNAME;
00215 
00220 static const char* nb_freq_details_cnt_fname = NB_FREQ_DETAILS_CNT_FNAME;
00221 
00226 static const char* nb_freq_details_pct_fname = NB_FREQ_DETAILS_PCT_FNAME;
00227 
00229 static const char* nb_freq_confusion_cnt_fname = NB_FREQ_CONFUSION_CNT_FNAME;
00230 
00232 static const char* nb_freq_confusion_pct_fname = NB_FREQ_CONFUSION_PCT_FNAME;
00233 
00235 static const char* nb_freq_preds_fname = NB_FREQ_PREDS_FNAME;
00236 
00241 static const char* nb_gauss_summary_fname = NB_GAUSS_SUMMARY_FNAME;
00242 
00247 static const char* nb_gauss_details_cnt_fname = NB_GAUSS_DETAILS_CNT_FNAME;
00248 
00253 static const char* nb_gauss_details_pct_fname = NB_GAUSS_DETAILS_PCT_FNAME;
00254 
00256 static const char* nb_gauss_confusion_cnt_fname = NB_GAUSS_CONFUSION_CNT_FNAME;
00257 
00259 static const char* nb_gauss_confusion_pct_fname = NB_GAUSS_CONFUSION_PCT_FNAME;
00260 
00262 static const char* nb_gauss_preds_fname = NB_GAUSS_PREDS_FNAME;
00263 
00268 static const char* nb_gmm_summary_fname = NB_GMM_SUMMARY_FNAME;
00269 
00274 static const char* nb_gmm_details_cnt_fname = NB_GMM_DETAILS_CNT_FNAME;
00275 
00280 static const char* nb_gmm_details_pct_fname = NB_GMM_DETAILS_PCT_FNAME;
00281 
00286 static const char* nb_gmm_confusion_cnt_fname = NB_GMM_CONFUSION_CNT_FNAME;
00287 
00292 static const char* nb_gmm_confusion_pct_fname = NB_GMM_CONFUSION_PCT_FNAME;
00293 
00297 static const char* nb_gmm_preds_fname = NB_GMM_PREDS_FNAME;
00298 
00303 static const char* mv_gmm_summary_fname = MV_GMM_SUMMARY_FNAME;
00304 
00309 static const char* mv_gmm_details_cnt_fname = MV_GMM_DETAILS_CNT_FNAME;
00310 
00315 static const char* mv_gmm_details_pct_fname = MV_GMM_DETAILS_PCT_FNAME;
00316 
00321 static const char* mv_gmm_confusion_cnt_fname = MV_GMM_CONFUSION_CNT_FNAME;
00322 
00327 static const char* mv_gmm_confusion_pct_fname = MV_GMM_CONFUSION_PCT_FNAME;
00328 
00332 static const char* mv_gmm_preds_fname = MV_GMM_PREDS_FNAME;
00333 
00335 static const char* svm_summary_fname = SVM_SUMMARY_FNAME;
00336 
00338 static const char* svm_details_cnt_fname = SVM_DETAILS_CNT_FNAME;
00339 
00341 static const char* svm_details_pct_fname = SVM_DETAILS_PCT_FNAME;
00342 
00344 static const char* svm_confusion_cnt_fname = SVM_CONFUSION_CNT_FNAME;
00345 
00347 static const char* svm_confusion_pct_fname = SVM_CONFUSION_PCT_FNAME;
00348 
00350 static const char* svm_preds_fname = SVM_PREDS_FNAME;
00351 
00353 static const char* weka_j48_summary_fname = WEKA_J48_SUMMARY_FNAME;
00354 
00356 static const char* weka_j48_details_cnt_fname = WEKA_J48_DETAILS_CNT_FNAME;
00357 
00362 static const char* weka_j48_details_pct_fname = WEKA_J48_DETAILS_PCT_FNAME;
00363 
00365 static const char* weka_j48_confusion_cnt_fname = WEKA_J48_CONFUSION_CNT_FNAME;
00366 
00368 static const char* weka_j48_confusion_pct_fname = WEKA_J48_CONFUSION_PCT_FNAME;
00369 
00371 static const char* weka_j48_preds_fname = WEKA_J48_PREDS_FNAME;
00372 
00374 static const char* weka_part_summary_fname = WEKA_PART_SUMMARY_FNAME;
00375 
00379 static const char* weka_part_details_cnt_fname = WEKA_PART_DETAILS_CNT_FNAME;
00380 
00385 static const char* weka_part_details_pct_fname = WEKA_PART_DETAILS_PCT_FNAME;
00386 
00388 static const char* weka_part_confusion_cnt_fname = WEKA_PART_CONFUSION_CNT_FNAME;
00389 
00391 static const char* weka_part_confusion_pct_fname = WEKA_PART_CONFUSION_PCT_FNAME;
00392 
00394 static const char* weka_part_preds_fname = WEKA_PART_PREDS_FNAME;
00395 
00397 static const char* nearest_summary_fname = NEAREST_SUMMARY_FNAME;
00398 
00402 static const char* nearest_details_cnt_fname = NEAREST_DETAILS_CNT_FNAME;
00403 
00408 static const char* nearest_details_pct_fname = NEAREST_DETAILS_PCT_FNAME;
00409 
00411 static const char* nearest_confusion_cnt_fname = NEAREST_CONFUSION_CNT_FNAME;
00412 
00414 static const char* nearest_confusion_pct_fname = NEAREST_CONFUSION_PCT_FNAME;
00415 
00417 static const char* nearest_preds_fname = NEAREST_PREDS_FNAME;
00418 
00420 static const char* tandem_agree_summary_fname = TANDEM_AGREE_SUMMARY_FNAME;
00421 
00426 static const char* tandem_agree_details_cnt_fname = TANDEM_AGREE_DETAILS_CNT_FNAME;
00427 
00432 static const char* tandem_agree_details_pct_fname = TANDEM_AGREE_DETAILS_PCT_FNAME;
00433 
00435 static const char* tandem_summary_fname = TANDEM_SUMMARY_FNAME;
00436 
00438 static const char* tandem_details_cnt_fname = TANDEM_DETAILS_CNT_FNAME;
00439 
00441 static const char* tandem_details_pct_fname = TANDEM_DETAILS_PCT_FNAME;
00442 
00444 static const char* tandem_confusion_cnt_fname = TANDEM_CONFUSION_CNT_FNAME;
00445 
00447 static const char* tandem_confusion_pct_fname = TANDEM_CONFUSION_PCT_FNAME;
00448 
00450 static const char* tandem_preds_fname = TANDEM_PREDS_FNAME;
00451 
00452 
00454 uint32_t get_num_opts_no_arg()
00455 {
00456     return NUM_OPTS_NO_ARG;
00457 }
00458 
00460 uint32_t get_num_opts_with_arg()
00461 {
00462     return NUM_OPTS_WITH_ARG;
00463 }
00464 
00466 void print_usage()
00467 {
00468     fprintf(stderr, "usage: haplo-test OPTIONS [data-fname | <stdin>]\n");
00469     print_options(stderr, 27, NUM_OPTS_NO_ARG, opts_no_arg, NUM_OPTS_WITH_ARG,
00470             opts_with_arg);
00471 }
00472 
00474 static Error* process_tandem_opt()
00475 {
00476     tandem = 1;
00477     return NULL;
00478 }
00479 
00481 static Error* process_test_type_opt(Option_arg arg)
00482 {
00483     if (arg == NULL)
00484     {
00485         return JWSC_EARG("Option 'test-type' requires an argument");
00486     }
00487     if (strncmp(arg, "loo", 13) == 0)
00488     {
00489         test_type = HAPLO_TEST_LEAVE_ONE_OUT;
00490     }
00491     else if (strncmp(arg, "cv", 2) == 0)
00492     {
00493         test_type = HAPLO_TEST_CROSS_VALIDATE;
00494     }
00495     else
00496     {
00497         return JWSC_EARG("Option 'test-type' must be one of {loo, cv}");
00498     }
00499     return NULL;
00500 }
00501 
00503 static Error* process_num_cv_folds_opt(Option_arg arg)
00504 {
00505     if (arg == NULL)
00506     {
00507         return JWSC_EARG("Option 'num-cv-folds' requires an argument");
00508     }
00509     if (sscanf(arg, "%u", &num_cv_folds) != 1 || num_cv_folds < 1)
00510     {
00511         return JWSC_EARG("Option 'num-cv-folds' must be > 0");
00512     }
00513 
00514     return NULL;
00515 }
00516 
00518 static Error* process_num_cv_iters_opt(Option_arg arg)
00519 {
00520     if (arg == NULL)
00521     {
00522         return JWSC_EARG("Option 'num-cv-iters' requires an argument");
00523     }
00524     if (sscanf(arg, "%u", &num_cv_iters) != 1 || num_cv_iters < 1)
00525     {
00526         return JWSC_EARG("Option 'num-cv-iters' must be > 0");
00527     }
00528 
00529     return NULL;
00530 }
00531 
00532 Error* process_tmp_dir_opt(Option_arg arg)
00533 {
00534     if (arg == NULL)
00535     {
00536         return JWSC_EARG("Option 'tmp-dir' requires an argument");
00537     }
00538     tmp_dirname = arg;
00539 
00540     return NULL;
00541 }
00542 
00544 Error* process_nb_freq_summary_out_opt(Option_arg arg)
00545 {
00546     if (arg == NULL)
00547     {
00548         return JWSC_EARG("Option 'nb-freq-summary-out' requires an argument");
00549     }
00550     nb_freq_summary_fname = arg;
00551     return NULL;
00552 }
00553 
00555 Error* process_nb_freq_details_pct_out_opt(Option_arg arg)
00556 {
00557     if (arg == NULL)
00558     {
00559         return JWSC_EARG("Option 'nb-freq-details-pct-out' requires an argument");
00560     }
00561     nb_freq_details_pct_fname = arg;
00562     return NULL;
00563 }
00564 
00566 Error* process_nb_freq_details_cnt_out_opt(Option_arg arg)
00567 {
00568     if (arg == NULL)
00569     {
00570         return JWSC_EARG("Option 'nb-freq-details-cnt-out' requires an argument");
00571     }
00572     nb_freq_details_cnt_fname = arg;
00573     return NULL;
00574 }
00575 
00577 Error* process_nb_freq_confusion_cnt_out_opt(Option_arg arg)
00578 {
00579     if (arg == NULL)
00580     {
00581         return JWSC_EARG("Option 'nb-freq-confusion-cnt-out' requires an argument");
00582     }
00583     nb_freq_confusion_cnt_fname = arg;
00584     return NULL;
00585 }
00586 
00588 Error* process_nb_freq_confusion_pct_out_opt(Option_arg arg)
00589 {
00590     if (arg == NULL)
00591     {
00592         return JWSC_EARG("Option 'nb-freq-confusion-pct-out' requires an argument");
00593     }
00594     nb_freq_confusion_pct_fname = arg;
00595     return NULL;
00596 }
00597 
00599 Error* process_nb_freq_preds_out_opt(Option_arg arg)
00600 {
00601     if (arg == NULL)
00602     {
00603         return JWSC_EARG("Option 'nb-freq-preds-out' requires an argument");
00604     }
00605     nb_freq_preds_fname = arg;
00606     return NULL;
00607 }
00608 
00610 Error* process_nb_gauss_summary_out_opt(Option_arg arg)
00611 {
00612     if (arg == NULL)
00613     {
00614         return JWSC_EARG("Option 'nb-gauss-summary-out' requires an argument");
00615     }
00616     nb_gauss_summary_fname = arg;
00617     return NULL;
00618 }
00619 
00621 Error* process_nb_gauss_details_pct_out_opt(Option_arg arg)
00622 {
00623     if (arg == NULL)
00624     {
00625         return JWSC_EARG("Option 'nb-gauss-details-pct-out' requires an argument");
00626     }
00627     nb_gauss_details_pct_fname = arg;
00628     return NULL;
00629 }
00630 
00632 Error* process_nb_gauss_details_cnt_out_opt(Option_arg arg)
00633 {
00634     if (arg == NULL)
00635     {
00636         return JWSC_EARG("Option 'nb-gauss-details-cnt-out' requires an argument");
00637     }
00638     nb_gauss_details_cnt_fname = arg;
00639     return NULL;
00640 }
00641 
00643 Error* process_nb_gauss_confusion_cnt_out_opt(Option_arg arg)
00644 {
00645     if (arg == NULL)
00646     {
00647         return JWSC_EARG("Option 'nb-gauss-confusion-cnt-out' requires an argument");
00648     }
00649     nb_gauss_confusion_cnt_fname = arg;
00650     return NULL;
00651 }
00652 
00654 Error* process_nb_gauss_confusion_pct_out_opt(Option_arg arg)
00655 {
00656     if (arg == NULL)
00657     {
00658         return JWSC_EARG("Option 'nb-gauss-confusion-pct-out' requires an argument");
00659     }
00660     nb_gauss_confusion_pct_fname = arg;
00661     return NULL;
00662 }
00663 
00665 Error* process_nb_gauss_preds_out_opt(Option_arg arg)
00666 {
00667     if (arg == NULL)
00668     {
00669         return JWSC_EARG("Option 'nb-gauss-preds-out' requires an argument");
00670     }
00671     nb_gauss_preds_fname = arg;
00672     return NULL;
00673 }
00674 
00676 Error* process_nb_gmm_summary_out_opt(Option_arg arg)
00677 {
00678     if (arg == NULL)
00679     {
00680         return JWSC_EARG("Option 'nb-gmm-summary-out' requires an argument");
00681     }
00682     nb_gmm_summary_fname = arg;
00683     return NULL;
00684 }
00685 
00687 Error* process_nb_gmm_details_pct_out_opt(Option_arg arg)
00688 {
00689     if (arg == NULL)
00690     {
00691         return JWSC_EARG("Option 'nb-gmm-details-pct-out' requires an argument");
00692     }
00693     nb_gmm_details_pct_fname = arg;
00694     return NULL;
00695 }
00696 
00698 Error* process_nb_gmm_details_cnt_out_opt(Option_arg arg)
00699 {
00700     if (arg == NULL)
00701     {
00702         return JWSC_EARG("Option 'nb-gmm-details-cnt-out' requires an argument");
00703     }
00704     nb_gmm_details_cnt_fname = arg;
00705     return NULL;
00706 }
00707 
00709 Error* process_nb_gmm_confusion_cnt_out_opt(Option_arg arg)
00710 {
00711     if (arg == NULL)
00712     {
00713         return JWSC_EARG("Option 'nb-gmm-confusion-cnt-out' requires an argument");
00714     }
00715     nb_gmm_confusion_cnt_fname = arg;
00716     return NULL;
00717 }
00718 
00720 Error* process_nb_gmm_confusion_pct_out_opt(Option_arg arg)
00721 {
00722     if (arg == NULL)
00723     {
00724         return JWSC_EARG("Option 'nb-gmm-confusion-pct-out' requires an argument");
00725     }
00726     nb_gmm_confusion_pct_fname = arg;
00727     return NULL;
00728 }
00729 
00731 Error* process_nb_gmm_preds_out_opt(Option_arg arg)
00732 {
00733     if (arg == NULL)
00734     {
00735         return JWSC_EARG("Option 'nb-gmm-preds-out' requires an argument");
00736     }
00737     nb_gmm_preds_fname = arg;
00738     return NULL;
00739 }
00740 
00742 Error* process_mv_gmm_summary_out_opt(Option_arg arg)
00743 {
00744     if (arg == NULL)
00745     {
00746         return JWSC_EARG("Option 'mv-gmm-summary-out' requires an argument");
00747     }
00748     mv_gmm_summary_fname = arg;
00749     return NULL;
00750 }
00751 
00753 Error* process_mv_gmm_details_pct_out_opt(Option_arg arg)
00754 {
00755     if (arg == NULL)
00756     {
00757         return JWSC_EARG("Option 'mv-gmm-details-pct-out' requires an argument");
00758     }
00759     mv_gmm_details_pct_fname = arg;
00760     return NULL;
00761 }
00762 
00764 Error* process_mv_gmm_details_cnt_out_opt(Option_arg arg)
00765 {
00766     if (arg == NULL)
00767     {
00768         return JWSC_EARG("Option 'mv-gmm-details-cnt-out' requires an argument");
00769     }
00770     mv_gmm_details_cnt_fname = arg;
00771     return NULL;
00772 }
00773 
00775 Error* process_mv_gmm_confusion_cnt_out_opt(Option_arg arg)
00776 {
00777     if (arg == NULL)
00778     {
00779         return JWSC_EARG("Option 'mv-gmm-confusion-cnt-out' requires an argument");
00780     }
00781     mv_gmm_confusion_cnt_fname = arg;
00782     return NULL;
00783 }
00784 
00786 Error* process_mv_gmm_confusion_pct_out_opt(Option_arg arg)
00787 {
00788     if (arg == NULL)
00789     {
00790         return JWSC_EARG("Option 'mv-gmm-confusion-pct-out' requires an argument");
00791     }
00792     mv_gmm_confusion_pct_fname = arg;
00793     return NULL;
00794 }
00795 
00797 Error* process_mv_gmm_preds_out_opt(Option_arg arg)
00798 {
00799     if (arg == NULL)
00800     {
00801         return JWSC_EARG("Option 'mv-gmm-preds-out' requires an argument");
00802     }
00803     mv_gmm_preds_fname = arg;
00804     return NULL;
00805 }
00806 
00808 Error* process_svm_summary_out_opt(Option_arg arg)
00809 {
00810     if (arg == NULL)
00811     {
00812         return JWSC_EARG("Option 'svm-summary-out' requires an argument");
00813     }
00814     svm_summary_fname = arg;
00815     return NULL;
00816 }
00817 
00819 Error* process_svm_details_pct_out_opt(Option_arg arg)
00820 {
00821     if (arg == NULL)
00822     {
00823         return JWSC_EARG("Option 'svm-details-pct-out' requires an argument");
00824     }
00825     svm_details_pct_fname = arg;
00826     return NULL;
00827 }
00828 
00830 Error* process_svm_details_cnt_out_opt(Option_arg arg)
00831 {
00832     if (arg == NULL)
00833     {
00834         return JWSC_EARG("Option 'svm-details-cnt-out' requires an argument");
00835     }
00836     svm_details_cnt_fname = arg;
00837     return NULL;
00838 }
00839 
00841 Error* process_svm_confusion_cnt_out_opt(Option_arg arg)
00842 {
00843     if (arg == NULL)
00844     {
00845         return JWSC_EARG("Option 'svm-confusion-cnt-out' requires an argument");
00846     }
00847     svm_confusion_cnt_fname = arg;
00848     return NULL;
00849 }
00850 
00852 Error* process_svm_confusion_pct_out_opt(Option_arg arg)
00853 {
00854     if (arg == NULL)
00855     {
00856         return JWSC_EARG("Option 'svm-confusion-pct-out' requires an argument");
00857     }
00858     svm_confusion_pct_fname = arg;
00859     return NULL;
00860 }
00861 
00863 Error* process_svm_preds_out_opt(Option_arg arg)
00864 {
00865     if (arg == NULL)
00866     {
00867         return JWSC_EARG("Option 'svm-preds-out' requires an argument");
00868     }
00869     svm_preds_fname = arg;
00870     return NULL;
00871 }
00872 
00874 Error* process_weka_j48_summary_out_opt(Option_arg arg)
00875 {
00876     if (arg == NULL)
00877     {
00878         return JWSC_EARG("Option 'weka-j48-summary-out' requires an argument");
00879     }
00880     weka_j48_summary_fname = arg;
00881     return NULL;
00882 }
00883 
00885 Error* process_weka_j48_details_pct_out_opt(Option_arg arg)
00886 {
00887     if (arg == NULL)
00888     {
00889         return JWSC_EARG("Option 'weka-j48-details-pct-out' requires an argument");
00890     }
00891     weka_j48_details_pct_fname = arg;
00892     return NULL;
00893 }
00894 
00896 Error* process_weka_j48_details_cnt_out_opt(Option_arg arg)
00897 {
00898     if (arg == NULL)
00899     {
00900         return JWSC_EARG("Option 'weka-j48-details-cnt-out' requires an argument");
00901     }
00902     weka_j48_details_cnt_fname = arg;
00903     return NULL;
00904 }
00905 
00907 Error* process_weka_j48_confusion_cnt_out_opt(Option_arg arg)
00908 {
00909     if (arg == NULL)
00910     {
00911         return JWSC_EARG("Option 'weka-j48-confusion-cnt-out' requires an argument");
00912     }
00913     weka_j48_confusion_cnt_fname = arg;
00914     return NULL;
00915 }
00916 
00918 Error* process_weka_j48_confusion_pct_out_opt(Option_arg arg)
00919 {
00920     if (arg == NULL)
00921     {
00922         return JWSC_EARG("Option 'weka-j48-confusion-pct-out' requires an argument");
00923     }
00924     weka_j48_confusion_pct_fname = arg;
00925     return NULL;
00926 }
00927 
00929 Error* process_weka_j48_preds_out_opt(Option_arg arg)
00930 {
00931     if (arg == NULL)
00932     {
00933         return JWSC_EARG("Option 'weka-j48-preds-out' requires an argument");
00934     }
00935     weka_j48_preds_fname = arg;
00936     return NULL;
00937 }
00938 
00940 Error* process_weka_part_summary_out_opt(Option_arg arg)
00941 {
00942     if (arg == NULL)
00943     {
00944         return JWSC_EARG("Option 'weka-part-summary-out' requires an argument");
00945     }
00946     weka_part_summary_fname = arg;
00947     return NULL;
00948 }
00949 
00951 Error* process_weka_part_details_pct_out_opt(Option_arg arg)
00952 {
00953     if (arg == NULL)
00954     {
00955         return JWSC_EARG("Option 'weka-part-details-pct-out' requires an argument");
00956     }
00957     weka_part_details_pct_fname = arg;
00958     return NULL;
00959 }
00960 
00962 Error* process_weka_part_details_cnt_out_opt(Option_arg arg)
00963 {
00964     if (arg == NULL)
00965     {
00966         return JWSC_EARG("Option 'weka-part-details-cnt-out' requires an argument");
00967     }
00968     weka_part_details_cnt_fname = arg;
00969     return NULL;
00970 }
00971 
00973 Error* process_weka_part_confusion_cnt_out_opt(Option_arg arg)
00974 {
00975     if (arg == NULL)
00976     {
00977         return JWSC_EARG("Option 'weka-part-confusion-cnt-out' requires an argument");
00978     }
00979     weka_part_confusion_cnt_fname = arg;
00980     return NULL;
00981 }
00982 
00984 Error* process_weka_part_confusion_pct_out_opt(Option_arg arg)
00985 {
00986     if (arg == NULL)
00987     {
00988         return JWSC_EARG("Option 'weka-part-confusion-pct-out' requires an argument");
00989     }
00990     weka_part_confusion_pct_fname = arg;
00991     return NULL;
00992 }
00993 
00995 Error* process_weka_part_preds_out_opt(Option_arg arg)
00996 {
00997     if (arg == NULL)
00998     {
00999         return JWSC_EARG("Option 'weka-part-preds-out' requires an argument");
01000     }
01001     weka_part_preds_fname = arg;
01002     return NULL;
01003 }
01004 
01006 Error* process_nearest_summary_out_opt(Option_arg arg)
01007 {
01008     if (arg == NULL)
01009     {
01010         return JWSC_EARG("Option 'nearest-summary-out' requires an argument");
01011     }
01012     nearest_summary_fname = arg;
01013     return NULL;
01014 }
01015 
01017 Error* process_nearest_details_pct_out_opt(Option_arg arg)
01018 {
01019     if (arg == NULL)
01020     {
01021         return JWSC_EARG("Option 'nearest-details-pct-out' requires an argument");
01022     }
01023     nearest_details_pct_fname = arg;
01024     return NULL;
01025 }
01026 
01028 Error* process_nearest_details_cnt_out_opt(Option_arg arg)
01029 {
01030     if (arg == NULL)
01031     {
01032         return JWSC_EARG("Option 'nearest-details-cnt-out' requires an argument");
01033     }
01034     nearest_details_cnt_fname = arg;
01035     return NULL;
01036 }
01037 
01039 Error* process_nearest_confusion_cnt_out_opt(Option_arg arg)
01040 {
01041     if (arg == NULL)
01042     {
01043         return JWSC_EARG("Option 'nearest-confusion-cnt-out' requires an argument");
01044     }
01045     nearest_confusion_cnt_fname = arg;
01046     return NULL;
01047 }
01048 
01050 Error* process_nearest_confusion_pct_out_opt(Option_arg arg)
01051 {
01052     if (arg == NULL)
01053     {
01054         return JWSC_EARG("Option 'nearest-confusion-pct-out' requires an argument");
01055     }
01056     nearest_confusion_pct_fname = arg;
01057     return NULL;
01058 }
01059 
01061 Error* process_nearest_preds_out_opt(Option_arg arg)
01062 {
01063     if (arg == NULL)
01064     {
01065         return JWSC_EARG("Option 'nearest-preds-out' requires an argument");
01066     }
01067     nearest_preds_fname = arg;
01068     return NULL;
01069 }
01070 
01072 Error* process_tandem_agree_summary_out_opt(Option_arg arg)
01073 {
01074     if (arg == NULL)
01075     {
01076         return JWSC_EARG("Option 'tandem-agree-summary-out' requires an argument");
01077     }
01078     tandem_agree_summary_fname = arg;
01079     return NULL;
01080 }
01081 
01083 Error* process_tandem_agree_details_pct_out_opt(Option_arg arg)
01084 {
01085     if (arg == NULL)
01086     {
01087         return JWSC_EARG("Option 'tandem-agree-details-pct-out' requires an argument");
01088     }
01089     tandem_agree_details_pct_fname = arg;
01090     return NULL;
01091 }
01092 
01094 Error* process_tandem_agree_details_cnt_out_opt(Option_arg arg)
01095 {
01096     if (arg == NULL)
01097     {
01098         return JWSC_EARG("Option 'tandem-agree-details-cnt-out' requires an argument");
01099     }
01100     tandem_agree_details_cnt_fname = arg;
01101     return NULL;
01102 }
01103 
01105 Error* process_tandem_summary_out_opt(Option_arg arg)
01106 {
01107     if (arg == NULL)
01108     {
01109         return JWSC_EARG("Option 'tandem-summary-out' requires an argument");
01110     }
01111     tandem_summary_fname = arg;
01112     return NULL;
01113 }
01114 
01116 Error* process_tandem_details_pct_out_opt(Option_arg arg)
01117 {
01118     if (arg == NULL)
01119     {
01120         return JWSC_EARG("Option 'tandem-details-pct-out' requires an argument");
01121     }
01122     tandem_details_pct_fname = arg;
01123     return NULL;
01124 }
01125 
01127 Error* process_tandem_details_cnt_out_opt(Option_arg arg)
01128 {
01129     if (arg == NULL)
01130     {
01131         return JWSC_EARG("Option 'tandem-details-cnt-out' requires an argument");
01132     }
01133     tandem_details_cnt_fname = arg;
01134     return NULL;
01135 }
01136 
01138 Error* process_tandem_confusion_cnt_out_opt(Option_arg arg)
01139 {
01140     if (arg == NULL)
01141     {
01142         return JWSC_EARG("Option 'tandem-confusion-cnt-out' requires an argument");
01143     }
01144     tandem_confusion_cnt_fname = arg;
01145     return NULL;
01146 }
01147 
01149 Error* process_tandem_confusion_pct_out_opt(Option_arg arg)
01150 {
01151     if (arg == NULL)
01152     {
01153         return JWSC_EARG("Option 'tandem-confusion-pct-out' requires an argument");
01154     }
01155     tandem_confusion_pct_fname = arg;
01156     return NULL;
01157 }
01158 
01160 Error* process_tandem_preds_out_opt(Option_arg arg)
01161 {
01162     if (arg == NULL)
01163     {
01164         return JWSC_EARG("Option 'tandem-preds-out' requires an argument");
01165     }
01166     tandem_preds_fname = arg;
01167     return NULL;
01168 }
01169 
01171 static void init_test_options(void)
01172 {
01173     uint32_t i;
01174 
01175     char s_name;
01176     const char* l_name;
01177     const char* desc;
01178 
01179     Error* (*fnoarg)();
01180     Error* (*farg)(const char*);
01181 
01182     init_options(opts_no_arg, opts_with_arg);
01183 
01184     opts.label_col = LABEL_COL;
01185 
01186     i = NUM_SHARED_OPTS_NO_ARG;
01187     l_name = "tandem";
01188     s_name = 0;
01189     desc   = "Perform tandem classifier decision analysis.";
01190     fnoarg = process_tandem_opt;
01191     init_option_no_arg(&(opts_no_arg[i++]), l_name, s_name, desc, fnoarg);
01192 
01193     l_name = "exclude-one";
01194     s_name = 0;
01195     desc   = "When performing tandem classifier analysis, exclude at most one prediction from the set of classification algorithms. There must be three or more algorithms in play for this to take effect.";
01196     fnoarg = process_exclude_one_opt;
01197     init_option_no_arg(&(opts_no_arg[i++]), l_name, s_name, desc, fnoarg);
01198     assert(i == NUM_OPTS_NO_ARG);
01199 
01200     i = NUM_SHARED_OPTS_WITH_ARG;
01201     l_name = "test-type";
01202     s_name = 0;
01203     desc   = "Type of testing to use. Must be one of leave-one-out or cross-validate. Use one of the abbreviations {loo, cv}.";
01204     farg   = process_test_type_opt;
01205     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01206 
01207     l_name = "tmp-dir";
01208     s_name = 0;
01209     desc   = "Directory for temporary files, including trained models.";
01210     farg   = process_tmp_dir_opt;
01211     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01212 
01213     l_name = "num-cv-folds";
01214     s_name = 0;
01215     desc   = "Number of data folds to use per cross-validation iteration.";
01216     farg   = process_num_cv_folds_opt;
01217     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01218 
01219     l_name = "num-cv-iters";
01220     s_name = 0;
01221     desc   = "Number of cross-validation iterations.";
01222     farg   = process_num_cv_iters_opt;
01223     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01224 
01225     l_name = "nb-freq";
01226     s_name = 0;
01227     desc   = "Naive Bayes non-parametric frequency model tree information.";
01228     farg   = process_nb_freq_opt;
01229     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01230 
01231     l_name = "nb-freq-dtd";
01232     s_name = 0;
01233     desc   = "Validate the naive Bayes non-parametric frequency model tree information XML file with this DTD.";
01234     farg   = process_nb_freq_dtd_opt;
01235     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01236 
01237     l_name = "nb-gauss";
01238     s_name = 0;
01239     desc   = "Naive Bayes Gaussian model tree information.";
01240     farg   = process_nb_gauss_opt;
01241     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01242 
01243     l_name = "nb-gauss-dtd";
01244     s_name = 0;
01245     desc   = "Validate the naive Bayes Gaussian model tree information XML file with this DTD.";
01246     farg   = process_nb_gauss_dtd_opt;
01247     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01248 
01249     l_name = "nb-gmm";
01250     s_name = 0;
01251     desc   = "Naive Bayes Gaussian mixture model tree information.";
01252     farg   = process_nb_gmm_opt;
01253     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01254 
01255     l_name = "nb-gmm-dtd";
01256     s_name = 0;
01257     desc   = "Validate the naive Bayes Gaussian mixture model tree information XML file with this DTD.";
01258     farg   = process_nb_gmm_dtd_opt;
01259     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01260 
01261     l_name = "mv-gmm";
01262     s_name = 0;
01263     desc   = "Multivariate Gaussian mixture model tree information.";
01264     farg   = process_mv_gmm_opt;
01265     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01266 
01267     l_name = "mv-gmm-dtd";
01268     s_name = 0;
01269     desc   = "Validate the multivariate Gaussian mixture model tree information XML file with this DTD.";
01270     farg   = process_mv_gmm_dtd_opt;
01271     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01272 
01273 #ifdef HAPLO_ENABLE_SVM
01274     l_name = "svm";
01275     s_name = 0;
01276     desc   = "SVM model tree information.";
01277     farg   = process_svm_opt;
01278     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01279 
01280     l_name = "svm-dtd";
01281     s_name = 0;
01282     desc   = "Validate the SVM model tree information XML file with this DTD.";
01283     farg   = process_svm_dtd_opt;
01284     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01285 #endif
01286 
01287 #ifdef HAPLO_ENABLE_WEKA
01288     l_name = "weka-j48";
01289     s_name = 0;
01290     desc   = "Weka J48 model tree information.";
01291     farg   = process_weka_j48_opt;
01292     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01293 
01294     l_name = "weka-part";
01295     s_name = 0;
01296     desc   = "Weka PART model tree information.";
01297     farg   = process_weka_part_opt;
01298     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01299 
01300     l_name = "weka-jar";
01301     s_name = 0;
01302     desc   = "Weka java archive file. Required for using the Weka algorithms.";
01303     farg   = process_weka_jar_opt;
01304     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01305 
01306     l_name = "weka-dtd";
01307     s_name = 0;
01308     desc   = "Validate the Weka model tree information XML files with this DTD.";
01309     farg   = process_weka_dtd_opt;
01310     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01311 #endif
01312 
01313     l_name = "nearest";
01314     s_name = 0;
01315     desc   = "Nearest neighbor model information.";
01316     farg   = process_nearest_opt;
01317     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01318 
01319     l_name = "nearest-dtd";
01320     s_name = 0;
01321     desc   = "Validate the nearest neighbor model information XML file with this DTD.";
01322     farg   = process_nearest_dtd_opt;
01323     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01324 
01325     l_name = "nearest-max-d";
01326     s_name = 0;
01327     desc   = "Maximum distance allowed for a nearest neighbor classification.";
01328     farg   = process_nearest_max_d_opt;
01329     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01330 
01331     l_name = "nb-freq-summary-out";
01332     s_name = 0;
01333     desc   = "File to output the Naive Bayes freqency model test performance summary to. The default is stdout.";
01334     farg   = process_nb_freq_summary_out_opt;
01335     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01336 
01337     l_name = "nb-freq-details-cnt-out";
01338     s_name = 0;
01339     desc   = "File to output the Naive Bayes freqency model test performance details (counts) to. The default is stdout.";
01340     farg   = process_nb_freq_details_cnt_out_opt;
01341     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01342 
01343     l_name = "nb-freq-details-pct-out";
01344     s_name = 0;
01345     desc   = "File to output the Naive Bayes freqency model test performance details (percents) to. The default is stdout.";
01346     farg   = process_nb_freq_details_pct_out_opt;
01347     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01348 
01349     l_name = "nb-freq-confusion-cnt-out";
01350     s_name = 0;
01351     desc   = "File to output the Naive Bayes freqency model test confusion matrix (counts) to. The default is stdout.";
01352     farg   = process_nb_freq_confusion_cnt_out_opt;
01353     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01354 
01355     l_name = "nb-freq-confusion-pct-out";
01356     s_name = 0;
01357     desc   = "File to output the Naive Bayes freqency model test confusion matrix (percents) to. The default is stdout.";
01358     farg   = process_nb_freq_confusion_pct_out_opt;
01359     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01360 
01361     l_name = "nb-freq-preds-out";
01362     s_name = 0;
01363     desc   = "File to output the Naive Bayes freqency model test predictions to. The default is no output.";
01364     farg   = process_nb_freq_preds_out_opt;
01365     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01366 
01367     l_name = "nb-gauss-summary-out";
01368     s_name = 0;
01369     desc   = "File to output the Naive Bayes Gaussian model test performance summary to. The default is stdout.";
01370     farg   = process_nb_gauss_summary_out_opt;
01371     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01372 
01373     l_name = "nb-gauss-details-cnt-out";
01374     s_name = 0;
01375     desc   = "File to output the Naive Bayes Gaussian model test performance details (counts) to. The default is stdout.";
01376     farg   = process_nb_gauss_details_cnt_out_opt;
01377     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01378 
01379     l_name = "nb-gauss-details-pct-out";
01380     s_name = 0;
01381     desc   = "File to output the Naive Bayes Gaussian model test performance details (percents) to. The default is stdout.";
01382     farg   = process_nb_gauss_details_pct_out_opt;
01383     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01384 
01385     l_name = "nb-gauss-confusion-cnt-out";
01386     s_name = 0;
01387     desc   = "File to output the Naive Bayes Gaussian model test confusion matrix (counts) to. The default is stdout.";
01388     farg   = process_nb_gauss_confusion_cnt_out_opt;
01389     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01390 
01391     l_name = "nb-gauss-confusion-pct-out";
01392     s_name = 0;
01393     desc   = "File to output the Naive Bayes Gaussian model test confusion matrix (percents) to. The default is stdout.";
01394     farg   = process_nb_gauss_confusion_pct_out_opt;
01395     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01396 
01397     l_name = "nb-gauss-preds-out";
01398     s_name = 0;
01399     desc   = "File to output the Naive Bayes Gaussian model test predictions to. The default is no output.";
01400     farg   = process_nb_gauss_preds_out_opt;
01401     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01402 
01403     l_name = "nb-gmm-summary-out";
01404     s_name = 0;
01405     desc   = "File to output the Naive Bayes Gaussian mixture model test performance summary to. The default is stdout.";
01406     farg   = process_nb_gmm_summary_out_opt;
01407     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01408 
01409     l_name = "nb-gmm-details-cnt-out";
01410     s_name = 0;
01411     desc   = "File to output the Naive Bayes Gaussian mixture model test performance details (counts) to. The default is stdout.";
01412     farg   = process_nb_gmm_details_cnt_out_opt;
01413     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01414 
01415     l_name = "nb-gmm-details-pct-out";
01416     s_name = 0;
01417     desc   = "File to output the Naive Bayes Gaussian mixture model test performance details (percents) to. The default is stdout.";
01418     farg   = process_nb_gmm_details_pct_out_opt;
01419     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01420 
01421     l_name = "nb-gmm-confusion-cnt-out";
01422     s_name = 0;
01423     desc   = "File to output the Naive Bayes Gaussian mixture model test confusion matrix (counts) to. The default is stdout.";
01424     farg   = process_nb_gmm_confusion_cnt_out_opt;
01425     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01426 
01427     l_name = "nb-gmm-confusion-pct-out";
01428     s_name = 0;
01429     desc   = "File to output the Naive Bayes Gaussian mixture model test confusion matrix (percents) to. The default is stdout.";
01430     farg   = process_nb_gmm_confusion_pct_out_opt;
01431     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01432 
01433     l_name = "nb-gmm-preds-out";
01434     s_name = 0;
01435     desc   = "File to output the Naive Bayes Gaussian mixture model test predictions to. The default is no output.";
01436     farg   = process_nb_gmm_preds_out_opt;
01437     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01438 
01439     l_name = "mv-gmm-summary-out";
01440     s_name = 0;
01441     desc   = "File to output the multivariate Gaussian mixture model test performance summary to. The default is stdout.";
01442     farg   = process_mv_gmm_summary_out_opt;
01443     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01444 
01445     l_name = "mv-gmm-details-cnt-out";
01446     s_name = 0;
01447     desc   = "File to output the multivariate Gaussian mixture model test performance details (counts) to. The default is stdout.";
01448     farg   = process_mv_gmm_details_cnt_out_opt;
01449     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01450 
01451     l_name = "mv-gmm-details-pct-out";
01452     s_name = 0;
01453     desc   = "File to output the multivariate Gaussian mixture model test performance details (percents) to. The default is stdout.";
01454     farg   = process_mv_gmm_details_pct_out_opt;
01455     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01456 
01457     l_name = "mv-gmm-confusion-cnt-out";
01458     s_name = 0;
01459     desc   = "File to output the multivariate Gaussian mixture model test confusion matrix (counts) to. The default is stdout.";
01460     farg   = process_mv_gmm_confusion_cnt_out_opt;
01461     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01462 
01463     l_name = "mv-gmm-confusion-pct-out";
01464     s_name = 0;
01465     desc   = "File to output the multivariate Gaussian mixture model test confusion matrix (percents) to. The default is stdout.";
01466     farg   = process_mv_gmm_confusion_pct_out_opt;
01467     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01468 
01469     l_name = "mv-gmm-preds-out";
01470     s_name = 0;
01471     desc   = "File to output the multivariate Gaussian mixture model test predictions to. The default is no output.";
01472     farg   = process_mv_gmm_preds_out_opt;
01473     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01474 
01475 #ifdef HAPLO_ENABLE_SVM
01476     l_name = "svm-summary-out";
01477     s_name = 0;
01478     desc   = "File to output the SVM model test performance summary to. The default is stdout.";
01479     farg   = process_svm_summary_out_opt;
01480     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01481 
01482     l_name = "svm-details-cnt-out";
01483     s_name = 0;
01484     desc   = "File to output the SVM model test performance details (counts) to. The default is stdout.";
01485     farg   = process_svm_details_cnt_out_opt;
01486     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01487 
01488     l_name = "svm-details-pct-out";
01489     s_name = 0;
01490     desc   = "File to output the SVM model test performance details (percents) to. The default is stdout.";
01491     farg   = process_svm_details_pct_out_opt;
01492     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01493 
01494     l_name = "svm-confusion-cnt-out";
01495     s_name = 0;
01496     desc   = "File to output the SVM model test confusion matrix (counts) to. The default is stdout.";
01497     farg   = process_svm_confusion_cnt_out_opt;
01498     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01499 
01500     l_name = "svm-confusion-pct-out";
01501     s_name = 0;
01502     desc   = "File to output the SVM model test confusion matrix (percents) to. The default is stdout.";
01503     farg   = process_svm_confusion_pct_out_opt;
01504     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01505 
01506     l_name = "svm-preds-out";
01507     s_name = 0;
01508     desc   = "File to output the SVM model test predictions to. The default is no output.";
01509     farg   = process_svm_preds_out_opt;
01510     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01511 #endif
01512 
01513 #ifdef HAPLO_ENABLE_WEKA
01514     l_name = "weka-j48-summary-out";
01515     s_name = 0;
01516     desc   = "File to output the Weka J48 model test performance summary to. The default is stdout.";
01517     farg   = process_weka_j48_summary_out_opt;
01518     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01519 
01520     l_name = "weka-j48-details-cnt-out";
01521     s_name = 0;
01522     desc   = "File to output the Weka J48 model test performance details (counts) to. The default is stdout.";
01523     farg   = process_weka_j48_details_cnt_out_opt;
01524     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01525 
01526     l_name = "weka-j48-details-pct-out";
01527     s_name = 0;
01528     desc   = "File to output the Weka J48 model test performance details (percents) to. The default is stdout.";
01529     farg   = process_weka_j48_details_pct_out_opt;
01530     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01531 
01532     l_name = "weka-j48-confusion-cnt-out";
01533     s_name = 0;
01534     desc   = "File to output the Weka J48 model test confusion matrix (counts) to. The default is stdout.";
01535     farg   = process_weka_j48_confusion_cnt_out_opt;
01536     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01537 
01538     l_name = "weka-j48-confusion-pct-out";
01539     s_name = 0;
01540     desc   = "File to output the Weka J48 model test confusion matrix (percents) to. The default is stdout.";
01541     farg   = process_weka_j48_confusion_pct_out_opt;
01542     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01543 
01544     l_name = "weka-j48-preds-out";
01545     s_name = 0;
01546     desc   = "File to output the Weka J48 model test predictions to. The default is no output.";
01547     farg   = process_weka_j48_preds_out_opt;
01548     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01549 
01550     l_name = "weka-part-summary-out";
01551     s_name = 0;
01552     desc   = "File to output the Weka PART model test performance summary to. The default is stdout.";
01553     farg   = process_weka_part_summary_out_opt;
01554     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01555 
01556     l_name = "weka-part-details-cnt-out";
01557     s_name = 0;
01558     desc   = "File to output the Weka PART model test performance details (counts) to. The default is stdout.";
01559     farg   = process_weka_part_details_cnt_out_opt;
01560     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01561 
01562     l_name = "weka-part-details-pct-out";
01563     s_name = 0;
01564     desc   = "File to output the Weka PART model test performance details (percents) to. The default is stdout.";
01565     farg   = process_weka_part_details_pct_out_opt;
01566     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01567 
01568     l_name = "weka-part-confusion-cnt-out";
01569     s_name = 0;
01570     desc   = "File to output the Weka PART model test confusion matrix (counts) to. The default is stdout.";
01571     farg   = process_weka_part_confusion_cnt_out_opt;
01572     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01573 
01574     l_name = "weka-part-confusion-pct-out";
01575     s_name = 0;
01576     desc   = "File to output the Weka PART model test confusion matrix (percents) to. The default is stdout.";
01577     farg   = process_weka_part_confusion_pct_out_opt;
01578     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01579 
01580     l_name = "weka-part-preds-out";
01581     s_name = 0;
01582     desc   = "File to output the Weka PART model test predictions to. The default is no output.";
01583     farg   = process_weka_part_preds_out_opt;
01584     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01585 #endif
01586 
01587     l_name = "nearest-summary-out";
01588     s_name = 0;
01589     desc   = "File to output the nearest neighbor model test performance summary to. The default is stdout.";
01590     farg   = process_nearest_summary_out_opt;
01591     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01592 
01593     l_name = "nearest-details-cnt-out";
01594     s_name = 0;
01595     desc   = "File to output the nearest neighbor model test performance details (counts) to. The default is stdout.";
01596     farg   = process_nearest_details_cnt_out_opt;
01597     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01598 
01599     l_name = "nearest-details-pct-out";
01600     s_name = 0;
01601     desc   = "File to output the nearest neighbor model test performance details (percents) to. The default is stdout.";
01602     farg   = process_nearest_details_pct_out_opt;
01603     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01604 
01605     l_name = "nearest-confusion-cnt-out";
01606     s_name = 0;
01607     desc   = "File to output the nearest neighbor model test confusion matrix (counts) to. The default is stdout.";
01608     farg   = process_nearest_confusion_cnt_out_opt;
01609     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01610 
01611     l_name = "nearest-confusion-pct-out";
01612     s_name = 0;
01613     desc   = "File to output the nearest neighbor model test confusion matrix (percents) to. The default is stdout.";
01614     farg   = process_nearest_confusion_pct_out_opt;
01615     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01616 
01617     l_name = "nearest-preds-out";
01618     s_name = 0;
01619     desc   = "File to output the nearest neighbor model test predictions to. The default is no output.";
01620     farg   = process_nearest_preds_out_opt;
01621     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01622 
01623     l_name = "tandem-agree-summary-out";
01624     s_name = 0;
01625     desc   = "File to output the tandem agreement test performance summary to. The default is stdout.";
01626     farg   = process_tandem_agree_summary_out_opt;
01627     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01628 
01629     l_name = "tandem-agree-details-cnt-out";
01630     s_name = 0;
01631     desc   = "File to output the tandem agreement test performance details (counts) to. The default is stdout.";
01632     farg   = process_tandem_agree_details_cnt_out_opt;
01633     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01634 
01635     l_name = "tandem-agree-details-pct-out";
01636     s_name = 0;
01637     desc   = "File to output the tandem agreement test performance details (percents) to. The default is stdout.";
01638     farg   = process_tandem_agree_details_pct_out_opt;
01639     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01640 
01641     l_name = "tandem-summary-out";
01642     s_name = 0;
01643     desc   = "File to output the tandem test performance summary to. The default is stdout.";
01644     farg   = process_tandem_summary_out_opt;
01645     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01646 
01647     l_name = "tandem-details-cnt-out";
01648     s_name = 0;
01649     desc   = "File to output the tandem test performance details (counts) to. The default is stdout.";
01650     farg   = process_tandem_details_cnt_out_opt;
01651     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01652 
01653     l_name = "tandem-details-pct-out";
01654     s_name = 0;
01655     desc   = "File to output the tandem test performance details (percents) to. The default is stdout.";
01656     farg   = process_tandem_details_pct_out_opt;
01657     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01658 
01659     l_name = "tandem-confusion-cnt-out";
01660     s_name = 0;
01661     desc   = "File to output the tandem test confusion matrix (counts) to. The default is stdout.";
01662     farg   = process_tandem_confusion_cnt_out_opt;
01663     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01664 
01665     l_name = "tandem-confusion-pct-out";
01666     s_name = 0;
01667     desc   = "File to output the tandem test confusion matrix (percents) to. The default is stdout.";
01668     farg   = process_tandem_confusion_pct_out_opt;
01669     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01670 
01671     l_name = "tandem-preds-out";
01672     s_name = 0;
01673     desc   = "File to output the tandem test predictions to. The default is no output.";
01674     farg   = process_tandem_preds_out_opt;
01675     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01676     assert(i == NUM_OPTS_WITH_ARG);
01677 }
01678 
01680 static uint8_t num_models_to_test()
01681 {
01682     return (opts.nb_freq_fname   != 0) +
01683            (opts.nb_gauss_fname  != 0) +
01684            (opts.nb_gmm_fname    != 0) +
01685            (opts.mv_gmm_fname    != 0) +
01686 #ifdef HAPLO_ENABLE_SVM
01687            (opts.svm_fname       != 0) +
01688 #endif
01689 #ifdef HAPLO_ENABLE_WEKA
01690            (opts.weka_j48_fname  != 0) +
01691            (opts.weka_part_fname != 0) +
01692 #endif
01693            (opts.nearest_fname   != 0);
01694 }
01695 
01697 static void find_ancestors
01698 (
01699     Vector_u32**      ancestor_types_out,
01700     Vector_u32**      ancestor_labels_out,
01701     const Vector_u32* labels_1,
01702     const Vector_u32* labels_2
01703 )
01704 {
01705     uint32_t i;
01706     uint32_t ancestor_label;
01707     Haplo_ancestor_type ancestor_type;
01708 
01709     if (!labels_1 || !labels_2)
01710         return;
01711 
01712     assert(labels_1->num_elts == labels_2->num_elts);
01713     create_vector_u32(ancestor_types_out, labels_1->num_elts);
01714     create_vector_u32(ancestor_labels_out, labels_1->num_elts);
01715 
01716     for (i = 0; i < labels_1->num_elts; i++)
01717     {
01718         ancestor_type = find_ancestor_index_of_pair(&ancestor_label,
01719                 labels_1->elts[ i ], labels_2->elts[ i ]);
01720 
01721         (*ancestor_types_out)->elts[ i ] = ancestor_type;
01722         (*ancestor_labels_out)->elts[ i ] = ancestor_label;
01723     }
01724 }
01725 
01727 static void find_tandem_ancestors
01728 (
01729     Vector_u32**      ancestor_types_out,
01730     Vector_u32**      ancestor_labels_out,
01731     const Vector_u32* tandem_types,
01732     const Vector_u32* tandem_labels,
01733     const Vector_u32* test_labels
01734 )
01735 {
01736     uint32_t i;
01737     uint32_t ancestor_label;
01738     Haplo_ancestor_type ancestor_type;
01739 
01740     assert(tandem_labels->num_elts == test_labels->num_elts);
01741     create_vector_u32(ancestor_types_out, tandem_labels->num_elts);
01742     create_vector_u32(ancestor_labels_out, tandem_labels->num_elts);
01743 
01744     for (i = 0; i < tandem_labels->num_elts; i++)
01745     {
01746         if (tandem_types->elts[ i ] != HAPLO_ANCESTOR_NONE)
01747         {
01748             ancestor_type = find_ancestor_index_of_pair(&ancestor_label,
01749                     tandem_labels->elts[ i ], test_labels->elts[ i ]);
01750         }
01751         else 
01752         {
01753             ancestor_type = HAPLO_ANCESTOR_NONE;
01754             ancestor_label = 0;
01755         }
01756 
01757         (*ancestor_types_out)->elts[ i ] = ancestor_type;
01758         (*ancestor_labels_out)->elts[ i ] = ancestor_label;
01759     }
01760 }
01761 
01763 static void find_ancestors_of_sets
01764 (
01765     Vector_u32**      ancestor_types_out,
01766     Vector_u32**      ancestor_labels_out,
01767     const Vector_u32* labels_1,
01768     const Vector_u32* labels_2,
01769     const Vector_u32* labels_3,
01770     const Vector_u32* labels_4,
01771     const Vector_u32* labels_5,
01772     const Vector_u32* labels_6,
01773     const Vector_u32* labels_7,
01774     const Vector_u32* labels_8
01775 )
01776 {
01777     uint32_t            n, nn, N;
01778     uint32_t            i;
01779     uint32_t            num_labels;
01780     uint32_t            ancestor_label;
01781     Haplo_ancestor_type ancestor_type;
01782     Vector_u32*         labels;
01783     Vector_u32*         labelss;
01784 
01785     N = 0;
01786 
01787     if (labels_1) {N++; num_labels = labels_1->num_elts;}
01788     if (labels_2) {N++; num_labels = labels_2->num_elts;}
01789     if (labels_3) {N++; num_labels = labels_3->num_elts;}
01790     if (labels_4) {N++; num_labels = labels_4->num_elts;}
01791     if (labels_5) {N++; num_labels = labels_5->num_elts;}
01792     if (labels_6) {N++; num_labels = labels_6->num_elts;}
01793     if (labels_7) {N++; num_labels = labels_7->num_elts;}
01794     if (labels_8) {N++; num_labels = labels_8->num_elts;}
01795 
01796     assert(N > 0);
01797 
01798     labels = NULL;
01799     create_vector_u32(&labels, N);
01800 
01801     create_vector_u32(ancestor_types_out, num_labels);
01802     create_vector_u32(ancestor_labels_out, num_labels);
01803 
01804     for (i = 0; i < num_labels; i++)
01805     {
01806         N = 0;
01807 
01808         if (labels_1) labels->elts[ N++ ] = labels_1->elts[ i ];
01809         if (labels_2) labels->elts[ N++ ] = labels_2->elts[ i ];
01810         if (labels_3) labels->elts[ N++ ] = labels_3->elts[ i ];
01811         if (labels_4) labels->elts[ N++ ] = labels_4->elts[ i ];
01812         if (labels_5) labels->elts[ N++ ] = labels_5->elts[ i ];
01813         if (labels_6) labels->elts[ N++ ] = labels_6->elts[ i ];
01814         if (labels_7) labels->elts[ N++ ] = labels_7->elts[ i ];
01815         if (labels_8) labels->elts[ N++ ] = labels_8->elts[ i ];
01816 
01817         ancestor_type = find_ancestor_index_of_set(&ancestor_label, labels);
01818 
01819         if (opts.exclude_one && ancestor_type == HAPLO_ANCESTOR_NONE && N > 3)
01820         {
01821             labelss = NULL;
01822             create_vector_u32(&labelss, N-1);
01823             for (n = 0; ancestor_type == HAPLO_ANCESTOR_NONE && n < N; n++)
01824             {
01825                 for (nn = 0; nn < N-1; nn++)
01826                 {
01827                     if (nn < n)
01828                     {
01829                         labelss->elts[ nn ] = labels->elts[ nn ];
01830                     }
01831                     else
01832                     {
01833                         labelss->elts[ nn ] = labels->elts[ nn+1 ];
01834                     }
01835                 }
01836 
01837                 ancestor_type = find_ancestor_index_of_set(&ancestor_label,
01838                         labelss);
01839             }
01840             free_vector_u32(labelss);
01841         }
01842 
01843         (*ancestor_types_out)->elts[ i ] = ancestor_type;
01844         (*ancestor_labels_out)->elts[ i ] = ancestor_label;
01845     }
01846 }
01847 
01849 static void write_leave_one_out_summary
01850 (
01851     const Matblock_u8* data_ids,
01852     const Vector_u32*  data_labels,
01853     const Vector_u32*  ancestor_types,
01854     const Vector_u32*  ancestor_labels,
01855     const Vector_u32*  pred_labels,
01856     const Vector_d*    pred_confs,
01857     const Vector_u32*  tandem_types,
01858     const char*        fname
01859 )
01860 {
01861     uint32_t    i;
01862     uint32_t    n, N;
01863     float       D;
01864     const char* fmt;
01865     FILE*       fp;
01866     xmlDoc*     xml_doc  = NULL;
01867     xmlNode*    xml_root = NULL;
01868     xmlNode*    xml_node[3];
01869     char        xml_buf[256];
01870     Error*      err;
01871     Vector_f*   counts = NULL;
01872 
01873     if ((err = open_output(&fp, &xml_doc, "haplo-test-loo-summary-out",
01874                     "haplo-test-loo-summary-out.dtd", fname)))
01875     {
01876         print_error_msg("haplo-test", err->msg);
01877     }
01878 
01879     create_zero_vector_f(&counts, 3);
01880 
01881     if (opts.header_out && opts.output_format != HAPLO_OUTPUT_XML)
01882     {
01883         switch (opts.output_format)
01884         {
01885             case HAPLO_OUTPUT_TXT:
01886                 fmt = "%-10s  %-10s  %-10s\n";
01887                 break;
01888             case HAPLO_OUTPUT_CSV:
01889                 fmt = "%s,%s,%s\n";
01890                 break;
01891             case HAPLO_OUTPUT_XML:
01892                 break;
01893         }
01894         fprintf(fp, fmt, "Direct", "Indirect", "None");
01895     }
01896     else if (opts.output_format == HAPLO_OUTPUT_XML)
01897     {
01898         xml_root = xmlDocGetRootElement(xml_doc);
01899     }
01900 
01901     N = ancestor_types->num_elts;
01902 
01903     for (n = 0; n < N; n++)
01904     {
01905         if (!tandem_types || tandem_types->elts[n] != HAPLO_ANCESTOR_NONE)
01906         {
01907             switch (ancestor_types->elts[ n ])
01908             {
01909                 case HAPLO_ANCESTOR_DIRECT:
01910                     counts->elts[0]++;
01911                     break;
01912                 case HAPLO_ANCESTOR_INDIRECT:
01913                     counts->elts[1]++;
01914                     break;
01915                 case HAPLO_ANCESTOR_NONE:
01916                     counts->elts[2]++;
01917                     break;
01918             }
01919         }
01920     }
01921 
01922     switch (opts.output_format)
01923     {
01924         case HAPLO_OUTPUT_TXT:
01925             fmt = "%-10.0f  %-10.0f  %-10.0f\n";
01926             fprintf(fp, fmt, counts->elts[0], counts->elts[1], counts->elts[2]);
01927             break;
01928         case HAPLO_OUTPUT_CSV:
01929             fmt = "%.0f,%.0f,%.0f\n";
01930             fprintf(fp, fmt, counts->elts[0], counts->elts[1], counts->elts[2]);
01931             break;
01932         case HAPLO_OUTPUT_XML:
01933             xml_node[0] = XMLNewChild(xml_root, "direct", NULL);
01934             xml_node[1] = XMLNewChild(xml_root, "indirect", NULL);
01935             xml_node[2] = XMLNewChild(xml_root, "none", NULL);
01936             for (i = 0; i < 3; i++)
01937             {
01938                 snprintf(xml_buf, 256, "%.0f", counts->elts[ i ]);
01939                 XMLNewChild(xml_node[ i ], "count", xml_buf);
01940             }
01941             break;
01942     }
01943 
01944     D = counts->elts[0] + counts->elts[1] + counts->elts[2];
01945     if (D > 0)
01946     {
01947         multiply_vector_by_scalar_f(&counts, counts, 1/D);
01948     }
01949     switch (opts.output_format)
01950     {
01951         case HAPLO_OUTPUT_TXT:
01952             fmt = "%-10.3f  %-10.3f  %-10.3f\n";
01953             fprintf(fp, fmt, counts->elts[0], counts->elts[1], counts->elts[2]);
01954             break;
01955         case HAPLO_OUTPUT_CSV:
01956             fmt = "%.3f,%.3f,%.3f\n";
01957             fprintf(fp, fmt, counts->elts[0], counts->elts[1], counts->elts[2]);
01958             break;
01959         case HAPLO_OUTPUT_XML:
01960             for (i = 0; i < 3; i++)
01961             {
01962                 snprintf(xml_buf, 256, "%.3f", counts->elts[ i ]);
01963                 XMLNewChild(xml_node[ i ], "percent", xml_buf);
01964             }
01965             break;
01966     }
01967 
01968     free_vector_f(counts);
01969 
01970     if ((err = close_output(fp, xml_doc, fname)))
01971     {
01972         print_error_msg("haplo-test", err->msg);
01973     }
01974 }
01975 
01984 static void write_leave_one_out_details
01985 (
01986     const Matblock_u8* data_ids,
01987     const Vector_u32*  data_labels,
01988     const Vector_u32*  ancestor_types,
01989     const Vector_u32*  ancestor_labels,
01990     const Vector_u32*  pred_labels,
01991     const Vector_d*    pred_confs,
01992     const Vector_u32*  tandem_types,
01993     const char*        cnt_fname,
01994     const char*        pct_fname
01995 )
01996 {
01997     uint32_t    i;
01998     uint32_t    n, nn, N;
01999     float       D;
02000     const char* label;
02001     const char* fmt;
02002     FILE*       cnt_fp;
02003     FILE*       pct_fp;
02004     xmlDoc*     xml_doc   = NULL;
02005     xmlNode*    xml_root  = NULL;
02006     xmlNode*    xml_node  = NULL;
02007     xmlNode*    xml_child[3] = {0};
02008     char        xml_buf[256];
02009     Error*      err;
02010 
02011     Vector_f* counts[3] = {0};
02012 
02013     if (!pred_labels)
02014         return;
02015 
02016     if ((err = open_output(&cnt_fp, &xml_doc, "haplo-test-loo-details-out",
02017                     "haplo-test-loo-details-out.dtd", cnt_fname)) ||
02018         (err = open_output(&pct_fp, NULL, NULL, NULL, pct_fname)))
02019     {
02020         print_error_msg("haplo-test", err->msg);
02021     }
02022 
02023     N = get_num_haplo_groups();
02024     for (i = 0; i < 3; i++)
02025     {
02026         create_zero_vector_f(&(counts[ i ]), N);
02027     }
02028 
02029     for (i = 0; i < ancestor_types->num_elts; i++)
02030     {
02031         if (!tandem_types || tandem_types->elts[i] != HAPLO_ANCESTOR_NONE)
02032         {
02033             assert(pred_labels->elts[ i ] < N);
02034             n = pred_labels->elts[ i ];
02035 
02036             switch (ancestor_types->elts[ i ])
02037             {
02038                 case HAPLO_ANCESTOR_DIRECT:
02039                     counts[0]->elts[ n ]++;
02040                     break;
02041                 case HAPLO_ANCESTOR_INDIRECT:
02042                     counts[1]->elts[ n ]++;
02043                     break;
02044                 case HAPLO_ANCESTOR_NONE:
02045                     counts[2]->elts[ n ]++;
02046                     break;
02047             }
02048         }
02049     }
02050 
02051     if (opts.header_out && opts.output_format != HAPLO_OUTPUT_XML)
02052     {
02053         switch (opts.output_format)
02054         {
02055             case HAPLO_OUTPUT_TXT:
02056                 fmt = "%-10s  %-15s  %-15s  %-15s\n";
02057                 break;
02058             case HAPLO_OUTPUT_CSV:
02059                 fmt = "%s,%s,%s,%s\n";
02060                 break;
02061             case HAPLO_OUTPUT_XML:
02062                 break;
02063         }
02064         fprintf(cnt_fp, fmt, "Predicted", "Direct", "Indirect", "None");
02065         fprintf(pct_fp, fmt, "Predicted", "Direct", "Indirect", "None");
02066     }
02067     else if (opts.output_format == HAPLO_OUTPUT_XML)
02068     {
02069         xml_root = xmlDocGetRootElement(xml_doc);
02070     }
02071 
02072     nn = 0;
02073     for (n = 0; n < N; n++)
02074     {
02075         D = counts[0]->elts[n] + counts[1]->elts[n] + counts[2]->elts[n];
02076         if (D > 0)
02077         {
02078             lookup_haplo_group_label_from_index(&label, n);
02079             switch (opts.output_format)
02080             {
02081                 case HAPLO_OUTPUT_TXT:
02082                     fprintf(cnt_fp, 
02083                          "%-10s  %-15.0f  %-15.0f  %-15.0f\n",
02084                             label, counts[0]->elts[n], counts[1]->elts[n],
02085                             counts[2]->elts[n]);
02086                     fprintf(pct_fp, 
02087                          "%-10s  %-15.3f  %-15.3f  %-15.3f\n",
02088                             label, counts[0]->elts[n]/D, counts[1]->elts[n]/D,
02089                             counts[2]->elts[n]/D);
02090                     break;
02091                 case HAPLO_OUTPUT_CSV:
02092                     fprintf(cnt_fp, "%s,%.0f,%.0f,%.0f\n", label,
02093                             counts[0]->elts[n], counts[1]->elts[n],
02094                             counts[2]->elts[n]);
02095                     fprintf(pct_fp, "%s,%.3f,%.3f,%.3f\n", label,
02096                             counts[0]->elts[n]/D, counts[1]->elts[n]/D,
02097                             counts[2]->elts[n]/D);
02098                     break;
02099                 case HAPLO_OUTPUT_XML:
02100                     xml_node = XMLNewChild(xml_root, "predicted", NULL);
02101                     snprintf(xml_buf, 256, "%d", nn+1);
02102                     XMLNewProp(xml_node, "number", xml_buf);
02103                     XMLNewChild(xml_node, "label", label);
02104                     xml_child[0] = XMLNewChild(xml_node, "direct", 0);
02105                     xml_child[1] = XMLNewChild(xml_node, "indirect", 0);
02106                     xml_child[2] = XMLNewChild(xml_node, "none", 0);
02107                     for (i = 0; i < 3; i++)
02108                     {
02109                         snprintf(xml_buf, 256, "%.0f", counts[i]->elts[n]);
02110                         XMLNewChild(xml_child[ i ], "count", xml_buf);
02111                         snprintf(xml_buf, 256, "%.3f", counts[i]->elts[n]/D);
02112                         XMLNewChild(xml_child[ i ], "percent", xml_buf);
02113                     }
02114                     break;
02115             }
02116 
02117             nn++;
02118         }
02119     }
02120 
02121     for (i = 0; i < 3; i++)
02122     {
02123         free_vector_f(counts[ i ]);
02124     }
02125 
02126     if ((err = close_output(cnt_fp, xml_doc, cnt_fname)) ||
02127         (err = close_output(pct_fp, NULL, pct_fname)))
02128     {
02129         print_error_msg("haplo-test", err->msg);
02130     }
02131 }
02132 
02134 static void write_leave_one_out_confusion
02135 (
02136     const Matblock_u8* data_ids,
02137     const Vector_u32*  data_labels,
02138     const Vector_u32*  ancestor_types,
02139     const Vector_u32*  ancestor_labels,
02140     const Vector_u32*  pred_labels,
02141     const Vector_d*    pred_confs,
02142     const Vector_u32*  tandem_types,
02143     const char*        cnt_fname,
02144     const char*        pct_fname
02145 )
02146 {
02147     uint32_t    i;
02148     uint32_t    n_1, n_2, N;
02149     uint32_t    nn_1, nn_2;
02150     float       D;
02151     const char* label;
02152     FILE*       cnt_fp;
02153     FILE*       pct_fp;
02154     xmlDoc*     xml_doc    = NULL;
02155     xmlNode*    xml_root   = NULL;
02156     xmlNode*    xml_actual = NULL;
02157     xmlNode*    xml_pred   = NULL;
02158     char        xml_buf[256];
02159     Error*      err;
02160 
02161     Vector_f* actual_counts    = NULL;
02162     Vector_f* predicted_counts = NULL;
02163     Matrix_f* confusion        = NULL;
02164     Matrix_f* confusion_pct    = NULL;
02165 
02166     if (!pred_labels)
02167         return;
02168 
02169     assert(data_labels->num_elts == pred_labels->num_elts);
02170 
02171     if ((err = open_output(&cnt_fp, &xml_doc, "haplo-test-loo-confusion-out",
02172                     "haplo-test-loo-confusion-out.dtd", cnt_fname)) ||
02173         (err = open_output(&pct_fp, NULL, NULL, NULL, pct_fname)))
02174     {
02175         print_error_msg("haplo-test", err->msg);
02176     }
02177 
02178     N = get_num_haplo_groups();
02179     create_zero_vector_f(&actual_counts, N);
02180     create_zero_vector_f(&predicted_counts, N);
02181     create_zero_matrix_f(&confusion, N, N);
02182     create_zero_matrix_f(&confusion_pct, N, N);
02183 
02184     for (i = 0; i < data_labels->num_elts; i++)
02185     {
02186         if (!tandem_types || tandem_types->elts[i] != HAPLO_ANCESTOR_NONE)
02187         {
02188         assert(data_labels->elts[ i ] < N);
02189         assert(pred_labels->elts[ i ] < N);
02190 
02191         actual_counts->elts[ data_labels->elts[ i ] ]++;
02192         predicted_counts->elts[ pred_labels->elts[ i ] ]++;
02193         confusion->elts[ data_labels->elts[ i ] ][ pred_labels->elts[ i ] ]++;
02194         }
02195     }
02196 
02197     copy_matrix_f(&confusion_pct, confusion);
02198     for (n_2 = 0; n_2 < N; n_2++)
02199     {
02200         D = predicted_counts->elts[ n_2 ];
02201         if (D > 0)
02202         {
02203             for (n_1 = 0; n_1 < N; n_1++)
02204             {
02205                 confusion_pct->elts[ n_1 ][ n_2 ] /= D;
02206             }
02207         }
02208     }
02209 
02210     if (opts.header_out && opts.output_format != HAPLO_OUTPUT_XML)
02211     {
02212         switch (opts.output_format)
02213         {
02214             case HAPLO_OUTPUT_TXT:
02215                 fprintf(cnt_fp, "%-10s", "Actual");
02216                 fprintf(pct_fp, "%-10s", "Actual");
02217                 for (n_2 = 0; n_2 < N; n_2++)
02218                 {
02219                     if (predicted_counts->elts[ n_2 ])
02220                     {
02221                         lookup_haplo_group_label_from_index(&label, n_2);
02222                         fprintf(cnt_fp, "  %-10s", label);
02223                         fprintf(pct_fp, "  %-10s", label);
02224                     }
02225                 }
02226                 break;
02227             case HAPLO_OUTPUT_CSV:
02228                 fprintf(cnt_fp, "%s", "Actual");
02229                 fprintf(pct_fp, "%s", "Actual");
02230                 for (n_2 = 0; n_2 < N; n_2++)
02231                 {
02232                     if (predicted_counts->elts[ n_2 ])
02233                     {
02234                         lookup_haplo_group_label_from_index(&label, n_2);
02235                         fprintf(cnt_fp, ",%s", label);
02236                         fprintf(pct_fp, ",%s", label);
02237                     }
02238                 }
02239                 break;
02240             case HAPLO_OUTPUT_XML:
02241                 break;
02242         }
02243         fprintf(cnt_fp, "\n");
02244         fprintf(pct_fp, "\n");
02245     }
02246     else if (opts.output_format == HAPLO_OUTPUT_XML)
02247     {
02248         xml_root = xmlDocGetRootElement(xml_doc);
02249     }
02250 
02251     nn_1 = 0;
02252     for (n_1 = 0; n_1 < N; n_1++)
02253     {
02254         switch (opts.output_format)
02255         {
02256             case HAPLO_OUTPUT_TXT:
02257                 if (actual_counts->elts[ n_1 ] > 0)
02258                 {
02259                     lookup_haplo_group_label_from_index(&label, n_1);
02260                     fprintf(cnt_fp, "%-10s", label);
02261                     fprintf(pct_fp, "%-10s", label);
02262 
02263                     nn_2 = 0;
02264                     for (n_2 = 0; n_2 < N; n_2++)
02265                     {
02266                         if (predicted_counts->elts[ n_2 ] > 0)
02267                         {
02268                             fprintf(cnt_fp, "  %-10.0f", 
02269                                     confusion->elts[ n_1 ][ n_2 ]);
02270                             fprintf(pct_fp, "  %-10.3f", 
02271                                     confusion_pct->elts[ n_1 ][ n_2 ]);
02272                             nn_2++;
02273                         }
02274                     }
02275                     fprintf(cnt_fp, "\n");
02276                     fprintf(pct_fp, "\n");
02277                     nn_1++;
02278                 }
02279                 break;
02280             case HAPLO_OUTPUT_CSV:
02281                 if (actual_counts->elts[ n_1 ] > 0)
02282                 {
02283                     lookup_haplo_group_label_from_index(&label, n_1);
02284                     fprintf(cnt_fp, "%s", label);
02285                     fprintf(pct_fp, "%s", label);
02286 
02287                     nn_2 = 0;
02288                     for (n_2 = 0; n_2 < N; n_2++)
02289                     {
02290                         if (predicted_counts->elts[ n_2 ] > 0)
02291                         {
02292                             fprintf(cnt_fp, ",%.0f", 
02293                                     confusion->elts[ n_1 ][ n_2 ]);
02294                             fprintf(pct_fp, ",%.3f", 
02295                                     confusion_pct->elts[ n_1 ][ n_2 ]);
02296                             nn_2++;
02297                         }
02298                     }
02299                     fprintf(cnt_fp, "\n");
02300                     fprintf(pct_fp, "\n");
02301                     nn_1++;
02302                 }
02303                 break;
02304             case HAPLO_OUTPUT_XML:
02305                 if (actual_counts->elts[ n_1 ] > 0)
02306                 {
02307                     lookup_haplo_group_label_from_index(&label, n_1);
02308                     xml_actual = XMLNewChild(xml_root, "actual", NULL);
02309                     snprintf(xml_buf, 256, "%d", nn_1+1);
02310                     XMLNewProp(xml_actual, "number", xml_buf);
02311                     XMLNewChild(xml_actual, "label", label);
02312 
02313                     nn_2 = 0;
02314                     for (n_2 = 0; n_2 < N; n_2++)
02315                     {
02316                         if (predicted_counts->elts[ n_2 ] > 0)
02317                         {
02318                             lookup_haplo_group_label_from_index(&label, n_2);
02319                             xml_pred = XMLNewChild(xml_actual, "predicted", 0);
02320                             snprintf(xml_buf, 256, "%d", nn_2+1);
02321                             XMLNewProp(xml_pred, "number", xml_buf);
02322                             XMLNewChild(xml_pred, "label", label);
02323 
02324                             snprintf(xml_buf, 256, "%.0f", 
02325                                     confusion->elts[n_1][n_2]);
02326                             XMLNewChild(xml_pred, "count", xml_buf);
02327 
02328                             snprintf(xml_buf, 256, "%.3f", 
02329                                     confusion_pct->elts[n_1][n_2]);
02330                             XMLNewChild(xml_pred, "percent", xml_buf);
02331 
02332                             nn_2++;
02333                         }
02334                     }
02335                     nn_1++;
02336                 }
02337                 break;
02338         }
02339     }
02340 
02341     free_vector_f(actual_counts);
02342     free_vector_f(predicted_counts);
02343     free_matrix_f(confusion);
02344     free_matrix_f(confusion_pct);
02345 
02346     if ((err = close_output(cnt_fp, xml_doc, cnt_fname)) ||
02347         (err = close_output(pct_fp, NULL, pct_fname)))
02348     {
02349         print_error_msg("haplo-test", err->msg);
02350     }
02351 }
02352 
02354 static void write_leave_one_out_preds
02355 (
02356     const char*        type,
02357     const Matblock_u8* data_ids,
02358     const Vector_u32*  data_labels,
02359     const Vector_u32*  ancestor_types,
02360     const Vector_u32*  ancestor_labels,
02361     const Vector_u32*  pred_labels,
02362     const Vector_d*    pred_confs,
02363     const Vector_u32*  tandem_types,
02364     const char*        fname
02365 )
02366 {
02367     uint32_t    i, j;
02368     FILE*       fp;
02369     xmlDoc*     xml_doc    = NULL;
02370     xmlNode*    xml_root   = NULL;
02371     xmlNode*    xml_node   = NULL;
02372     char        xml_buf[256] = {0};
02373     Error*      err;
02374 
02375     if (!pred_labels)
02376         return;
02377 
02378     if ((err = open_output(&fp, &xml_doc, "haplo-test-loo-predictions-out",
02379                     "haplo-test-loo-predictions-out.dtd", fname)))
02380     {
02381         print_error_msg("haplo-test", err->msg);
02382     }
02383 
02384     if (opts.header_out && opts.output_format != HAPLO_OUTPUT_XML)
02385     {
02386         if (data_ids)
02387         {
02388             for (j = 0; j < data_ids->num_rows; j++)
02389             {
02390                 switch (opts.output_format)
02391                 {
02392                     case HAPLO_OUTPUT_TXT:
02393                         fprintf(fp, "ID %-7d  ", j+1);
02394                         break;
02395                     case HAPLO_OUTPUT_CSV:
02396                         fprintf(fp, "ID %d,", j+1);
02397                         break;
02398                     case HAPLO_OUTPUT_XML:
02399                         break;
02400                 }
02401             }
02402         }
02403 
02404         switch (opts.output_format)
02405         {
02406             case HAPLO_OUTPUT_TXT:
02407                 fprintf(fp, "%-10s  %-4s  %-10s", "Actual", "Ancestor", "Type");
02408                 if (pred_labels) 
02409                     fprintf(fp, "  %-10s  %-5s", "Prediction", "Conf");
02410                 break;
02411             case HAPLO_OUTPUT_CSV:
02412                 fprintf(fp, "%s,%s,%s", "Actual", "Ancestor", "Type");
02413                 if (pred_labels) 
02414                     fprintf(fp, ",%s,%s", "Prediction", "Conf");
02415                 break;
02416             case HAPLO_OUTPUT_XML:
02417                 break;
02418         }
02419         fprintf(fp, "\n");
02420     }
02421     else if (opts.output_format == HAPLO_OUTPUT_XML)
02422     {
02423         xml_root = xmlDocGetRootElement(xml_doc);
02424     }
02425 
02426     for (i = 0; i < data_labels->num_elts; i++)
02427     {
02428         if (!tandem_types || tandem_types->elts[i] != HAPLO_ANCESTOR_NONE)
02429         {
02430             if (opts.output_format == HAPLO_OUTPUT_XML)
02431             {
02432                 xml_node = XMLNewChild(xml_root, "sample", NULL);
02433                 snprintf(xml_buf, 256, "%d", i+1);
02434                 XMLNewProp(xml_node, "number", xml_buf);
02435             }
02436 
02437             write_ids(data_ids, i, HAPLO_SEP_SUFFIX, fp, xml_node);
02438             write_label(data_labels, i, HAPLO_SEP_SUFFIX, fp, xml_node);
02439 
02440             write_ancestor_label(ancestor_types, ancestor_labels, i, 
02441                     HAPLO_SEP_NONE, fp, xml_node);
02442 
02443             write_prediction(type, pred_labels, pred_confs, i, 
02444                     HAPLO_SEP_PREFIX, fp, xml_node);
02445 
02446             if (opts.output_format != HAPLO_OUTPUT_XML)
02447             {
02448                 fprintf(fp, "\n");
02449             }
02450         }
02451     }
02452 
02453     if ((err = close_output(fp, xml_doc, fname)))
02454     {
02455         print_error_msg("haplo-test", err->msg);
02456     }
02457 }
02458 
02462 static void create_leave_one_out_train_and_test_data
02463 (
02464     Vector_u32**       train_labels_out,
02465     Matrix_i32**       train_markers_out,
02466     Matrix_i32**       test_markers_out,
02467     const Vector_u32*  labels,
02468     const Matrix_i32*  markers,
02469     uint32_t           i
02470 )
02471 {
02472     create_vector_u32(train_labels_out, labels->num_elts - 1);
02473     create_matrix_i32(train_markers_out, markers->num_rows - 1, 
02474             markers->num_cols);
02475     create_matrix_i32(test_markers_out, 1, markers->num_cols);
02476 
02477     if (i > 0)
02478     {
02479         copy_vector_section_into_vector_u32(*train_labels_out, 0, labels, 0, i);
02480         copy_matrix_block_into_matrix_i32(*train_markers_out, 0, 0,
02481                 markers, 0, 0, i, markers->num_cols);
02482     }
02483     if (i < labels->num_elts - 1)
02484     {
02485         copy_vector_section_into_vector_u32(*train_labels_out, i, 
02486                 labels, i+1, labels->num_elts - 1 - i);
02487         copy_matrix_block_into_matrix_i32(*train_markers_out, i, 0,
02488                 markers, i+1, 0, markers->num_rows - 1 - i, markers->num_cols);
02489     }
02490 
02491     copy_matrix_block_into_matrix_i32(*test_markers_out, 0, 0,
02492             markers, i, 0, 1, markers->num_cols);
02493 }
02494 
02496 static void leave_one_out_nb_freq
02497 (
02498     Vector_u32**       labels_out,
02499     Vector_d**         confs_out,
02500     Vector_u32**       ancestor_types_out,
02501     Vector_u32**       ancestor_labels_out,
02502     const Matblock_u8* data_ids,
02503     const Vector_u32*  data_labels,
02504     const Matrix_i32*  data_markers
02505 )
02506 {
02507     uint32_t i;
02508 
02509     NB_freq_model_tree* tree  = NULL;
02510     Vector_u32* train_labels  = NULL;
02511     Matrix_i32* train_markers = NULL;
02512     Vector_u32* test_labels   = NULL;
02513     Vector_d*   test_confs    = NULL;
02514     Matrix_i32* test_markers  = NULL;
02515     Error* err;
02516 
02517     if (!opts.nb_freq_fname)
02518         return;
02519 
02520     create_vector_u32(labels_out, data_labels->num_elts);
02521     create_vector_d(confs_out, data_labels->num_elts);
02522 
02523     for (i = 0; i < data_labels->num_elts; i++)
02524     {
02525         create_leave_one_out_train_and_test_data(&train_labels, &train_markers,
02526                 &test_markers, data_labels, data_markers, i);
02527 
02528         if ((err = train_nb_freq_model_tree(&tree, train_labels, train_markers,
02529                         opts.nb_freq_fname, opts.nb_freq_dtd_fname)) ||
02530             (err = predict_labels_with_nb_freq_model_tree(&test_labels, 
02531                         &test_confs, test_markers, tree, 0)))
02532         {
02533             print_error_msg_exit("haplo-test", err->msg);
02534         }
02535 
02536         assert(test_labels->num_elts == 1);
02537         (*labels_out)->elts[ i ] = test_labels->elts[ 0 ];
02538         (*confs_out)->elts[ i ] = test_confs->elts[ 0 ];
02539     }
02540 
02541     free_vector_u32(test_labels);
02542     free_vector_d(test_confs);
02543     free_matrix_i32(test_markers);
02544     free_vector_u32(train_labels);
02545     free_matrix_i32(train_markers);
02546     free_nb_freq_model_tree(tree);
02547 
02548     find_ancestors(ancestor_types_out, ancestor_labels_out, *labels_out,
02549             data_labels);
02550 
02551     write_leave_one_out_summary(data_ids, data_labels, *ancestor_types_out,
02552             *ancestor_labels_out, *labels_out, *confs_out, NULL,
02553             nb_freq_summary_fname);
02554 
02555     write_leave_one_out_details(data_ids, data_labels, *ancestor_types_out,
02556             *ancestor_labels_out, *labels_out, *confs_out, NULL,
02557             nb_freq_details_cnt_fname, nb_freq_details_pct_fname);
02558 
02559     write_leave_one_out_confusion(data_ids, data_labels, *ancestor_types_out,
02560             *ancestor_labels_out, *labels_out, *confs_out, NULL,
02561             nb_freq_confusion_cnt_fname, nb_freq_confusion_pct_fname);
02562 
02563     write_leave_one_out_preds("nb-freq", data_ids, data_labels,
02564             *ancestor_types_out, *ancestor_labels_out, *labels_out,
02565             *confs_out, NULL, nb_freq_preds_fname);
02566 }
02567 
02569 static void leave_one_out_nb_gauss
02570 (
02571     Vector_u32**       labels_out,
02572     Vector_d**         confs_out,
02573     Vector_u32**       ancestor_types_out,
02574     Vector_u32**       ancestor_labels_out,
02575     const Matblock_u8* data_ids,
02576     const Vector_u32*  data_labels,
02577     const Matrix_i32*  data_markers
02578 )
02579 {
02580     uint32_t i;
02581 
02582     NB_gauss_model_tree* tree  = NULL;
02583     Vector_u32* train_labels  = NULL;
02584     Matrix_i32* train_markers = NULL;
02585     Vector_u32* test_labels   = NULL;
02586     Vector_d*   test_confs    = NULL;
02587     Matrix_i32* test_markers  = NULL;
02588     Error* err;
02589 
02590     if (!opts.nb_gauss_fname)
02591         return;
02592 
02593     create_vector_u32(labels_out, data_labels->num_elts);
02594     create_vector_d(confs_out, data_labels->num_elts);
02595 
02596     for (i = 0; i < data_labels->num_elts; i++)
02597     {
02598         create_leave_one_out_train_and_test_data(&train_labels, &train_markers,
02599                 &test_markers, data_labels, data_markers, i);
02600 
02601         if ((err = train_nb_gauss_model_tree(&tree, train_labels, train_markers,
02602                         opts.nb_gauss_fname, opts.nb_gauss_dtd_fname)) ||
02603             (err = predict_labels_with_nb_gauss_model_tree(&test_labels, 
02604                         &test_confs, test_markers, tree, 0)))
02605         {
02606             print_error_msg_exit("haplo-test", err->msg);
02607         }
02608 
02609         assert(test_labels->num_elts == 1);
02610         (*labels_out)->elts[ i ] = test_labels->elts[ 0 ];
02611         (*confs_out)->elts[ i ] = test_confs->elts[ 0 ];
02612     }
02613 
02614     free_vector_u32(test_labels);
02615     free_vector_d(test_confs);
02616     free_matrix_i32(test_markers);
02617     free_vector_u32(train_labels);
02618     free_matrix_i32(train_markers);
02619     free_nb_gauss_model_tree(tree);
02620 
02621     find_ancestors(ancestor_types_out, ancestor_labels_out, *labels_out,
02622             data_labels);
02623 
02624     write_leave_one_out_summary(data_ids, data_labels, *ancestor_types_out,
02625             *ancestor_labels_out, *labels_out, *confs_out, NULL,
02626             nb_gauss_summary_fname);
02627 
02628     write_leave_one_out_details(data_ids, data_labels, *ancestor_types_out,
02629             *ancestor_labels_out, *labels_out, *confs_out, NULL,
02630             nb_gauss_details_cnt_fname, nb_gauss_details_pct_fname);
02631 
02632     write_leave_one_out_confusion(data_ids, data_labels, *ancestor_types_out,
02633             *ancestor_labels_out, *labels_out, *confs_out, NULL,
02634             nb_gauss_confusion_cnt_fname, nb_gauss_confusion_pct_fname);
02635 
02636     write_leave_one_out_preds("nb-gauss", data_ids, data_labels,
02637             *ancestor_types_out, *ancestor_labels_out, *labels_out,
02638             *confs_out, NULL, nb_gauss_preds_fname);
02639 }
02640 
02642 static void leave_one_out_nb_gmm
02643 (
02644     Vector_u32**       labels_out,
02645     Vector_d**         confs_out,
02646     Vector_u32**       ancestor_types_out,
02647     Vector_u32**       ancestor_labels_out,
02648     const Matblock_u8* data_ids,
02649     const Vector_u32*  data_labels,
02650     const Matrix_i32*  data_markers
02651 )
02652 {
02653     uint32_t i;
02654 
02655     NB_gmm_model_tree* tree  = NULL;
02656     Vector_u32* train_labels  = NULL;
02657     Matrix_i32* train_markers = NULL;
02658     Vector_u32* test_labels   = NULL;
02659     Vector_d*   test_confs    = NULL;
02660     Matrix_i32* test_markers  = NULL;
02661     Error* err;
02662 
02663     if (!opts.nb_gmm_fname)
02664         return;
02665 
02666     create_vector_u32(labels_out, data_labels->num_elts);
02667     create_vector_d(confs_out, data_labels->num_elts);
02668 
02669     for (i = 0; i < data_labels->num_elts; i++)
02670     {
02671         create_leave_one_out_train_and_test_data(&train_labels, &train_markers,
02672                 &test_markers, data_labels, data_markers, i);
02673 
02674         if ((err = train_nb_gmm_model_tree(&tree, train_labels, train_markers,
02675                         opts.nb_gmm_fname, opts.nb_gmm_dtd_fname)) ||
02676             (err = predict_labels_with_nb_gmm_model_tree(&test_labels, 
02677                         &test_confs, test_markers, tree, 0)))
02678         {
02679             print_error_msg_exit("haplo-test", err->msg);
02680         }
02681 
02682         assert(test_labels->num_elts == 1);
02683         (*labels_out)->elts[ i ] = test_labels->elts[ 0 ];
02684         (*confs_out)->elts[ i ] = test_confs->elts[ 0 ];
02685     }
02686 
02687     free_vector_u32(test_labels);
02688     free_vector_d(test_confs);
02689     free_matrix_i32(test_markers);
02690     free_vector_u32(train_labels);
02691     free_matrix_i32(train_markers);
02692     free_nb_gmm_model_tree(tree);
02693 
02694     find_ancestors(ancestor_types_out, ancestor_labels_out, *labels_out,
02695             data_labels);
02696 
02697     write_leave_one_out_summary(data_ids, data_labels, *ancestor_types_out,
02698             *ancestor_labels_out, *labels_out, *confs_out, NULL,
02699             nb_gmm_summary_fname);
02700 
02701     write_leave_one_out_details(data_ids, data_labels, *ancestor_types_out,
02702             *ancestor_labels_out, *labels_out, *confs_out, NULL,
02703             nb_gmm_details_cnt_fname, nb_gmm_details_pct_fname);
02704 
02705     write_leave_one_out_confusion(data_ids, data_labels, *ancestor_types_out,
02706             *ancestor_labels_out, *labels_out, *confs_out, NULL,
02707             nb_gmm_confusion_cnt_fname, nb_gmm_confusion_pct_fname);
02708 
02709     write_leave_one_out_preds("nb-gmm", data_ids, data_labels,
02710             *ancestor_types_out, *ancestor_labels_out, *labels_out,
02711             *confs_out, NULL, nb_gmm_preds_fname);
02712 }
02713 
02715 static void leave_one_out_mv_gmm
02716 (
02717     Vector_u32**       labels_out,
02718     Vector_d**         confs_out,
02719     Vector_u32**       ancestor_types_out,
02720     Vector_u32**       ancestor_labels_out,
02721     const Matblock_u8* data_ids,
02722     const Vector_u32*  data_labels,
02723     const Matrix_i32*  data_markers
02724 )
02725 {
02726     uint32_t i;
02727 
02728     MV_gmm_model_tree* tree  = NULL;
02729     Vector_u32* train_labels  = NULL;
02730     Matrix_i32* train_markers = NULL;
02731     Vector_u32* test_labels   = NULL;
02732     Vector_d*   test_confs    = NULL;
02733     Matrix_i32* test_markers  = NULL;
02734     Error* err;
02735 
02736     if (!opts.mv_gmm_fname)
02737         return;
02738 
02739     create_vector_u32(labels_out, data_labels->num_elts);
02740     create_vector_d(confs_out, data_labels->num_elts);
02741 
02742     for (i = 0; i < data_labels->num_elts; i++)
02743     {
02744         create_leave_one_out_train_and_test_data(&train_labels, &train_markers,
02745                 &test_markers, data_labels, data_markers, i);
02746 
02747         if ((err = train_mv_gmm_model_tree(&tree, train_labels, train_markers,
02748                         opts.mv_gmm_fname, opts.mv_gmm_dtd_fname)) ||
02749             (err = predict_labels_with_mv_gmm_model_tree(&test_labels, 
02750                         &test_confs, test_markers, tree, 0)))
02751         {
02752             print_error_msg_exit("haplo-test", err->msg);
02753         }
02754 
02755         assert(test_labels->num_elts == 1);
02756         (*labels_out)->elts[ i ] = test_labels->elts[ 0 ];
02757         (*confs_out)->elts[ i ] = test_confs->elts[ 0 ];
02758     }
02759 
02760     free_vector_u32(test_labels);
02761     free_vector_d(test_confs);
02762     free_matrix_i32(test_markers);
02763     free_vector_u32(train_labels);
02764     free_matrix_i32(train_markers);
02765     free_mv_gmm_model_tree(tree);
02766 
02767     find_ancestors(ancestor_types_out, ancestor_labels_out, *labels_out,
02768             data_labels);
02769 
02770     write_leave_one_out_summary(data_ids, data_labels, *ancestor_types_out,
02771             *ancestor_labels_out, *labels_out, *confs_out, NULL,
02772             mv_gmm_summary_fname);
02773 
02774     write_leave_one_out_details(data_ids, data_labels, *ancestor_types_out,
02775             *ancestor_labels_out, *labels_out, *confs_out, NULL,
02776             mv_gmm_details_cnt_fname, mv_gmm_details_pct_fname);
02777 
02778     write_leave_one_out_confusion(data_ids, data_labels, *ancestor_types_out,
02779             *ancestor_labels_out, *labels_out, *confs_out, NULL,
02780             mv_gmm_confusion_cnt_fname, mv_gmm_confusion_pct_fname);
02781 
02782     write_leave_one_out_preds("mv-gmm", data_ids, data_labels,
02783             *ancestor_types_out, *ancestor_labels_out, *labels_out,
02784             *confs_out, NULL, mv_gmm_preds_fname);
02785 }
02786 
02788 static void leave_one_out_svm
02789 (
02790     Vector_u32**       labels_out,
02791     Vector_d**         confs_out,
02792     Vector_u32**       ancestor_types_out,
02793     Vector_u32**       ancestor_labels_out,
02794     const Matblock_u8* data_ids,
02795     const Vector_u32*  data_labels,
02796     const Matrix_i32*  data_markers
02797 )
02798 {
02799 #ifdef HAPLO_ENABLE_SVM
02800     uint32_t i;
02801 
02802     SVM_model_tree* tree  = NULL;
02803     Vector_u32* train_labels  = NULL;
02804     Matrix_i32* train_markers = NULL;
02805     Vector_u32* test_labels   = NULL;
02806     Vector_d*   test_confs    = NULL;
02807     Matrix_i32* test_markers  = NULL;
02808     Error* err;
02809 
02810     if (!opts.svm_fname)
02811         return;
02812 
02813     create_vector_u32(labels_out, data_labels->num_elts);
02814     create_vector_d(confs_out, data_labels->num_elts);
02815 
02816     for (i = 0; i < data_labels->num_elts; i++)
02817     {
02818         create_leave_one_out_train_and_test_data(&train_labels, &train_markers,
02819                 &test_markers, data_labels, data_markers, i);
02820 
02821         if ((err = train_svm_model_tree(&tree, train_labels, train_markers,
02822                         opts.svm_fname, opts.svm_dtd_fname)) ||
02823             (err = predict_labels_with_svm_model_tree(&test_labels, 
02824                         &test_confs, test_markers, tree)))
02825         {
02826             print_error_msg_exit("haplo-test", err->msg);
02827         }
02828 
02829         assert(test_labels->num_elts == 1);
02830         (*labels_out)->elts[ i ] = test_labels->elts[ 0 ];
02831         (*confs_out)->elts[ i ] = test_confs->elts[ 0 ];
02832     }
02833 
02834     free_vector_u32(test_labels);
02835     free_vector_d(test_confs);
02836     free_matrix_i32(test_markers);
02837     free_vector_u32(train_labels);
02838     free_matrix_i32(train_markers);
02839     free_svm_model_tree(tree);
02840 
02841     find_ancestors(ancestor_types_out, ancestor_labels_out, *labels_out,
02842             data_labels);
02843 
02844     write_leave_one_out_summary(data_ids, data_labels, *ancestor_types_out,
02845             *ancestor_labels_out, *labels_out, *confs_out, NULL, 
02846             svm_summary_fname);
02847 
02848     write_leave_one_out_details(data_ids, data_labels, *ancestor_types_out,
02849             *ancestor_labels_out, *labels_out, *confs_out, NULL,
02850             svm_details_cnt_fname, svm_details_pct_fname);
02851 
02852     write_leave_one_out_confusion(data_ids, data_labels, *ancestor_types_out,
02853             *ancestor_labels_out, *labels_out, *confs_out, NULL,
02854             svm_confusion_cnt_fname, svm_confusion_pct_fname);
02855 
02856     write_leave_one_out_preds("svm", data_ids, data_labels,
02857             *ancestor_types_out, *ancestor_labels_out, *labels_out,
02858             *confs_out, NULL, svm_preds_fname);
02859 #else
02860     return;
02861 #endif
02862 }
02863 
02865 static void leave_one_out_j48
02866 (
02867     Vector_u32**       labels_out,
02868     Vector_d**         confs_out,
02869     Vector_u32**       ancestor_types_out,
02870     Vector_u32**       ancestor_labels_out,
02871     const Matblock_u8* data_ids,
02872     const Vector_u32*  data_labels,
02873     const Matrix_i32*  data_markers
02874 )
02875 {
02876 #ifdef HAPLO_ENABLE_WEKA
02877     uint32_t i;
02878     pid_t    pid;
02879     char     tmp_dir[1024] = {0};
02880     char     script[4096] = {0};
02881 
02882     Weka_model_tree* tree     = NULL;
02883     Vector_u32* train_labels  = NULL;
02884     Matrix_i32* train_markers = NULL;
02885     Vector_u32* test_labels   = NULL;
02886     Vector_d*   test_confs    = NULL;
02887     Matrix_i32* test_markers  = NULL;
02888     Error* err;
02889 
02890     if (!opts.weka_j48_fname)
02891         return;
02892 
02893     pid = getpid();
02894     snprintf(tmp_dir, 1024, "%s/.haplo_test_leave_one_out_j48_%u", 
02895             tmp_dirname, pid);
02896     snprintf(script, 4096, "mkdir -p %s", tmp_dir);
02897     if (system(script) == 127)
02898     {
02899         print_error_msg_exit("haplo-test", "Could not create tmp files");
02900     }
02901 
02902     create_vector_u32(labels_out, data_labels->num_elts);
02903     create_vector_d(confs_out, data_labels->num_elts);
02904 
02905     for (i = 0; i < data_labels->num_elts; i++)
02906     {
02907         create_leave_one_out_train_and_test_data(&train_labels, &train_markers,
02908                 &test_markers, data_labels, data_markers, i);
02909 
02910         if ((err = train_weka_j48_model_tree(&tree, train_labels, train_markers,
02911                         opts.weka_j48_fname, opts.weka_dtd_fname,
02912                         tmp_dir, opts.weka_jar_fname)) ||
02913             (err = predict_labels_with_weka_j48_model_tree(&test_labels, 
02914                         &test_confs, test_markers, tree, opts.weka_jar_fname)))
02915         {
02916             print_error_msg_exit("haplo-test", err->msg);
02917         }
02918 
02919         assert(test_labels->num_elts == 1);
02920         (*labels_out)->elts[ i ] = test_labels->elts[ 0 ];
02921         (*confs_out)->elts[ i ] = test_confs->elts[ 0 ];
02922     }
02923 
02924     snprintf(script, 4096, "rm -rf %s", tmp_dir);
02925     if (system(script) == 127)
02926     {
02927         print_error_msg_exit("haplo-test", "Could not remove tmp files");
02928     }
02929 
02930     free_vector_u32(test_labels);
02931     free_vector_d(test_confs);
02932     free_matrix_i32(test_markers);
02933     free_vector_u32(train_labels);
02934     free_matrix_i32(train_markers);
02935     free_weka_model_tree(tree);
02936 
02937     find_ancestors(ancestor_types_out, ancestor_labels_out, *labels_out,
02938             data_labels);
02939 
02940     write_leave_one_out_summary(data_ids, data_labels, *ancestor_types_out,
02941             *ancestor_labels_out, *labels_out, *confs_out, NULL,
02942             weka_j48_summary_fname);
02943 
02944     write_leave_one_out_details(data_ids, data_labels, *ancestor_types_out,
02945             *ancestor_labels_out, *labels_out, *confs_out, NULL,
02946             weka_j48_details_cnt_fname, weka_j48_details_pct_fname);
02947 
02948     write_leave_one_out_confusion(data_ids, data_labels, *ancestor_types_out,
02949             *ancestor_labels_out, *labels_out, *confs_out, NULL,
02950             weka_j48_confusion_cnt_fname, weka_j48_confusion_pct_fname);
02951 
02952     write_leave_one_out_preds("j48", data_ids, data_labels,
02953             *ancestor_types_out, *ancestor_labels_out, *labels_out,
02954             *confs_out, NULL, weka_j48_preds_fname);
02955 #else
02956     return;
02957 #endif
02958 }
02959 
02961 static void leave_one_out_part
02962 (
02963     Vector_u32**       labels_out,
02964     Vector_d**         confs_out,
02965     Vector_u32**       ancestor_types_out,
02966     Vector_u32**       ancestor_labels_out,
02967     const Matblock_u8* data_ids,
02968     const Vector_u32*  data_labels,
02969     const Matrix_i32*  data_markers
02970 )
02971 {
02972 #ifdef HAPLO_ENABLE_WEKA
02973     uint32_t i;
02974     pid_t    pid;
02975     char     tmp_dir[1024] = {0};
02976     char     script[4096] = {0};
02977 
02978     Weka_model_tree* tree     = NULL;
02979     Vector_u32* train_labels  = NULL;
02980     Matrix_i32* train_markers = NULL;
02981     Vector_u32* test_labels   = NULL;
02982     Vector_d*   test_confs    = NULL;
02983     Matrix_i32* test_markers  = NULL;
02984     Error* err;
02985 
02986     if (!opts.weka_part_fname)
02987         return;
02988 
02989     pid = getpid();
02990     snprintf(tmp_dir, 1024, "%s/.haplo_test_leave_one_out_part_%u", 
02991             tmp_dirname, pid);
02992     snprintf(script, 4096, "mkdir -p %s", tmp_dir);
02993     if (system(script) == 127)
02994     {
02995         print_error_msg_exit("haplo-test", "Could not create tmp files");
02996     }
02997 
02998     create_vector_u32(labels_out, data_labels->num_elts);
02999     create_vector_d(confs_out, data_labels->num_elts);
03000 
03001     for (i = 0; i < data_labels->num_elts; i++)
03002     {
03003         create_leave_one_out_train_and_test_data(&train_labels, &train_markers,
03004                 &test_markers, data_labels, data_markers, i);
03005 
03006         if ((err = train_weka_part_model_tree(&tree, train_labels, 
03007                         train_markers, opts.weka_part_fname, 
03008                         opts.weka_dtd_fname, tmp_dir, 
03009                         opts.weka_jar_fname)) ||
03010             (err = predict_labels_with_weka_part_model_tree(&test_labels, 
03011                         &test_confs, test_markers, tree, opts.weka_jar_fname)))
03012         {
03013             print_error_msg_exit("haplo-test", err->msg);
03014         }
03015 
03016         assert(test_labels->num_elts == 1);
03017         (*labels_out)->elts[ i ] = test_labels->elts[ 0 ];
03018         (*confs_out)->elts[ i ] = test_confs->elts[ 0 ];
03019     }
03020 
03021     snprintf(script, 4096, "rm -rf %s", tmp_dir);
03022     if (system(script) == 127)
03023     {
03024         print_error_msg_exit("haplo-test", "Could not remove tmp files");
03025     }
03026 
03027     free_vector_u32(test_labels);
03028     free_vector_d(test_confs);
03029     free_matrix_i32(test_markers);
03030     free_vector_u32(train_labels);
03031     free_matrix_i32(train_markers);
03032     free_weka_model_tree(tree);
03033 
03034     find_ancestors(ancestor_types_out, ancestor_labels_out, *labels_out,
03035             data_labels);
03036 
03037     write_leave_one_out_summary(data_ids, data_labels, *ancestor_types_out,
03038             *ancestor_labels_out, *labels_out, *confs_out, NULL,
03039             weka_part_summary_fname);
03040 
03041     write_leave_one_out_details(data_ids, data_labels, *ancestor_types_out,
03042             *ancestor_labels_out, *labels_out, *confs_out, NULL,
03043             weka_part_details_cnt_fname, weka_part_details_pct_fname);
03044 
03045     write_leave_one_out_confusion(data_ids, data_labels, *ancestor_types_out,
03046             *ancestor_labels_out, *labels_out, *confs_out, NULL,
03047             weka_part_confusion_cnt_fname, weka_part_confusion_pct_fname);
03048 
03049     write_leave_one_out_preds("part", data_ids, data_labels,
03050             *ancestor_types_out, *ancestor_labels_out, *labels_out,
03051             *confs_out, NULL, weka_part_preds_fname);
03052 #else
03053     return;
03054 #endif
03055 }
03056 
03060 static void leave_one_out_nearest
03061 (
03062     Vector_u32**       labels_out,
03063     Vector_d**         dists_out,
03064     Vector_u32**       ancestor_types_out,
03065     Vector_u32**       ancestor_labels_out,
03066     const Matblock_u8* data_ids,
03067     const Vector_u32*  data_labels,
03068     const Matrix_i32*  data_markers
03069 )
03070 {
03071     uint32_t i;
03072 
03073     Nearest_model* model      = NULL;
03074     Vector_u32* train_labels  = NULL;
03075     Matrix_i32* train_markers = NULL;
03076     Vector_u32* test_labels   = NULL;
03077     Vector_d*   test_dists    = NULL;
03078     Matrix_i32* test_markers  = NULL;
03079     Error* err;
03080 
03081     if (!opts.nearest_fname)
03082         return;
03083 
03084     create_vector_u32(labels_out, data_labels->num_elts);
03085     create_vector_d(dists_out, data_labels->num_elts);
03086 
03087     for (i = 0; i < data_labels->num_elts; i++)
03088     {
03089         create_leave_one_out_train_and_test_data(&train_labels, &train_markers,
03090                 &test_markers, data_labels, data_markers, i);
03091 
03092         if ((err = train_nearest_model(&model, train_labels, train_markers,
03093                         opts.nearest_fname, opts.nearest_dtd_fname)) ||
03094             (err = predict_labels_with_nearest_model(&test_labels, &test_dists,
03095                         test_markers, model)))
03096         {
03097             print_error_msg_exit("haplo-test", err->msg);
03098         }
03099 
03100         assert(test_labels->num_elts == 1);
03101         (*labels_out)->elts[ i ] = test_labels->elts[ 0 ];
03102         (*dists_out)->elts[ i ] = test_dists->elts[ 0 ];
03103     }
03104 
03105     free_vector_u32(test_labels);
03106     free_vector_d(test_dists);
03107     free_matrix_i32(test_markers);
03108     free_vector_u32(train_labels);
03109     free_matrix_i32(train_markers);
03110     free_nearest_model(model);
03111 
03112     find_ancestors(ancestor_types_out, ancestor_labels_out, *labels_out,
03113             data_labels);
03114 
03115     write_leave_one_out_summary(data_ids, data_labels, *ancestor_types_out,
03116             *ancestor_labels_out, *labels_out, *dists_out, NULL,
03117             nearest_summary_fname);
03118 
03119     write_leave_one_out_details(data_ids, data_labels, *ancestor_types_out,
03120             *ancestor_labels_out, *labels_out, *dists_out, NULL,
03121             nearest_details_cnt_fname, nearest_details_pct_fname);
03122 
03123     write_leave_one_out_confusion(data_ids, data_labels, *ancestor_types_out,
03124             *ancestor_labels_out, *labels_out, *dists_out, NULL,
03125             nearest_confusion_cnt_fname, nearest_confusion_pct_fname);
03126 
03127     write_leave_one_out_preds("nearest", data_ids, data_labels,
03128             *ancestor_types_out, *ancestor_labels_out, *labels_out, *dists_out,
03129             NULL, nearest_preds_fname);
03130 }
03131 
03135 static void leave_one_out_tandem_agree
03136 (
03137     Vector_u32**       types_out,
03138     Vector_u32**       labels_out,
03139     Vector_u32**       ancestor_types_out,
03140     Vector_u32**       ancestor_labels_out,
03141     const Vector_u32*  nb_freq_labels, 
03142     const Vector_u32*  nb_gauss_labels, 
03143     const Vector_u32*  nb_gmm_labels,
03144     const Vector_u32*  mv_gmm_labels, 
03145     const Vector_u32*  svm_labels, 
03146     const Vector_u32*  j48_labels, 
03147     const Vector_u32*  part_labels, 
03148     const Vector_u32*  nearest_labels, 
03149     const Matblock_u8* data_ids,
03150     const Vector_u32*  data_labels
03151 )
03152 {
03153     find_ancestors_of_sets(ancestor_types_out, ancestor_labels_out, 
03154             nb_freq_labels, nb_gauss_labels, nb_gmm_labels, mv_gmm_labels, 
03155             svm_labels, j48_labels, part_labels, nearest_labels);
03156 
03157     copy_vector_u32(types_out, *ancestor_types_out);
03158     copy_vector_u32(labels_out, *ancestor_labels_out);
03159 
03160     write_leave_one_out_summary(data_ids, data_labels, *ancestor_types_out,
03161             *ancestor_labels_out, *labels_out, NULL, NULL,
03162             tandem_agree_summary_fname);
03163 
03164     write_leave_one_out_details(data_ids, data_labels, *ancestor_types_out,
03165             *ancestor_labels_out, *labels_out, NULL, *types_out,
03166             tandem_agree_details_cnt_fname, tandem_agree_details_pct_fname);
03167 }
03168 
03172 static void leave_one_out_tandem
03173 (
03174     const Vector_u32*  tandem_types, 
03175     const Vector_u32*  tandem_labels, 
03176     const Matblock_u8* data_ids,
03177     const Vector_u32*  data_labels,
03178     Vector_u32*        ancestor_types, 
03179     Vector_u32*        ancestor_labels
03180 )
03181 {
03182     find_tandem_ancestors(&ancestor_types, &ancestor_labels, tandem_types,
03183             tandem_labels, data_labels);
03184 
03185     write_leave_one_out_summary(data_ids, data_labels, ancestor_types,
03186             ancestor_labels, tandem_labels, NULL, tandem_types,
03187             tandem_summary_fname);
03188 
03189     write_leave_one_out_details(data_ids, data_labels, ancestor_types,
03190             ancestor_labels, tandem_labels, NULL, tandem_types,
03191             tandem_details_cnt_fname, tandem_details_pct_fname);
03192 
03193     write_leave_one_out_confusion(data_ids, data_labels, ancestor_types,
03194             ancestor_labels, tandem_labels, NULL, tandem_types,
03195             tandem_confusion_cnt_fname, tandem_confusion_pct_fname);
03196 
03197     write_leave_one_out_preds("tandem", data_ids, data_labels,
03198             ancestor_types, ancestor_labels, tandem_labels, NULL,
03199             tandem_types, tandem_preds_fname);
03200 }
03201 
03206 static void create_cross_validation_train_and_test_data
03207 (
03208     Matblock_u8****    train_ids_out,
03209     Vector_u32****     train_labels_out,
03210     Matrix_i32****     train_markers_out, 
03211     Matblock_u8****    test_ids_out,
03212     Vector_u32****     test_labels_out,
03213     Matrix_i32****     test_markers_out,
03214     const Matblock_u8* data_ids, 
03215     const Vector_u32*  data_labels, 
03216     const Matrix_i32*  data_markers
03217 )
03218 {
03219     uint32_t num_samples;
03220     uint32_t sample;
03221     uint32_t num_markers;
03222     uint32_t marker;
03223     uint32_t num_ids;
03224     uint32_t id;
03225     uint32_t iter;
03226     uint32_t fold;
03227     uint32_t num_samples_mod_num_folds;
03228     uint32_t num_samples_per_fold;
03229     uint32_t num_times_subtract_a_sample;
03230     uint32_t train_id_sample;
03231     uint32_t train_label_sample;
03232     uint32_t train_marker_sample;
03233     uint32_t test_id_sample;
03234     uint32_t test_label_sample;
03235     uint32_t test_marker_sample;
03236     uint32_t c;
03237     int32_t  i, j, k;
03238 
03239     Matblock_u8*** train_ids;
03240     Vector_u32***  train_labels;
03241     Matrix_i32***  train_markers;
03242     Matblock_u8*** test_ids;
03243     Vector_u32***  test_labels;
03244     Matrix_i32***  test_markers;
03245     Vector_i32*    fold_members;
03246     Matblock_u8*   folded_ids;
03247     Vector_u32*    folded_labels;
03248     Matrix_i32*    folded_markers;
03249 
03250     num_ids     = (data_ids) ? data_ids->num_rows : 0;
03251     num_samples = data_markers->num_rows;
03252     num_markers = data_markers->num_cols;
03253 
03254     assert(*train_ids_out = malloc(num_cv_iters*sizeof(void**)));
03255     assert(*train_labels_out = malloc(num_cv_iters*sizeof(void**)));
03256     assert(*train_markers_out = malloc(num_cv_iters*sizeof(void**)));
03257     assert(*test_ids_out = malloc(num_cv_iters*sizeof(void**)));
03258     assert(*test_labels_out = malloc(num_cv_iters*sizeof(void**)));
03259     assert(*test_markers_out = malloc(num_cv_iters*sizeof(void**)));
03260     for (iter = 0; iter < num_cv_iters; iter++)
03261     {
03262         assert((*train_ids_out)[iter] = calloc(num_cv_folds,sizeof(void*)));
03263         assert((*train_labels_out)[iter] = calloc(num_cv_folds,sizeof(void*)));
03264         assert((*train_markers_out)[iter] = calloc(num_cv_folds,sizeof(void*)));
03265         assert((*test_ids_out)[iter] = calloc(num_cv_folds,sizeof(void*)));
03266         assert((*test_labels_out)[iter] = calloc(num_cv_folds,sizeof(void*)));
03267         assert((*test_markers_out)[iter] = calloc(num_cv_folds,sizeof(void*)));
03268     }
03269 
03270     train_ids     = *train_ids_out;
03271     train_labels  = *train_labels_out;
03272     train_markers = *train_markers_out;
03273     test_ids      = *test_ids_out;
03274     test_labels   = *test_labels_out;
03275     test_markers  = *test_markers_out;
03276 
03277     fold_members = NULL;
03278 
03279     for (iter = 0; iter < num_cv_iters; iter++)
03280     {
03281         num_samples_mod_num_folds = num_samples % num_cv_folds;
03282         num_samples_per_fold = ceil(num_samples / (double)num_cv_folds);
03283 
03284         num_times_subtract_a_sample = (num_samples_mod_num_folds > 0) ? 
03285             num_cv_folds - num_samples_mod_num_folds : 0;
03286 
03287         create_init_vector_i32(&fold_members, num_samples, -1);
03288 
03289         for (fold = 0; fold < num_cv_folds; fold++)
03290         {
03291             if (num_times_subtract_a_sample > 0)
03292             {
03293                 i = num_samples_per_fold - 1;
03294                 num_times_subtract_a_sample--;
03295             }
03296             else 
03297             {
03298                 i = num_samples_per_fold;
03299             }
03300             j = i;
03301 
03302             while (j > 0)
03303             {
03304                 j--;
03305                 k = -1;
03306 
03307                 while (k < 0)
03308                 {
03309                     k = floor((rand()/(double)RAND_MAX) * num_samples);
03310                     if (k == (int32_t)num_samples) 
03311                     {
03312                         k--;
03313                     }
03314 
03315                     if (fold_members->elts[ k ] < 0)
03316                     {
03317                         fold_members->elts[ k ] = fold;
03318                     }
03319                     else
03320                     {
03321                         k = -1;
03322                     }
03323                 }
03324             }
03325 
03326             if (data_ids)
03327             {
03328                 create_zero_matblock_u8(&(test_ids[ iter ][ fold ]), i,
03329                         num_ids, data_ids->num_cols);
03330                 create_zero_matblock_u8(&(train_ids[ iter ][ fold ]), 
03331                         num_samples - i, num_ids, data_ids->num_cols);
03332             }
03333             create_zero_vector_u32(&(test_labels[ iter ][ fold ]), i);
03334             create_zero_matrix_i32(&(test_markers[ iter ][ fold ]), i, 
03335                     num_markers);
03336             create_zero_vector_u32(&(train_labels[ iter ][ fold ]), 
03337                     num_samples - i);
03338             create_zero_matrix_i32(&(train_markers[ iter ][ fold ]), 
03339                     num_samples - i, num_markers);
03340         }
03341 
03342         for (fold = 0; fold < num_cv_folds; fold++)
03343         {
03344             test_id_sample = 0;
03345             test_label_sample = 0;
03346             test_marker_sample = 0;
03347             train_id_sample = 0;
03348             train_label_sample = 0;
03349             train_marker_sample = 0;
03350 
03351             for (sample = 0; sample < num_samples; sample++)
03352             {
03353                 if (fold_members->elts[ sample ] == (int32_t)fold)
03354                 {
03355                     if (data_ids)
03356                     {
03357                         folded_ids = test_ids[ iter ][ fold ];
03358                         for (id = 0; id < num_ids; id++)
03359                         {
03360                             for (c = 0; c < data_ids->num_cols; c++)
03361                             {
03362                                 folded_ids->elts[ test_id_sample ][ id ][ c ] =
03363                                     data_ids->elts[ sample ][ id ][ c ];
03364                             }
03365                         }
03366                         test_id_sample++;
03367                     }
03368 
03369                     folded_labels = test_labels[ iter ][ fold ];
03370                     folded_labels->elts[ test_label_sample ] = 
03371                         data_labels->elts[ sample ];
03372 
03373                     folded_markers = test_markers[ iter ][ fold ];
03374                     for (marker = 0; marker < num_markers; marker++)
03375                     {
03376                         folded_markers->elts[ test_marker_sample ][ marker ] =
03377                             data_markers->elts[ sample ][ marker ];
03378                     }
03379 
03380                     test_label_sample++;
03381                     test_marker_sample++;
03382                 }
03383                 else
03384                 {
03385                     if (data_ids)
03386                     {
03387                         folded_ids = train_ids[ iter ][ fold ];
03388                         for (id = 0; id < num_ids; id++)
03389                         {
03390                             for (c = 0; c < data_ids->num_cols; c++)
03391                             {
03392                                 folded_ids->elts[ train_id_sample ][ id ][ c ] =
03393                                     data_ids->elts[ sample ][ id ][ c ];
03394                             }
03395                         }
03396                         train_id_sample++;
03397                     }
03398 
03399                     folded_labels = train_labels[ iter ][ fold ];
03400                     folded_labels->elts[ train_label_sample ] = 
03401                         data_labels->elts[ sample ];
03402 
03403                     folded_markers = train_markers[ iter ][ fold ];
03404                     for (marker = 0; marker < num_markers; marker++)
03405                     {
03406                         folded_markers->elts[ train_marker_sample ][ marker ] =
03407                             data_markers->elts[ sample ][ marker ];
03408                     }
03409 
03410                     train_label_sample++;
03411                     train_marker_sample++;
03412                 }
03413             }
03414         }
03415 
03416     }
03417 
03418     free_vector_i32(fold_members);
03419 }
03420 
03422 static void free_cross_validation_train_and_test_data
03423 (
03424     Matblock_u8***    train_ids,
03425     Vector_u32***     train_labels,
03426     Matrix_i32***     train_markers, 
03427     Matblock_u8***    test_ids,
03428     Vector_u32***     test_labels,
03429     Matrix_i32***     test_markers
03430 )
03431 {
03432     uint32_t i, j;
03433 
03434     for (i = 0; i < num_cv_iters; i++)
03435     {
03436         for (j = 0; j < num_cv_folds; j++)
03437         {
03438             free_matblock_u8(train_ids[ i ][ j ]);
03439             free_vector_u32(train_labels[ i ][ j ]);
03440             free_matrix_i32(train_markers[ i ][ j ]);
03441             free_matblock_u8(test_ids[ i ][ j ]);
03442             free_vector_u32(test_labels[ i ][ j ]);
03443             free_matrix_i32(test_markers[ i ][ j ]);
03444         }
03445 
03446         free(train_ids[ i ]);
03447         free(train_labels[ i ]);
03448         free(train_markers[ i ]);
03449         free(test_ids[ i ]);
03450         free(test_labels[ i ]);
03451         free(test_markers[ i ]);
03452     }
03453 
03454     free(train_ids);
03455     free(train_labels);
03456     free(train_markers);
03457     free(test_ids);
03458     free(test_labels);
03459     free(test_markers);
03460 }
03461 
03463 static void write_cross_validation_summary
03464 (
03465     Vector_u32*** ancestor_types,
03466     Vector_u32*** ancestor_labels,
03467     Vector_u32*** pred_labels,
03468     Vector_d***   pred_confs,
03469     Vector_u32*** tandem_types,
03470     const char*   fname
03471 )
03472 {
03473     uint32_t    n, N;
03474     uint32_t    i, j;
03475     float       D;
03476     FILE*       fp;
03477     const char* fmt_1;
03478     const char* fmt_2;
03479     xmlDoc*     xml_doc  = NULL;
03480     xmlNode*    xml_root = NULL;
03481     xmlNode*    xml_node[3];
03482     xmlNode*    xml_child = NULL;
03483     char        xml_buf[256];
03484     Error*      err;
03485 
03486     Matrix_f* counts = NULL;
03487     Vector_f* means  = NULL;
03488     Vector_f* vars   = NULL;
03489     Vector_f* errs   = NULL;
03490 
03491     if ((err = open_output(&fp, &xml_doc, "haplo-test-cv-summary-out",
03492                     "haplo-test-cv-summary-out.dtd", fname)))
03493     {
03494         print_error_msg("haplo-test", err->msg);
03495     }
03496 
03497     create_zero_matrix_f(&counts, num_cv_iters*num_cv_folds, 3);
03498 
03499     if (opts.header_out && opts.output_format != HAPLO_OUTPUT_XML)
03500     {
03501         switch (opts.output_format)
03502         {
03503             case HAPLO_OUTPUT_TXT:
03504                 fmt_1 = "%-21s  %-21s  %-21s\n";
03505                 fmt_2 = "%-8s %-6s %-5s  %-8s %-6s %-5s  %-8s %-6s %-5s\n";
03506                 break;
03507             case HAPLO_OUTPUT_CSV:
03508                 fmt_1 = "%s,,,%s,,,%s,,\n";
03509                 fmt_2 = "%s,%s,%s,%s,%s,%s,%s,%s,%s\n";
03510                 break;
03511             case HAPLO_OUTPUT_XML:
03512                 break;
03513         }
03514         fprintf(fp, fmt_1, "Direct", "Indirect", "None");
03515         fprintf(fp, fmt_2, "Mean", "Dev", "Err", "Mean", "Dev", "Err", "Mean",
03516                 "Dev", "Err");
03517     }
03518     else if (opts.output_format == HAPLO_OUTPUT_XML)
03519     {
03520         xml_root = xmlDocGetRootElement(xml_doc);
03521     }
03522 
03523     for (i = 0; i < num_cv_iters; i++)
03524     {
03525         for (j = 0; j < num_cv_folds; j++)
03526         {
03527             N = ancestor_types[ i ][ j ]->num_elts;
03528 
03529             for (n = 0; n < N; n++)
03530             {
03531                 if (!tandem_types || 
03532                         tandem_types[i][j]->elts[n] != HAPLO_ANCESTOR_NONE)
03533                 {
03534                     switch (ancestor_types[ i ][ j ]->elts[ n ])
03535                     {
03536                         case HAPLO_ANCESTOR_DIRECT:
03537                             counts->elts[ i*num_cv_folds + j ][ 0 ]++;
03538                             break;
03539                         case HAPLO_ANCESTOR_INDIRECT:
03540                             counts->elts[ i*num_cv_folds + j ][ 1 ]++;
03541                             break;
03542                         case HAPLO_ANCESTOR_NONE:
03543                             counts->elts[ i*num_cv_folds + j ][ 2 ]++;
03544                             break;
03545                     }
03546                 }
03547             }
03548         }
03549     }
03550 
03551     ind_mv_sample_stats_f(&means, &vars, &errs, counts);
03552 
03553     switch (opts.output_format)
03554     {
03555         case HAPLO_OUTPUT_TXT:
03556             fmt_1 = "%-8.1f %-6.1f %-5.1f  %-8.1f %-6.1f %-5.1f  %-8.1f %-6.1f %-5.1f\n";
03557             fprintf(fp, fmt_1, means->elts[0], sqrt(vars->elts[0]),
03558                     errs->elts[0], means->elts[1], sqrt(vars->elts[1]),
03559                     errs->elts[1], means->elts[2], sqrt(vars->elts[2]),
03560                     errs->elts[2]);
03561             break;
03562         case HAPLO_OUTPUT_CSV:
03563             fmt_1 = "%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,%.1f\n";
03564             fprintf(fp, fmt_1, means->elts[0], sqrt(vars->elts[0]),
03565                     errs->elts[0], means->elts[1], sqrt(vars->elts[1]),
03566                     errs->elts[1], means->elts[2], sqrt(vars->elts[2]),
03567                     errs->elts[2]);
03568             break;
03569         case HAPLO_OUTPUT_XML:
03570             xml_node[0] = XMLNewChild(xml_root, "direct", NULL);
03571             xml_node[1] = XMLNewChild(xml_root, "indirect", NULL);
03572             xml_node[2] = XMLNewChild(xml_root, "none", NULL);
03573             for (i = 0; i < 3; i++)
03574             {
03575                 xml_child = XMLNewChild(xml_node[ i ], "count", NULL);
03576                 snprintf(xml_buf, 256, "%.1f", means->elts[ i ]);
03577                 XMLNewChild(xml_child, "mean", xml_buf);
03578                 snprintf(xml_buf, 256, "%.1f", sqrt(vars->elts[ i ]));
03579                 XMLNewChild(xml_child, "dev", xml_buf);
03580                 snprintf(xml_buf, 256, "%.1f", errs->elts[ i ]);
03581                 XMLNewChild(xml_child, "err", xml_buf);
03582             }
03583             break;
03584     }
03585 
03586     D = means->elts[0] + means->elts[1] + means->elts[2];
03587     if (D > 0)
03588     {
03589         multiply_vector_by_scalar_f(&means, means, 1/D);
03590         multiply_vector_by_scalar_f(&vars, vars, 1/(D*D));
03591         multiply_vector_by_scalar_f(&errs, errs, 1/D);
03592     }
03593 
03594     switch (opts.output_format)
03595     {
03596         case HAPLO_OUTPUT_TXT:
03597             fmt_1 = "%-8.3f %-6.3f %-5.3f  %-8.3f %-6.3f %-5.3f  %-8.3f %-6.3f %-5.3f\n";
03598             fprintf(fp, fmt_1, means->elts[0], sqrt(vars->elts[0]),
03599                     errs->elts[0], means->elts[1], sqrt(vars->elts[1]),
03600                     errs->elts[1], means->elts[2], sqrt(vars->elts[2]),
03601                     errs->elts[2]);
03602             break;
03603         case HAPLO_OUTPUT_CSV:
03604             fmt_1 = "%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n";
03605             fprintf(fp, fmt_1, means->elts[0], sqrt(vars->elts[0]),
03606                     errs->elts[0], means->elts[1], sqrt(vars->elts[1]),
03607                     errs->elts[1], means->elts[2], sqrt(vars->elts[2]),
03608                     errs->elts[2]);
03609             break;
03610         case HAPLO_OUTPUT_XML:
03611             for (i = 0; i < 3; i++)
03612             {
03613                 xml_child = XMLNewChild(xml_node[ i ], "percent", NULL);
03614                 snprintf(xml_buf, 256, "%.3f", means->elts[ i ]);
03615                 XMLNewChild(xml_child, "mean", xml_buf);
03616                 snprintf(xml_buf, 256, "%.3f", sqrt(vars->elts[ i ]));
03617                 XMLNewChild(xml_child, "dev", xml_buf);
03618                 snprintf(xml_buf, 256, "%.3f", errs->elts[ i ]);
03619                 XMLNewChild(xml_child, "err", xml_buf);
03620             }
03621             break;
03622     }
03623 
03624     free_matrix_f(counts);
03625     free_vector_f(means);
03626     free_vector_f(vars);
03627     free_vector_f(errs);
03628 
03629     if ((err = close_output(fp, xml_doc, fname)))
03630     {
03631         print_error_msg("haplo-test", err->msg);
03632     }
03633 }
03634 
03645 static void write_cross_validation_details
03646 (
03647     Vector_u32*** ancestor_types,
03648     Vector_u32*** ancestor_labels,
03649     Vector_u32*** pred_labels,
03650     Vector_d***   pred_confs,
03651     Vector_u32*** tandem_types,
03652     const char*   cnt_fname,
03653     const char*   pct_fname
03654 )
03655 {
03656     uint32_t    i, j, k;
03657     uint32_t    n, nn, N;
03658     float       D;
03659     const char* label;
03660     const char* fmt_1;
03661     const char* fmt_2;
03662     FILE*       cnt_fp;
03663     FILE*       pct_fp;
03664     xmlDoc*     xml_doc    = NULL;
03665     xmlNode*    xml_root   = NULL;
03666     xmlNode*    xml_nd1    = NULL;
03667     xmlNode*    xml_nd2[3] = {0};
03668     xmlNode*    xml_nd3    = NULL;
03669     char        xml_buf[256];
03670     Error*      err;
03671 
03672     Matrix_f* counts[3]   = {0};
03673     Vector_f* stats[3][3] = {{0}};
03674 
03675     if (!(pred_labels[0][0]))
03676         return;
03677 
03678     if ((err = open_output(&cnt_fp, &xml_doc, "haplo-test-cv-details-out",
03679                     "haplo-test-cv-details-out.dtd", cnt_fname)) ||
03680         (err = open_output(&pct_fp, NULL, NULL, NULL, pct_fname)))
03681     {
03682         print_error_msg("haplo-test", err->msg);
03683     }
03684 
03685     N = get_num_haplo_groups();
03686     for (i = 0; i < 3; i++)
03687     {
03688         create_zero_matrix_f(&(counts[ i ]), num_cv_iters*num_cv_folds, N);
03689     }
03690 
03691     for (i = 0; i < num_cv_iters; i++)
03692     {
03693         for (j = 0; j < num_cv_folds; j++)
03694         {
03695             for (k = 0; k < ancestor_types[ i ][ j ]->num_elts; k++)
03696             {
03697                 if (!tandem_types || 
03698                         tandem_types[i][j]->elts[k] != HAPLO_ANCESTOR_NONE)
03699                 {
03700                     assert(pred_labels[ i ][ j ]->elts[ k ] < N);
03701                     n = pred_labels[ i ][ j ]->elts[ k ];
03702 
03703                     switch (ancestor_types[ i ][ j ]->elts[ k ])
03704                     {
03705                         case HAPLO_ANCESTOR_DIRECT:
03706                             counts[0]->elts[ i*num_cv_folds + j ][ n ]++;
03707                             break;
03708                         case HAPLO_ANCESTOR_INDIRECT:
03709                             counts[1]->elts[ i*num_cv_folds + j ][ n ]++;
03710                             break;
03711                         case HAPLO_ANCESTOR_NONE:
03712                             counts[2]->elts[ i*num_cv_folds + j ][ n ]++;
03713                             break;
03714                     }
03715                 }
03716             }
03717         }
03718     }
03719 
03720     for (i = 0; i < 3; i++)
03721     {
03722         ind_mv_sample_stats_f(&(stats[ i ][0]), &(stats[ i ][1]), 
03723                 &(stats[ i ][2]), counts[ i ]);
03724     }
03725 
03726     if (opts.header_out && opts.output_format != HAPLO_OUTPUT_XML)
03727     {
03728         switch (opts.output_format)
03729         {
03730             case HAPLO_OUTPUT_TXT:
03731                 fmt_1 = "%-10s  %-21s  %-21s  %-21s\n";
03732                 fmt_2 = "%-10s  %-8s %-6s %-5s  %-8s %-6s %-5s  %-8s %-6s %-5s\n";
03733                 break;
03734             case HAPLO_OUTPUT_CSV:
03735                 fmt_1 = "%s,%s,,,%s,,,%s,,\n";
03736                 fmt_2 = "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n";
03737                 break;
03738             case HAPLO_OUTPUT_XML:
03739                 break;
03740         }
03741         fprintf(cnt_fp, fmt_1, "Predicted", "Direct", "Indirect", "None");
03742         fprintf(cnt_fp, fmt_2, "Label", "Mean", "Dev", "Err", "Mean", "Dev",
03743                 "Err", "Mean", "Dev", "Err");
03744         fprintf(pct_fp, fmt_1, "Predicted", "Direct", "Indirect", "None");
03745         fprintf(pct_fp, fmt_2, "Label", "Mean", "Dev", "Err", "Mean", "Dev",
03746                 "Err", "Mean", "Dev", "Err");
03747     }
03748     else if (opts.output_format == HAPLO_OUTPUT_XML)
03749     {
03750         xml_root = xmlDocGetRootElement(xml_doc);
03751     }
03752 
03753     nn = 0;
03754     for (n = 0; n < N; n++)
03755     {
03756         D = stats[0][0]->elts[n] + stats[1][0]->elts[n] + stats[2][0]->elts[n];
03757         if (D > 0)
03758         {
03759             lookup_haplo_group_label_from_index(&label, n);
03760 
03761             switch (opts.output_format)
03762             {
03763                 case HAPLO_OUTPUT_TXT:
03764                     fmt_1 = "%-10s  %-8.1f %-6.1f %-5.1f  %-8.1f %-6.1f %-5.1f  %-8.1f %-6.1f %-5.1f\n";
03765                     fprintf(cnt_fp, fmt_1, label, 
03766   stats[0][0]->elts[n], sqrt(stats[0][1]->elts[n]), stats[0][2]->elts[n],
03767   stats[1][0]->elts[n], sqrt(stats[1][1]->elts[n]), stats[1][2]->elts[n], 
03768   stats[2][0]->elts[n], sqrt(stats[2][1]->elts[n]), stats[2][2]->elts[n]);
03769                     fmt_1 = "%-10s  %-8.3f %-6.3f %-5.3f  %-8.3f %-6.3f %-5.3f  %-8.3f %-6.3f %-5.3f\n";
03770                     fprintf(pct_fp, fmt_1, label, 
03771   stats[0][0]->elts[n]/D, sqrt(stats[0][1]->elts[n])/D, stats[0][2]->elts[n]/D,
03772   stats[1][0]->elts[n]/D, sqrt(stats[1][1]->elts[n])/D, stats[1][2]->elts[n]/D,
03773   stats[2][0]->elts[n]/D, sqrt(stats[2][1]->elts[n])/D, stats[2][2]->elts[n]/D);
03774                     break;
03775                 case HAPLO_OUTPUT_CSV:
03776                     fmt_1 = "%s,%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,%.1f\n";
03777                     fprintf(cnt_fp, fmt_1, label, 
03778   stats[0][0]->elts[n], sqrt(stats[0][1]->elts[n]), stats[0][2]->elts[n],
03779   stats[1][0]->elts[n], sqrt(stats[1][1]->elts[n]), stats[1][2]->elts[n], 
03780   stats[2][0]->elts[n], sqrt(stats[2][1]->elts[n]), stats[2][2]->elts[n]);
03781                     fmt_1 = "%s,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n";
03782                     fprintf(pct_fp, fmt_1, label, 
03783   stats[0][0]->elts[n]/D, sqrt(stats[0][1]->elts[n])/D, stats[0][2]->elts[n]/D,
03784   stats[1][0]->elts[n]/D, sqrt(stats[1][1]->elts[n])/D, stats[1][2]->elts[n]/D,
03785   stats[2][0]->elts[n]/D, sqrt(stats[2][1]->elts[n])/D, stats[2][2]->elts[n]/D);
03786                     break;
03787                 case HAPLO_OUTPUT_XML:
03788                     xml_nd1 = XMLNewChild(xml_root, "predicted", NULL);
03789                     snprintf(xml_buf, 256, "%d", nn+1);
03790                     XMLNewProp(xml_nd1, "number", xml_buf);
03791                     XMLNewChild(xml_nd1, "label", label);
03792                     xml_nd2[0] = XMLNewChild(xml_nd1, "direct", 0);
03793                     xml_nd2[1] = XMLNewChild(xml_nd1, "indirect", 0);
03794                     xml_nd2[2] = XMLNewChild(xml_nd1, "none", 0);
03795                     for (i = 0; i < 3; i++)
03796                     {
03797                         xml_nd3 = XMLNewChild(xml_nd2[i], "count", NULL);
03798                         snprintf(xml_buf, 256, "%.1f", stats[i][0]->elts[n]);
03799                         XMLNewChild(xml_nd3, "mean", xml_buf);
03800                         snprintf(xml_buf, 256, "%.1f", 
03801                                 sqrt(stats[i][1]->elts[n]));
03802                         XMLNewChild(xml_nd3, "dev", xml_buf);
03803                         snprintf(xml_buf, 256, "%.1f", stats[i][2]->elts[n]);
03804                         XMLNewChild(xml_nd3, "err", xml_buf);
03805 
03806                         xml_nd3 = XMLNewChild(xml_nd2[i], "percent", NULL);
03807                         snprintf(xml_buf, 256, "%.1f", 
03808                                 stats[i][0]->elts[n]/D);
03809                         XMLNewChild(xml_nd3, "mean", xml_buf);
03810                         snprintf(xml_buf, 256, "%.1f", 
03811                                 sqrt(stats[i][1]->elts[n])/D);
03812                         XMLNewChild(xml_nd3, "dev", xml_buf);
03813                         snprintf(xml_buf, 256, "%.1f", 
03814                                 stats[i][2]->elts[n]/D);
03815                         XMLNewChild(xml_nd3, "err", xml_buf);
03816                     }
03817                     break;
03818             }
03819 
03820             nn++;
03821         }
03822     }
03823 
03824     for (i = 0; i < 3; i++)
03825     {
03826         free_matrix_f(counts[ i ]);
03827 
03828         for (j = 0; j < 3; j++)
03829         {
03830             free_vector_f(stats[ i ][ j ]);
03831         }
03832     }
03833 
03834     if ((err = close_output(cnt_fp, xml_doc, cnt_fname)) ||
03835         (err = close_output(pct_fp, NULL, pct_fname)))
03836     {
03837         print_error_msg("haplo-test", err->msg);
03838     }
03839 }
03840 
03844 static void write_cross_validation_confusion
03845 (
03846     Vector_u32*** test_labels,
03847     Vector_u32*** pred_labels,
03848     Vector_d***   pred_confs,
03849     Vector_u32*** tandem_types,
03850     const char*   cnt_fname,
03851     const char*   pct_fname
03852 )
03853 {
03854     uint32_t    i, j, k;
03855     uint32_t    n_1, n_2, N;
03856     uint32_t    nn_1, nn_2;
03857     float       D;
03858     const char* label;
03859     FILE*       cnt_fp;
03860     FILE*       pct_fp;
03861     xmlDoc*     xml_doc    = NULL;
03862     xmlNode*    xml_root   = NULL;
03863     xmlNode*    xml_actual = NULL;
03864     xmlNode*    xml_pred   = NULL;
03865     char        xml_buf[256];
03866     Error*      err;
03867 
03868     Vector_f* actual_counts    = NULL;
03869     Vector_f* predicted_counts = NULL;
03870     Matrix_f* confusion        = NULL;
03871     Matrix_f* confusion_pct    = NULL;
03872 
03873     if (!(pred_labels[0][0]))
03874         return;
03875 
03876     if ((err = open_output(&cnt_fp, &xml_doc, "haplo-test-cv-confusion-out",
03877                     "haplo-test-cv-confusion-out.dtd", cnt_fname)) ||
03878         (err = open_output(&pct_fp, NULL, NULL, NULL, pct_fname)))
03879     {
03880         print_error_msg("haplo-test", err->msg);
03881     }
03882 
03883     N = get_num_haplo_groups();
03884     create_zero_vector_f(&actual_counts, N);
03885     create_zero_vector_f(&predicted_counts, N);
03886     create_zero_matrix_f(&confusion, N, N);
03887     create_zero_matrix_f(&confusion_pct, N, N);
03888 
03889     for (i = 0; i < num_cv_iters; i++)
03890     {
03891         for (j = 0; j < num_cv_folds; j++)
03892         {
03893             for (k = 0; k < test_labels[ i ][ j ]->num_elts; k++)
03894             {
03895                 if (!tandem_types || 
03896                         tandem_types[i][j]->elts[k] != HAPLO_ANCESTOR_NONE)
03897                 {
03898                 assert(test_labels[ i ][ j ]->elts[ k ] < N);
03899                 assert(pred_labels[ i ][ j ]->elts[ k ] < N);
03900 
03901                 actual_counts->elts[ test_labels[ i ][ j ]->elts[ k ] ]++;
03902                 predicted_counts->elts[ pred_labels[ i ][ j ]->elts[ k ] ]++;
03903                 confusion->elts[ test_labels[ i ][ j ]->elts[ k ] ]
03904                                [ pred_labels[ i ][ j ]->elts[ k ] ]++;
03905                 }
03906             }
03907         }
03908     }
03909 
03910     multiply_vector_by_scalar_f(&actual_counts, actual_counts,
03911             1.0f/(num_cv_iters*num_cv_folds));
03912     multiply_vector_by_scalar_f(&predicted_counts, predicted_counts,
03913             1.0f/(num_cv_iters*num_cv_folds));
03914     multiply_matrix_by_scalar_f(&confusion, confusion,
03915             1.0f/(num_cv_iters*num_cv_folds));
03916 
03917     copy_matrix_f(&confusion_pct, confusion);
03918     for (n_2 = 0; n_2 < N; n_2++)
03919     {
03920         D = predicted_counts->elts[ n_2 ];
03921         if (D > 0)
03922         {
03923             for (n_1 = 0; n_1 < N; n_1++)
03924             {
03925                 confusion_pct->elts[ n_1 ][ n_2 ] /= D;
03926             }
03927         }
03928     }
03929 
03930     if (opts.header_out && opts.output_format != HAPLO_OUTPUT_XML)
03931     {
03932         switch (opts.output_format)
03933         {
03934             case HAPLO_OUTPUT_TXT:
03935                 fprintf(cnt_fp, "%-10s", "Actual");
03936                 fprintf(pct_fp, "%-10s", "Actual");
03937                 for (n_2 = 0; n_2 < N; n_2++)
03938                 {
03939                     if (predicted_counts->elts[ n_2 ])
03940                     {
03941                         lookup_haplo_group_label_from_index(&label, n_2);
03942                         fprintf(cnt_fp, "  %-10s", label);
03943                         fprintf(pct_fp, "  %-10s", label);
03944                     }
03945                 }
03946                 break;
03947             case HAPLO_OUTPUT_CSV:
03948                 fprintf(cnt_fp, "%s", "Actual");
03949                 fprintf(pct_fp, "%s", "Actual");
03950                 for (n_2 = 0; n_2 < N; n_2++)
03951                 {
03952                     if (predicted_counts->elts[ n_2 ])
03953                     {
03954                         lookup_haplo_group_label_from_index(&label, n_2);
03955                         fprintf(cnt_fp, ",%s", label);
03956                         fprintf(pct_fp, ",%s", label);
03957                     }
03958                 }
03959                 break;
03960             case HAPLO_OUTPUT_XML:
03961                 break;
03962         }
03963         fprintf(cnt_fp, "\n");
03964         fprintf(pct_fp, "\n");
03965     }
03966     else if (opts.output_format == HAPLO_OUTPUT_XML)
03967     {
03968         xml_root = xmlDocGetRootElement(xml_doc);
03969     }
03970 
03971     nn_1 = 0;
03972     for (n_1 = 0; n_1 < N; n_1++)
03973     {
03974         switch (opts.output_format)
03975         {
03976             case HAPLO_OUTPUT_TXT:
03977                 if (actual_counts->elts[ n_1 ] > 0)
03978                 {
03979                     lookup_haplo_group_label_from_index(&label, n_1);
03980                     fprintf(cnt_fp, "%-10s", label);
03981                     fprintf(pct_fp, "%-10s", label);
03982 
03983                     nn_2 = 0;
03984                     for (n_2 = 0; n_2 < N; n_2++)
03985                     {
03986                         if (predicted_counts->elts[ n_2 ] > 0)
03987                         {
03988                             fprintf(cnt_fp, "  %-10.1f", 
03989                                     confusion->elts[ n_1 ][ n_2 ]);
03990                             fprintf(pct_fp, "  %-10.3f", 
03991                                     confusion_pct->elts[ n_1 ][ n_2 ]);
03992                             nn_2++;
03993                         }
03994                     }
03995                     fprintf(cnt_fp, "\n");
03996                     fprintf(pct_fp, "\n");
03997                     nn_1++;
03998                 }
03999                 break;
04000             case HAPLO_OUTPUT_CSV:
04001                 if (actual_counts->elts[ n_1 ] > 0)
04002                 {
04003                     lookup_haplo_group_label_from_index(&label, n_1);
04004                     fprintf(cnt_fp, "%s", label);
04005                     fprintf(pct_fp, "%s", label);
04006 
04007                     nn_2 = 0;
04008                     for (n_2 = 0; n_2 < N; n_2++)
04009                     {
04010                         if (predicted_counts->elts[ n_2 ] > 0)
04011                         {
04012                             fprintf(cnt_fp, ",%.1f", 
04013                                     confusion->elts[ n_1 ][ n_2 ]);
04014                             fprintf(pct_fp, ",%.3f", 
04015                                     confusion_pct->elts[ n_1 ][ n_2 ]);
04016                             nn_2++;
04017                         }
04018                     }
04019                     fprintf(cnt_fp, "\n");
04020                     fprintf(pct_fp, "\n");
04021                     nn_1++;
04022                 }
04023                 break;
04024             case HAPLO_OUTPUT_XML:
04025                 if (actual_counts->elts[ n_1 ] > 0)
04026                 {
04027                     lookup_haplo_group_label_from_index(&label, n_1);
04028                     xml_actual = XMLNewChild(xml_root, "actual", NULL);
04029                     snprintf(xml_buf, 256, "%d", nn_1+1);
04030                     XMLNewProp(xml_actual, "number", xml_buf);
04031                     XMLNewChild(xml_actual, "label", label);
04032 
04033                     nn_2 = 0;
04034                     for (n_2 = 0; n_2 < N; n_2++)
04035                     {
04036                         if (predicted_counts->elts[ n_2 ] > 0)
04037                         {
04038                             lookup_haplo_group_label_from_index(&label, n_2);
04039                             xml_pred = XMLNewChild(xml_actual, "predicted", 0);
04040                             snprintf(xml_buf, 256, "%d", nn_2+1);
04041                             XMLNewProp(xml_pred, "number", xml_buf);
04042                             XMLNewChild(xml_pred, "label", label);
04043 
04044                             snprintf(xml_buf, 256, "%.1f", 
04045                                     confusion->elts[n_1][n_2]);
04046                             XMLNewChild(xml_pred, "count", xml_buf);
04047 
04048                             snprintf(xml_buf, 256, "%.3f", 
04049                                     confusion_pct->elts[n_1][n_2]);
04050                             XMLNewChild(xml_pred, "percent", xml_buf);
04051 
04052                             nn_2++;
04053                         }
04054                     }
04055                     nn_1++;
04056                 }
04057                 break;
04058         }
04059     }
04060 
04061     free_vector_f(actual_counts);
04062     free_vector_f(predicted_counts);
04063     free_matrix_f(confusion);
04064     free_matrix_f(confusion_pct);
04065 
04066     if ((err = close_output(cnt_fp, xml_doc, cnt_fname)) ||
04067         (err = close_output(pct_fp, NULL, pct_fname)))
04068     {
04069         print_error_msg("haplo-test", err->msg);
04070     }
04071 }
04072 
04074 static void write_cross_validation_preds
04075 (
04076     const char*    type,
04077     Matblock_u8*** data_ids,
04078     Vector_u32***  data_labels,
04079     Vector_u32***  ancestor_types,
04080     Vector_u32***  ancestor_labels,
04081     Vector_u32***  pred_labels,
04082     Vector_d***    pred_confs,
04083     Vector_u32***  tandem_types,
04084     const char*    fname
04085 )
04086 {
04087     uint32_t    i, j, k;
04088     FILE*       fp;
04089     xmlDoc*     xml_doc    = NULL;
04090     xmlNode*    xml_root   = NULL;
04091     xmlNode*    xml_node   = NULL;
04092     char        xml_buf[256] = {0};
04093     Error*      err;
04094 
04095     if (!(pred_labels[0][0]))
04096         return;
04097 
04098     if ((err = open_output(&fp, &xml_doc, "haplo-test-cv-predictions-out",
04099                     "haplo-test-cv-predictions-out.dtd", fname)))
04100     {
04101         print_error_msg("haplo-test", err->msg);
04102     }
04103 
04104     if (opts.header_out && opts.output_format != HAPLO_OUTPUT_XML)
04105     {
04106         if (data_ids)
04107         {
04108             for (j = 0; j < data_ids[0][0]->num_rows; j++)
04109             {
04110                 switch (opts.output_format)
04111                 {
04112                     case HAPLO_OUTPUT_TXT:
04113                         fprintf(fp, "ID %-7d  ", j+1);
04114                         break;
04115                     case HAPLO_OUTPUT_CSV:
04116                         fprintf(fp, "ID %d,", j+1);
04117                         break;
04118                     case HAPLO_OUTPUT_XML:
04119                         break;
04120                 }
04121             }
04122         }
04123 
04124         switch (opts.output_format)
04125         {
04126             case HAPLO_OUTPUT_TXT:
04127                 fprintf(fp, "%-10s  %-10s  %-4s", "Actual", "Ancestor", "Type");
04128                 if (pred_labels) 
04129                     fprintf(fp, "  %-10s  %-5s", "Prediction", "Conf");
04130                 break;
04131             case HAPLO_OUTPUT_CSV:
04132                 fprintf(fp, "%s,%s,%s", "Actual", "Ancestor", "Type");
04133                 if (pred_labels) 
04134                     fprintf(fp, ",%s,%s", "Prediction", "Conf");
04135                 break;
04136             case HAPLO_OUTPUT_XML:
04137                 break;
04138         }
04139         fprintf(fp, "\n");
04140     }
04141     else if (opts.output_format == HAPLO_OUTPUT_XML)
04142     {
04143         xml_root = xmlDocGetRootElement(xml_doc);
04144     }
04145 
04146     for (i = 0; i < num_cv_iters; i++)
04147     {
04148         for (j = 0; j < num_cv_folds; j++)
04149         {
04150             for (k = 0; k < data_labels[i][j]->num_elts; k++)
04151             {
04152                 if (!tandem_types || 
04153                         tandem_types[i][j]->elts[k] != HAPLO_ANCESTOR_NONE)
04154                 {
04155                 if (opts.output_format == HAPLO_OUTPUT_XML)
04156                 {
04157                     xml_node = XMLNewChild(xml_root, "sample", NULL);
04158                     snprintf(xml_buf, 256, "%d", k+1);
04159                     XMLNewProp(xml_node, "number", xml_buf);
04160                 }
04161 
04162                 write_ids(data_ids[i][j], k, HAPLO_SEP_SUFFIX, fp, xml_node);
04163                 write_label(data_labels[i][j], k, HAPLO_SEP_SUFFIX, fp, 
04164                         xml_node);
04165 
04166                 write_ancestor_label(ancestor_types[i][j], 
04167                         ancestor_labels[i][j], k, HAPLO_SEP_NONE, fp, xml_node);
04168 
04169                 write_prediction(type, pred_labels[i][j], (pred_confs) ?
04170                         pred_confs[i][j] : NULL, k, HAPLO_SEP_PREFIX, fp,
04171                         xml_node);
04172 
04173                 if (opts.output_format != HAPLO_OUTPUT_XML)
04174                 {
04175                     fprintf(fp, "\n");
04176                 }
04177                 }
04178             }
04179         }
04180     }
04181 
04182     if ((err = close_output(fp, xml_doc, fname)))
04183     {
04184         print_error_msg("haplo-test", err->msg);
04185     }
04186 }
04187 
04189 static void cross_validate_nb_freq
04190 (
04191     Vector_u32**** labels_out,
04192     Vector_d****   confs_out,
04193     Matblock_u8*** train_ids,
04194     Vector_u32***  train_labels,
04195     Matrix_i32***  train_markers,
04196     Matblock_u8*** test_ids,
04197     Vector_u32***  test_labels,
04198     Matrix_i32***  test_markers,
04199     Vector_u32***  ancestor_types,
04200     Vector_u32***  ancestor_labels
04201 )
04202 {
04203     uint32_t i, j;
04204 
04205     NB_freq_model_tree* tree = NULL;
04206     Error* err;
04207 
04208     if (!opts.nb_freq_fname)
04209         return;
04210 
04211     for (i = 0; i < num_cv_iters; i++)
04212     {
04213         for (j = 0; j < num_cv_folds; j++)
04214         {
04215             if ((err = train_nb_freq_model_tree(&tree, train_labels[i][j], 
04216                             train_markers[i][j], opts.nb_freq_fname, 
04217                             opts.nb_freq_dtd_fname)) ||
04218                 (err = predict_labels_with_nb_freq_model_tree(
04219                             &((*labels_out)[i][j]), 
04220                             &((*confs_out)[i][j]), test_markers[i][j], 
04221                             tree, 0)))
04222             {
04223                 print_error_msg_exit("haplo-test", err->msg);
04224             }
04225 
04226             find_ancestors(&(ancestor_types[i][j]), 
04227                     &(ancestor_labels[i][j]), (*labels_out)[i][j], 
04228                     test_labels[i][j]);
04229         }
04230     }
04231 
04232     free_nb_freq_model_tree(tree);
04233 
04234     write_cross_validation_summary(ancestor_types, ancestor_labels,
04235             *labels_out, *confs_out, NULL, nb_freq_summary_fname);
04236 
04237     write_cross_validation_details(ancestor_types, ancestor_labels, 
04238             *labels_out, *confs_out, NULL, nb_freq_details_cnt_fname,
04239             nb_freq_details_pct_fname);
04240 
04241     write_cross_validation_confusion(test_labels, *labels_out, *confs_out,
04242             NULL, nb_freq_confusion_cnt_fname, nb_freq_confusion_pct_fname);
04243 
04244     write_cross_validation_preds("nb-freq", test_ids, test_labels, 
04245             ancestor_types, ancestor_labels, *labels_out, *confs_out, 
04246             NULL, nb_freq_preds_fname);
04247 }
04248 
04250 static void cross_validate_nb_gauss
04251 (
04252     Vector_u32**** labels_out,
04253     Vector_d****   confs_out,
04254     Matblock_u8*** train_ids,
04255     Vector_u32***  train_labels,
04256     Matrix_i32***  train_markers,
04257     Matblock_u8*** test_ids,
04258     Vector_u32***  test_labels,
04259     Matrix_i32***  test_markers,
04260     Vector_u32***  ancestor_types,
04261     Vector_u32***  ancestor_labels
04262 )
04263 {
04264     uint32_t i, j;
04265 
04266     NB_gauss_model_tree* tree = NULL;
04267     Error* err;
04268 
04269     if (!opts.nb_gauss_fname)
04270         return;
04271 
04272     for (i = 0; i < num_cv_iters; i++)
04273     {
04274         for (j = 0; j < num_cv_folds; j++)
04275         {
04276             if ((err = train_nb_gauss_model_tree(&tree, train_labels[i][j], 
04277                             train_markers[i][j], opts.nb_gauss_fname, 
04278                             opts.nb_gauss_dtd_fname)) ||
04279                 (err = predict_labels_with_nb_gauss_model_tree(
04280                             &((*labels_out)[i][j]), 
04281                             &((*confs_out)[i][j]), test_markers[i][j], 
04282                             tree, 0)))
04283             {
04284                 print_error_msg_exit("haplo-test", err->msg);
04285             }
04286 
04287             find_ancestors(&(ancestor_types[i][j]), 
04288                     &(ancestor_labels[i][j]), (*labels_out)[i][j], 
04289                     test_labels[i][j]);
04290         }
04291     }
04292 
04293     free_nb_gauss_model_tree(tree);
04294 
04295     write_cross_validation_summary(ancestor_types, ancestor_labels,
04296             *labels_out, *confs_out, NULL, nb_gauss_summary_fname);
04297 
04298     write_cross_validation_details(ancestor_types, ancestor_labels, 
04299             *labels_out, *confs_out, NULL, nb_gauss_details_cnt_fname,
04300             nb_gauss_details_pct_fname);
04301 
04302     write_cross_validation_confusion(test_labels, *labels_out, *confs_out,
04303             NULL, nb_gauss_confusion_cnt_fname, nb_gauss_confusion_pct_fname);
04304 
04305     write_cross_validation_preds("nb-gauss", test_ids, test_labels, 
04306             ancestor_types, ancestor_labels, *labels_out, *confs_out, 
04307             NULL, nb_gauss_preds_fname);
04308 }
04309 
04311 static void cross_validate_nb_gmm
04312 (
04313     Vector_u32**** labels_out,
04314     Vector_d****   confs_out,
04315     Matblock_u8*** train_ids,
04316     Vector_u32***  train_labels,
04317     Matrix_i32***  train_markers,
04318     Matblock_u8*** test_ids,
04319     Vector_u32***  test_labels,
04320     Matrix_i32***  test_markers,
04321     Vector_u32***  ancestor_types,
04322     Vector_u32***  ancestor_labels
04323 )
04324 {
04325     uint32_t i, j;
04326 
04327     NB_gmm_model_tree* tree = NULL;
04328     Error* err;
04329 
04330     if (!opts.nb_gmm_fname)
04331         return;
04332 
04333     for (i = 0; i < num_cv_iters; i++)
04334     {
04335         for (j = 0; j < num_cv_folds; j++)
04336         {
04337             if ((err = train_nb_gmm_model_tree(&tree, train_labels[i][j], 
04338                             train_markers[i][j], opts.nb_gmm_fname, 
04339                             opts.nb_gmm_dtd_fname)) ||
04340                 (err = predict_labels_with_nb_gmm_model_tree(
04341                             &((*labels_out)[i][j]), 
04342                             &((*confs_out)[i][j]), test_markers[i][j], 
04343                             tree, 0)))
04344             {
04345                 print_error_msg_exit("haplo-test", err->msg);
04346             }
04347 
04348             find_ancestors(&(ancestor_types[i][j]), 
04349                     &(ancestor_labels[i][j]), (*labels_out)[i][j], 
04350                     test_labels[i][j]);
04351         }
04352     }
04353 
04354     free_nb_gmm_model_tree(tree);
04355 
04356     write_cross_validation_summary(ancestor_types, ancestor_labels,
04357             *labels_out, *confs_out, NULL, nb_gmm_summary_fname);
04358 
04359     write_cross_validation_details(ancestor_types, ancestor_labels, 
04360             *labels_out, *confs_out, NULL, nb_gmm_details_cnt_fname,
04361             nb_gmm_details_pct_fname);
04362 
04363     write_cross_validation_confusion(test_labels, *labels_out, *confs_out,
04364             NULL, nb_gmm_confusion_cnt_fname, nb_gmm_confusion_pct_fname);
04365 
04366     write_cross_validation_preds("nb-gmm", test_ids, test_labels, 
04367             ancestor_types, ancestor_labels, *labels_out, *confs_out, 
04368             NULL, nb_gmm_preds_fname);
04369 }
04370 
04372 static void cross_validate_mv_gmm
04373 (
04374     Vector_u32**** labels_out,
04375     Vector_d****   confs_out,
04376     Matblock_u8*** train_ids,
04377     Vector_u32***  train_labels,
04378     Matrix_i32***  train_markers,
04379     Matblock_u8*** test_ids,
04380     Vector_u32***  test_labels,
04381     Matrix_i32***  test_markers,
04382     Vector_u32***  ancestor_types,
04383     Vector_u32***  ancestor_labels
04384 )
04385 {
04386     uint32_t i, j;
04387 
04388     MV_gmm_model_tree* tree = NULL;
04389     Error* err;
04390 
04391     if (!opts.mv_gmm_fname)
04392         return;
04393 
04394     for (i = 0; i < num_cv_iters; i++)
04395     {
04396         for (j = 0; j < num_cv_folds; j++)
04397         {
04398             if ((err = train_mv_gmm_model_tree(&tree, train_labels[i][j], 
04399                             train_markers[i][j], opts.mv_gmm_fname, 
04400                             opts.mv_gmm_dtd_fname)) ||
04401                 (err = predict_labels_with_mv_gmm_model_tree(
04402                             &((*labels_out)[i][j]), 
04403                             &((*confs_out)[i][j]), test_markers[i][j], 
04404                             tree, 0)))
04405             {
04406                 print_error_msg_exit("haplo-test", err->msg);
04407             }
04408 
04409             find_ancestors(&(ancestor_types[i][j]), 
04410                     &(ancestor_labels[i][j]), (*labels_out)[i][j], 
04411                     test_labels[i][j]);
04412         }
04413     }
04414 
04415     free_mv_gmm_model_tree(tree);
04416 
04417     write_cross_validation_summary(ancestor_types, ancestor_labels,
04418             *labels_out, *confs_out, NULL, mv_gmm_summary_fname);
04419 
04420     write_cross_validation_details(ancestor_types, ancestor_labels, 
04421             *labels_out, *confs_out, NULL, mv_gmm_details_cnt_fname,
04422             mv_gmm_details_pct_fname);
04423 
04424     write_cross_validation_confusion(test_labels, *labels_out, *confs_out,
04425             NULL, mv_gmm_confusion_cnt_fname, mv_gmm_confusion_pct_fname);
04426 
04427     write_cross_validation_preds("mv-gmm", test_ids, test_labels, 
04428             ancestor_types, ancestor_labels, *labels_out, *confs_out, 
04429             NULL, mv_gmm_preds_fname);
04430 }
04431 
04433 static void cross_validate_svm
04434 (
04435     Vector_u32**** labels_out,
04436     Vector_d****   confs_out,
04437     Matblock_u8*** train_ids,
04438     Vector_u32***  train_labels,
04439     Matrix_i32***  train_markers,
04440     Matblock_u8*** test_ids,
04441     Vector_u32***  test_labels,
04442     Matrix_i32***  test_markers,
04443     Vector_u32***  ancestor_types,
04444     Vector_u32***  ancestor_labels
04445 )
04446 {
04447 #ifdef HAPLO_ENABLE_SVM
04448     uint32_t i, j;
04449 
04450     SVM_model_tree* tree = NULL;
04451     Error* err;
04452 
04453     if (!opts.svm_fname)
04454         return;
04455 
04456     for (i = 0; i < num_cv_iters; i++)
04457     {
04458         for (j = 0; j < num_cv_folds; j++)
04459         {
04460             if ((err = train_svm_model_tree(&tree, train_labels[i][j], 
04461                             train_markers[i][j], opts.svm_fname, 
04462                             opts.svm_dtd_fname)) ||
04463                 (err = predict_labels_with_svm_model_tree(
04464                             &((*labels_out)[i][j]), 
04465                             &((*confs_out)[i][j]), test_markers[i][j], tree)))
04466             {
04467                 print_error_msg_exit("haplo-test", err->msg);
04468             }
04469 
04470             find_ancestors(&(ancestor_types[i][j]), 
04471                     &(ancestor_labels[i][j]), (*labels_out)[i][j], 
04472                     test_labels[i][j]);
04473         }
04474     }
04475 
04476     free_svm_model_tree(tree);
04477 
04478     write_cross_validation_summary(ancestor_types, ancestor_labels,
04479             *labels_out, *confs_out, NULL, svm_summary_fname);
04480 
04481     write_cross_validation_details(ancestor_types, ancestor_labels, 
04482             *labels_out, *confs_out, NULL, svm_details_cnt_fname,
04483             svm_details_pct_fname);
04484 
04485     write_cross_validation_confusion(test_labels, *labels_out, *confs_out,
04486             NULL, svm_confusion_cnt_fname, svm_confusion_pct_fname);
04487 
04488     write_cross_validation_preds("svm", test_ids, test_labels, 
04489             ancestor_types, ancestor_labels, *labels_out, *confs_out, 
04490             NULL, svm_preds_fname);
04491 #else
04492     return;
04493 #endif
04494 }
04495 
04497 static void cross_validate_j48
04498 (
04499     Vector_u32**** labels_out,
04500     Vector_d****   confs_out,
04501     Matblock_u8*** train_ids,
04502     Vector_u32***  train_labels,
04503     Matrix_i32***  train_markers,
04504     Matblock_u8*** test_ids,
04505     Vector_u32***  test_labels,
04506     Matrix_i32***  test_markers,
04507     Vector_u32***  ancestor_types,
04508     Vector_u32***  ancestor_labels
04509 )
04510 {
04511 #ifdef HAPLO_ENABLE_WEKA
04512     uint32_t i, j;
04513     pid_t    pid;
04514     char     tmp_dir[1024] = {0};
04515     char     script[4096] = {0};
04516 
04517     Weka_model_tree* tree = NULL;
04518     Error* err;
04519 
04520     if (!opts.weka_j48_fname)
04521         return;
04522 
04523     pid = getpid();
04524     snprintf(tmp_dir, 1024, "%s/.haplo_test_cross_validate_j48_%u", 
04525             tmp_dirname, pid);
04526     snprintf(script, 4096, "mkdir -p %s", tmp_dir);
04527     if (system(script) == 127)
04528     {
04529         print_error_msg_exit("haplo-test", "Could not create tmp files");
04530     }
04531 
04532     for (i = 0; i < num_cv_iters; i++)
04533     {
04534         for (j = 0; j < num_cv_folds; j++)
04535         {
04536             if ((err = train_weka_j48_model_tree(&tree, train_labels[i][j], 
04537                             train_markers[i][j], opts.weka_j48_fname, 
04538                             opts.weka_dtd_fname, tmp_dir,
04539                             opts.weka_jar_fname)) ||
04540                 (err = predict_labels_with_weka_j48_model_tree(
04541                             &((*labels_out)[i][j]), 
04542                             &((*confs_out)[i][j]), test_markers[i][j], 
04543                             tree, opts.weka_jar_fname)))
04544             {
04545                 print_error_msg_exit("haplo-test", err->msg);
04546             }
04547 
04548             find_ancestors(&(ancestor_types[i][j]), 
04549                     &(ancestor_labels[i][j]), (*labels_out)[i][j], 
04550                     test_labels[i][j]);
04551         }
04552     }
04553 
04554     snprintf(script, 4096, "rm -rf %s", tmp_dir);
04555     if (system(script) == 127)
04556     {
04557         print_error_msg_exit("haplo-test", "Could not remove tmp files");
04558     }
04559 
04560     free_weka_model_tree(tree);
04561 
04562     write_cross_validation_summary(ancestor_types, ancestor_labels,
04563             *labels_out, *confs_out, NULL, weka_j48_summary_fname);
04564 
04565     write_cross_validation_details(ancestor_types, ancestor_labels, 
04566             *labels_out, *confs_out, NULL, weka_j48_details_cnt_fname,
04567             weka_j48_details_pct_fname);
04568 
04569     write_cross_validation_confusion(test_labels, *labels_out, *confs_out,
04570             NULL, weka_j48_confusion_cnt_fname, weka_j48_confusion_pct_fname);
04571 
04572     write_cross_validation_preds("j48", test_ids, test_labels, 
04573             ancestor_types, ancestor_labels, *labels_out, *confs_out, 
04574             NULL, weka_j48_preds_fname);
04575 #else
04576     return;
04577 #endif
04578 }
04579 
04581 static void cross_validate_part
04582 (
04583     Vector_u32**** labels_out,
04584     Vector_d****   confs_out,
04585     Matblock_u8*** train_ids,
04586     Vector_u32***  train_labels,
04587     Matrix_i32***  train_markers,
04588     Matblock_u8*** test_ids,
04589     Vector_u32***  test_labels,
04590     Matrix_i32***  test_markers,
04591     Vector_u32***  ancestor_types,
04592     Vector_u32***  ancestor_labels
04593 )
04594 {
04595 #ifdef HAPLO_ENABLE_WEKA
04596     uint32_t i, j;
04597     pid_t    pid;
04598     char     tmp_dir[1024] = {0};
04599     char     script[4096] = {0};
04600 
04601     Weka_model_tree* tree = NULL;
04602     Error* err;
04603 
04604     if (!opts.weka_part_fname)
04605         return;
04606 
04607     pid = getpid();
04608     snprintf(tmp_dir, 1024, "%s/.haplo_test_cross_validate_part_%u", 
04609             tmp_dirname, pid);
04610     snprintf(script, 4096, "mkdir -p %s", tmp_dir);
04611     if (system(script) == 127)
04612     {
04613         print_error_msg_exit("haplo-test", "Could not create tmp files");
04614     }
04615 
04616     for (i = 0; i < num_cv_iters; i++)
04617     {
04618         for (j = 0; j < num_cv_folds; j++)
04619         {
04620             if ((err = train_weka_part_model_tree(&tree, train_labels[i][j], 
04621                             train_markers[i][j], opts.weka_part_fname, 
04622                             opts.weka_dtd_fname, tmp_dir,
04623                             opts.weka_jar_fname)) ||
04624                 (err = predict_labels_with_weka_part_model_tree(
04625                             &((*labels_out)[i][j]), 
04626                             &((*confs_out)[i][j]), test_markers[i][j], 
04627                             tree, opts.weka_jar_fname)))
04628             {
04629                 print_error_msg_exit("haplo-test", err->msg);
04630             }
04631 
04632             find_ancestors(&(ancestor_types[i][j]), 
04633                     &(ancestor_labels[i][j]), (*labels_out)[i][j], 
04634                     test_labels[i][j]);
04635         }
04636     }
04637 
04638     snprintf(script, 4096, "rm -rf %s", tmp_dir);
04639     if (system(script) == 127)
04640     {
04641         print_error_msg_exit("haplo-test", "Could not remove tmp files");
04642     }
04643 
04644     free_weka_model_tree(tree);
04645 
04646     write_cross_validation_summary(ancestor_types, ancestor_labels,
04647             *labels_out, *confs_out, NULL, weka_part_summary_fname);
04648 
04649     write_cross_validation_details(ancestor_types, ancestor_labels, 
04650             *labels_out, *confs_out, NULL, weka_part_details_cnt_fname,
04651             weka_part_details_pct_fname);
04652 
04653     write_cross_validation_confusion(test_labels, *labels_out, *confs_out,
04654             NULL, weka_part_confusion_cnt_fname, weka_part_confusion_pct_fname);
04655 
04656     write_cross_validation_preds("part", test_ids, test_labels, 
04657             ancestor_types, ancestor_labels, *labels_out, *confs_out, 
04658             NULL, weka_part_preds_fname);
04659 #else
04660     return;
04661 #endif
04662 }
04663 
04668 static void cross_validate_nearest
04669 (
04670     Vector_u32**** labels_out,
04671     Vector_d****   dists_out,
04672     Matblock_u8*** train_ids,
04673     Vector_u32***  train_labels,
04674     Matrix_i32***  train_markers,
04675     Matblock_u8*** test_ids,
04676     Vector_u32***  test_labels,
04677     Matrix_i32***  test_markers,
04678     Vector_u32***  ancestor_types,
04679     Vector_u32***  ancestor_labels
04680 )
04681 {
04682     uint32_t i, j;
04683 
04684     Nearest_model* model = NULL;
04685     Error* err;
04686 
04687     if (!opts.nearest_fname)
04688         return;
04689 
04690     for (i = 0; i < num_cv_iters; i++)
04691     {
04692         for (j = 0; j < num_cv_folds; j++)
04693         {
04694             if ((err = train_nearest_model(&model, train_labels[i][j], 
04695                             train_markers[i][j], opts.nearest_fname, 
04696                             opts.nearest_dtd_fname)) ||
04697                 (err = predict_labels_with_nearest_model(
04698                             &((*labels_out)[i][j]), 
04699                             &((*dists_out)[i][j]), test_markers[i][j], model)))
04700             {
04701                 print_error_msg_exit("haplo-test", err->msg);
04702             }
04703 
04704             find_ancestors(&(ancestor_types[i][j]), 
04705                     &(ancestor_labels[i][j]), (*labels_out)[i][j], 
04706                     test_labels[i][j]);
04707         }
04708     }
04709 
04710     free_nearest_model(model);
04711 
04712     write_cross_validation_summary(ancestor_types, ancestor_labels, *labels_out,
04713             *dists_out, NULL, nearest_summary_fname);
04714 
04715     write_cross_validation_details(ancestor_types, ancestor_labels, *labels_out,
04716             *dists_out, NULL, nearest_details_cnt_fname,
04717             nearest_details_pct_fname);
04718 
04719     write_cross_validation_confusion(test_labels, *labels_out, *dists_out,
04720             NULL, nearest_confusion_cnt_fname, nearest_confusion_pct_fname);
04721 
04722     write_cross_validation_preds("nearest", test_ids, test_labels,
04723             ancestor_types, ancestor_labels, *labels_out, *dists_out,
04724             NULL, nearest_preds_fname);
04725 }
04726 
04731 static void cross_validate_tandem_agree
04732 (
04733     Vector_u32**** types_out, 
04734     Vector_u32**** labels_out,
04735     Vector_u32***  nb_freq_labels, 
04736     Vector_u32***  nb_gauss_labels, 
04737     Vector_u32***  nb_gmm_labels,
04738     Vector_u32***  mv_gmm_labels, 
04739     Vector_u32***  svm_labels, 
04740     Vector_u32***  j48_labels, 
04741     Vector_u32***  part_labels, 
04742     Vector_u32***  nearest_labels, 
04743     Matblock_u8*** test_ids,
04744     Vector_u32***  test_labels,
04745     Vector_u32***  ancestor_types, 
04746     Vector_u32***  ancestor_labels
04747 )
04748 {
04749     uint32_t i, j;
04750     const Vector_u32*  nb_freq;
04751     const Vector_u32*  nb_gauss;
04752     const Vector_u32*  nb_gmm;
04753     const Vector_u32*  mv_gmm;
04754     const Vector_u32*  svm;
04755     const Vector_u32*  j48;
04756     const Vector_u32*  part;
04757     const Vector_u32*  nearest;
04758 
04759     for (i = 0; i < num_cv_iters; i++)
04760     {
04761         for (j = 0; j < num_cv_folds; j++)
04762         {
04763             nb_freq  = (opts.nb_freq_fname) ? nb_freq_labels[i][j] : NULL;
04764             nb_gauss = (opts.nb_gauss_fname) ? nb_gauss_labels[i][j] : NULL;
04765             nb_gmm   = (opts.nb_gmm_fname) ? nb_gmm_labels[i][j] : NULL;
04766             mv_gmm   = (opts.mv_gmm_fname) ? mv_gmm_labels[i][j] : NULL;
04767 #ifdef HAPLO_ENABLE_SVM
04768             svm      = (opts.svm_fname) ? svm_labels[i][j] : NULL;
04769 #else    
04770             svm      = NULL;
04771 #endif
04772 #ifdef HAPLO_ENABLE_WEKA
04773             j48      = (opts.weka_j48_fname) ? j48_labels[i][j] : NULL;
04774             part     = (opts.weka_part_fname) ? part_labels[i][j] : NULL;
04775 #else
04776             j48      = NULL;
04777             part     = NULL;
04778 #endif
04779             nearest  = (opts.nearest_fname) ? nearest_labels[i][j] : NULL;
04780 
04781             find_ancestors_of_sets(&(ancestor_types[i][j]),
04782                     &(ancestor_labels[i][j]), nb_freq, nb_gauss, nb_gmm,
04783                     mv_gmm, svm, j48, part, nearest);
04784 
04785             copy_vector_u32(&((*types_out)[i][j]), ancestor_types[i][j]);
04786             copy_vector_u32(&((*labels_out)[i][j]), ancestor_labels[i][j]);
04787         }
04788     }
04789 
04790     write_cross_validation_summary(ancestor_types, ancestor_labels,
04791             *labels_out, NULL, NULL, tandem_agree_summary_fname);
04792 
04793     write_cross_validation_details(ancestor_types, ancestor_labels,
04794             *labels_out, NULL, *types_out, tandem_agree_details_cnt_fname,
04795             tandem_agree_details_pct_fname);
04796 }
04797 
04802 static void cross_validate_tandem
04803 (
04804     Vector_u32***  tandem_types, 
04805     Vector_u32***  tandem_labels, 
04806     Matblock_u8*** test_ids,
04807     Vector_u32***  test_labels,
04808     Vector_u32***  ancestor_types, 
04809     Vector_u32***  ancestor_labels
04810 )
04811 {
04812     uint32_t i, j;
04813 
04814     for (i = 0; i < num_cv_iters; i++)
04815     {
04816         for (j = 0; j < num_cv_folds; j++)
04817         {
04818             find_tandem_ancestors(&(ancestor_types[i][j]), 
04819                     &(ancestor_labels[i][j]), tandem_types[i][j],
04820                     tandem_labels[i][j], test_labels[i][j]);
04821         }
04822     }
04823 
04824     write_cross_validation_summary(ancestor_types, ancestor_labels,
04825             tandem_labels, NULL, tandem_types, tandem_summary_fname);
04826 
04827     write_cross_validation_details(ancestor_types, ancestor_labels,
04828             tandem_labels, NULL, tandem_types, tandem_details_cnt_fname,
04829             tandem_details_pct_fname);
04830 
04831     write_cross_validation_confusion(test_labels, tandem_labels, NULL,
04832             tandem_types, tandem_confusion_cnt_fname,
04833             tandem_confusion_pct_fname);
04834 
04835     write_cross_validation_preds("tandem", test_ids, test_labels,
04836             ancestor_types, ancestor_labels, tandem_labels, NULL,
04837             tandem_types, tandem_preds_fname);
04838 }
04839 
04841 static void leave_one_out
04842 (
04843     const Matblock_u8* ids,
04844     const Vector_u32*  labels,
04845     const Matrix_i32*  markers
04846 )
04847 {
04848     Vector_u32*  nb_freq_labels  = NULL;
04849     Vector_d*    nb_freq_confs   = NULL;
04850     Vector_u32*  nb_gauss_labels = NULL;
04851     Vector_d*    nb_gauss_confs  = NULL;
04852     Vector_u32*  nb_gmm_labels   = NULL;
04853     Vector_d*    nb_gmm_confs    = NULL;
04854     Vector_u32*  mv_gmm_labels   = NULL;
04855     Vector_d*    mv_gmm_confs    = NULL;
04856     Vector_u32*  svm_labels      = NULL;
04857     Vector_d*    svm_confs       = NULL;
04858     Vector_u32*  j48_labels      = NULL;
04859     Vector_d*    j48_confs       = NULL;
04860     Vector_u32*  part_labels     = NULL;
04861     Vector_d*    part_confs      = NULL;
04862     Vector_u32*  nearest_labels  = NULL;
04863     Vector_d*    nearest_dists   = NULL;
04864     Vector_u32*  tandem_types    = NULL;
04865     Vector_u32*  tandem_labels   = NULL;
04866     Vector_u32*  ancestor_types  = NULL;
04867     Vector_u32*  ancestor_labels = NULL;
04868 
04869     leave_one_out_nb_freq(&nb_freq_labels, &nb_freq_confs, &ancestor_types,
04870             &ancestor_labels, ids, labels, markers);
04871 
04872     leave_one_out_nb_gauss(&nb_gauss_labels, &nb_gauss_confs, &ancestor_types,
04873             &ancestor_labels, ids, labels, markers);
04874 
04875     leave_one_out_nb_gmm(&nb_gmm_labels, &nb_gmm_confs, &ancestor_types,
04876             &ancestor_labels, ids, labels, markers);
04877 
04878     leave_one_out_mv_gmm(&mv_gmm_labels, &mv_gmm_confs, &ancestor_types,
04879             &ancestor_labels, ids, labels, markers);
04880 
04881     leave_one_out_svm(&svm_labels, &svm_confs, &ancestor_types,
04882             &ancestor_labels, ids, labels, markers);
04883 
04884     leave_one_out_j48(&j48_labels, &j48_confs, &ancestor_types,
04885             &ancestor_labels, ids, labels, markers);
04886 
04887     leave_one_out_part(&part_labels, &part_confs, &ancestor_types,
04888             &ancestor_labels, ids, labels, markers);
04889 
04890     leave_one_out_nearest(&nearest_labels, &nearest_dists, &ancestor_types,
04891             &ancestor_labels, ids, labels, markers);
04892 
04893     if (tandem)
04894     {
04895         leave_one_out_tandem_agree(&tandem_types, &tandem_labels,
04896                 &ancestor_types, &ancestor_labels, nb_freq_labels,
04897                 nb_gauss_labels, nb_gmm_labels, mv_gmm_labels, svm_labels,
04898                 j48_labels, part_labels, nearest_labels, ids, labels);
04899 
04900         leave_one_out_tandem(tandem_types, tandem_labels, ids, labels,
04901                 ancestor_types, ancestor_labels);
04902     }
04903 
04904     free_vector_u32(nb_freq_labels);
04905     free_vector_d(nb_freq_confs);
04906     free_vector_u32(nb_gauss_labels);
04907     free_vector_d(nb_gauss_confs);
04908     free_vector_u32(nb_gmm_labels);
04909     free_vector_d(nb_gmm_confs);
04910     free_vector_u32(mv_gmm_labels);
04911     free_vector_d(mv_gmm_confs);
04912     free_vector_u32(svm_labels);
04913     free_vector_d(svm_confs);
04914     free_vector_u32(j48_labels);
04915     free_vector_d(j48_confs);
04916     free_vector_u32(part_labels);
04917     free_vector_d(part_confs);
04918     free_vector_u32(nearest_labels);
04919     free_vector_d(nearest_dists);
04920     free_vector_u32(tandem_labels);
04921     free_vector_u32(ancestor_types);
04922     free_vector_u32(ancestor_labels);
04923 }
04924 
04929 static void allocate_cross_validation_results
04930 (
04931     Vector_u32****  nb_freq_labels_out,
04932     Vector_d****    nb_freq_confs_out,
04933     Vector_u32****  nb_gauss_labels_out,
04934     Vector_d****    nb_gauss_confs_out,
04935     Vector_u32****  nb_gmm_labels_out,
04936     Vector_d****    nb_gmm_confs_out,
04937     Vector_u32****  mv_gmm_labels_out,
04938     Vector_d****    mv_gmm_confs_out,
04939     Vector_u32****  svm_labels_out,
04940     Vector_d****    svm_confs_out,
04941     Vector_u32****  j48_labels_out,
04942     Vector_d****    j48_confs_out,
04943     Vector_u32****  part_labels_out,
04944     Vector_d****    part_confs_out,
04945     Vector_u32****  nearest_labels_out,
04946     Vector_d****    nearest_dists_out,
04947     Vector_u32****  tandem_types_out,
04948     Vector_u32****  tandem_labels_out,
04949     Vector_u32****  ancestor_types_out,
04950     Vector_u32****  ancestor_labels_out,
04951     uint32_t        num_samples
04952 )
04953 {
04954     uint32_t i, j;
04955 
04956     assert(*nb_freq_labels_out = malloc(num_cv_iters*sizeof(void**)));
04957     assert(*nb_freq_confs_out = malloc(num_cv_iters*sizeof(void**)));
04958     assert(*nb_gauss_labels_out = malloc(num_cv_iters*sizeof(void**)));
04959     assert(*nb_gauss_confs_out = malloc(num_cv_iters*sizeof(void**)));
04960     assert(*nb_gmm_labels_out = malloc(num_cv_iters*sizeof(void**)));
04961     assert(*nb_gmm_confs_out = malloc(num_cv_iters*sizeof(void**)));
04962     assert(*mv_gmm_labels_out = malloc(num_cv_iters*sizeof(void**)));
04963     assert(*mv_gmm_confs_out = malloc(num_cv_iters*sizeof(void**)));
04964     assert(*svm_labels_out = malloc(num_cv_iters*sizeof(void**)));
04965     assert(*svm_confs_out = malloc(num_cv_iters*sizeof(void**)));
04966     assert(*j48_labels_out = malloc(num_cv_iters*sizeof(void**)));
04967     assert(*j48_confs_out = malloc(num_cv_iters*sizeof(void**)));
04968     assert(*part_labels_out = malloc(num_cv_iters*sizeof(void**)));
04969     assert(*part_confs_out = malloc(num_cv_iters*sizeof(void**)));
04970     assert(*nearest_labels_out = malloc(num_cv_iters*sizeof(void**)));
04971     assert(*nearest_dists_out = malloc(num_cv_iters*sizeof(void**)));
04972     assert(*tandem_types_out = malloc(num_cv_iters*sizeof(void**)));
04973     assert(*tandem_labels_out = malloc(num_cv_iters*sizeof(void**)));
04974     assert(*ancestor_types_out = malloc(num_cv_iters*sizeof(void**)));
04975     assert(*ancestor_labels_out = malloc(num_cv_iters*sizeof(void**)));
04976 
04977     for (i = 0; i < num_cv_iters; i++)
04978     {
04979         assert((*nb_freq_labels_out)[i] = calloc(num_cv_folds,sizeof(void*)));
04980         assert((*nb_freq_confs_out)[i] = calloc(num_cv_folds,sizeof(void*)));
04981         assert((*nb_gauss_labels_out)[i] = calloc(num_cv_folds,sizeof(void*)));
04982         assert((*nb_gauss_confs_out)[i] = calloc(num_cv_folds,sizeof(void*)));
04983         assert((*nb_gmm_labels_out)[i] = calloc(num_cv_folds,sizeof(void*)));
04984         assert((*nb_gmm_confs_out)[i] = calloc(num_cv_folds,sizeof(void*)));
04985         assert((*mv_gmm_labels_out)[i] = calloc(num_cv_folds,sizeof(void*)));
04986         assert((*mv_gmm_confs_out)[i] = calloc(num_cv_folds,sizeof(void*)));
04987         assert((*svm_labels_out)[i] = calloc(num_cv_folds,sizeof(void*)));
04988         assert((*svm_confs_out)[i] = calloc(num_cv_folds,sizeof(void*)));
04989         assert((*j48_labels_out)[i] = calloc(num_cv_folds,sizeof(void*)));
04990         assert((*j48_confs_out)[i] = calloc(num_cv_folds,sizeof(void*)));
04991         assert((*part_labels_out)[i] = calloc(num_cv_folds,sizeof(void*)));
04992         assert((*part_confs_out)[i] = calloc(num_cv_folds,sizeof(void*)));
04993         assert((*nearest_labels_out)[i] = calloc(num_cv_folds,sizeof(void*)));
04994         assert((*nearest_dists_out)[i] = calloc(num_cv_folds,sizeof(void*)));
04995         assert((*tandem_types_out)[i] = calloc(num_cv_folds,sizeof(void*)));
04996         assert((*tandem_labels_out)[i] = calloc(num_cv_folds,sizeof(void*)));
04997         assert((*ancestor_types_out)[i] = calloc(num_cv_folds,sizeof(void*)));
04998         assert((*ancestor_labels_out)[i] = calloc(num_cv_folds,sizeof(void*)));
04999 
05000         for (j = 0; j < num_cv_folds; j++)
05001         {
05002             create_zero_vector_u32(&((*nb_freq_labels_out)[i][j]), num_samples);
05003             create_zero_vector_d(&((*nb_freq_confs_out)[i][j]), num_samples);
05004             create_zero_vector_u32(&((*nb_gauss_labels_out)[i][j]), num_samples);
05005             create_zero_vector_d(&((*nb_gauss_confs_out)[i][j]), num_samples);
05006             create_zero_vector_u32(&((*nb_gmm_labels_out)[i][j]), num_samples);
05007             create_zero_vector_d(&((*nb_gmm_confs_out)[i][j]), num_samples);
05008             create_zero_vector_u32(&((*mv_gmm_labels_out)[i][j]), num_samples);
05009             create_zero_vector_d(&((*mv_gmm_confs_out)[i][j]), num_samples);
05010             create_zero_vector_u32(&((*svm_labels_out)[i][j]), num_samples);
05011             create_zero_vector_d(&((*svm_confs_out)[i][j]), num_samples);
05012             create_zero_vector_u32(&((*j48_labels_out)[i][j]), num_samples);
05013             create_zero_vector_d(&((*j48_confs_out)[i][j]), num_samples);
05014             create_zero_vector_u32(&((*part_labels_out)[i][j]), num_samples);
05015             create_zero_vector_d(&((*part_confs_out)[i][j]), num_samples);
05016             create_zero_vector_u32(&((*nearest_labels_out)[i][j]), num_samples);
05017             create_zero_vector_d(&((*nearest_dists_out)[i][j]), num_samples);
05018             create_zero_vector_u32(&((*tandem_types_out)[i][j]), num_samples);
05019             create_zero_vector_u32(&((*tandem_labels_out)[i][j]), num_samples);
05020             create_zero_vector_u32(&((*ancestor_types_out)[i][j]), num_samples);
05021             create_zero_vector_u32(&((*ancestor_labels_out)[i][j]),num_samples);
05022         }
05023     }
05024 }
05025 
05026 static void free_cross_validation_results
05027 (
05028     Vector_u32***  nb_freq_labels,
05029     Vector_d***    nb_freq_confs,
05030     Vector_u32***  nb_gauss_labels,
05031     Vector_d***    nb_gauss_confs,
05032     Vector_u32***  nb_gmm_labels,
05033     Vector_d***    nb_gmm_confs,
05034     Vector_u32***  mv_gmm_labels,
05035     Vector_d***    mv_gmm_confs,
05036     Vector_u32***  svm_labels,
05037     Vector_d***    svm_confs,
05038     Vector_u32***  j48_labels,
05039     Vector_d***    j48_confs,
05040     Vector_u32***  part_labels,
05041     Vector_d***    part_confs,
05042     Vector_u32***  nearest_labels,
05043     Vector_d***    nearest_dists,
05044     Vector_u32***  tandem_types,
05045     Vector_u32***  tandem_labels,
05046     Vector_u32***  ancestor_types,
05047     Vector_u32***  ancestor_labels
05048 )
05049 {
05050     uint32_t i, j;
05051 
05052     for (i = 0; i < num_cv_iters; i++)
05053     {
05054         for (j =0; j < num_cv_folds; j++)
05055         {
05056             free_vector_u32(nb_freq_labels[ i ][ j ]);
05057             free_vector_d(nb_freq_confs[ i ][ j ]);
05058             free_vector_u32(nb_gauss_labels[ i ][ j ]);
05059             free_vector_d(nb_gauss_confs[ i ][ j ]);
05060             free_vector_u32(nb_gmm_labels[ i ][ j ]);
05061             free_vector_d(nb_gmm_confs[ i ][ j ]);
05062             free_vector_u32(mv_gmm_labels[ i ][ j ]);
05063             free_vector_d(mv_gmm_confs[ i ][ j ]);
05064             free_vector_u32(svm_labels[ i ][ j ]);
05065             free_vector_d(svm_confs[ i ][ j ]);
05066             free_vector_u32(j48_labels[ i ][ j ]);
05067             free_vector_d(j48_confs[ i ][ j ]);
05068             free_vector_u32(part_labels[ i ][ j ]);
05069             free_vector_d(part_confs[ i ][ j ]);
05070             free_vector_u32(nearest_labels[ i ][ j ]);
05071             free_vector_d(nearest_dists[ i ][ j ]);
05072             free_vector_u32(tandem_types[ i ][ j ]);
05073             free_vector_u32(tandem_labels[ i ][ j ]);
05074             free_vector_u32(ancestor_labels[ i ][ j ]);
05075             free_vector_u32(ancestor_types[ i ][ j ]);
05076         }
05077 
05078         free(nb_freq_labels[ i ]); 
05079         free(nb_freq_confs[ i ]);
05080         free(nb_gauss_labels[ i ]); 
05081         free(nb_gauss_confs[ i ]);
05082         free(nb_gmm_labels[ i ]); 
05083         free(nb_gmm_confs[ i ]);
05084         free(mv_gmm_labels[ i ]); 
05085         free(mv_gmm_confs[ i ]);
05086         free(svm_labels[ i ]); 
05087         free(svm_confs[ i ]);
05088         free(j48_labels[ i ]); 
05089         free(j48_confs[ i ]);
05090         free(part_labels[ i ]); 
05091         free(part_confs[ i ]);
05092         free(nearest_labels[ i ]);
05093         free(nearest_dists[ i ]);
05094         free(tandem_types[ i ]);
05095         free(tandem_labels[ i ]);
05096         free(ancestor_labels[ i ]);
05097         free(ancestor_types[ i ]);
05098     }
05099 
05100     free(nb_freq_labels); 
05101     free(nb_freq_confs);
05102     free(nb_gauss_labels); 
05103     free(nb_gauss_confs);
05104     free(nb_gmm_labels); 
05105     free(nb_gmm_confs);
05106     free(mv_gmm_labels); 
05107     free(mv_gmm_confs);
05108     free(svm_labels); 
05109     free(svm_confs);
05110     free(j48_labels); 
05111     free(j48_confs);
05112     free(part_labels); 
05113     free(part_confs);
05114     free(nearest_labels);
05115     free(nearest_dists);
05116     free(tandem_types);
05117     free(tandem_labels);
05118     free(ancestor_labels);
05119     free(ancestor_types);
05120 }
05121 
05123 static void cross_validate
05124 (
05125     const Matblock_u8* data_ids,
05126     const Vector_u32*  data_labels,
05127     const Matrix_i32*  data_markers
05128 )
05129 {
05130     Matblock_u8*** train_ids       = NULL;
05131     Vector_u32***  train_labels    = NULL;
05132     Matrix_i32***  train_markers   = NULL;
05133     Matblock_u8*** test_ids        = NULL;
05134     Vector_u32***  test_labels     = NULL;
05135     Matrix_i32***  test_markers    = NULL;
05136     Vector_u32***  nb_freq_labels  = NULL;
05137     Vector_d***    nb_freq_confs   = NULL;
05138     Vector_u32***  nb_gauss_labels = NULL;
05139     Vector_d***    nb_gauss_confs  = NULL;
05140     Vector_u32***  nb_gmm_labels   = NULL;
05141     Vector_d***    nb_gmm_confs    = NULL;
05142     Vector_u32***  mv_gmm_labels   = NULL;
05143     Vector_d***    mv_gmm_confs    = NULL;
05144     Vector_u32***  svm_labels      = NULL;
05145     Vector_d***    svm_confs       = NULL;
05146     Vector_u32***  j48_labels      = NULL;
05147     Vector_d***    j48_confs       = NULL;
05148     Vector_u32***  part_labels     = NULL;
05149     Vector_d***    part_confs      = NULL;
05150     Vector_u32***  nearest_labels  = NULL;
05151     Vector_d***    nearest_dists   = NULL;
05152     Vector_u32***  tandem_types    = NULL;
05153     Vector_u32***  tandem_labels   = NULL;
05154     Vector_u32***  ancestor_types  = NULL;
05155     Vector_u32***  ancestor_labels = NULL;
05156 
05157     create_cross_validation_train_and_test_data(&train_ids, &train_labels,
05158             &train_markers, &test_ids, &test_labels, &test_markers, data_ids,
05159             data_labels, data_markers);
05160 
05161     allocate_cross_validation_results(&nb_freq_labels, &nb_freq_confs,
05162             &nb_gauss_labels, &nb_gauss_confs, &nb_gmm_labels, &nb_gmm_confs,
05163             &mv_gmm_labels, &mv_gmm_confs, &svm_labels, &svm_confs, &j48_labels,
05164             &j48_confs, &part_labels, &part_confs, &nearest_labels,
05165             &nearest_dists, &tandem_types, &tandem_labels, &ancestor_types,
05166             &ancestor_labels, data_labels->num_elts);
05167 
05168     cross_validate_nb_freq(&nb_freq_labels, &nb_freq_confs, train_ids,
05169             train_labels, train_markers, test_ids, test_labels, test_markers,
05170             ancestor_types, ancestor_labels);
05171 
05172     cross_validate_nb_gauss(&nb_gauss_labels, &nb_gauss_confs, train_ids,
05173             train_labels, train_markers, test_ids, test_labels, test_markers,
05174             ancestor_types, ancestor_labels);
05175 
05176     cross_validate_nb_gmm(&nb_gmm_labels, &nb_gmm_confs, train_ids,
05177             train_labels, train_markers, test_ids, test_labels, test_markers,
05178             ancestor_types, ancestor_labels);
05179 
05180     cross_validate_mv_gmm(&mv_gmm_labels, &mv_gmm_confs, train_ids,
05181             train_labels, train_markers, test_ids, test_labels, test_markers,
05182             ancestor_types, ancestor_labels);
05183 
05184     cross_validate_svm(&svm_labels, &svm_confs, train_ids, train_labels,
05185             train_markers, test_ids, test_labels, test_markers,
05186             ancestor_types, ancestor_labels);
05187 
05188     cross_validate_j48(&j48_labels, &j48_confs, train_ids, train_labels,
05189             train_markers, test_ids, test_labels, test_markers,
05190             ancestor_types, ancestor_labels);
05191 
05192     cross_validate_part(&part_labels, &part_confs, train_ids, train_labels,
05193             train_markers, test_ids, test_labels, test_markers,
05194             ancestor_types, ancestor_labels);
05195 
05196     cross_validate_nearest(&nearest_labels, &nearest_dists, train_ids,
05197             train_labels, train_markers, test_ids, test_labels, test_markers,
05198             ancestor_types, ancestor_labels);
05199 
05200     if (tandem)
05201     {
05202         cross_validate_tandem_agree(&tandem_types, &tandem_labels,
05203                 nb_freq_labels, nb_gauss_labels, nb_gmm_labels, mv_gmm_labels,
05204                 svm_labels, j48_labels, part_labels, nearest_labels, test_ids,
05205                 test_labels, ancestor_types, ancestor_labels);
05206 
05207         cross_validate_tandem(tandem_types, tandem_labels, test_ids,
05208                 test_labels, ancestor_types, ancestor_labels);
05209     }
05210 
05211     free_cross_validation_results(nb_freq_labels, nb_freq_confs,
05212             nb_gauss_labels, nb_gauss_confs, nb_gmm_labels, nb_gmm_confs,
05213             mv_gmm_labels, mv_gmm_confs, svm_labels, svm_confs, j48_labels,
05214             j48_confs, part_labels, part_confs, nearest_labels, nearest_dists, 
05215             tandem_types, tandem_labels, ancestor_types, ancestor_labels);
05216 
05217     free_cross_validation_train_and_test_data(train_ids, train_labels,
05218             train_markers, test_ids, test_labels, test_markers);
05219 }
05220 
05221 
05223 int main(int argc, const char** argv)
05224 {
05225     int         argi;
05226     const char* data_fname = "/dev/stdin";
05227     Error*      err;
05228 
05229     Matblock_u8* ids            = NULL;
05230     Vector_u32*  labels         = NULL;
05231     Matrix_i32*  markers        = NULL;
05232 
05233     init_test_options();
05234 
05235     if ((err = process_options(argc, argv, &argi, NUM_OPTS_NO_ARG, opts_no_arg,
05236                     NUM_OPTS_WITH_ARG, opts_with_arg)) != NULL)
05237     {
05238         print_error_msg_exit("haplo-test", err->msg);
05239     }
05240 
05241     if ((argc - argi) == 1)
05242     {
05243         data_fname = argv[ argi ];
05244     }
05245 
05246     if (num_models_to_test() == 0)
05247     {
05248         print_error_msg_exit("haplo-test", "No models to test");
05249     }
05250 
05251     if ((err = read_haplo_groups(opts.labels_fname)))
05252     {
05253         print_error_msg_exit("haplo-test", err->msg);
05254     }
05255 
05256     if ((err = read_input(&ids, &labels, &markers, data_fname)))
05257     {
05258         print_error_msg_exit("haplo-test", err->msg);
05259     }
05260 
05261     if (!labels)
05262     {
05263         print_error_msg("haplo-test", NULL);
05264         print_error_msg_exit(data_fname, "No labels to test with");
05265     }
05266 
05267     switch (test_type)
05268     {
05269         case HAPLO_TEST_LEAVE_ONE_OUT:
05270             leave_one_out(ids, labels, markers);
05271             break;
05272         case HAPLO_TEST_CROSS_VALIDATE:
05273             cross_validate(ids, labels, markers);
05274             break;
05275     }
05276 
05277     free_matblock_u8(ids);
05278     free_vector_u32(labels);
05279     free_matrix_i32(markers);
05280 
05281     if (get_num_unhandled_errors() > 0)
05282     {
05283         print_error_msg_exit("haplo-test", "Unhandled errors");
05284     }
05285 
05286     return EXIT_SUCCESS;
05287 }