Haplo Prediction
predict haplogroups
options.c
Go to the documentation of this file.
00001 /*
00002  * This work is licensed under a Creative Commons 
00003  * Attribution-Noncommercial-Share Alike 3.0 United States License.
00004  * 
00005  *    http://creativecommons.org/licenses/by-nc-sa/3.0/us/
00006  * 
00007  * You are free:
00008  * 
00009  *    to Share - to copy, distribute, display, and perform the work
00010  *    to Remix - to make derivative works
00011  * 
00012  * Under the following conditions:
00013  * 
00014  *    Attribution. You must attribute the work in the manner specified by the
00015  *    author or licensor (but not in any way that suggests that they endorse you
00016  *    or your use of the work).
00017  * 
00018  *    Noncommercial. You may not use this work for commercial purposes.
00019  * 
00020  *    Share Alike. If you alter, transform, or build upon this work, you may
00021  *    distribute the resulting work only under the same or similar license to
00022  *    this one.
00023  * 
00024  * For any reuse or distribution, you must make clear to others the license
00025  * terms of this work. The best way to do this is by including this header.
00026  * 
00027  * Any of the above conditions can be waived if you get permission from the
00028  * copyright holder.
00029  * 
00030  * Apart from the remix rights granted under this license, nothing in this
00031  * license impairs or restricts the author's moral rights.
00032  */
00033 
00034 
00046 #include <config.h>
00047 
00048 #include <stdlib.h>
00049 #include <stdio.h>
00050 #include <string.h>
00051 #include <inttypes.h>
00052 #include <assert.h>
00053 
00054 #ifdef HAPLO_HAVE_DMALLOC
00055 #include <dmalloc.h>
00056 #endif
00057 
00058 #include <jwsc/base/error.h>
00059 #include <jwsc/base/option.h>
00060 #include <jwsc/vector/vector.h>
00061 
00062 #include "haplo_groups.h"
00063 #include "options.h"
00064 
00065 
00066 #define  HEADER_IN                 0
00067 #define  HEADER_OUT                0
00068 #define  EXCLUDE_ONE               0
00069 #define  NUM_THREADS               1
00070 #define  INPUT_FORMAT              HAPLO_INPUT_TXT
00071 #define  INPUT_DTD_FNAME           0
00072 #define  OUTPUT_FORMAT             HAPLO_OUTPUT_TXT
00073 #define  LABELS_FNAME              0
00074 #define  LABELS_DTD_FNAME          0
00075 #define  FIRST_MARKER_COL          2
00076 #define  NUM_MARKERS               9
00077 #define  AUX_INPUT_FNAME           0
00078 #define  AUX_LABEL_COL             0
00079 #define  AUX_FIRST_MARKER_COL      2
00080 #define  AUX_NUM_MARKERS           9
00081 #define  NB_FREQ_FNAME             0
00082 #define  NB_FREQ_DTD_FNAME         0
00083 #define  NB_GAUSS_FNAME            0
00084 #define  NB_GAUSS_DTD_FNAME        0
00085 #define  NB_GMM_FNAME              0
00086 #define  NB_GMM_DTD_FNAME          0
00087 #define  MV_GMM_FNAME              0
00088 #define  MV_GMM_DTD_FNAME          0
00089 #define  MV_MMM_FNAME              0
00090 #define  MV_MMM_DTD_FNAME          0
00091 #define  SVM_FNAME                 0
00092 #define  SVM_DTD_FNAME             0
00093 #define  WEKA_J48_FNAME            0
00094 #define  WEKA_PART_FNAME           0
00095 #define  WEKA_JAR_FNAME            0
00096 #define  WEKA_DTD_FNAME            0
00097 #define  NEAREST_FNAME             0
00098 #define  NEAREST_DTD_FNAME         0
00099 #define  NEAREST_MAX_D             1
00100 #define  BMM_TREE_FNAME            0
00101 #define  BMM_INFO_FNAME            0
00102 #define  BMM_CONF_THRESH_FNAME     0
00103 #define  GMM_BMM_TREE_FNAME        0
00104 #define  GMM_BMM_INFO_FNAME        0
00105 #define  GMM_BMM_CONF_THRESH_FNAME 0
00106 #define  MODEL_DIRNAME             "."
00107 
00108 
00109 extern Option_no_arg opts_no_arg[];
00110 extern Option_with_arg opts_with_arg[];
00111 extern uint32_t get_num_opts_no_arg();
00112 extern uint32_t get_num_opts_with_arg();
00113 extern void print_usage();
00114 
00115 
00120 uint8_t sequential_markers = 0;
00121 
00122 
00127 uint8_t aux_sequential_markers = 0;
00128 
00129 
00131 Error* process_help_opt()
00132 {
00133     print_usage();
00134     exit(EXIT_SUCCESS);
00135     return NULL;
00136 }
00137 
00139 Error* process_version_opt()
00140 {
00141     fprintf(stderr, "%s\n", HAPLO_PACKAGE_STRING);
00142     exit(EXIT_SUCCESS);
00143     return NULL;
00144 }
00145 
00147 Error* process_header_in_opt()
00148 {
00149     opts.header_in = 1;
00150     return NULL;
00151 }
00152 
00154 Error* process_header_out_opt()
00155 {
00156     opts.header_out = 1;
00157     return NULL;
00158 }
00159 
00160 Error* process_exclude_one_opt()
00161 {
00162     opts.exclude_one = 1;
00163     return NULL;
00164 }
00165 
00167 Error* process_options_opt(Option_arg arg)
00168 {
00169     Error* err;
00170     int    slen = 256;
00171     char   s[slen];
00172 
00173     if (arg == NULL)
00174     {
00175         return JWSC_EARG("Option 'options' requires an argument");
00176     }
00177 
00178     if ((err = process_options_from_file(arg, get_num_opts_no_arg(), 
00179                     opts_no_arg, get_num_opts_with_arg(), opts_with_arg)) 
00180             != NULL)
00181     {
00182         snprintf(s, slen, "%s: %s", arg, err->msg);
00183         return JWSC_EARG(s);
00184     }
00185 
00186     return NULL;
00187 }
00188 
00190 Error* process_num_threads_opt(Option_arg arg)
00191 {
00192     if (arg == NULL)
00193     {
00194         return JWSC_EARG("Option 'num-threads' requires an argument");
00195     }
00196     if (sscanf(arg, "%u", &(opts.num_threads)) != 1 || 
00197             opts.num_threads < 1)
00198     {
00199         return JWSC_EARG("Option 'num-threads' must be > 0");
00200     }
00201 
00202     return NULL;
00203 }
00204 
00206 Error* process_seed_opt(Option_arg arg)
00207 {
00208     if (arg == NULL)
00209     {
00210         return JWSC_EARG("Option 'seed' requires an argument");
00211     }
00212     if (sscanf(arg, "%u", &(opts.seed)) != 1)
00213     {
00214         return JWSC_EARG("Option 'seed' requires an argument");
00215     }
00216 
00217     srand(opts.seed);
00218 
00219     return NULL;
00220 }
00221 
00223 static Error* process_input_format_opt(Option_arg arg)
00224 {
00225     if (arg == NULL)
00226     {
00227         return JWSC_EARG("Option 'input-format' requires an argument");
00228     }
00229     if (strncmp(arg, "txt", 3) == 0)
00230     {
00231         opts.input_format = HAPLO_INPUT_TXT;
00232     }
00233     else if (strncmp(arg, "csv", 3) == 0)
00234     {
00235         opts.input_format = HAPLO_INPUT_CSV;
00236     }
00237     else if (strncmp(arg, "xml", 3) == 0)
00238     {
00239         opts.input_format = HAPLO_INPUT_XML;
00240     }
00241     else
00242     {
00243         return JWSC_EARG("Option 'input-format' must be one of {txt, csv, xml}");
00244     }
00245     return NULL;
00246 }
00247 
00249 Error* process_input_dtd_opt(Option_arg arg)
00250 {
00251     if (arg == NULL)
00252     {
00253         return JWSC_EARG("Option 'input-dtd' requires an argument");
00254     }
00255     opts.input_dtd_fname = arg;
00256 
00257     return NULL;
00258 }
00259 
00261 static Error* process_output_format_opt(Option_arg arg)
00262 {
00263     if (arg == NULL)
00264     {
00265         return JWSC_EARG("Option 'output-format' requires an argument");
00266     }
00267     if (strncmp(arg, "txt", 3) == 0)
00268     {
00269         opts.output_format = HAPLO_OUTPUT_TXT;
00270     }
00271     else if (strncmp(arg, "csv", 3) == 0)
00272     {
00273         opts.output_format = HAPLO_OUTPUT_CSV;
00274     }
00275     else if (strncmp(arg, "xml", 3) == 0)
00276     {
00277         opts.output_format = HAPLO_OUTPUT_XML;
00278     }
00279     else
00280     {
00281         return JWSC_EARG("Option 'output-format' must be one of {txt, csv, xml}");
00282     }
00283     return NULL;
00284 }
00285 
00287 Error* process_labels_opt(Option_arg arg)
00288 {
00289     if (arg == NULL)
00290     {
00291         return JWSC_EARG("Option 'labels' requires an argument");
00292     }
00293     opts.labels_fname = arg;
00294 
00295     return NULL;
00296 }
00297 
00299 Error* process_labels_dtd_opt(Option_arg arg)
00300 {
00301     if (arg == NULL)
00302     {
00303         return JWSC_EARG("Option 'labels-dtd' requires an argument");
00304     }
00305     opts.labels_dtd_fname = arg;
00306 
00307     return NULL;
00308 }
00309 
00311 Error* process_id_cols_opt(Option_arg arg)
00312 {
00313     uint32_t num_ids;
00314     uint32_t id_col;
00315     uint32_t n;
00316     char*    str;
00317     char*    token;
00318 
00319     if (arg == NULL)
00320     {
00321         return JWSC_EARG("Option 'id-cols' requires an argument");
00322     }
00323 
00324     n = strlen(arg);
00325     str = malloc((n+1)*sizeof(char));
00326     strncpy(str, arg, n);
00327     str[ n ] = 0;
00328     token = str;
00329 
00330     free_vector_u32(opts.id_cols);
00331     opts.id_cols = NULL;
00332     num_ids = 0;
00333 
00334     while ((token = strtok(token, ",")) != NULL)
00335     {
00336         if (sscanf(token, "%u", &id_col) != 1)
00337         {
00338             return JWSC_EARG("Option 'id-cols' must be >= 0");
00339         }
00340         if (num_ids == 0 && id_col == 0)
00341         {
00342             break;
00343         }
00344         else if (num_ids > 0 && id_col == 0)
00345         {
00346             return JWSC_EARG("Option 'id-cols': IDs must be > 0");
00347         }
00348         create_vector_u32(&(opts.id_cols), num_ids+1);
00349         opts.id_cols->elts[ num_ids++ ] = id_col;
00350         token = NULL;
00351     }
00352 
00353     free(str);
00354 
00355     return NULL;
00356 }
00357 
00359 Error* process_label_col_opt(Option_arg arg)
00360 {
00361     if (arg == NULL)
00362     {
00363         return JWSC_EARG("Option 'label-col' requires an argument");
00364     }
00365     if (sscanf(arg, "%u", &(opts.label_col)) != 1)
00366     {
00367         return JWSC_EARG("Option 'label-col' must be >= 0");
00368     }
00369 
00370     return NULL;
00371 }
00372 
00374 Error* process_first_marker_col_opt(Option_arg arg)
00375 {
00376     if (opts.marker_cols)
00377     {
00378         return JWSC_EARG("Option '1st-marker-col' cannot be used with 'marker-cols'");
00379     }
00380     if (arg == NULL)
00381     {
00382         return JWSC_EARG("Option '1st-marker-col' requires an argument");
00383     }
00384     if (sscanf(arg, "%u", &(opts.first_marker_col)) != 1 || 
00385             opts.first_marker_col < 1)
00386     {
00387         return JWSC_EARG("Option '1st-marker-col' must be > 0");
00388     }
00389     sequential_markers = 1;
00390 
00391     return NULL;
00392 }
00393 
00395 Error* process_num_markers_opt(Option_arg arg)
00396 {
00397     if (opts.marker_cols)
00398     {
00399         return JWSC_EARG("Option 'num-markers' cannot be used with 'marker-cols'");
00400     }
00401     if (arg == NULL)
00402     {
00403         return JWSC_EARG("Option 'num-markers' requires an argument");
00404     }
00405     if (sscanf(arg, "%u", &(opts.num_markers)) != 1 || opts.num_markers < 1)
00406     {
00407         return JWSC_EARG("Option 'num-markers' must be > 0");
00408     }
00409     sequential_markers = 1;
00410 
00411     return NULL;
00412 }
00413 
00415 Error* process_marker_cols_opt(Option_arg arg)
00416 {
00417     uint32_t n;
00418     uint32_t m;
00419     char*    str;
00420     char*    token;
00421 
00422     if (sequential_markers)
00423     {
00424         return JWSC_EARG("Option 'marker-cols' cannot be used with 'num-markers' or 'marker-col'");
00425     }
00426 
00427     if (arg == NULL)
00428     {
00429         return JWSC_EARG("Option 'marker-cols' requires an argument");
00430     }
00431 
00432     n = strlen(arg);
00433     str = malloc((n+1)*sizeof(char));
00434     strncpy(str, arg, n);
00435     str[ n ] = 0;
00436     token = str;
00437 
00438     opts.num_markers = 0;
00439 
00440     while ((token = strtok(token, ",")) != NULL)
00441     {
00442         if (sscanf(token, "%u", &m) != 1 || m < 1)
00443         {
00444             return JWSC_EARG("Option 'marker-cols': Invalid marker");
00445         }
00446         create_vector_u32(&(opts.marker_cols), opts.num_markers+1);
00447         opts.marker_cols->elts[ opts.num_markers++ ] = m;
00448         token = NULL;
00449     }
00450 
00451     free(str);
00452     
00453     if (opts.num_markers == 0)
00454     {
00455         return JWSC_EARG("Option 'marker_cols' must number > 0");
00456     }
00457     opts.first_marker_col = opts.marker_cols->elts[ 0 ];
00458 
00459     return NULL;
00460 }
00461 
00462 Error* process_aux_input_opt(Option_arg arg)
00463 {
00464     if (arg == NULL)
00465     {
00466         return JWSC_EARG("Option 'aux-input' requires an argument");
00467     }
00468     opts.aux_input_fname = arg;
00469 
00470     return NULL;
00471 }
00472 
00473 Error* process_aux_id_cols_opt(Option_arg arg)
00474 {
00475     uint32_t num_ids;
00476     uint32_t id_col;
00477     uint32_t n;
00478     char*    str;
00479     char*    token;
00480 
00481     if (arg == NULL)
00482     {
00483         return JWSC_EARG("Option 'aux-id-cols' requires an argument");
00484     }
00485 
00486     n = strlen(arg);
00487     str = malloc((n+1)*sizeof(char));
00488     strncpy(str, arg, n);
00489     str[ n ] = 0;
00490     token = str;
00491 
00492     num_ids = 0;
00493 
00494     while ((token = strtok(token, ",")) != NULL)
00495     {
00496         if (sscanf(token, "%u", &id_col) != 1)
00497         {
00498             return JWSC_EARG("Option 'aux-id-cols' must be >= 0");
00499         }
00500         if (num_ids == 0 && id_col == 0)
00501         {
00502             break;
00503         }
00504         else if (num_ids > 0 && id_col == 0)
00505         {
00506             return JWSC_EARG("Option 'aux-id-cols': IDs must be > 0");
00507         }
00508         create_vector_u32(&(opts.aux_id_cols), num_ids+1);
00509         opts.aux_id_cols->elts[ num_ids++ ] = id_col;
00510         token = NULL;
00511     }
00512 
00513     free(str);
00514 
00515     return NULL;
00516 }
00517 
00518 Error* process_aux_label_col_opt(Option_arg arg)
00519 {
00520     if (arg == NULL)
00521     {
00522         return JWSC_EARG("Option 'aux-label-col' requires an argument");
00523     }
00524     if (sscanf(arg, "%u", &(opts.aux_label_col)) != 1)
00525     {
00526         return JWSC_EARG("Option 'aux-label-col' must be >= 0");
00527     }
00528 
00529     return NULL;
00530 }
00531 
00532 Error* process_aux_first_marker_col_opt(Option_arg arg)
00533 {
00534     if (opts.aux_marker_cols)
00535     {
00536         return JWSC_EARG("Option 'aux-1st-marker-col' cannot be used with 'aux-marker-cols'");
00537     }
00538     if (arg == NULL)
00539     {
00540         return JWSC_EARG("Option 'aux-1st-marker-col' requires an argument");
00541     }
00542     if (sscanf(arg, "%u", &(opts.aux_first_marker_col)) != 1 || 
00543             opts.aux_first_marker_col < 1)
00544     {
00545         return JWSC_EARG("Option 'aux-1st-marker-col' must be > 0");
00546     }
00547     aux_sequential_markers = 1;
00548 
00549     return NULL;
00550 }
00551 
00552 Error* process_aux_num_markers_opt(Option_arg arg)
00553 {
00554     if (opts.aux_marker_cols)
00555     {
00556         return JWSC_EARG("Option 'aux-num-markers' cannot be used with 'aux-marker-cols'");
00557     }
00558     if (arg == NULL)
00559     {
00560         return JWSC_EARG("Option 'aux-num-markers' requires an argument");
00561     }
00562     if (sscanf(arg, "%u", &(opts.aux_num_markers)) != 1 || 
00563             opts.aux_num_markers < 1)
00564     {
00565         return JWSC_EARG("Option 'aux-num-markers' must be > 0");
00566     }
00567     aux_sequential_markers = 1;
00568 
00569     return NULL;
00570 }
00571 
00572 Error* process_aux_marker_cols_opt(Option_arg arg)
00573 {
00574     uint32_t n;
00575     uint32_t m;
00576     char*    str;
00577     char*    token;
00578 
00579     if (aux_sequential_markers)
00580     {
00581         return JWSC_EARG("Option 'aux-marker-cols' cannot be used with 'aux-num-markers' or 'aux-marker-col'");
00582     }
00583 
00584     if (arg == NULL)
00585     {
00586         return JWSC_EARG("Option 'aux-marker-cols' requires an argument");
00587     }
00588 
00589     n = strlen(arg);
00590     str = malloc((n+1)*sizeof(char));
00591     strncpy(str, arg, n);
00592     str[ n ] = 0;
00593     token = str;
00594 
00595     opts.aux_num_markers = 0;
00596 
00597     while ((token = strtok(token, ",")) != NULL)
00598     {
00599         if (sscanf(token, "%u", &m) != 1 || m < 1)
00600         {
00601             return JWSC_EARG("Option 'aux-marker-cols': Invalid marker");
00602         }
00603         create_vector_u32(&(opts.aux_marker_cols), opts.aux_num_markers+1);
00604         opts.aux_marker_cols->elts[ opts.aux_num_markers++ ] = m;
00605         token = NULL;
00606     }
00607 
00608     free(str);
00609     
00610     if (opts.aux_num_markers == 0)
00611     {
00612         return JWSC_EARG("Option 'aux-marker-cols' must number > 0");
00613     }
00614     opts.aux_first_marker_col = opts.aux_marker_cols->elts[ 0 ];
00615 
00616     return NULL;
00617 }
00618 
00619 Error* process_num_gmm_bmm_markers_opt(Option_arg arg)
00620 {
00621     uint32_t n,m;
00622 
00623     if (arg == NULL)
00624     {
00625         return JWSC_EARG("Option 'num-gmm-bmm-markers' requires an argument");
00626     }
00627     if (sscanf(arg, "%d,%d", &n, &m) < 2)
00628     {
00629         return JWSC_EARG("Option 'num-markers' has format gmm,bmm");
00630     }
00631     opts.num_gmm_markers = (uint32_t)n;
00632 
00633     return NULL;
00634 }
00635 
00636 Error* process_model_dir_opt(Option_arg arg)
00637 {
00638     if (arg == NULL)
00639     {
00640         return JWSC_EARG("Option 'model-dir' requires an argument");
00641     }
00642     opts.model_dirname = arg;
00643 
00644     return NULL;
00645 }
00646 
00647 Error* process_nb_freq_opt(Option_arg arg)
00648 {
00649     if (arg == NULL)
00650     {
00651         return JWSC_EARG("Option 'nb-freq' requires an argument");
00652     }
00653     opts.nb_freq_fname = arg;
00654 
00655     return NULL;
00656 }
00657 
00658 Error* process_nb_freq_dtd_opt(Option_arg arg)
00659 {
00660     if (arg == NULL)
00661     {
00662         return JWSC_EARG("Option 'nb-freq-dtd' requires an argument");
00663     }
00664     opts.nb_freq_dtd_fname = arg;
00665 
00666     return NULL;
00667 }
00668 
00669 Error* process_nb_gauss_opt(Option_arg arg)
00670 {
00671     if (arg == NULL)
00672     {
00673         return JWSC_EARG("Option 'nb-gauss' requires an argument");
00674     }
00675     opts.nb_gauss_fname = arg;
00676 
00677     return NULL;
00678 }
00679 
00680 Error* process_nb_gauss_dtd_opt(Option_arg arg)
00681 {
00682     if (arg == NULL)
00683     {
00684         return JWSC_EARG("Option 'nb-gauss-dtd' requires an argument");
00685     }
00686     opts.nb_gauss_dtd_fname = arg;
00687 
00688     return NULL;
00689 }
00690 
00691 Error* process_nb_gmm_opt(Option_arg arg)
00692 {
00693     if (arg == NULL)
00694     {
00695         return JWSC_EARG("Option 'nb-gmm' requires an argument");
00696     }
00697     opts.nb_gmm_fname = arg;
00698 
00699     return NULL;
00700 }
00701 
00702 Error* process_nb_gmm_dtd_opt(Option_arg arg)
00703 {
00704     if (arg == NULL)
00705     {
00706         return JWSC_EARG("Option 'nb-gmm-dtd' requires an argument");
00707     }
00708     opts.nb_gmm_dtd_fname = arg;
00709 
00710     return NULL;
00711 }
00712 
00713 Error* process_mv_gmm_opt(Option_arg arg)
00714 {
00715     if (arg == NULL)
00716     {
00717         return JWSC_EARG("Option 'mv-gmm' requires an argument");
00718     }
00719     opts.mv_gmm_fname = arg;
00720 
00721     return NULL;
00722 }
00723 
00724 Error* process_mv_gmm_dtd_opt(Option_arg arg)
00725 {
00726     if (arg == NULL)
00727     {
00728         return JWSC_EARG("Option 'mv-gmm-dtd' requires an argument");
00729     }
00730     opts.mv_gmm_dtd_fname = arg;
00731 
00732     return NULL;
00733 }
00734 
00735 Error* process_mv_mmm_opt(Option_arg arg)
00736 {
00737     if (arg == NULL)
00738     {
00739         return JWSC_EARG("Option 'mv-mmm' requires an argument");
00740     }
00741     opts.mv_mmm_fname = arg;
00742 
00743     return NULL;
00744 }
00745 
00746 Error* process_mv_mmm_dtd_opt(Option_arg arg)
00747 {
00748     if (arg == NULL)
00749     {
00750         return JWSC_EARG("Option 'mv-mmm-dtd' requires an argument");
00751     }
00752     opts.mv_mmm_dtd_fname = arg;
00753 
00754     return NULL;
00755 }
00756 
00757 #ifdef HAPLO_ENABLE_SVM
00758 Error* process_svm_opt(Option_arg arg)
00759 {
00760     if (arg == NULL)
00761     {
00762         return JWSC_EARG("Option 'svm' requires an argument");
00763     }
00764     opts.svm_fname = arg;
00765 
00766     return NULL;
00767 }
00768 
00769 Error* process_svm_dtd_opt(Option_arg arg)
00770 {
00771     if (arg == NULL)
00772     {
00773         return JWSC_EARG("Option 'svm-dtd' requires an argument");
00774     }
00775     opts.svm_dtd_fname = arg;
00776 
00777     return NULL;
00778 }
00779 #endif
00780 
00781 #ifdef HAPLO_ENABLE_WEKA
00782 Error* process_weka_j48_opt(Option_arg arg)
00783 {
00784     if (arg == NULL)
00785     {
00786         return JWSC_EARG("Option 'weka-j48' requires an argument");
00787     }
00788     opts.weka_j48_fname = arg;
00789 
00790     return NULL;
00791 }
00792 
00793 Error* process_weka_part_opt(Option_arg arg)
00794 {
00795     if (arg == NULL)
00796     {
00797         return JWSC_EARG("Option 'weka-part' requires an argument");
00798     }
00799     opts.weka_part_fname = arg;
00800 
00801     return NULL;
00802 }
00803 
00804 Error* process_weka_jar_opt(Option_arg arg)
00805 {
00806     if (arg == NULL)
00807     {
00808         return JWSC_EARG("Option 'weka-jar' requires an argument");
00809     }
00810     opts.weka_jar_fname = arg;
00811 
00812     return NULL;
00813 }
00814 
00815 Error* process_weka_dtd_opt(Option_arg arg)
00816 {
00817     if (arg == NULL)
00818     {
00819         return JWSC_EARG("Option 'weka-dtd' requires an argument");
00820     }
00821     opts.weka_dtd_fname = arg;
00822 
00823     return NULL;
00824 }
00825 #endif
00826 
00827 Error* process_nearest_max_d_opt(Option_arg arg)
00828 {
00829     if (arg == NULL)
00830     {
00831         return JWSC_EARG("Option 'nearest-max-d' requires an argument");
00832     }
00833     if (sscanf(arg, "%u", &(opts.nearest_max_d)) < 1)
00834     {
00835         return JWSC_EARG("Option 'nearest-max-d' must be > 0");
00836     }
00837 
00838     return NULL;
00839 }
00840 
00841 Error* process_nearest_opt(Option_arg arg)
00842 {
00843     if (arg == NULL)
00844     {
00845         return JWSC_EARG("Option 'nearest' requires an argument");
00846     }
00847     opts.nearest_fname = arg;
00848 
00849     return NULL;
00850 }
00851 
00852 Error* process_nearest_dtd_opt(Option_arg arg)
00853 {
00854     if (arg == NULL)
00855     {
00856         return JWSC_EARG("Option 'nearest-dtd' requires an argument");
00857     }
00858     opts.nearest_dtd_fname = arg;
00859 
00860     return NULL;
00861 }
00862 
00863 Error* process_bmm_tree_opt(Option_arg arg)
00864 {
00865     if (arg == NULL)
00866     {
00867         return JWSC_EARG("Option 'bmm-tree' requires an argument");
00868     }
00869     opts.bmm_tree_fname = arg;
00870 
00871     return NULL;
00872 }
00873 
00874 Error* process_bmm_info_opt(Option_arg arg)
00875 {
00876     if (arg == NULL)
00877     {
00878         return JWSC_EARG("Option 'bmm-info' requires an argument");
00879     }
00880     opts.bmm_info_fname = arg;
00881 
00882     return NULL;
00883 }
00884 
00885 Error* process_bmm_conf_thresh_opt(Option_arg arg)
00886 {
00887     if (arg == NULL)
00888     {
00889         return JWSC_EARG("Option 'bmm-conf-thresh' requires an argument");
00890     }
00891     opts.bmm_conf_thresh_fname = arg;
00892 
00893     return NULL;
00894 }
00895 
00896 Error* process_gmm_bmm_tree_opt(Option_arg arg)
00897 {
00898     if (arg == NULL)
00899     {
00900         return JWSC_EARG("Option 'gmm-bmm-tree' requires an argument");
00901     }
00902     opts.gmm_bmm_tree_fname = arg;
00903 
00904     return NULL;
00905 }
00906 
00907 Error* process_gmm_bmm_info_opt(Option_arg arg)
00908 {
00909     if (arg == NULL)
00910     {
00911         return JWSC_EARG("Option 'gmm-bmm-info' requires an argument");
00912     }
00913     opts.gmm_bmm_info_fname = arg;
00914 
00915     return NULL;
00916 }
00917 
00918 Error* process_gmm_bmm_conf_thresh_opt(Option_arg arg)
00919 {
00920     if (arg == NULL)
00921     {
00922         return JWSC_EARG("Option 'gmm-bmm-conf-thresh' requires an argument");
00923     }
00924     opts.gmm_bmm_conf_thresh_fname = arg;
00925 
00926     return NULL;
00927 }
00928 
00929 void init_options(Option_no_arg* opts_no_arg, Option_with_arg* opts_with_arg)
00930 {
00931     uint32_t i;
00932 
00933     char s_name;
00934     const char* l_name;
00935     const char* desc;
00936 
00937     Error* (*fnoarg)();
00938     Error* (*farg)(const char*);
00939 
00940     opts.header_in               = HEADER_IN;
00941     opts.header_out              = HEADER_OUT;
00942     opts.exclude_one             = EXCLUDE_ONE;
00943     opts.num_threads             = NUM_THREADS;
00944     opts.seed                    = 0;
00945     opts.input_format            = INPUT_FORMAT;
00946     opts.input_dtd_fname         = INPUT_DTD_FNAME;
00947     opts.output_format           = OUTPUT_FORMAT;
00948     opts.labels_fname            = INPUT_DTD_FNAME;
00949     opts.labels_dtd_fname        = INPUT_DTD_FNAME;
00950     opts.id_cols                 = NULL;
00951     opts.first_marker_col        = FIRST_MARKER_COL;
00952     opts.num_markers             = NUM_MARKERS;
00953     opts.marker_cols             = NULL;
00954     opts.aux_id_cols             = NULL;
00955     opts.aux_label_col           = AUX_LABEL_COL;
00956     opts.aux_first_marker_col    = AUX_FIRST_MARKER_COL;
00957     opts.aux_num_markers         = AUX_NUM_MARKERS;
00958     opts.aux_marker_cols         = NULL;
00959     opts.num_gmm_markers         = NUM_MARKERS;
00960     opts.nb_freq_fname           = NB_FREQ_FNAME;
00961     opts.nb_freq_dtd_fname       = NB_FREQ_DTD_FNAME;
00962     opts.nb_gauss_fname          = NB_GAUSS_FNAME;
00963     opts.nb_gauss_dtd_fname      = NB_GAUSS_DTD_FNAME;
00964     opts.nb_gmm_fname            = NB_GMM_FNAME;
00965     opts.nb_gmm_dtd_fname        = NB_GMM_DTD_FNAME;
00966     opts.mv_gmm_fname            = MV_GMM_FNAME;
00967     opts.mv_gmm_dtd_fname        = MV_GMM_DTD_FNAME;
00968     opts.mv_mmm_fname            = MV_MMM_FNAME;
00969     opts.mv_mmm_dtd_fname        = MV_MMM_DTD_FNAME;
00970 #ifdef HAPLO_ENABLE_SVM
00971     opts.svm_fname               = SVM_FNAME;
00972     opts.svm_dtd_fname           = SVM_DTD_FNAME;
00973 #endif
00974 #ifdef HAPLO_ENABLE_WEKA
00975     opts.weka_j48_fname          = WEKA_J48_FNAME;
00976     opts.weka_part_fname         = WEKA_PART_FNAME;
00977     opts.weka_jar_fname          = WEKA_JAR_FNAME;
00978     opts.weka_dtd_fname          = WEKA_DTD_FNAME;
00979 #endif
00980     opts.nearest_fname             = NEAREST_FNAME;
00981     opts.nearest_dtd_fname         = NEAREST_DTD_FNAME;
00982     opts.nearest_max_d             = NEAREST_MAX_D;
00983     opts.bmm_tree_fname            = BMM_TREE_FNAME;
00984     opts.bmm_info_fname            = BMM_INFO_FNAME;
00985     opts.bmm_conf_thresh_fname     = BMM_CONF_THRESH_FNAME;
00986     opts.gmm_bmm_tree_fname        = GMM_BMM_TREE_FNAME;
00987     opts.gmm_bmm_info_fname        = GMM_BMM_INFO_FNAME;
00988     opts.gmm_bmm_conf_thresh_fname = GMM_BMM_CONF_THRESH_FNAME;
00989     opts.model_dirname             = MODEL_DIRNAME;
00990 
00991     i = 0;
00992     l_name = "help";
00993     s_name = 'h';
00994     desc   = "Prints program usage.";
00995     fnoarg = process_help_opt;
00996     init_option_no_arg(&(opts_no_arg[i++]), l_name, s_name, desc, fnoarg);
00997 
00998     l_name = "version";
00999     s_name = 'v';
01000     desc   = "Prints program version.";
01001     fnoarg = process_version_opt;
01002     init_option_no_arg(&(opts_no_arg[i++]), l_name, s_name, desc, fnoarg);
01003 
01004     l_name = "header-in";
01005     s_name = 0;
01006     desc   = "The input data contains a header (descriptive) line, which should be discarded.";
01007     fnoarg = process_header_in_opt;
01008     init_option_no_arg(&(opts_no_arg[i++]), l_name, s_name, desc, fnoarg);
01009 
01010     l_name = "header-out";
01011     s_name = 0;
01012     desc   = "Write a header (descriptive) line to the first line of the output results.";
01013     fnoarg = process_header_out_opt;
01014     init_option_no_arg(&(opts_no_arg[i++]), l_name, s_name, desc, fnoarg);
01015     assert(i == NUM_SHARED_OPTS_NO_ARG);
01016 
01017 
01018     i = 0;
01019     l_name = "options";
01020     s_name = 0;
01021     desc   = "File containing program options. Any options appearing on the command line following this option take precendence over those in the options file.";
01022     farg   = process_options_opt;
01023     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01024 
01025 #ifdef HAPLO_HAVE_PTHREAD
01026     l_name = "num-threads";
01027     s_name = 0;
01028     desc   = "Number of concurrent threads to use at any given time.";
01029     farg   = process_num_threads_opt;
01030     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01031 #endif
01032 
01033     l_name = "seed";
01034     s_name = 0;
01035     desc   = "Random seed.";
01036     farg   = process_seed_opt;
01037     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01038 
01039     l_name = "input-format";
01040     s_name = 0;
01041     desc   = "Input file format. Must be one of {txt, csv, xml}. If the input is XML, it must conform to the XML DTD haplo-input.dtd.";
01042     farg   = process_input_format_opt;
01043     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01044 
01045     l_name = "input-dtd";
01046     s_name = 0;
01047     desc   = "If the input format is XML, validate it with this DTD.";
01048     farg   = process_input_dtd_opt;
01049     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01050 
01051     l_name = "output-format";
01052     s_name = 0;
01053     desc   = "Output file format. Must be one of {txt, csv, xml}.";
01054     farg   = process_output_format_opt;
01055     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01056 
01057     l_name = "labels";
01058     s_name = 0;
01059     desc   = "XML file containing the organization and listing of possible haplo groups labels for the samples. Must conform to the XML DTD haplo-labels.dtd.";
01060     farg   = process_labels_opt;
01061     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01062 
01063     l_name = "labels-dtd";
01064     s_name = 0;
01065     desc   = "Validate the XML labels file with this DTD.";
01066     farg   = process_labels_dtd_opt;
01067     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01068 
01069     l_name = "id-cols";
01070     s_name = 0;
01071     desc   = "Comma separated ordered list of columns to use for sample identification. Prefixes the output of each sample. Count begins with 1 at the first column of the file. Set to zero to ignore the id column";
01072     farg   = process_id_cols_opt;
01073     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01074 
01075     l_name = "label-col";
01076     s_name = 0;
01077     desc   = "Column containing the haplo group labels. Count begins with 1 at the first column of the file. Set to zero to ignore the label column.";
01078     farg   = process_label_col_opt;
01079     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01080 
01081     l_name = "1st-marker-col";
01082     s_name = 0;
01083     desc   = "Column containing the first marker. Use in conjunction with num-markers to specify the markers for reading. All other markers are assumed to follow this one. Count begins with 1.";
01084     farg   = process_first_marker_col_opt;
01085     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01086 
01087     l_name = "num-markers";
01088     s_name = 0;
01089     desc   = "Number of markers to read. Use in conjunction with 1st-marker-col to specify the markers for reading.";
01090     farg   = process_num_markers_opt;
01091     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01092 
01093     l_name = "marker-cols";
01094     s_name = 0;
01095     desc   = "Comma separated ordered list of markers to use for training. Use instead of 1st-marker-col and num-markers. Count begins with 1 at the first column of the CSV file.";
01096     farg   = process_marker_cols_opt;
01097     init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg);
01098     assert(i == NUM_SHARED_OPTS_WITH_ARG);
01099 }