Haplo Prediction
predict haplogroups
|
00001 /* 00002 * This work is licensed under a Creative Commons 00003 * Attribution-Noncommercial-Share Alike 3.0 United States License. 00004 * 00005 * http://creativecommons.org/licenses/by-nc-sa/3.0/us/ 00006 * 00007 * You are free: 00008 * 00009 * to Share - to copy, distribute, display, and perform the work 00010 * to Remix - to make derivative works 00011 * 00012 * Under the following conditions: 00013 * 00014 * Attribution. You must attribute the work in the manner specified by the 00015 * author or licensor (but not in any way that suggests that they endorse you 00016 * or your use of the work). 00017 * 00018 * Noncommercial. You may not use this work for commercial purposes. 00019 * 00020 * Share Alike. If you alter, transform, or build upon this work, you may 00021 * distribute the resulting work only under the same or similar license to 00022 * this one. 00023 * 00024 * For any reuse or distribution, you must make clear to others the license 00025 * terms of this work. The best way to do this is by including this header. 00026 * 00027 * Any of the above conditions can be waived if you get permission from the 00028 * copyright holder. 00029 * 00030 * Apart from the remix rights granted under this license, nothing in this 00031 * license impairs or restricts the author's moral rights. 00032 */ 00033 00034 00046 #include <config.h> 00047 00048 #include <stdlib.h> 00049 #include <stdio.h> 00050 #include <string.h> 00051 #include <inttypes.h> 00052 #include <assert.h> 00053 00054 #ifdef HAPLO_HAVE_DMALLOC 00055 #include <dmalloc.h> 00056 #endif 00057 00058 #include <jwsc/base/error.h> 00059 #include <jwsc/base/option.h> 00060 #include <jwsc/vector/vector.h> 00061 00062 #include "haplo_groups.h" 00063 #include "options.h" 00064 00065 00066 #define HEADER_IN 0 00067 #define HEADER_OUT 0 00068 #define EXCLUDE_ONE 0 00069 #define NUM_THREADS 1 00070 #define INPUT_FORMAT HAPLO_INPUT_TXT 00071 #define INPUT_DTD_FNAME 0 00072 #define OUTPUT_FORMAT HAPLO_OUTPUT_TXT 00073 #define LABELS_FNAME 0 00074 #define LABELS_DTD_FNAME 0 00075 #define FIRST_MARKER_COL 2 00076 #define NUM_MARKERS 9 00077 #define AUX_INPUT_FNAME 0 00078 #define AUX_LABEL_COL 0 00079 #define AUX_FIRST_MARKER_COL 2 00080 #define AUX_NUM_MARKERS 9 00081 #define NB_FREQ_FNAME 0 00082 #define NB_FREQ_DTD_FNAME 0 00083 #define NB_GAUSS_FNAME 0 00084 #define NB_GAUSS_DTD_FNAME 0 00085 #define NB_GMM_FNAME 0 00086 #define NB_GMM_DTD_FNAME 0 00087 #define MV_GMM_FNAME 0 00088 #define MV_GMM_DTD_FNAME 0 00089 #define MV_MMM_FNAME 0 00090 #define MV_MMM_DTD_FNAME 0 00091 #define SVM_FNAME 0 00092 #define SVM_DTD_FNAME 0 00093 #define WEKA_J48_FNAME 0 00094 #define WEKA_PART_FNAME 0 00095 #define WEKA_JAR_FNAME 0 00096 #define WEKA_DTD_FNAME 0 00097 #define NEAREST_FNAME 0 00098 #define NEAREST_DTD_FNAME 0 00099 #define NEAREST_MAX_D 1 00100 #define BMM_TREE_FNAME 0 00101 #define BMM_INFO_FNAME 0 00102 #define BMM_CONF_THRESH_FNAME 0 00103 #define GMM_BMM_TREE_FNAME 0 00104 #define GMM_BMM_INFO_FNAME 0 00105 #define GMM_BMM_CONF_THRESH_FNAME 0 00106 #define MODEL_DIRNAME "." 00107 00108 00109 extern Option_no_arg opts_no_arg[]; 00110 extern Option_with_arg opts_with_arg[]; 00111 extern uint32_t get_num_opts_no_arg(); 00112 extern uint32_t get_num_opts_with_arg(); 00113 extern void print_usage(); 00114 00115 00120 uint8_t sequential_markers = 0; 00121 00122 00127 uint8_t aux_sequential_markers = 0; 00128 00129 00131 Error* process_help_opt() 00132 { 00133 print_usage(); 00134 exit(EXIT_SUCCESS); 00135 return NULL; 00136 } 00137 00139 Error* process_version_opt() 00140 { 00141 fprintf(stderr, "%s\n", HAPLO_PACKAGE_STRING); 00142 exit(EXIT_SUCCESS); 00143 return NULL; 00144 } 00145 00147 Error* process_header_in_opt() 00148 { 00149 opts.header_in = 1; 00150 return NULL; 00151 } 00152 00154 Error* process_header_out_opt() 00155 { 00156 opts.header_out = 1; 00157 return NULL; 00158 } 00159 00160 Error* process_exclude_one_opt() 00161 { 00162 opts.exclude_one = 1; 00163 return NULL; 00164 } 00165 00167 Error* process_options_opt(Option_arg arg) 00168 { 00169 Error* err; 00170 int slen = 256; 00171 char s[slen]; 00172 00173 if (arg == NULL) 00174 { 00175 return JWSC_EARG("Option 'options' requires an argument"); 00176 } 00177 00178 if ((err = process_options_from_file(arg, get_num_opts_no_arg(), 00179 opts_no_arg, get_num_opts_with_arg(), opts_with_arg)) 00180 != NULL) 00181 { 00182 snprintf(s, slen, "%s: %s", arg, err->msg); 00183 return JWSC_EARG(s); 00184 } 00185 00186 return NULL; 00187 } 00188 00190 Error* process_num_threads_opt(Option_arg arg) 00191 { 00192 if (arg == NULL) 00193 { 00194 return JWSC_EARG("Option 'num-threads' requires an argument"); 00195 } 00196 if (sscanf(arg, "%u", &(opts.num_threads)) != 1 || 00197 opts.num_threads < 1) 00198 { 00199 return JWSC_EARG("Option 'num-threads' must be > 0"); 00200 } 00201 00202 return NULL; 00203 } 00204 00206 Error* process_seed_opt(Option_arg arg) 00207 { 00208 if (arg == NULL) 00209 { 00210 return JWSC_EARG("Option 'seed' requires an argument"); 00211 } 00212 if (sscanf(arg, "%u", &(opts.seed)) != 1) 00213 { 00214 return JWSC_EARG("Option 'seed' requires an argument"); 00215 } 00216 00217 srand(opts.seed); 00218 00219 return NULL; 00220 } 00221 00223 static Error* process_input_format_opt(Option_arg arg) 00224 { 00225 if (arg == NULL) 00226 { 00227 return JWSC_EARG("Option 'input-format' requires an argument"); 00228 } 00229 if (strncmp(arg, "txt", 3) == 0) 00230 { 00231 opts.input_format = HAPLO_INPUT_TXT; 00232 } 00233 else if (strncmp(arg, "csv", 3) == 0) 00234 { 00235 opts.input_format = HAPLO_INPUT_CSV; 00236 } 00237 else if (strncmp(arg, "xml", 3) == 0) 00238 { 00239 opts.input_format = HAPLO_INPUT_XML; 00240 } 00241 else 00242 { 00243 return JWSC_EARG("Option 'input-format' must be one of {txt, csv, xml}"); 00244 } 00245 return NULL; 00246 } 00247 00249 Error* process_input_dtd_opt(Option_arg arg) 00250 { 00251 if (arg == NULL) 00252 { 00253 return JWSC_EARG("Option 'input-dtd' requires an argument"); 00254 } 00255 opts.input_dtd_fname = arg; 00256 00257 return NULL; 00258 } 00259 00261 static Error* process_output_format_opt(Option_arg arg) 00262 { 00263 if (arg == NULL) 00264 { 00265 return JWSC_EARG("Option 'output-format' requires an argument"); 00266 } 00267 if (strncmp(arg, "txt", 3) == 0) 00268 { 00269 opts.output_format = HAPLO_OUTPUT_TXT; 00270 } 00271 else if (strncmp(arg, "csv", 3) == 0) 00272 { 00273 opts.output_format = HAPLO_OUTPUT_CSV; 00274 } 00275 else if (strncmp(arg, "xml", 3) == 0) 00276 { 00277 opts.output_format = HAPLO_OUTPUT_XML; 00278 } 00279 else 00280 { 00281 return JWSC_EARG("Option 'output-format' must be one of {txt, csv, xml}"); 00282 } 00283 return NULL; 00284 } 00285 00287 Error* process_labels_opt(Option_arg arg) 00288 { 00289 if (arg == NULL) 00290 { 00291 return JWSC_EARG("Option 'labels' requires an argument"); 00292 } 00293 opts.labels_fname = arg; 00294 00295 return NULL; 00296 } 00297 00299 Error* process_labels_dtd_opt(Option_arg arg) 00300 { 00301 if (arg == NULL) 00302 { 00303 return JWSC_EARG("Option 'labels-dtd' requires an argument"); 00304 } 00305 opts.labels_dtd_fname = arg; 00306 00307 return NULL; 00308 } 00309 00311 Error* process_id_cols_opt(Option_arg arg) 00312 { 00313 uint32_t num_ids; 00314 uint32_t id_col; 00315 uint32_t n; 00316 char* str; 00317 char* token; 00318 00319 if (arg == NULL) 00320 { 00321 return JWSC_EARG("Option 'id-cols' requires an argument"); 00322 } 00323 00324 n = strlen(arg); 00325 str = malloc((n+1)*sizeof(char)); 00326 strncpy(str, arg, n); 00327 str[ n ] = 0; 00328 token = str; 00329 00330 free_vector_u32(opts.id_cols); 00331 opts.id_cols = NULL; 00332 num_ids = 0; 00333 00334 while ((token = strtok(token, ",")) != NULL) 00335 { 00336 if (sscanf(token, "%u", &id_col) != 1) 00337 { 00338 return JWSC_EARG("Option 'id-cols' must be >= 0"); 00339 } 00340 if (num_ids == 0 && id_col == 0) 00341 { 00342 break; 00343 } 00344 else if (num_ids > 0 && id_col == 0) 00345 { 00346 return JWSC_EARG("Option 'id-cols': IDs must be > 0"); 00347 } 00348 create_vector_u32(&(opts.id_cols), num_ids+1); 00349 opts.id_cols->elts[ num_ids++ ] = id_col; 00350 token = NULL; 00351 } 00352 00353 free(str); 00354 00355 return NULL; 00356 } 00357 00359 Error* process_label_col_opt(Option_arg arg) 00360 { 00361 if (arg == NULL) 00362 { 00363 return JWSC_EARG("Option 'label-col' requires an argument"); 00364 } 00365 if (sscanf(arg, "%u", &(opts.label_col)) != 1) 00366 { 00367 return JWSC_EARG("Option 'label-col' must be >= 0"); 00368 } 00369 00370 return NULL; 00371 } 00372 00374 Error* process_first_marker_col_opt(Option_arg arg) 00375 { 00376 if (opts.marker_cols) 00377 { 00378 return JWSC_EARG("Option '1st-marker-col' cannot be used with 'marker-cols'"); 00379 } 00380 if (arg == NULL) 00381 { 00382 return JWSC_EARG("Option '1st-marker-col' requires an argument"); 00383 } 00384 if (sscanf(arg, "%u", &(opts.first_marker_col)) != 1 || 00385 opts.first_marker_col < 1) 00386 { 00387 return JWSC_EARG("Option '1st-marker-col' must be > 0"); 00388 } 00389 sequential_markers = 1; 00390 00391 return NULL; 00392 } 00393 00395 Error* process_num_markers_opt(Option_arg arg) 00396 { 00397 if (opts.marker_cols) 00398 { 00399 return JWSC_EARG("Option 'num-markers' cannot be used with 'marker-cols'"); 00400 } 00401 if (arg == NULL) 00402 { 00403 return JWSC_EARG("Option 'num-markers' requires an argument"); 00404 } 00405 if (sscanf(arg, "%u", &(opts.num_markers)) != 1 || opts.num_markers < 1) 00406 { 00407 return JWSC_EARG("Option 'num-markers' must be > 0"); 00408 } 00409 sequential_markers = 1; 00410 00411 return NULL; 00412 } 00413 00415 Error* process_marker_cols_opt(Option_arg arg) 00416 { 00417 uint32_t n; 00418 uint32_t m; 00419 char* str; 00420 char* token; 00421 00422 if (sequential_markers) 00423 { 00424 return JWSC_EARG("Option 'marker-cols' cannot be used with 'num-markers' or 'marker-col'"); 00425 } 00426 00427 if (arg == NULL) 00428 { 00429 return JWSC_EARG("Option 'marker-cols' requires an argument"); 00430 } 00431 00432 n = strlen(arg); 00433 str = malloc((n+1)*sizeof(char)); 00434 strncpy(str, arg, n); 00435 str[ n ] = 0; 00436 token = str; 00437 00438 opts.num_markers = 0; 00439 00440 while ((token = strtok(token, ",")) != NULL) 00441 { 00442 if (sscanf(token, "%u", &m) != 1 || m < 1) 00443 { 00444 return JWSC_EARG("Option 'marker-cols': Invalid marker"); 00445 } 00446 create_vector_u32(&(opts.marker_cols), opts.num_markers+1); 00447 opts.marker_cols->elts[ opts.num_markers++ ] = m; 00448 token = NULL; 00449 } 00450 00451 free(str); 00452 00453 if (opts.num_markers == 0) 00454 { 00455 return JWSC_EARG("Option 'marker_cols' must number > 0"); 00456 } 00457 opts.first_marker_col = opts.marker_cols->elts[ 0 ]; 00458 00459 return NULL; 00460 } 00461 00462 Error* process_aux_input_opt(Option_arg arg) 00463 { 00464 if (arg == NULL) 00465 { 00466 return JWSC_EARG("Option 'aux-input' requires an argument"); 00467 } 00468 opts.aux_input_fname = arg; 00469 00470 return NULL; 00471 } 00472 00473 Error* process_aux_id_cols_opt(Option_arg arg) 00474 { 00475 uint32_t num_ids; 00476 uint32_t id_col; 00477 uint32_t n; 00478 char* str; 00479 char* token; 00480 00481 if (arg == NULL) 00482 { 00483 return JWSC_EARG("Option 'aux-id-cols' requires an argument"); 00484 } 00485 00486 n = strlen(arg); 00487 str = malloc((n+1)*sizeof(char)); 00488 strncpy(str, arg, n); 00489 str[ n ] = 0; 00490 token = str; 00491 00492 num_ids = 0; 00493 00494 while ((token = strtok(token, ",")) != NULL) 00495 { 00496 if (sscanf(token, "%u", &id_col) != 1) 00497 { 00498 return JWSC_EARG("Option 'aux-id-cols' must be >= 0"); 00499 } 00500 if (num_ids == 0 && id_col == 0) 00501 { 00502 break; 00503 } 00504 else if (num_ids > 0 && id_col == 0) 00505 { 00506 return JWSC_EARG("Option 'aux-id-cols': IDs must be > 0"); 00507 } 00508 create_vector_u32(&(opts.aux_id_cols), num_ids+1); 00509 opts.aux_id_cols->elts[ num_ids++ ] = id_col; 00510 token = NULL; 00511 } 00512 00513 free(str); 00514 00515 return NULL; 00516 } 00517 00518 Error* process_aux_label_col_opt(Option_arg arg) 00519 { 00520 if (arg == NULL) 00521 { 00522 return JWSC_EARG("Option 'aux-label-col' requires an argument"); 00523 } 00524 if (sscanf(arg, "%u", &(opts.aux_label_col)) != 1) 00525 { 00526 return JWSC_EARG("Option 'aux-label-col' must be >= 0"); 00527 } 00528 00529 return NULL; 00530 } 00531 00532 Error* process_aux_first_marker_col_opt(Option_arg arg) 00533 { 00534 if (opts.aux_marker_cols) 00535 { 00536 return JWSC_EARG("Option 'aux-1st-marker-col' cannot be used with 'aux-marker-cols'"); 00537 } 00538 if (arg == NULL) 00539 { 00540 return JWSC_EARG("Option 'aux-1st-marker-col' requires an argument"); 00541 } 00542 if (sscanf(arg, "%u", &(opts.aux_first_marker_col)) != 1 || 00543 opts.aux_first_marker_col < 1) 00544 { 00545 return JWSC_EARG("Option 'aux-1st-marker-col' must be > 0"); 00546 } 00547 aux_sequential_markers = 1; 00548 00549 return NULL; 00550 } 00551 00552 Error* process_aux_num_markers_opt(Option_arg arg) 00553 { 00554 if (opts.aux_marker_cols) 00555 { 00556 return JWSC_EARG("Option 'aux-num-markers' cannot be used with 'aux-marker-cols'"); 00557 } 00558 if (arg == NULL) 00559 { 00560 return JWSC_EARG("Option 'aux-num-markers' requires an argument"); 00561 } 00562 if (sscanf(arg, "%u", &(opts.aux_num_markers)) != 1 || 00563 opts.aux_num_markers < 1) 00564 { 00565 return JWSC_EARG("Option 'aux-num-markers' must be > 0"); 00566 } 00567 aux_sequential_markers = 1; 00568 00569 return NULL; 00570 } 00571 00572 Error* process_aux_marker_cols_opt(Option_arg arg) 00573 { 00574 uint32_t n; 00575 uint32_t m; 00576 char* str; 00577 char* token; 00578 00579 if (aux_sequential_markers) 00580 { 00581 return JWSC_EARG("Option 'aux-marker-cols' cannot be used with 'aux-num-markers' or 'aux-marker-col'"); 00582 } 00583 00584 if (arg == NULL) 00585 { 00586 return JWSC_EARG("Option 'aux-marker-cols' requires an argument"); 00587 } 00588 00589 n = strlen(arg); 00590 str = malloc((n+1)*sizeof(char)); 00591 strncpy(str, arg, n); 00592 str[ n ] = 0; 00593 token = str; 00594 00595 opts.aux_num_markers = 0; 00596 00597 while ((token = strtok(token, ",")) != NULL) 00598 { 00599 if (sscanf(token, "%u", &m) != 1 || m < 1) 00600 { 00601 return JWSC_EARG("Option 'aux-marker-cols': Invalid marker"); 00602 } 00603 create_vector_u32(&(opts.aux_marker_cols), opts.aux_num_markers+1); 00604 opts.aux_marker_cols->elts[ opts.aux_num_markers++ ] = m; 00605 token = NULL; 00606 } 00607 00608 free(str); 00609 00610 if (opts.aux_num_markers == 0) 00611 { 00612 return JWSC_EARG("Option 'aux-marker-cols' must number > 0"); 00613 } 00614 opts.aux_first_marker_col = opts.aux_marker_cols->elts[ 0 ]; 00615 00616 return NULL; 00617 } 00618 00619 Error* process_num_gmm_bmm_markers_opt(Option_arg arg) 00620 { 00621 uint32_t n,m; 00622 00623 if (arg == NULL) 00624 { 00625 return JWSC_EARG("Option 'num-gmm-bmm-markers' requires an argument"); 00626 } 00627 if (sscanf(arg, "%d,%d", &n, &m) < 2) 00628 { 00629 return JWSC_EARG("Option 'num-markers' has format gmm,bmm"); 00630 } 00631 opts.num_gmm_markers = (uint32_t)n; 00632 00633 return NULL; 00634 } 00635 00636 Error* process_model_dir_opt(Option_arg arg) 00637 { 00638 if (arg == NULL) 00639 { 00640 return JWSC_EARG("Option 'model-dir' requires an argument"); 00641 } 00642 opts.model_dirname = arg; 00643 00644 return NULL; 00645 } 00646 00647 Error* process_nb_freq_opt(Option_arg arg) 00648 { 00649 if (arg == NULL) 00650 { 00651 return JWSC_EARG("Option 'nb-freq' requires an argument"); 00652 } 00653 opts.nb_freq_fname = arg; 00654 00655 return NULL; 00656 } 00657 00658 Error* process_nb_freq_dtd_opt(Option_arg arg) 00659 { 00660 if (arg == NULL) 00661 { 00662 return JWSC_EARG("Option 'nb-freq-dtd' requires an argument"); 00663 } 00664 opts.nb_freq_dtd_fname = arg; 00665 00666 return NULL; 00667 } 00668 00669 Error* process_nb_gauss_opt(Option_arg arg) 00670 { 00671 if (arg == NULL) 00672 { 00673 return JWSC_EARG("Option 'nb-gauss' requires an argument"); 00674 } 00675 opts.nb_gauss_fname = arg; 00676 00677 return NULL; 00678 } 00679 00680 Error* process_nb_gauss_dtd_opt(Option_arg arg) 00681 { 00682 if (arg == NULL) 00683 { 00684 return JWSC_EARG("Option 'nb-gauss-dtd' requires an argument"); 00685 } 00686 opts.nb_gauss_dtd_fname = arg; 00687 00688 return NULL; 00689 } 00690 00691 Error* process_nb_gmm_opt(Option_arg arg) 00692 { 00693 if (arg == NULL) 00694 { 00695 return JWSC_EARG("Option 'nb-gmm' requires an argument"); 00696 } 00697 opts.nb_gmm_fname = arg; 00698 00699 return NULL; 00700 } 00701 00702 Error* process_nb_gmm_dtd_opt(Option_arg arg) 00703 { 00704 if (arg == NULL) 00705 { 00706 return JWSC_EARG("Option 'nb-gmm-dtd' requires an argument"); 00707 } 00708 opts.nb_gmm_dtd_fname = arg; 00709 00710 return NULL; 00711 } 00712 00713 Error* process_mv_gmm_opt(Option_arg arg) 00714 { 00715 if (arg == NULL) 00716 { 00717 return JWSC_EARG("Option 'mv-gmm' requires an argument"); 00718 } 00719 opts.mv_gmm_fname = arg; 00720 00721 return NULL; 00722 } 00723 00724 Error* process_mv_gmm_dtd_opt(Option_arg arg) 00725 { 00726 if (arg == NULL) 00727 { 00728 return JWSC_EARG("Option 'mv-gmm-dtd' requires an argument"); 00729 } 00730 opts.mv_gmm_dtd_fname = arg; 00731 00732 return NULL; 00733 } 00734 00735 Error* process_mv_mmm_opt(Option_arg arg) 00736 { 00737 if (arg == NULL) 00738 { 00739 return JWSC_EARG("Option 'mv-mmm' requires an argument"); 00740 } 00741 opts.mv_mmm_fname = arg; 00742 00743 return NULL; 00744 } 00745 00746 Error* process_mv_mmm_dtd_opt(Option_arg arg) 00747 { 00748 if (arg == NULL) 00749 { 00750 return JWSC_EARG("Option 'mv-mmm-dtd' requires an argument"); 00751 } 00752 opts.mv_mmm_dtd_fname = arg; 00753 00754 return NULL; 00755 } 00756 00757 #ifdef HAPLO_ENABLE_SVM 00758 Error* process_svm_opt(Option_arg arg) 00759 { 00760 if (arg == NULL) 00761 { 00762 return JWSC_EARG("Option 'svm' requires an argument"); 00763 } 00764 opts.svm_fname = arg; 00765 00766 return NULL; 00767 } 00768 00769 Error* process_svm_dtd_opt(Option_arg arg) 00770 { 00771 if (arg == NULL) 00772 { 00773 return JWSC_EARG("Option 'svm-dtd' requires an argument"); 00774 } 00775 opts.svm_dtd_fname = arg; 00776 00777 return NULL; 00778 } 00779 #endif 00780 00781 #ifdef HAPLO_ENABLE_WEKA 00782 Error* process_weka_j48_opt(Option_arg arg) 00783 { 00784 if (arg == NULL) 00785 { 00786 return JWSC_EARG("Option 'weka-j48' requires an argument"); 00787 } 00788 opts.weka_j48_fname = arg; 00789 00790 return NULL; 00791 } 00792 00793 Error* process_weka_part_opt(Option_arg arg) 00794 { 00795 if (arg == NULL) 00796 { 00797 return JWSC_EARG("Option 'weka-part' requires an argument"); 00798 } 00799 opts.weka_part_fname = arg; 00800 00801 return NULL; 00802 } 00803 00804 Error* process_weka_jar_opt(Option_arg arg) 00805 { 00806 if (arg == NULL) 00807 { 00808 return JWSC_EARG("Option 'weka-jar' requires an argument"); 00809 } 00810 opts.weka_jar_fname = arg; 00811 00812 return NULL; 00813 } 00814 00815 Error* process_weka_dtd_opt(Option_arg arg) 00816 { 00817 if (arg == NULL) 00818 { 00819 return JWSC_EARG("Option 'weka-dtd' requires an argument"); 00820 } 00821 opts.weka_dtd_fname = arg; 00822 00823 return NULL; 00824 } 00825 #endif 00826 00827 Error* process_nearest_max_d_opt(Option_arg arg) 00828 { 00829 if (arg == NULL) 00830 { 00831 return JWSC_EARG("Option 'nearest-max-d' requires an argument"); 00832 } 00833 if (sscanf(arg, "%u", &(opts.nearest_max_d)) < 1) 00834 { 00835 return JWSC_EARG("Option 'nearest-max-d' must be > 0"); 00836 } 00837 00838 return NULL; 00839 } 00840 00841 Error* process_nearest_opt(Option_arg arg) 00842 { 00843 if (arg == NULL) 00844 { 00845 return JWSC_EARG("Option 'nearest' requires an argument"); 00846 } 00847 opts.nearest_fname = arg; 00848 00849 return NULL; 00850 } 00851 00852 Error* process_nearest_dtd_opt(Option_arg arg) 00853 { 00854 if (arg == NULL) 00855 { 00856 return JWSC_EARG("Option 'nearest-dtd' requires an argument"); 00857 } 00858 opts.nearest_dtd_fname = arg; 00859 00860 return NULL; 00861 } 00862 00863 Error* process_bmm_tree_opt(Option_arg arg) 00864 { 00865 if (arg == NULL) 00866 { 00867 return JWSC_EARG("Option 'bmm-tree' requires an argument"); 00868 } 00869 opts.bmm_tree_fname = arg; 00870 00871 return NULL; 00872 } 00873 00874 Error* process_bmm_info_opt(Option_arg arg) 00875 { 00876 if (arg == NULL) 00877 { 00878 return JWSC_EARG("Option 'bmm-info' requires an argument"); 00879 } 00880 opts.bmm_info_fname = arg; 00881 00882 return NULL; 00883 } 00884 00885 Error* process_bmm_conf_thresh_opt(Option_arg arg) 00886 { 00887 if (arg == NULL) 00888 { 00889 return JWSC_EARG("Option 'bmm-conf-thresh' requires an argument"); 00890 } 00891 opts.bmm_conf_thresh_fname = arg; 00892 00893 return NULL; 00894 } 00895 00896 Error* process_gmm_bmm_tree_opt(Option_arg arg) 00897 { 00898 if (arg == NULL) 00899 { 00900 return JWSC_EARG("Option 'gmm-bmm-tree' requires an argument"); 00901 } 00902 opts.gmm_bmm_tree_fname = arg; 00903 00904 return NULL; 00905 } 00906 00907 Error* process_gmm_bmm_info_opt(Option_arg arg) 00908 { 00909 if (arg == NULL) 00910 { 00911 return JWSC_EARG("Option 'gmm-bmm-info' requires an argument"); 00912 } 00913 opts.gmm_bmm_info_fname = arg; 00914 00915 return NULL; 00916 } 00917 00918 Error* process_gmm_bmm_conf_thresh_opt(Option_arg arg) 00919 { 00920 if (arg == NULL) 00921 { 00922 return JWSC_EARG("Option 'gmm-bmm-conf-thresh' requires an argument"); 00923 } 00924 opts.gmm_bmm_conf_thresh_fname = arg; 00925 00926 return NULL; 00927 } 00928 00929 void init_options(Option_no_arg* opts_no_arg, Option_with_arg* opts_with_arg) 00930 { 00931 uint32_t i; 00932 00933 char s_name; 00934 const char* l_name; 00935 const char* desc; 00936 00937 Error* (*fnoarg)(); 00938 Error* (*farg)(const char*); 00939 00940 opts.header_in = HEADER_IN; 00941 opts.header_out = HEADER_OUT; 00942 opts.exclude_one = EXCLUDE_ONE; 00943 opts.num_threads = NUM_THREADS; 00944 opts.seed = 0; 00945 opts.input_format = INPUT_FORMAT; 00946 opts.input_dtd_fname = INPUT_DTD_FNAME; 00947 opts.output_format = OUTPUT_FORMAT; 00948 opts.labels_fname = INPUT_DTD_FNAME; 00949 opts.labels_dtd_fname = INPUT_DTD_FNAME; 00950 opts.id_cols = NULL; 00951 opts.first_marker_col = FIRST_MARKER_COL; 00952 opts.num_markers = NUM_MARKERS; 00953 opts.marker_cols = NULL; 00954 opts.aux_id_cols = NULL; 00955 opts.aux_label_col = AUX_LABEL_COL; 00956 opts.aux_first_marker_col = AUX_FIRST_MARKER_COL; 00957 opts.aux_num_markers = AUX_NUM_MARKERS; 00958 opts.aux_marker_cols = NULL; 00959 opts.num_gmm_markers = NUM_MARKERS; 00960 opts.nb_freq_fname = NB_FREQ_FNAME; 00961 opts.nb_freq_dtd_fname = NB_FREQ_DTD_FNAME; 00962 opts.nb_gauss_fname = NB_GAUSS_FNAME; 00963 opts.nb_gauss_dtd_fname = NB_GAUSS_DTD_FNAME; 00964 opts.nb_gmm_fname = NB_GMM_FNAME; 00965 opts.nb_gmm_dtd_fname = NB_GMM_DTD_FNAME; 00966 opts.mv_gmm_fname = MV_GMM_FNAME; 00967 opts.mv_gmm_dtd_fname = MV_GMM_DTD_FNAME; 00968 opts.mv_mmm_fname = MV_MMM_FNAME; 00969 opts.mv_mmm_dtd_fname = MV_MMM_DTD_FNAME; 00970 #ifdef HAPLO_ENABLE_SVM 00971 opts.svm_fname = SVM_FNAME; 00972 opts.svm_dtd_fname = SVM_DTD_FNAME; 00973 #endif 00974 #ifdef HAPLO_ENABLE_WEKA 00975 opts.weka_j48_fname = WEKA_J48_FNAME; 00976 opts.weka_part_fname = WEKA_PART_FNAME; 00977 opts.weka_jar_fname = WEKA_JAR_FNAME; 00978 opts.weka_dtd_fname = WEKA_DTD_FNAME; 00979 #endif 00980 opts.nearest_fname = NEAREST_FNAME; 00981 opts.nearest_dtd_fname = NEAREST_DTD_FNAME; 00982 opts.nearest_max_d = NEAREST_MAX_D; 00983 opts.bmm_tree_fname = BMM_TREE_FNAME; 00984 opts.bmm_info_fname = BMM_INFO_FNAME; 00985 opts.bmm_conf_thresh_fname = BMM_CONF_THRESH_FNAME; 00986 opts.gmm_bmm_tree_fname = GMM_BMM_TREE_FNAME; 00987 opts.gmm_bmm_info_fname = GMM_BMM_INFO_FNAME; 00988 opts.gmm_bmm_conf_thresh_fname = GMM_BMM_CONF_THRESH_FNAME; 00989 opts.model_dirname = MODEL_DIRNAME; 00990 00991 i = 0; 00992 l_name = "help"; 00993 s_name = 'h'; 00994 desc = "Prints program usage."; 00995 fnoarg = process_help_opt; 00996 init_option_no_arg(&(opts_no_arg[i++]), l_name, s_name, desc, fnoarg); 00997 00998 l_name = "version"; 00999 s_name = 'v'; 01000 desc = "Prints program version."; 01001 fnoarg = process_version_opt; 01002 init_option_no_arg(&(opts_no_arg[i++]), l_name, s_name, desc, fnoarg); 01003 01004 l_name = "header-in"; 01005 s_name = 0; 01006 desc = "The input data contains a header (descriptive) line, which should be discarded."; 01007 fnoarg = process_header_in_opt; 01008 init_option_no_arg(&(opts_no_arg[i++]), l_name, s_name, desc, fnoarg); 01009 01010 l_name = "header-out"; 01011 s_name = 0; 01012 desc = "Write a header (descriptive) line to the first line of the output results."; 01013 fnoarg = process_header_out_opt; 01014 init_option_no_arg(&(opts_no_arg[i++]), l_name, s_name, desc, fnoarg); 01015 assert(i == NUM_SHARED_OPTS_NO_ARG); 01016 01017 01018 i = 0; 01019 l_name = "options"; 01020 s_name = 0; 01021 desc = "File containing program options. Any options appearing on the command line following this option take precendence over those in the options file."; 01022 farg = process_options_opt; 01023 init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg); 01024 01025 #ifdef HAPLO_HAVE_PTHREAD 01026 l_name = "num-threads"; 01027 s_name = 0; 01028 desc = "Number of concurrent threads to use at any given time."; 01029 farg = process_num_threads_opt; 01030 init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg); 01031 #endif 01032 01033 l_name = "seed"; 01034 s_name = 0; 01035 desc = "Random seed."; 01036 farg = process_seed_opt; 01037 init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg); 01038 01039 l_name = "input-format"; 01040 s_name = 0; 01041 desc = "Input file format. Must be one of {txt, csv, xml}. If the input is XML, it must conform to the XML DTD haplo-input.dtd."; 01042 farg = process_input_format_opt; 01043 init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg); 01044 01045 l_name = "input-dtd"; 01046 s_name = 0; 01047 desc = "If the input format is XML, validate it with this DTD."; 01048 farg = process_input_dtd_opt; 01049 init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg); 01050 01051 l_name = "output-format"; 01052 s_name = 0; 01053 desc = "Output file format. Must be one of {txt, csv, xml}."; 01054 farg = process_output_format_opt; 01055 init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg); 01056 01057 l_name = "labels"; 01058 s_name = 0; 01059 desc = "XML file containing the organization and listing of possible haplo groups labels for the samples. Must conform to the XML DTD haplo-labels.dtd."; 01060 farg = process_labels_opt; 01061 init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg); 01062 01063 l_name = "labels-dtd"; 01064 s_name = 0; 01065 desc = "Validate the XML labels file with this DTD."; 01066 farg = process_labels_dtd_opt; 01067 init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg); 01068 01069 l_name = "id-cols"; 01070 s_name = 0; 01071 desc = "Comma separated ordered list of columns to use for sample identification. Prefixes the output of each sample. Count begins with 1 at the first column of the file. Set to zero to ignore the id column"; 01072 farg = process_id_cols_opt; 01073 init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg); 01074 01075 l_name = "label-col"; 01076 s_name = 0; 01077 desc = "Column containing the haplo group labels. Count begins with 1 at the first column of the file. Set to zero to ignore the label column."; 01078 farg = process_label_col_opt; 01079 init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg); 01080 01081 l_name = "1st-marker-col"; 01082 s_name = 0; 01083 desc = "Column containing the first marker. Use in conjunction with num-markers to specify the markers for reading. All other markers are assumed to follow this one. Count begins with 1."; 01084 farg = process_first_marker_col_opt; 01085 init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg); 01086 01087 l_name = "num-markers"; 01088 s_name = 0; 01089 desc = "Number of markers to read. Use in conjunction with 1st-marker-col to specify the markers for reading."; 01090 farg = process_num_markers_opt; 01091 init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg); 01092 01093 l_name = "marker-cols"; 01094 s_name = 0; 01095 desc = "Comma separated ordered list of markers to use for training. Use instead of 1st-marker-col and num-markers. Count begins with 1 at the first column of the CSV file."; 01096 farg = process_marker_cols_opt; 01097 init_option_with_arg(&(opts_with_arg[i++]), l_name, s_name, desc, farg); 01098 assert(i == NUM_SHARED_OPTS_WITH_ARG); 01099 }