Haplo Prediction
predict haplogroups
|
Predicts a Y-STR haplotype label using a set of trained classifiers. More...
#include <config.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <inttypes.h>
#include <libxml/tree.h>
#include <jwsc/base/error.h>
#include <jwsc/base/option.h>
#include <jwsc/base/file_io.h>
#include <jwsc/vector/vector.h>
#include <jwsc/matrix/matrix.h>
#include <jwsc/matblock/matblock.h>
#include "haplo_groups.h"
#include "options.h"
#include "output.h"
#include "input.h"
#include "xml.h"
#include "nb_freq.h"
#include "nb_gauss.h"
#include "nb_gmm.h"
#include "mv_gmm.h"
#include "nearest.h"
Go to the source code of this file.
Data Structures | |
struct | Predict_params |
Prediction function parameter set. More... | |
Defines | |
#define | NUM_SVM_OPTS 0 |
#define | NUM_WEKA_OPTS 0 |
#define | NUM_OPTS_NO_ARG 1 + NUM_SHARED_OPTS_NO_ARG |
#define | NUM_OPTS_WITH_ARG 13 + NUM_SVM_OPTS + NUM_WEKA_OPTS + NUM_SHARED_OPTS_WITH_ARG |
#define | NUM_ALGOS 8 |
#define | LABEL_COL 0 |
#define | OUTPUT_FNAME "/dev/stdout" |
Functions | |
uint32_t | get_num_opts_no_arg () |
Returns the number of options not taking an argument. | |
uint32_t | get_num_opts_with_arg () |
Returns the number of options taking an argument. | |
void | print_usage () |
Prints the program usage to stderr. | |
Error * | process_output_opt (Option_arg arg) |
Process the 'output' program option. | |
static void | init_predict_options (void) |
Initializes the program options. | |
static uint8_t | num_models_to_predict () |
Returns the number of models to use for prediction. | |
static void | predict_nb_freq (Vector_u32 **labels_out, Vector_d **confs_out, const Matrix_i32 *markers) |
Predict haplo labels using a Naive Bayes freqency model. | |
static void | predict_nb_gauss (Vector_u32 **labels_out, Vector_d **confs_out, const Matrix_i32 *markers) |
Predict haplo labels using a Naive Bayes Gaussian model. | |
static void | predict_nb_gmm (Vector_u32 **labels_out, Vector_d **confs_out, const Matrix_i32 *markers) |
Predict haplo labels using a Naive Bayes Gaussian mixture model. | |
static void | predict_mv_gmm (Vector_u32 **labels_out, Vector_d **confs_out, const Matrix_i32 *markers) |
Predict haplo labels using a multivariate Gaussian mixture model. | |
static void | predict_svm (Vector_u32 **labels_out, Vector_d **confs_out, const Matrix_i32 *markers) |
Predict haplo labels using a SVM model. | |
static void | predict_j48 (Vector_u32 **labels_out, Vector_d **confs_out, const Matrix_i32 *markers) |
Predict haplo labels using Weka's J48 decision trees. | |
static void | predict_part (Vector_u32 **labels_out, Vector_d **confs_out, const Matrix_i32 *markers) |
Predict haplo labels using Weka's PART decision trees. | |
static void | predict_nearest (Vector_u32 **labels_out, Vector_d **confs_out, const Matrix_i32 *markers) |
Predict haplo labels using a nearest neighbor model. | |
static void | write_results_header (const Matblock_u8 *ids, const Vector_u32 *labels, const Vector_u32 *nb_freq_labels, const Vector_u32 *nb_gauss_labels, const Vector_u32 *nb_gmm_labels, const Vector_u32 *mv_gmm_labels, const Vector_u32 *svm_labels, const Vector_u32 *j48_labels, const Vector_u32 *part_labels, const Vector_u32 *nearest_labels, FILE *fp) |
Writes the prediction results header to the output file. | |
static void | find_ancestors (Vector_u32 **ancestor_types_out, Vector_u32 **ancestor_labels_out, const Vector_u32 *labels_1, const Vector_u32 *labels_2, const Vector_u32 *labels_3, const Vector_u32 *labels_4, const Vector_u32 *labels_5, const Vector_u32 *labels_6, const Vector_u32 *labels_7, const Vector_u32 *labels_8) |
Looks up the common ancestor for sets of labels. | |
static void | write_results (const Matblock_u8 *ids, const Vector_u32 *labels, const Matrix_i32 *markers, const Vector_u32 *nb_freq_labels, const Vector_d *nb_freq_confs, const Vector_u32 *nb_gauss_labels, const Vector_d *nb_gauss_confs, const Vector_u32 *nb_gmm_labels, const Vector_d *nb_gmm_confs, const Vector_u32 *mv_gmm_labels, const Vector_d *mv_gmm_confs, const Vector_u32 *svm_labels, const Vector_d *svm_confs, const Vector_u32 *j48_labels, const Vector_d *j48_confs, const Vector_u32 *part_labels, const Vector_d *part_confs, const Vector_u32 *nearest_labels, const Vector_d *nearest_dists, const Vector_u32 *ancestor_types, const Vector_u32 *ancestor_labels) |
Writes the prediction results to the output file. | |
static void | predict (Predict_params *p) |
int | main (int argc, const char **argv) |
Main function for the haplo-predict program. | |
Variables | |
Option_no_arg | opts_no_arg [NUM_OPTS_NO_ARG] |
Program options not taking an argument. | |
Option_with_arg | opts_with_arg [NUM_OPTS_WITH_ARG] |
Program options taking an argument. | |
static const char * | output_fname = OUTPUT_FNAME |
Prediction output file name. |
Predicts a Y-STR haplotype label using a set of trained classifiers.
Classifier types are
Definition in file haplo_predict.c.
#define NUM_SVM_OPTS 0 |
Definition at line 101 of file haplo_predict.c.
#define NUM_WEKA_OPTS 0 |
Definition at line 107 of file haplo_predict.c.
#define NUM_OPTS_NO_ARG 1 + NUM_SHARED_OPTS_NO_ARG |
Definition at line 110 of file haplo_predict.c.
#define NUM_OPTS_WITH_ARG 13 + NUM_SVM_OPTS + NUM_WEKA_OPTS + NUM_SHARED_OPTS_WITH_ARG |
Definition at line 111 of file haplo_predict.c.
#define NUM_ALGOS 8 |
Definition at line 114 of file haplo_predict.c.
#define LABEL_COL 0 |
Definition at line 115 of file haplo_predict.c.
#define OUTPUT_FNAME "/dev/stdout" |
Definition at line 116 of file haplo_predict.c.
uint32_t get_num_opts_no_arg | ( | ) |
Returns the number of options not taking an argument.
Definition at line 148 of file haplo_predict.c.
uint32_t get_num_opts_with_arg | ( | ) |
Returns the number of options taking an argument.
Definition at line 154 of file haplo_predict.c.
void print_usage | ( | void | ) |
Prints the program usage to stderr.
Definition at line 160 of file haplo_predict.c.
Error* process_output_opt | ( | Option_arg | arg | ) |
Process the 'output' program option.
Definition at line 168 of file haplo_predict.c.
static void init_predict_options | ( | void | ) | [static] |
Initializes the program options.
Definition at line 179 of file haplo_predict.c.
static uint8_t num_models_to_predict | ( | ) | [static] |
Returns the number of models to use for prediction.
Definition at line 324 of file haplo_predict.c.
static void predict_nb_freq | ( | Vector_u32 ** | labels_out, |
Vector_d ** | confs_out, | ||
const Matrix_i32 * | markers | ||
) | [static] |
Predict haplo labels using a Naive Bayes freqency model.
Definition at line 342 of file haplo_predict.c.
static void predict_nb_gauss | ( | Vector_u32 ** | labels_out, |
Vector_d ** | confs_out, | ||
const Matrix_i32 * | markers | ||
) | [static] |
Predict haplo labels using a Naive Bayes Gaussian model.
Definition at line 367 of file haplo_predict.c.
static void predict_nb_gmm | ( | Vector_u32 ** | labels_out, |
Vector_d ** | confs_out, | ||
const Matrix_i32 * | markers | ||
) | [static] |
Predict haplo labels using a Naive Bayes Gaussian mixture model.
Definition at line 392 of file haplo_predict.c.
static void predict_mv_gmm | ( | Vector_u32 ** | labels_out, |
Vector_d ** | confs_out, | ||
const Matrix_i32 * | markers | ||
) | [static] |
Predict haplo labels using a multivariate Gaussian mixture model.
Definition at line 417 of file haplo_predict.c.
static void predict_svm | ( | Vector_u32 ** | labels_out, |
Vector_d ** | confs_out, | ||
const Matrix_i32 * | markers | ||
) | [static] |
Predict haplo labels using a SVM model.
Definition at line 442 of file haplo_predict.c.
static void predict_j48 | ( | Vector_u32 ** | labels_out, |
Vector_d ** | confs_out, | ||
const Matrix_i32 * | markers | ||
) | [static] |
Predict haplo labels using Weka's J48 decision trees.
Definition at line 471 of file haplo_predict.c.
static void predict_part | ( | Vector_u32 ** | labels_out, |
Vector_d ** | confs_out, | ||
const Matrix_i32 * | markers | ||
) | [static] |
Predict haplo labels using Weka's PART decision trees.
Definition at line 500 of file haplo_predict.c.
static void predict_nearest | ( | Vector_u32 ** | labels_out, |
Vector_d ** | confs_out, | ||
const Matrix_i32 * | markers | ||
) | [static] |
Predict haplo labels using a nearest neighbor model.
Definition at line 529 of file haplo_predict.c.
static void write_results_header | ( | const Matblock_u8 * | ids, |
const Vector_u32 * | labels, | ||
const Vector_u32 * | nb_freq_labels, | ||
const Vector_u32 * | nb_gauss_labels, | ||
const Vector_u32 * | nb_gmm_labels, | ||
const Vector_u32 * | mv_gmm_labels, | ||
const Vector_u32 * | svm_labels, | ||
const Vector_u32 * | j48_labels, | ||
const Vector_u32 * | part_labels, | ||
const Vector_u32 * | nearest_labels, | ||
FILE * | fp | ||
) | [static] |
Writes the prediction results header to the output file.
Definition at line 554 of file haplo_predict.c.
static void find_ancestors | ( | Vector_u32 ** | ancestor_types_out, |
Vector_u32 ** | ancestor_labels_out, | ||
const Vector_u32 * | labels_1, | ||
const Vector_u32 * | labels_2, | ||
const Vector_u32 * | labels_3, | ||
const Vector_u32 * | labels_4, | ||
const Vector_u32 * | labels_5, | ||
const Vector_u32 * | labels_6, | ||
const Vector_u32 * | labels_7, | ||
const Vector_u32 * | labels_8 | ||
) | [static] |
Looks up the common ancestor for sets of labels.
Definition at line 654 of file haplo_predict.c.
static void write_results | ( | const Matblock_u8 * | ids, |
const Vector_u32 * | labels, | ||
const Matrix_i32 * | markers, | ||
const Vector_u32 * | nb_freq_labels, | ||
const Vector_d * | nb_freq_confs, | ||
const Vector_u32 * | nb_gauss_labels, | ||
const Vector_d * | nb_gauss_confs, | ||
const Vector_u32 * | nb_gmm_labels, | ||
const Vector_d * | nb_gmm_confs, | ||
const Vector_u32 * | mv_gmm_labels, | ||
const Vector_d * | mv_gmm_confs, | ||
const Vector_u32 * | svm_labels, | ||
const Vector_d * | svm_confs, | ||
const Vector_u32 * | j48_labels, | ||
const Vector_d * | j48_confs, | ||
const Vector_u32 * | part_labels, | ||
const Vector_d * | part_confs, | ||
const Vector_u32 * | nearest_labels, | ||
const Vector_d * | nearest_dists, | ||
const Vector_u32 * | ancestor_types, | ||
const Vector_u32 * | ancestor_labels | ||
) | [static] |
Writes the prediction results to the output file.
Definition at line 740 of file haplo_predict.c.
static void predict | ( | Predict_params * | p | ) | [static] |
Definition at line 843 of file haplo_predict.c.
int main | ( | int | argc, |
const char ** | argv | ||
) |
Main function for the haplo-predict program.
Definition at line 894 of file haplo_predict.c.
Option_no_arg opts_no_arg[NUM_OPTS_NO_ARG] |
Program options not taking an argument.
Definition at line 138 of file haplo_predict.c.
Option_with_arg opts_with_arg[NUM_OPTS_WITH_ARG] |
Program options taking an argument.
Definition at line 141 of file haplo_predict.c.
const char* output_fname = OUTPUT_FNAME [static] |
Prediction output file name.
Definition at line 144 of file haplo_predict.c.