Haplo Prediction
predict haplogroups
Data Structures | Defines | Functions | Variables
haplo_predict.c File Reference

Predicts a Y-STR haplotype label using a set of trained classifiers. More...

#include <config.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <inttypes.h>
#include <libxml/tree.h>
#include <jwsc/base/error.h>
#include <jwsc/base/option.h>
#include <jwsc/base/file_io.h>
#include <jwsc/vector/vector.h>
#include <jwsc/matrix/matrix.h>
#include <jwsc/matblock/matblock.h>
#include "haplo_groups.h"
#include "options.h"
#include "output.h"
#include "input.h"
#include "xml.h"
#include "nb_freq.h"
#include "nb_gauss.h"
#include "nb_gmm.h"
#include "mv_gmm.h"
#include "nearest.h"

Go to the source code of this file.

Data Structures

struct  Predict_params
 Prediction function parameter set. More...

Defines

#define NUM_SVM_OPTS   0
#define NUM_WEKA_OPTS   0
#define NUM_OPTS_NO_ARG   1 + NUM_SHARED_OPTS_NO_ARG
#define NUM_OPTS_WITH_ARG   13 + NUM_SVM_OPTS + NUM_WEKA_OPTS + NUM_SHARED_OPTS_WITH_ARG
#define NUM_ALGOS   8
#define LABEL_COL   0
#define OUTPUT_FNAME   "/dev/stdout"

Functions

uint32_t get_num_opts_no_arg ()
 Returns the number of options not taking an argument.
uint32_t get_num_opts_with_arg ()
 Returns the number of options taking an argument.
void print_usage ()
 Prints the program usage to stderr.
Errorprocess_output_opt (Option_arg arg)
 Process the 'output' program option.
static void init_predict_options (void)
 Initializes the program options.
static uint8_t num_models_to_predict ()
 Returns the number of models to use for prediction.
static void predict_nb_freq (Vector_u32 **labels_out, Vector_d **confs_out, const Matrix_i32 *markers)
 Predict haplo labels using a Naive Bayes freqency model.
static void predict_nb_gauss (Vector_u32 **labels_out, Vector_d **confs_out, const Matrix_i32 *markers)
 Predict haplo labels using a Naive Bayes Gaussian model.
static void predict_nb_gmm (Vector_u32 **labels_out, Vector_d **confs_out, const Matrix_i32 *markers)
 Predict haplo labels using a Naive Bayes Gaussian mixture model.
static void predict_mv_gmm (Vector_u32 **labels_out, Vector_d **confs_out, const Matrix_i32 *markers)
 Predict haplo labels using a multivariate Gaussian mixture model.
static void predict_svm (Vector_u32 **labels_out, Vector_d **confs_out, const Matrix_i32 *markers)
 Predict haplo labels using a SVM model.
static void predict_j48 (Vector_u32 **labels_out, Vector_d **confs_out, const Matrix_i32 *markers)
 Predict haplo labels using Weka's J48 decision trees.
static void predict_part (Vector_u32 **labels_out, Vector_d **confs_out, const Matrix_i32 *markers)
 Predict haplo labels using Weka's PART decision trees.
static void predict_nearest (Vector_u32 **labels_out, Vector_d **confs_out, const Matrix_i32 *markers)
 Predict haplo labels using a nearest neighbor model.
static void write_results_header (const Matblock_u8 *ids, const Vector_u32 *labels, const Vector_u32 *nb_freq_labels, const Vector_u32 *nb_gauss_labels, const Vector_u32 *nb_gmm_labels, const Vector_u32 *mv_gmm_labels, const Vector_u32 *svm_labels, const Vector_u32 *j48_labels, const Vector_u32 *part_labels, const Vector_u32 *nearest_labels, FILE *fp)
 Writes the prediction results header to the output file.
static void find_ancestors (Vector_u32 **ancestor_types_out, Vector_u32 **ancestor_labels_out, const Vector_u32 *labels_1, const Vector_u32 *labels_2, const Vector_u32 *labels_3, const Vector_u32 *labels_4, const Vector_u32 *labels_5, const Vector_u32 *labels_6, const Vector_u32 *labels_7, const Vector_u32 *labels_8)
 Looks up the common ancestor for sets of labels.
static void write_results (const Matblock_u8 *ids, const Vector_u32 *labels, const Matrix_i32 *markers, const Vector_u32 *nb_freq_labels, const Vector_d *nb_freq_confs, const Vector_u32 *nb_gauss_labels, const Vector_d *nb_gauss_confs, const Vector_u32 *nb_gmm_labels, const Vector_d *nb_gmm_confs, const Vector_u32 *mv_gmm_labels, const Vector_d *mv_gmm_confs, const Vector_u32 *svm_labels, const Vector_d *svm_confs, const Vector_u32 *j48_labels, const Vector_d *j48_confs, const Vector_u32 *part_labels, const Vector_d *part_confs, const Vector_u32 *nearest_labels, const Vector_d *nearest_dists, const Vector_u32 *ancestor_types, const Vector_u32 *ancestor_labels)
 Writes the prediction results to the output file.
static void predict (Predict_params *p)
int main (int argc, const char **argv)
 Main function for the haplo-predict program.

Variables

Option_no_arg opts_no_arg [NUM_OPTS_NO_ARG]
 Program options not taking an argument.
Option_with_arg opts_with_arg [NUM_OPTS_WITH_ARG]
 Program options taking an argument.
static const char * output_fname = OUTPUT_FNAME
 Prediction output file name.

Detailed Description

Predicts a Y-STR haplotype label using a set of trained classifiers.

Author:
Joseph Schlecht
License:
Creative Commons BY-NC-SA 3.0

Classifier types are

  1. Naive Bayes non-parametric marker frequency model
  2. Naive Bayes Gaussian model
  3. Naive Bayes Gaussian mixture model
  4. Multivariate Gaussian mixture model
  5. Support Vector Machines
  6. Weka PART and J48
  7. Nearest neighbor

Definition in file haplo_predict.c.


Define Documentation

#define NUM_SVM_OPTS   0

Definition at line 101 of file haplo_predict.c.

#define NUM_WEKA_OPTS   0

Definition at line 107 of file haplo_predict.c.

#define NUM_OPTS_NO_ARG   1 + NUM_SHARED_OPTS_NO_ARG

Definition at line 110 of file haplo_predict.c.

#define NUM_OPTS_WITH_ARG   13 + NUM_SVM_OPTS + NUM_WEKA_OPTS + NUM_SHARED_OPTS_WITH_ARG

Definition at line 111 of file haplo_predict.c.

#define NUM_ALGOS   8

Definition at line 114 of file haplo_predict.c.

#define LABEL_COL   0

Definition at line 115 of file haplo_predict.c.

#define OUTPUT_FNAME   "/dev/stdout"

Definition at line 116 of file haplo_predict.c.


Function Documentation

uint32_t get_num_opts_no_arg ( )

Returns the number of options not taking an argument.

Definition at line 148 of file haplo_predict.c.

uint32_t get_num_opts_with_arg ( )

Returns the number of options taking an argument.

Definition at line 154 of file haplo_predict.c.

void print_usage ( void  )

Prints the program usage to stderr.

Definition at line 160 of file haplo_predict.c.

Error* process_output_opt ( Option_arg  arg)

Process the 'output' program option.

Definition at line 168 of file haplo_predict.c.

static void init_predict_options ( void  ) [static]

Initializes the program options.

Definition at line 179 of file haplo_predict.c.

static uint8_t num_models_to_predict ( ) [static]

Returns the number of models to use for prediction.

Definition at line 324 of file haplo_predict.c.

static void predict_nb_freq ( Vector_u32 **  labels_out,
Vector_d **  confs_out,
const Matrix_i32 markers 
) [static]

Predict haplo labels using a Naive Bayes freqency model.

Definition at line 342 of file haplo_predict.c.

static void predict_nb_gauss ( Vector_u32 **  labels_out,
Vector_d **  confs_out,
const Matrix_i32 markers 
) [static]

Predict haplo labels using a Naive Bayes Gaussian model.

Definition at line 367 of file haplo_predict.c.

static void predict_nb_gmm ( Vector_u32 **  labels_out,
Vector_d **  confs_out,
const Matrix_i32 markers 
) [static]

Predict haplo labels using a Naive Bayes Gaussian mixture model.

Definition at line 392 of file haplo_predict.c.

static void predict_mv_gmm ( Vector_u32 **  labels_out,
Vector_d **  confs_out,
const Matrix_i32 markers 
) [static]

Predict haplo labels using a multivariate Gaussian mixture model.

Definition at line 417 of file haplo_predict.c.

static void predict_svm ( Vector_u32 **  labels_out,
Vector_d **  confs_out,
const Matrix_i32 markers 
) [static]

Predict haplo labels using a SVM model.

Definition at line 442 of file haplo_predict.c.

static void predict_j48 ( Vector_u32 **  labels_out,
Vector_d **  confs_out,
const Matrix_i32 markers 
) [static]

Predict haplo labels using Weka's J48 decision trees.

Definition at line 471 of file haplo_predict.c.

static void predict_part ( Vector_u32 **  labels_out,
Vector_d **  confs_out,
const Matrix_i32 markers 
) [static]

Predict haplo labels using Weka's PART decision trees.

Definition at line 500 of file haplo_predict.c.

static void predict_nearest ( Vector_u32 **  labels_out,
Vector_d **  confs_out,
const Matrix_i32 markers 
) [static]

Predict haplo labels using a nearest neighbor model.

Definition at line 529 of file haplo_predict.c.

static void write_results_header ( const Matblock_u8 ids,
const Vector_u32 labels,
const Vector_u32 nb_freq_labels,
const Vector_u32 nb_gauss_labels,
const Vector_u32 nb_gmm_labels,
const Vector_u32 mv_gmm_labels,
const Vector_u32 svm_labels,
const Vector_u32 j48_labels,
const Vector_u32 part_labels,
const Vector_u32 nearest_labels,
FILE *  fp 
) [static]

Writes the prediction results header to the output file.

Definition at line 554 of file haplo_predict.c.

static void find_ancestors ( Vector_u32 **  ancestor_types_out,
Vector_u32 **  ancestor_labels_out,
const Vector_u32 labels_1,
const Vector_u32 labels_2,
const Vector_u32 labels_3,
const Vector_u32 labels_4,
const Vector_u32 labels_5,
const Vector_u32 labels_6,
const Vector_u32 labels_7,
const Vector_u32 labels_8 
) [static]

Looks up the common ancestor for sets of labels.

Definition at line 654 of file haplo_predict.c.

static void write_results ( const Matblock_u8 ids,
const Vector_u32 labels,
const Matrix_i32 markers,
const Vector_u32 nb_freq_labels,
const Vector_d nb_freq_confs,
const Vector_u32 nb_gauss_labels,
const Vector_d nb_gauss_confs,
const Vector_u32 nb_gmm_labels,
const Vector_d nb_gmm_confs,
const Vector_u32 mv_gmm_labels,
const Vector_d mv_gmm_confs,
const Vector_u32 svm_labels,
const Vector_d svm_confs,
const Vector_u32 j48_labels,
const Vector_d j48_confs,
const Vector_u32 part_labels,
const Vector_d part_confs,
const Vector_u32 nearest_labels,
const Vector_d nearest_dists,
const Vector_u32 ancestor_types,
const Vector_u32 ancestor_labels 
) [static]

Writes the prediction results to the output file.

Definition at line 740 of file haplo_predict.c.

static void predict ( Predict_params p) [static]

Definition at line 843 of file haplo_predict.c.

int main ( int  argc,
const char **  argv 
)

Main function for the haplo-predict program.

Definition at line 894 of file haplo_predict.c.


Variable Documentation

Option_no_arg opts_no_arg[NUM_OPTS_NO_ARG]

Program options not taking an argument.

Definition at line 138 of file haplo_predict.c.

Option_with_arg opts_with_arg[NUM_OPTS_WITH_ARG]

Program options taking an argument.

Definition at line 141 of file haplo_predict.c.

const char* output_fname = OUTPUT_FNAME [static]

Prediction output file name.

Definition at line 144 of file haplo_predict.c.