Haplo Prediction
predict haplogroups
Functions
svm_tree.c File Reference

Support vector machine model classifier. More...

#include <config.h>
#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>
#include <assert.h>
#include <string.h>
#include <errno.h>
#include <math.h>
#include <libxml/tree.h>
#include <libxml/parser.h>
#include <libxml/valid.h>
#include <jwsc/base/error.h>
#include <jwsc/base/limits.h>
#include <jwsc/base/file_io.h>
#include <jwsc/vector/vector.h>
#include <jwsc/vector/vector_io.h>
#include <jwsc/vector/vector_math.h>
#include <jwsc/matrix/matrix.h>
#include <jwsc/matrix/matrix_io.h>
#include <jwsc/matblock/matblock.h>
#include <jwsc/matblock/matblock_io.h>
#include <jwsc/stat/gmm.h>
#include "xml.h"
#include "haplo_groups.h"
#include "svm_tree.h"

Go to the source code of this file.

Functions

void train_svm_model (SVM_model **model_out, const Vector_u32 *labels, const Matrix_i32 *markers, double cost, double gamma)
 Trains a SVM model.
Errorpredict_label_with_svm_model (uint32_t *label_out, double **confidence_out, const struct svm_node *markers, const SVM_model *model)
 Predicts the label for a marker sample using a SVM model.
static void get_predicted_labels_confidence (Vector_d **confidence_out, const Vector_u32 *labels_v, const Matrix_d *confidence_matrix, const SVM_model *model)
 Gets the confidence value for each predicted label.
Errorpredict_labels_with_svm_model (Vector_u32 **labels_out, Vector_d **confidence_out, const Matrix_i32 *markers, const SVM_model *model)
 Predicts the labels for a set of marker samples using a SVM model.
Errorread_svm_model (SVM_model **model_out, const char *fname)
 Reads a SVM model.
Errorwrite_svm_model (SVM_model *model, const char *fname)
 Writes a SVM model.
Errorwrite_svm_model_training_data (const Vector_u32 *labels, const Matrix_i32 *markers, const char *fname)
 Writes the training data for an SVM model to a file.
void free_svm_model (SVM_model *model)
 Frees a SVM model.
static Errorread_svm_xml_doc (xmlDoc **xml_doc_out, const char *xml_fname, const char *dtd_fname)
 Reads and and optionally validates an XML document.
static Errorcreate_svm_model_tree_from_xml_node (SVM_model_tree **tree_out, const SVM_model_tree *parent, uint32_t parent_label, xmlNode *xml_node)
 Allocates and initializes a model tree from an xml node.
static Errorcreate_svm_model_node_from_xml_node (xmlNode *xml_node, SVM_model_node *svm_node, uint32_t i)
 Fills in an SVM node from an xml node.
static Errorcreate_model_training_data (Vector_u32 **train_labels_out, Matrix_i32 **train_markers_out, const Vector_u32 *data_labels, const Matrix_i32 *data_markers, uint32_t *model_labels, const Vector_u32 **model_altlabels)
 Creates a set of training data as a model-specific labeled subset of a larger data set.
static Errortrain_svm_model_node (SVM_model_node *node, const Vector_u32 *labels, const Matrix_i32 *markers)
 Trains a model in the tree using node-specific data.
Errortrain_svm_model_tree (SVM_model_tree **tree_out, const Vector_u32 *labels, const Matrix_i32 *markers, const char *tree_xml_fname, const char *tree_dtd_fname)
 Trains a SVM model tree .
static double get_predicted_label_confidence (uint32_t label, double *confs, const SVM_model *model)
 Gets the confidence value for a predicted label.
static uint32_t get_best_one_against_all_model (const SVM_model_tree *tree, Vector_u32 *labels, Vector_d *confs)
 Gets the best SVM model (index).
static uint32_t get_best_one_against_one_model (const SVM_model_tree *tree, Vector_u32 *labels, Vector_d *confs)
 Gets the best SVM model (index).
static Errorrecursively_predict_label_in_model_tree (uint32_t *label_out, double *confidence_out, const SVM_model_tree *tree, const struct svm_node *markers)
 Recursively predicts a label from a model tree.
Errorpredict_labels_with_svm_model_tree (Vector_u32 **labels_out, Vector_d **confidence_out, const Matrix_i32 *markers, const SVM_model_tree *tree)
 Predicts the labels for a set of marker samples using a SVM model tree.
static Errorread_svm_model_node (SVM_model_node *node, const char *model_dirname)
 Recursively reads the model in a node from its file name.
Errorread_svm_model_tree (SVM_model_tree **tree_out, const char *tree_xml_fname, const char *tree_dtd_fname, const char *model_dirname)
 Reads a SVM model tree from.
Errorwrite_svm_model_tree (const SVM_model_tree *tree, const char *model_dirname)
 Writes a SVM model tree.
static Errorwrite_svm_model_node_training_data (SVM_model_node *node, const Vector_u32 *labels, const Matrix_i32 *markers, const char *data_dirname)
 Writes the training data for a model in the tree using node-specific data.
Errorwrite_svm_model_tree_training_data (const Vector_u32 *labels, const Matrix_i32 *markers, const char *tree_xml_fname, const char *tree_dtd_fname, const char *data_dirname)
 Writes the training data for an SVM model tree to a file.
void free_svm_model_tree (SVM_model_tree *tree)
 Frees a SVM model tree.

Detailed Description

Support vector machine model classifier.

Author:
Joseph Schlecht
License:
Creative Commons BY-NC-SA 3.0

Definition in file svm_tree.c.


Function Documentation

void train_svm_model ( SVM_model **  model_out,
const Vector_u32 labels,
const Matrix_i32 markers,
double  cost,
double  gamma 
)

Trains a SVM model.

Parameters:
model_outResult parameter. If *model_out is NULL, a model is allocated; otherwise its space is re-used.
labelsLabels for training, with ith element as corresponding to the ith sample in markers.
markersMarkers for training, with ith row as a sample corresponding to the ith label in labels.
costCost parameter for the SVM model.
gammaGamma parameter for the SVM model.

Definition at line 98 of file svm_tree.c.

Error* predict_label_with_svm_model ( uint32_t *  label_out,
double **  confidence_out,
const struct svm_node *  markers,
const SVM_model model 
)

Predicts the label for a marker sample using a SVM model.

Parameters:
label_outResult parameter.
confidence_outResult parameter. Must be an array of doubles of length svm_get_nr_class(model).
markersMarker data to predict.
modelTrained model to use for predicting.
Returns:
On success, NULL is returned; otherwise an error is returned.

Definition at line 172 of file svm_tree.c.

static void get_predicted_labels_confidence ( Vector_d **  confidence_out,
const Vector_u32 labels_v,
const Matrix_d confidence_matrix,
const SVM_model model 
) [static]

Gets the confidence value for each predicted label.

Definition at line 188 of file svm_tree.c.

Error* predict_labels_with_svm_model ( Vector_u32 **  labels_out,
Vector_d **  confidence_out,
const Matrix_i32 markers,
const SVM_model model 
)

Predicts the labels for a set of marker samples using a SVM model.

Parameters:
labels_outResult parameter. If *labels_out is NULL, it is allocated; otherwise its space is re-used.
confidence_outResult parameter. If *confidence_out is NULL, it is allocated; otherwise its space is re-used.
markersMarker data to predict. Each row is a sample for prediction, corresponding to an element in the result parameters.
modelTrained model to use for predicting.
Returns:
On success, NULL is returned; otherwise an error is returned, but the result parameters are not freed.

Definition at line 241 of file svm_tree.c.

Error* read_svm_model ( SVM_model **  model_out,
const char *  fname 
)

Reads a SVM model.

Parameters:
model_outResult parameter. If *model_out is NULL, a model is allocated; otherwise its space is re-used.
fnameFile to read the model from.
Returns:
On success, NULL is returned; otherwise an error is returned but does not free result parameter.

Definition at line 299 of file svm_tree.c.

Error* write_svm_model ( SVM_model model,
const char *  fname 
)

Writes a SVM model.

Parameters:
modelModel to write.
fnameFile to write to.
Returns:
On success, NULL is returned; otherwise an error is returned.

Definition at line 328 of file svm_tree.c.

Error* write_svm_model_training_data ( const Vector_u32 labels,
const Matrix_i32 markers,
const char *  fname 
)

Writes the training data for an SVM model to a file.

The format is txt-based and consistent with input to libsvm.

  label-index  1:marker-val 2:marker-val 3:marker-val ...
Parameters:
labelsModel training labels to write.
markersModel training markers to write.
fnameFile to write to.
Returns:
On success, NULL is returned; otherwise an error is returned.

Definition at line 357 of file svm_tree.c.

void free_svm_model ( SVM_model model)

Frees a SVM model.

Parameters:
modelModel to free.

Definition at line 399 of file svm_tree.c.

static Error* read_svm_xml_doc ( xmlDoc **  xml_doc_out,
const char *  xml_fname,
const char *  dtd_fname 
) [static]

Reads and and optionally validates an XML document.

Definition at line 425 of file svm_tree.c.

static Error * create_svm_model_tree_from_xml_node ( SVM_model_tree **  tree_out,
const SVM_model_tree parent,
uint32_t  parent_label,
xmlNode *  xml_node 
) [static]

Allocates and initializes a model tree from an xml node.

Does not train or read the model; leaves the model field as NULL in each tree node.

Definition at line 600 of file svm_tree.c.

static Error* create_svm_model_node_from_xml_node ( xmlNode *  xml_node,
SVM_model_node svm_node,
uint32_t  i 
) [static]

Fills in an SVM node from an xml node.

Definition at line 480 of file svm_tree.c.

static Error* create_model_training_data ( Vector_u32 **  train_labels_out,
Matrix_i32 **  train_markers_out,
const Vector_u32 data_labels,
const Matrix_i32 data_markers,
uint32_t *  model_labels,
const Vector_u32 **  model_altlabels 
) [static]

Creates a set of training data as a model-specific labeled subset of a larger data set.

Definition at line 687 of file svm_tree.c.

static Error* train_svm_model_node ( SVM_model_node node,
const Vector_u32 labels,
const Matrix_i32 markers 
) [static]

Trains a model in the tree using node-specific data.

Definition at line 768 of file svm_tree.c.

Error* train_svm_model_tree ( SVM_model_tree **  tree_out,
const Vector_u32 labels,
const Matrix_i32 markers,
const char *  tree_xml_fname,
const char *  tree_dtd_fname 
)

Trains a SVM model tree .

Parameters:
tree_outResult parameter.
labelsSample group labels.
markersSample marker values.
tree_xml_fnameXML file containing the model tree information.
tree_dtd_fnameDTD file for validating the XML file, can be NULL.

Definition at line 829 of file svm_tree.c.

static double get_predicted_label_confidence ( uint32_t  label,
double *  confs,
const SVM_model model 
) [static]

Gets the confidence value for a predicted label.

Definition at line 884 of file svm_tree.c.

static uint32_t get_best_one_against_all_model ( const SVM_model_tree tree,
Vector_u32 labels,
Vector_d confs 
) [static]

Gets the best SVM model (index).

Definition at line 914 of file svm_tree.c.

static uint32_t get_best_one_against_one_model ( const SVM_model_tree tree,
Vector_u32 labels,
Vector_d confs 
) [static]

Gets the best SVM model (index).

Definition at line 1015 of file svm_tree.c.

static Error* recursively_predict_label_in_model_tree ( uint32_t *  label_out,
double *  confidence_out,
const SVM_model_tree tree,
const struct svm_node *  markers 
) [static]

Recursively predicts a label from a model tree.

Definition at line 1083 of file svm_tree.c.

Error* predict_labels_with_svm_model_tree ( Vector_u32 **  labels_out,
Vector_d **  confidence_out,
const Matrix_i32 markers,
const SVM_model_tree tree 
)

Predicts the labels for a set of marker samples using a SVM model tree.

Parameters:
labels_outResult parameter. If *labels_out is NULL, it is allocated; otherwise its space is re-used.
confidence_outResult parameter. If *confidence_out is NULL, it is allocated; otherwise its space is re-used.
markersMarker data to predict. Each row is a sample for prediction, corresponding to an element in the result parameters.
treeTrained model tree to use for predicting.
Returns:
On success, NULL is returned; otherwise an error is returned, but the result parameters are not freed.

Definition at line 1170 of file svm_tree.c.

static Error* read_svm_model_node ( SVM_model_node node,
const char *  model_dirname 
) [static]

Recursively reads the model in a node from its file name.

Definition at line 1223 of file svm_tree.c.

Error* read_svm_model_tree ( SVM_model_tree **  tree_out,
const char *  tree_xml_fname,
const char *  tree_dtd_fname,
const char *  model_dirname 
)

Reads a SVM model tree from.

Parameters:
tree_outResult parameter.
tree_xml_fnameXML file containing the model tree information.
tree_dtd_fnameDTD file for validating the XML file, can be NULL.
model_dirnameDirectory prefixing each model file in the tree.

Definition at line 1264 of file svm_tree.c.

Error* write_svm_model_tree ( const SVM_model_tree tree,
const char *  model_dirname 
)

Writes a SVM model tree.

Parameters:
treeModel tree to write.
model_dirnameDirectory to prefix to each model file in the tree.

Definition at line 1321 of file svm_tree.c.

static Error* write_svm_model_node_training_data ( SVM_model_node node,
const Vector_u32 labels,
const Matrix_i32 markers,
const char *  data_dirname 
) [static]

Writes the training data for a model in the tree using node-specific data.

Definition at line 1364 of file svm_tree.c.

Error* write_svm_model_tree_training_data ( const Vector_u32 labels,
const Matrix_i32 markers,
const char *  tree_xml_fname,
const char *  tree_dtd_fname,
const char *  data_dirname 
)

Writes the training data for an SVM model tree to a file.

Writes the data files using the name of the model to the data_dir directory.

Parameters:
labelsSample group labels.
markersSample marker values.
tree_xml_fnameXML file containing the model tree information.
tree_dtd_fnameDTD file for validating the XML file, can be NULL.
data_dirnameDirectory to write the training data files.

Definition at line 1436 of file svm_tree.c.

void free_svm_model_tree ( SVM_model_tree tree)

Frees a SVM model tree.

Parameters:
treeModel tree to free.

Definition at line 1493 of file svm_tree.c.