Haplo Prediction
predict haplogroups
|
Clusters a set of Y-STR samples. More...
#include <config.h>
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <inttypes.h>
#include <string.h>
#include <math.h>
#include <libxml/tree.h>
#include <jwsc/base/error.h>
#include <jwsc/base/option.h>
#include <jwsc/base/limits.h>
#include <jwsc/vector/vector.h>
#include <jwsc/matrix/matrix.h>
#include <jwsc/matrix/matrix_io.h>
#include <jwsc/matblock/matblock.h>
#include <jwsc/stat/kmeans.h>
#include <jwsc/stat/gmm.h>
#include <jwsc/stat/mmm.h>
#include "haplo_groups.h"
#include "options.h"
#include "output.h"
#include "input.h"
#include "xml.h"
Go to the source code of this file.
Defines | |
#define | NUM_OPTS_NO_ARG 0 + NUM_SHARED_OPTS_NO_ARG |
#define | NUM_OPTS_WITH_ARG 12 + NUM_SHARED_OPTS_WITH_ARG |
#define | LABEL_COL 0 |
#define | NUM_CLUSTERS 4 |
#define | CLUSTER_TYPE HAPLO_CLUSTER_KMEANS |
#define | MEANS_OUT_FNAME "/dev/null" |
#define | WEIGHTS_OUT_FNAME "/dev/null" |
#define | RESPONSES_OUT_FNAME "/dev/null" |
#define | MEMBERS_OUT_FNAME "/dev/stdout" |
Enumerations | |
enum | Haplo_cluster_type { HAPLO_CLUSTER_KMEANS, HAPLO_CLUSTER_GMM, HAPLO_CLUSTER_MMM } |
Types of clustering supported. More... | |
Functions | |
uint32_t | get_num_opts_no_arg () |
Returns the number of options not taking an argument. | |
uint32_t | get_num_opts_with_arg () |
Returns the number of options taking an argument. | |
void | print_usage () |
Prints the program usage to stderr. | |
static Error * | process_num_clusters_opt (Option_arg arg) |
Process the 'num-clusters' program option. | |
static Error * | process_cluster_type_opt (Option_arg arg) |
Process the 'cluster-type' program option. | |
Error * | process_means_out_opt (Option_arg arg) |
Process the 'means-out' program option. | |
Error * | process_weights_out_opt (Option_arg arg) |
Process the 'weights-out' program option. | |
Error * | process_responses_out_opt (Option_arg arg) |
Process the 'responses-out' program option. | |
Error * | process_members_out_opt (Option_arg arg) |
Process the 'members-out' program option. | |
static void | init_cluster_options () |
Initializes the haplo-cluster program options. | |
static void | write_means (const Matrix_d *means) |
Writes the clustering means. | |
static void | write_mmm_means (const Matblock_d *means) |
Writes the MMM clustering means. | |
static void | write_members_header (const Matblock_u8 *ids, const Vector_u32 *labels, FILE *fp) |
Writes the file header information for kmeans membership. | |
static void | write_members (const Matblock_u8 *ids, const Vector_u32 *labels, const Vector_i32 *members) |
Writes the clustering membership. | |
static void | write_weights (const Vector_d *weights) |
Writes the clustering weights. | |
static void | write_responses_header (const Matblock_u8 *ids, const Vector_u32 *labels, const Matrix_d *responses, FILE *fp) |
Write the file header for cluster responsibilities. | |
static void | write_responses (const Matblock_u8 *ids, const Vector_u32 *labels, const Matrix_d *responses) |
Writes the clustering responsibilities. | |
static void | write_kmeans_results (const Matblock_u8 *ids, const Vector_u32 *labels, const Matrix_d *means, const Vector_i32 *members) |
Writes the kmeans results to files. | |
static void | assign_gmm_members (Vector_i32 **members_out, const Matrix_d *responses) |
Assigns hard cluster membership from GMM responsibilities. | |
static void | assign_mmm_members (Vector_i32 **members_out, const Matrix_d *responses) |
Assigns hard cluster membership from MMM responsibilities. | |
static void | assign_mmm_aux_members (Vector_i32 **members_out, const Matblock_d *means, const Vector_d *weights, const Matblock_u32 *markers) |
Assigns hard cluster membership from MMM responsibilities. | |
static void | write_gmm_results (const Matblock_u8 *ids, const Vector_u32 *labels, const Matrix_d *means, const Matblock_d *covars, const Vector_d *weights, const Matrix_d *responses, const Vector_i32 *members) |
Writes the Gaussian mixture model results to files. | |
static void | write_mmm_results (const Matblock_u8 *ids, const Vector_u32 *labels, const Matblock_d *means, const Vector_d *weights, const Matrix_d *responses, const Vector_i32 *members) |
Writes the multinomial mixture model results to files. | |
static void | write_mmm_aux_results (const Matblock_u8 *ids, const Vector_u32 *labels, const Vector_i32 *members) |
Writes the auxilliary multinomial mixture model results to files. | |
static void | cluster_kmeans (const Matblock_u8 *ids, const Vector_u32 *labels, const Matrix_i32 *markers) |
static void | cluster_gmm (const Matblock_u8 *ids, const Vector_u32 *labels, const Matrix_i32 *markers) |
static void | cluster_mmm (const Matblock_u8 *ids, const Vector_u32 *labels, const Matrix_i32 *markers, const Matblock_u8 *aux_ids, const Vector_u32 *aux_labels, const Matrix_i32 *aux_markers) |
int | main (int argc, const char **argv) |
Main function for the haplo-cluster program. | |
Variables | |
Option_no_arg | opts_no_arg [NUM_OPTS_NO_ARG] |
Program options not taking an argument. | |
Option_with_arg | opts_with_arg [NUM_OPTS_WITH_ARG] |
Program options taking an argument. | |
static uint32_t | num_clusters = NUM_CLUSTERS |
Number of clusters to use in the K-means algorithm. | |
static Haplo_cluster_type | cluster_type = CLUSTER_TYPE |
Type of clustering to use. | |
static const char * | means_out_fname = MEANS_OUT_FNAME |
Cluster means output file name. | |
static const char * | weights_out_fname = WEIGHTS_OUT_FNAME |
Cluster weights output file name. | |
static const char * | responses_out_fname = RESPONSES_OUT_FNAME |
Cluster responses output file name. | |
static const char * | members_out_fname = MEMBERS_OUT_FNAME |
Cluster members output file name. |
Clusters a set of Y-STR samples.
Available algorithms for clustering include K-means, Gaussian mixture model, and multinomial mixture model.
Definition in file haplo_cluster.c.
#define NUM_OPTS_NO_ARG 0 + NUM_SHARED_OPTS_NO_ARG |
Definition at line 82 of file haplo_cluster.c.
#define NUM_OPTS_WITH_ARG 12 + NUM_SHARED_OPTS_WITH_ARG |
Definition at line 83 of file haplo_cluster.c.
#define LABEL_COL 0 |
Definition at line 86 of file haplo_cluster.c.
#define NUM_CLUSTERS 4 |
Definition at line 87 of file haplo_cluster.c.
#define CLUSTER_TYPE HAPLO_CLUSTER_KMEANS |
Definition at line 88 of file haplo_cluster.c.
#define MEANS_OUT_FNAME "/dev/null" |
Definition at line 89 of file haplo_cluster.c.
#define WEIGHTS_OUT_FNAME "/dev/null" |
Definition at line 90 of file haplo_cluster.c.
#define RESPONSES_OUT_FNAME "/dev/null" |
Definition at line 91 of file haplo_cluster.c.
#define MEMBERS_OUT_FNAME "/dev/stdout" |
Definition at line 92 of file haplo_cluster.c.
enum Haplo_cluster_type |
Types of clustering supported.
Definition at line 96 of file haplo_cluster.c.
uint32_t get_num_opts_no_arg | ( | ) |
Returns the number of options not taking an argument.
Definition at line 131 of file haplo_cluster.c.
uint32_t get_num_opts_with_arg | ( | ) |
Returns the number of options taking an argument.
Definition at line 137 of file haplo_cluster.c.
void print_usage | ( | void | ) |
Prints the program usage to stderr.
Definition at line 143 of file haplo_cluster.c.
static Error* process_num_clusters_opt | ( | Option_arg | arg | ) | [static] |
Process the 'num-clusters' program option.
Definition at line 151 of file haplo_cluster.c.
static Error* process_cluster_type_opt | ( | Option_arg | arg | ) | [static] |
Process the 'cluster-type' program option.
Definition at line 165 of file haplo_cluster.c.
Error* process_means_out_opt | ( | Option_arg | arg | ) |
Process the 'means-out' program option.
Definition at line 191 of file haplo_cluster.c.
Error* process_weights_out_opt | ( | Option_arg | arg | ) |
Process the 'weights-out' program option.
Definition at line 202 of file haplo_cluster.c.
Error* process_responses_out_opt | ( | Option_arg | arg | ) |
Process the 'responses-out' program option.
Definition at line 213 of file haplo_cluster.c.
Error* process_members_out_opt | ( | Option_arg | arg | ) |
Process the 'members-out' program option.
Definition at line 224 of file haplo_cluster.c.
static void init_cluster_options | ( | ) | [static] |
Initializes the haplo-cluster program options.
Definition at line 235 of file haplo_cluster.c.
static void write_means | ( | const Matrix_d * | means | ) | [static] |
Writes the clustering means.
Definition at line 325 of file haplo_cluster.c.
static void write_mmm_means | ( | const Matblock_d * | means | ) | [static] |
Writes the MMM clustering means.
Definition at line 389 of file haplo_cluster.c.
static void write_members_header | ( | const Matblock_u8 * | ids, |
const Vector_u32 * | labels, | ||
FILE * | fp | ||
) | [static] |
Writes the file header information for kmeans membership.
Definition at line 469 of file haplo_cluster.c.
static void write_members | ( | const Matblock_u8 * | ids, |
const Vector_u32 * | labels, | ||
const Vector_i32 * | members | ||
) | [static] |
Writes the clustering membership.
Definition at line 528 of file haplo_cluster.c.
static void write_weights | ( | const Vector_d * | weights | ) | [static] |
Writes the clustering weights.
Definition at line 596 of file haplo_cluster.c.
static void write_responses_header | ( | const Matblock_u8 * | ids, |
const Vector_u32 * | labels, | ||
const Matrix_d * | responses, | ||
FILE * | fp | ||
) | [static] |
Write the file header for cluster responsibilities.
Definition at line 644 of file haplo_cluster.c.
static void write_responses | ( | const Matblock_u8 * | ids, |
const Vector_u32 * | labels, | ||
const Matrix_d * | responses | ||
) | [static] |
Writes the clustering responsibilities.
Definition at line 713 of file haplo_cluster.c.
static void write_kmeans_results | ( | const Matblock_u8 * | ids, |
const Vector_u32 * | labels, | ||
const Matrix_d * | means, | ||
const Vector_i32 * | members | ||
) | [static] |
Writes the kmeans results to files.
Definition at line 798 of file haplo_cluster.c.
static void assign_gmm_members | ( | Vector_i32 ** | members_out, |
const Matrix_d * | responses | ||
) | [static] |
Assigns hard cluster membership from GMM responsibilities.
Definition at line 811 of file haplo_cluster.c.
static void assign_mmm_members | ( | Vector_i32 ** | members_out, |
const Matrix_d * | responses | ||
) | [static] |
Assigns hard cluster membership from MMM responsibilities.
Definition at line 837 of file haplo_cluster.c.
static void assign_mmm_aux_members | ( | Vector_i32 ** | members_out, |
const Matblock_d * | means, | ||
const Vector_d * | weights, | ||
const Matblock_u32 * | markers | ||
) | [static] |
Assigns hard cluster membership from MMM responsibilities.
Definition at line 863 of file haplo_cluster.c.
static void write_gmm_results | ( | const Matblock_u8 * | ids, |
const Vector_u32 * | labels, | ||
const Matrix_d * | means, | ||
const Matblock_d * | covars, | ||
const Vector_d * | weights, | ||
const Matrix_d * | responses, | ||
const Vector_i32 * | members | ||
) | [static] |
Writes the Gaussian mixture model results to files.
Definition at line 927 of file haplo_cluster.c.
static void write_mmm_results | ( | const Matblock_u8 * | ids, |
const Vector_u32 * | labels, | ||
const Matblock_d * | means, | ||
const Vector_d * | weights, | ||
const Matrix_d * | responses, | ||
const Vector_i32 * | members | ||
) | [static] |
Writes the multinomial mixture model results to files.
Definition at line 945 of file haplo_cluster.c.
static void write_mmm_aux_results | ( | const Matblock_u8 * | ids, |
const Vector_u32 * | labels, | ||
const Vector_i32 * | members | ||
) | [static] |
Writes the auxilliary multinomial mixture model results to files.
Definition at line 962 of file haplo_cluster.c.
static void cluster_kmeans | ( | const Matblock_u8 * | ids, |
const Vector_u32 * | labels, | ||
const Matrix_i32 * | markers | ||
) | [static] |
Definition at line 972 of file haplo_cluster.c.
static void cluster_gmm | ( | const Matblock_u8 * | ids, |
const Vector_u32 * | labels, | ||
const Matrix_i32 * | markers | ||
) | [static] |
Definition at line 1006 of file haplo_cluster.c.
static void cluster_mmm | ( | const Matblock_u8 * | ids, |
const Vector_u32 * | labels, | ||
const Matrix_i32 * | markers, | ||
const Matblock_u8 * | aux_ids, | ||
const Vector_u32 * | aux_labels, | ||
const Matrix_i32 * | aux_markers | ||
) | [static] |
Definition at line 1047 of file haplo_cluster.c.
int main | ( | int | argc, |
const char ** | argv | ||
) |
Main function for the haplo-cluster program.
Definition at line 1140 of file haplo_cluster.c.
Option_no_arg opts_no_arg[NUM_OPTS_NO_ARG] |
Program options not taking an argument.
Definition at line 106 of file haplo_cluster.c.
Option_with_arg opts_with_arg[NUM_OPTS_WITH_ARG] |
Program options taking an argument.
Definition at line 109 of file haplo_cluster.c.
uint32_t num_clusters = NUM_CLUSTERS [static] |
Number of clusters to use in the K-means algorithm.
Definition at line 112 of file haplo_cluster.c.
Haplo_cluster_type cluster_type = CLUSTER_TYPE [static] |
Type of clustering to use.
Definition at line 115 of file haplo_cluster.c.
const char* means_out_fname = MEANS_OUT_FNAME [static] |
Cluster means output file name.
Definition at line 118 of file haplo_cluster.c.
const char* weights_out_fname = WEIGHTS_OUT_FNAME [static] |
Cluster weights output file name.
Definition at line 121 of file haplo_cluster.c.
const char* responses_out_fname = RESPONSES_OUT_FNAME [static] |
Cluster responses output file name.
Definition at line 124 of file haplo_cluster.c.
const char* members_out_fname = MEMBERS_OUT_FNAME [static] |
Cluster members output file name.
Definition at line 127 of file haplo_cluster.c.