Haplo Prediction
predict haplogroups
Defines | Enumerations | Functions | Variables
haplo_cluster.c File Reference

Clusters a set of Y-STR samples. More...

#include <config.h>
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <inttypes.h>
#include <string.h>
#include <math.h>
#include <libxml/tree.h>
#include <jwsc/base/error.h>
#include <jwsc/base/option.h>
#include <jwsc/base/limits.h>
#include <jwsc/vector/vector.h>
#include <jwsc/matrix/matrix.h>
#include <jwsc/matrix/matrix_io.h>
#include <jwsc/matblock/matblock.h>
#include <jwsc/stat/kmeans.h>
#include <jwsc/stat/gmm.h>
#include <jwsc/stat/mmm.h>
#include "haplo_groups.h"
#include "options.h"
#include "output.h"
#include "input.h"
#include "xml.h"

Go to the source code of this file.

Defines

#define NUM_OPTS_NO_ARG   0 + NUM_SHARED_OPTS_NO_ARG
#define NUM_OPTS_WITH_ARG   12 + NUM_SHARED_OPTS_WITH_ARG
#define LABEL_COL   0
#define NUM_CLUSTERS   4
#define CLUSTER_TYPE   HAPLO_CLUSTER_KMEANS
#define MEANS_OUT_FNAME   "/dev/null"
#define WEIGHTS_OUT_FNAME   "/dev/null"
#define RESPONSES_OUT_FNAME   "/dev/null"
#define MEMBERS_OUT_FNAME   "/dev/stdout"

Enumerations

enum  Haplo_cluster_type { HAPLO_CLUSTER_KMEANS, HAPLO_CLUSTER_GMM, HAPLO_CLUSTER_MMM }
 Types of clustering supported. More...

Functions

uint32_t get_num_opts_no_arg ()
 Returns the number of options not taking an argument.
uint32_t get_num_opts_with_arg ()
 Returns the number of options taking an argument.
void print_usage ()
 Prints the program usage to stderr.
static Errorprocess_num_clusters_opt (Option_arg arg)
 Process the 'num-clusters' program option.
static Errorprocess_cluster_type_opt (Option_arg arg)
 Process the 'cluster-type' program option.
Errorprocess_means_out_opt (Option_arg arg)
 Process the 'means-out' program option.
Errorprocess_weights_out_opt (Option_arg arg)
 Process the 'weights-out' program option.
Errorprocess_responses_out_opt (Option_arg arg)
 Process the 'responses-out' program option.
Errorprocess_members_out_opt (Option_arg arg)
 Process the 'members-out' program option.
static void init_cluster_options ()
 Initializes the haplo-cluster program options.
static void write_means (const Matrix_d *means)
 Writes the clustering means.
static void write_mmm_means (const Matblock_d *means)
 Writes the MMM clustering means.
static void write_members_header (const Matblock_u8 *ids, const Vector_u32 *labels, FILE *fp)
 Writes the file header information for kmeans membership.
static void write_members (const Matblock_u8 *ids, const Vector_u32 *labels, const Vector_i32 *members)
 Writes the clustering membership.
static void write_weights (const Vector_d *weights)
 Writes the clustering weights.
static void write_responses_header (const Matblock_u8 *ids, const Vector_u32 *labels, const Matrix_d *responses, FILE *fp)
 Write the file header for cluster responsibilities.
static void write_responses (const Matblock_u8 *ids, const Vector_u32 *labels, const Matrix_d *responses)
 Writes the clustering responsibilities.
static void write_kmeans_results (const Matblock_u8 *ids, const Vector_u32 *labels, const Matrix_d *means, const Vector_i32 *members)
 Writes the kmeans results to files.
static void assign_gmm_members (Vector_i32 **members_out, const Matrix_d *responses)
 Assigns hard cluster membership from GMM responsibilities.
static void assign_mmm_members (Vector_i32 **members_out, const Matrix_d *responses)
 Assigns hard cluster membership from MMM responsibilities.
static void assign_mmm_aux_members (Vector_i32 **members_out, const Matblock_d *means, const Vector_d *weights, const Matblock_u32 *markers)
 Assigns hard cluster membership from MMM responsibilities.
static void write_gmm_results (const Matblock_u8 *ids, const Vector_u32 *labels, const Matrix_d *means, const Matblock_d *covars, const Vector_d *weights, const Matrix_d *responses, const Vector_i32 *members)
 Writes the Gaussian mixture model results to files.
static void write_mmm_results (const Matblock_u8 *ids, const Vector_u32 *labels, const Matblock_d *means, const Vector_d *weights, const Matrix_d *responses, const Vector_i32 *members)
 Writes the multinomial mixture model results to files.
static void write_mmm_aux_results (const Matblock_u8 *ids, const Vector_u32 *labels, const Vector_i32 *members)
 Writes the auxilliary multinomial mixture model results to files.
static void cluster_kmeans (const Matblock_u8 *ids, const Vector_u32 *labels, const Matrix_i32 *markers)
static void cluster_gmm (const Matblock_u8 *ids, const Vector_u32 *labels, const Matrix_i32 *markers)
static void cluster_mmm (const Matblock_u8 *ids, const Vector_u32 *labels, const Matrix_i32 *markers, const Matblock_u8 *aux_ids, const Vector_u32 *aux_labels, const Matrix_i32 *aux_markers)
int main (int argc, const char **argv)
 Main function for the haplo-cluster program.

Variables

Option_no_arg opts_no_arg [NUM_OPTS_NO_ARG]
 Program options not taking an argument.
Option_with_arg opts_with_arg [NUM_OPTS_WITH_ARG]
 Program options taking an argument.
static uint32_t num_clusters = NUM_CLUSTERS
 Number of clusters to use in the K-means algorithm.
static Haplo_cluster_type cluster_type = CLUSTER_TYPE
 Type of clustering to use.
static const char * means_out_fname = MEANS_OUT_FNAME
 Cluster means output file name.
static const char * weights_out_fname = WEIGHTS_OUT_FNAME
 Cluster weights output file name.
static const char * responses_out_fname = RESPONSES_OUT_FNAME
 Cluster responses output file name.
static const char * members_out_fname = MEMBERS_OUT_FNAME
 Cluster members output file name.

Detailed Description

Clusters a set of Y-STR samples.

Author:
Joseph Schlecht
License:
Creative Commons BY-NC-SA 3.0

Available algorithms for clustering include K-means, Gaussian mixture model, and multinomial mixture model.

Definition in file haplo_cluster.c.


Define Documentation

#define NUM_OPTS_NO_ARG   0 + NUM_SHARED_OPTS_NO_ARG

Definition at line 82 of file haplo_cluster.c.

#define NUM_OPTS_WITH_ARG   12 + NUM_SHARED_OPTS_WITH_ARG

Definition at line 83 of file haplo_cluster.c.

#define LABEL_COL   0

Definition at line 86 of file haplo_cluster.c.

#define NUM_CLUSTERS   4

Definition at line 87 of file haplo_cluster.c.

#define CLUSTER_TYPE   HAPLO_CLUSTER_KMEANS

Definition at line 88 of file haplo_cluster.c.

#define MEANS_OUT_FNAME   "/dev/null"

Definition at line 89 of file haplo_cluster.c.

#define WEIGHTS_OUT_FNAME   "/dev/null"

Definition at line 90 of file haplo_cluster.c.

#define RESPONSES_OUT_FNAME   "/dev/null"

Definition at line 91 of file haplo_cluster.c.

#define MEMBERS_OUT_FNAME   "/dev/stdout"

Definition at line 92 of file haplo_cluster.c.


Enumeration Type Documentation

Types of clustering supported.

Enumerator:
HAPLO_CLUSTER_KMEANS 
HAPLO_CLUSTER_GMM 
HAPLO_CLUSTER_MMM 

Definition at line 96 of file haplo_cluster.c.


Function Documentation

uint32_t get_num_opts_no_arg ( )

Returns the number of options not taking an argument.

Definition at line 131 of file haplo_cluster.c.

uint32_t get_num_opts_with_arg ( )

Returns the number of options taking an argument.

Definition at line 137 of file haplo_cluster.c.

void print_usage ( void  )

Prints the program usage to stderr.

Definition at line 143 of file haplo_cluster.c.

static Error* process_num_clusters_opt ( Option_arg  arg) [static]

Process the 'num-clusters' program option.

Definition at line 151 of file haplo_cluster.c.

static Error* process_cluster_type_opt ( Option_arg  arg) [static]

Process the 'cluster-type' program option.

Definition at line 165 of file haplo_cluster.c.

Error* process_means_out_opt ( Option_arg  arg)

Process the 'means-out' program option.

Definition at line 191 of file haplo_cluster.c.

Error* process_weights_out_opt ( Option_arg  arg)

Process the 'weights-out' program option.

Definition at line 202 of file haplo_cluster.c.

Error* process_responses_out_opt ( Option_arg  arg)

Process the 'responses-out' program option.

Definition at line 213 of file haplo_cluster.c.

Error* process_members_out_opt ( Option_arg  arg)

Process the 'members-out' program option.

Definition at line 224 of file haplo_cluster.c.

static void init_cluster_options ( ) [static]

Initializes the haplo-cluster program options.

Definition at line 235 of file haplo_cluster.c.

static void write_means ( const Matrix_d means) [static]

Writes the clustering means.

Definition at line 325 of file haplo_cluster.c.

static void write_mmm_means ( const Matblock_d means) [static]

Writes the MMM clustering means.

Definition at line 389 of file haplo_cluster.c.

static void write_members_header ( const Matblock_u8 ids,
const Vector_u32 labels,
FILE *  fp 
) [static]

Writes the file header information for kmeans membership.

Definition at line 469 of file haplo_cluster.c.

static void write_members ( const Matblock_u8 ids,
const Vector_u32 labels,
const Vector_i32 members 
) [static]

Writes the clustering membership.

Definition at line 528 of file haplo_cluster.c.

static void write_weights ( const Vector_d weights) [static]

Writes the clustering weights.

Definition at line 596 of file haplo_cluster.c.

static void write_responses_header ( const Matblock_u8 ids,
const Vector_u32 labels,
const Matrix_d responses,
FILE *  fp 
) [static]

Write the file header for cluster responsibilities.

Definition at line 644 of file haplo_cluster.c.

static void write_responses ( const Matblock_u8 ids,
const Vector_u32 labels,
const Matrix_d responses 
) [static]

Writes the clustering responsibilities.

Definition at line 713 of file haplo_cluster.c.

static void write_kmeans_results ( const Matblock_u8 ids,
const Vector_u32 labels,
const Matrix_d means,
const Vector_i32 members 
) [static]

Writes the kmeans results to files.

Definition at line 798 of file haplo_cluster.c.

static void assign_gmm_members ( Vector_i32 **  members_out,
const Matrix_d responses 
) [static]

Assigns hard cluster membership from GMM responsibilities.

Definition at line 811 of file haplo_cluster.c.

static void assign_mmm_members ( Vector_i32 **  members_out,
const Matrix_d responses 
) [static]

Assigns hard cluster membership from MMM responsibilities.

Definition at line 837 of file haplo_cluster.c.

static void assign_mmm_aux_members ( Vector_i32 **  members_out,
const Matblock_d means,
const Vector_d weights,
const Matblock_u32 markers 
) [static]

Assigns hard cluster membership from MMM responsibilities.

Definition at line 863 of file haplo_cluster.c.

static void write_gmm_results ( const Matblock_u8 ids,
const Vector_u32 labels,
const Matrix_d means,
const Matblock_d covars,
const Vector_d weights,
const Matrix_d responses,
const Vector_i32 members 
) [static]

Writes the Gaussian mixture model results to files.

Definition at line 927 of file haplo_cluster.c.

static void write_mmm_results ( const Matblock_u8 ids,
const Vector_u32 labels,
const Matblock_d means,
const Vector_d weights,
const Matrix_d responses,
const Vector_i32 members 
) [static]

Writes the multinomial mixture model results to files.

Definition at line 945 of file haplo_cluster.c.

static void write_mmm_aux_results ( const Matblock_u8 ids,
const Vector_u32 labels,
const Vector_i32 members 
) [static]

Writes the auxilliary multinomial mixture model results to files.

Definition at line 962 of file haplo_cluster.c.

static void cluster_kmeans ( const Matblock_u8 ids,
const Vector_u32 labels,
const Matrix_i32 markers 
) [static]

Definition at line 972 of file haplo_cluster.c.

static void cluster_gmm ( const Matblock_u8 ids,
const Vector_u32 labels,
const Matrix_i32 markers 
) [static]

Definition at line 1006 of file haplo_cluster.c.

static void cluster_mmm ( const Matblock_u8 ids,
const Vector_u32 labels,
const Matrix_i32 markers,
const Matblock_u8 aux_ids,
const Vector_u32 aux_labels,
const Matrix_i32 aux_markers 
) [static]

Definition at line 1047 of file haplo_cluster.c.

int main ( int  argc,
const char **  argv 
)

Main function for the haplo-cluster program.

Definition at line 1140 of file haplo_cluster.c.


Variable Documentation

Option_no_arg opts_no_arg[NUM_OPTS_NO_ARG]

Program options not taking an argument.

Definition at line 106 of file haplo_cluster.c.

Option_with_arg opts_with_arg[NUM_OPTS_WITH_ARG]

Program options taking an argument.

Definition at line 109 of file haplo_cluster.c.

uint32_t num_clusters = NUM_CLUSTERS [static]

Number of clusters to use in the K-means algorithm.

Definition at line 112 of file haplo_cluster.c.

Haplo_cluster_type cluster_type = CLUSTER_TYPE [static]

Type of clustering to use.

Definition at line 115 of file haplo_cluster.c.

const char* means_out_fname = MEANS_OUT_FNAME [static]

Cluster means output file name.

Definition at line 118 of file haplo_cluster.c.

const char* weights_out_fname = WEIGHTS_OUT_FNAME [static]

Cluster weights output file name.

Definition at line 121 of file haplo_cluster.c.

const char* responses_out_fname = RESPONSES_OUT_FNAME [static]

Cluster responses output file name.

Definition at line 124 of file haplo_cluster.c.

const char* members_out_fname = MEMBERS_OUT_FNAME [static]

Cluster members output file name.

Definition at line 127 of file haplo_cluster.c.