Haplo Prediction
predict haplogroups
output.c
Go to the documentation of this file.
00001 /*
00002  * This work is licensed under a Creative Commons 
00003  * Attribution-Noncommercial-Share Alike 3.0 United States License.
00004  * 
00005  *    http://creativecommons.org/licenses/by-nc-sa/3.0/us/
00006  * 
00007  * You are free:
00008  * 
00009  *    to Share - to copy, distribute, display, and perform the work
00010  *    to Remix - to make derivative works
00011  * 
00012  * Under the following conditions:
00013  * 
00014  *    Attribution. You must attribute the work in the manner specified by the
00015  *    author or licensor (but not in any way that suggests that they endorse you
00016  *    or your use of the work).
00017  * 
00018  *    Noncommercial. You may not use this work for commercial purposes.
00019  * 
00020  *    Share Alike. If you alter, transform, or build upon this work, you may
00021  *    distribute the resulting work only under the same or similar license to
00022  *    this one.
00023  * 
00024  * For any reuse or distribution, you must make clear to others the license
00025  * terms of this work. The best way to do this is by including this header.
00026  * 
00027  * Any of the above conditions can be waived if you get permission from the
00028  * copyright holder.
00029  * 
00030  * Apart from the remix rights granted under this license, nothing in this
00031  * license impairs or restricts the author's moral rights.
00032  */
00033 
00034 
00046 #include <config.h>
00047 
00048 #include <stdlib.h>
00049 #include <stdio.h>
00050 #include <inttypes.h>
00051 #include <assert.h>
00052 #include <errno.h>
00053 #include <string.h>
00054 
00055 #include <libxml/tree.h>
00056 
00057 #ifdef HAPLO_HAVE_DMALLOC
00058 #include <dmalloc.h>
00059 #endif
00060 
00061 #include <jwsc/base/error.h>
00062 #include <jwsc/vector/vector.h>
00063 #include <jwsc/matblock/matblock.h>
00064 
00065 #include "options.h"
00066 #include "xml.h"
00067 #include "haplo_groups.h"
00068 #include "output.h"
00069 
00070 
00082 Error* open_output
00083 (
00084     FILE**      fp_out, 
00085     xmlDoc**    xml_doc_out, 
00086     const char* xml_name,
00087     const char* xml_dtd,
00088     const char* fname
00089 )
00090 {
00091     const char* fp_mode;
00092     int   slen = 256;
00093     char  str[slen];
00094 
00095     switch (opts.output_format)
00096     {
00097         case HAPLO_OUTPUT_TXT:
00098         case HAPLO_OUTPUT_CSV:
00099             fp_mode = (strcmp("/dev/stdout", fname) == 0) ? "a" : "w";
00100             if (!((*fp_out) = fopen(fname, fp_mode)))
00101             {
00102                 snprintf(str, slen, "%s: %s", fname, strerror(errno));
00103                 return JWSC_EIO(str);
00104             }
00105             break;
00106         case HAPLO_OUTPUT_XML:
00107             if (!xml_doc_out)
00108                 return NULL;
00109             *xml_doc_out = xmlNewDoc((xmlChar*)"1.0");
00110             xmlDocSetRootElement(*xml_doc_out, XMLNewNode(xml_name));
00111             XMLCreateIntSubset(*xml_doc_out, xml_name, NULL, xml_dtd);
00112             break;
00113     }
00114 
00115     return NULL;
00116 }
00117 
00118 
00128 Error* close_output(FILE* fp, xmlDoc* xml_doc, const char* fname)
00129 {
00130     int  slen = 256;
00131     char str[slen];
00132 
00133     switch (opts.output_format)
00134     {
00135         case HAPLO_OUTPUT_TXT:
00136         case HAPLO_OUTPUT_CSV:
00137             if (fclose(fp) != 0)
00138             {
00139                 snprintf(str, slen, "%s: %s", fname, strerror(errno));
00140                 return JWSC_EIO(str);
00141             }
00142             break;
00143         case HAPLO_OUTPUT_XML:
00144             if (!xml_doc)
00145                 return NULL;
00146             if (xmlSaveFormatFile(fname, xml_doc, 1) <= 0)
00147             {
00148                 snprintf(str, slen, "%s: %s", fname, strerror(errno));
00149                 return JWSC_EIO(str);
00150             }
00151             xmlFreeDoc(xml_doc);
00152             xmlCleanupParser();
00153             break;
00154     }
00155 
00156     return NULL;
00157 }
00158 
00166 void write_ids
00167 (
00168     const Matblock_u8* ids,
00169     uint32_t           i,
00170     Haplo_field_sep    field_sep,
00171     FILE*              fp,
00172     xmlNode*           xml_node
00173 )
00174 {
00175     uint32_t    j;
00176     const char* fmt;
00177     xmlNode*    xml_child = NULL;
00178     char*       xml_buf   = NULL;
00179     uint32_t    xml_len;
00180 
00181     if (!ids)
00182         return;
00183 
00184     for (j = 0; j < ids->num_rows; j++)
00185     {
00186         switch (opts.output_format)
00187         {
00188             case HAPLO_OUTPUT_TXT:
00189                 switch (field_sep)
00190                 {
00191                     case HAPLO_SEP_PREFIX:
00192                         fmt = "  %-10s"; break;
00193                     case HAPLO_SEP_SUFFIX:
00194                         fmt = "%-10s  "; break;
00195                     case HAPLO_SEP_BOTH:
00196                         fmt = "  %-10s  "; break;
00197                     case HAPLO_SEP_NONE:
00198                         fmt = "%-10s"; break;
00199                 }
00200                 fprintf(fp, fmt, ids->elts[ i ][ j ]);
00201                 break;
00202             case HAPLO_OUTPUT_CSV:
00203                 switch (field_sep)
00204                 {
00205                     case HAPLO_SEP_PREFIX:
00206                         fmt = ",%s"; break;
00207                     case HAPLO_SEP_SUFFIX:
00208                         fmt = "%s,"; break;
00209                     case HAPLO_SEP_BOTH:
00210                         fmt = ",%s,"; break;
00211                     case HAPLO_SEP_NONE:
00212                         fmt = "%s"; break;
00213                 }
00214                 fprintf(fp, fmt, ids->elts[ i ][ j ]);
00215                 break;
00216             case HAPLO_OUTPUT_XML:
00217                 xml_len = strlen((char*)ids->elts[ i ][ j ]) + 256;
00218                 xml_buf = malloc(xml_len*sizeof(char));
00219                 snprintf(xml_buf, xml_len, "%s", ids->elts[ i ][ j ]);
00220                 xml_child = XMLNewChild(xml_node, "id", xml_buf);
00221                 snprintf(xml_buf, xml_len, "%d", j+1);
00222                 XMLNewProp(xml_child, "number", xml_buf);
00223                 free(xml_buf);
00224                 break;
00225         }
00226     }
00227 }
00228 
00236 void write_label
00237 (
00238     const Vector_u32* labels,
00239     uint32_t          i,
00240     Haplo_field_sep   field_sep,
00241     FILE*             fp,
00242     xmlNode*          xml_node
00243 )
00244 {
00245     const char* fmt;
00246     const char* label;
00247     char*       xml_buf = NULL;
00248     uint32_t    xml_len;
00249 
00250     if (!labels)
00251         return;
00252 
00253     assert(lookup_haplo_group_label_from_index(&label, labels->elts[ i ]) 
00254             == NULL);
00255 
00256     switch (opts.output_format)
00257     {
00258         case HAPLO_OUTPUT_TXT:
00259             switch (field_sep)
00260             {
00261                 case HAPLO_SEP_PREFIX:
00262                     fmt = "  %-10s"; break;
00263                 case HAPLO_SEP_SUFFIX:
00264                     fmt = "%-10s  "; break;
00265                 case HAPLO_SEP_BOTH:
00266                     fmt = "  %-10s  "; break;
00267                 case HAPLO_SEP_NONE:
00268                     fmt = "%-10s"; break;
00269             }
00270             fprintf(fp, fmt, label);
00271             break;
00272         case HAPLO_OUTPUT_CSV:
00273             switch (field_sep)
00274             {
00275                 case HAPLO_SEP_PREFIX:
00276                     fmt = ",%s"; break;
00277                 case HAPLO_SEP_SUFFIX:
00278                     fmt = "%s,"; break;
00279                 case HAPLO_SEP_BOTH:
00280                     fmt = ",%s,"; break;
00281                 case HAPLO_SEP_NONE:
00282                     fmt = "%s"; break;
00283             }
00284             fprintf(fp, fmt, label);
00285             break;
00286         case HAPLO_OUTPUT_XML:
00287             xml_len = strlen(label) + 1;
00288             xml_buf = malloc(xml_len*sizeof(char));
00289             snprintf(xml_buf, xml_len, "%s", label);
00290             XMLNewChild(xml_node, "label", xml_buf);
00291             free(xml_buf);
00292             break;
00293     }
00294 }
00295 
00296 
00304 void write_conf
00305 (
00306     const Vector_d* confs,
00307     uint32_t        i,
00308     Haplo_field_sep field_sep,
00309     FILE*           fp,
00310     xmlNode*        xml_node
00311 )
00312 {
00313     double      conf;
00314     const char* fmt;
00315     char        xml_buf[256] = {0};
00316 
00317     if (!confs)
00318         return;
00319 
00320     conf = confs->elts[ i ];
00321 
00322     switch (opts.output_format)
00323     {
00324         case HAPLO_OUTPUT_TXT:
00325             switch (field_sep)
00326             {
00327                 case HAPLO_SEP_PREFIX:
00328                     fmt = "  %-5.3f"; break;
00329                 case HAPLO_SEP_SUFFIX:
00330                     fmt = "%-5.3f  "; break;
00331                 case HAPLO_SEP_BOTH:
00332                     fmt = "  %-5.3f  "; break;
00333                 case HAPLO_SEP_NONE:
00334                     fmt = "%-5.3f"; break;
00335             }
00336             fprintf(fp, fmt, conf);
00337             break;
00338         case HAPLO_OUTPUT_CSV:
00339             switch (field_sep)
00340             {
00341                 case HAPLO_SEP_PREFIX:
00342                     fmt = ",%.3f"; break;
00343                 case HAPLO_SEP_SUFFIX:
00344                     fmt = "%.3f,"; break;
00345                 case HAPLO_SEP_BOTH:
00346                     fmt = ",%.3f,"; break;
00347                 case HAPLO_SEP_NONE:
00348                     fmt = "%.3f"; break;
00349             }
00350             fprintf(fp, fmt, conf);
00351             break;
00352         case HAPLO_OUTPUT_XML:
00353             snprintf(xml_buf, 256, "%5.3f", conf);
00354             XMLNewChild(xml_node, "confidence", xml_buf);
00355             break;
00356     }
00357 }
00358 
00359 
00369 void write_prediction
00370 (
00371     const char*       type,
00372     const Vector_u32* labels,
00373     const Vector_d*   confs,
00374     uint32_t          i,
00375     Haplo_field_sep   field_sep,
00376     FILE*             fp,
00377     xmlNode*          xml_node
00378 )
00379 {
00380     xmlNode* xml_child;
00381 
00382     if (!labels)
00383         return;
00384 
00385     if (opts.output_format == HAPLO_OUTPUT_XML)
00386     {
00387         xml_child = XMLNewChild(xml_node, "prediction", NULL);
00388         XMLNewProp(xml_child, "type", type);
00389     }
00390 
00391     write_label(labels, i, field_sep, fp, xml_child);
00392     write_conf(confs, i, field_sep, fp, xml_child);
00393 }
00394 
00395 
00404 void write_ancestor_label
00405 (
00406     const Vector_u32*  ancestor_types,
00407     const Vector_u32*  ancestor_labels,
00408     uint32_t           i,
00409     Haplo_field_sep    field_sep,
00410     FILE*              fp,
00411     xmlNode*           xml_node
00412 )
00413 {
00414     uint32_t            ancestor;
00415     Haplo_ancestor_type type;
00416     const char*         fmt;
00417     const char*         label;
00418     xmlNode*            xml_child = NULL;
00419     char*               xml_buf   = NULL;
00420     uint32_t            xml_len;
00421 
00422     assert(ancestor_types && ancestor_labels);
00423 
00424     type = ancestor_types->elts[ i ];
00425     ancestor = ancestor_labels->elts[ i ];
00426 
00427     switch (type)
00428     {
00429         case HAPLO_ANCESTOR_INDIRECT:
00430             assert(lookup_haplo_group_label_from_index(&label, ancestor)==NULL);
00431             break;
00432         case HAPLO_ANCESTOR_DIRECT:
00433             assert(lookup_haplo_group_label_from_index(&label, ancestor)==NULL);
00434             break;
00435         case HAPLO_ANCESTOR_NONE:
00436             label = "NONE";
00437             break;
00438     }
00439 
00440     switch (opts.output_format)
00441     {
00442         case HAPLO_OUTPUT_TXT:
00443             switch (field_sep)
00444             {
00445                 case HAPLO_SEP_PREFIX:
00446                     fmt = "  %-10s  %-4d"; break;
00447                 case HAPLO_SEP_SUFFIX:
00448                     fmt = "%-10s  %-4d  "; break;
00449                 case HAPLO_SEP_BOTH:
00450                     fmt = "  %-10s  %-4d  "; break;
00451                 case HAPLO_SEP_NONE:
00452                     fmt = "%-10s  %-4d"; break;
00453             }
00454             fprintf(fp, fmt, label, type);
00455             break;
00456         case HAPLO_OUTPUT_CSV:
00457             switch (field_sep)
00458             {
00459                 case HAPLO_SEP_PREFIX:
00460                     fmt = ",%s,%d"; break;
00461                 case HAPLO_SEP_SUFFIX:
00462                     fmt = "%s,%d,"; break;
00463                 case HAPLO_SEP_BOTH:
00464                     fmt = ",%s,%d,"; break;
00465                 case HAPLO_SEP_NONE:
00466                     fmt = "%s,%d"; break;
00467             }
00468             fprintf(fp, fmt, label, type);
00469             break;
00470         case HAPLO_OUTPUT_XML:
00471             xml_len = strlen(label) + 9;
00472             xml_buf = malloc(xml_len*sizeof(char));
00473             snprintf(xml_buf, xml_len, "%s", label);
00474             xml_child = XMLNewChild(xml_node, "ancestor", xml_buf);
00475             switch (type)
00476             {
00477                 case HAPLO_ANCESTOR_DIRECT:
00478                     snprintf(xml_buf, 9, "%s", "direct"); 
00479                     break;
00480                 case HAPLO_ANCESTOR_INDIRECT:
00481                     snprintf(xml_buf, 9, "%s", "indirect"); 
00482                     break;
00483                 case HAPLO_ANCESTOR_NONE:
00484                     snprintf(xml_buf, 9, "%s", "none"); 
00485                     break;
00486             }
00487             XMLNewProp(xml_child, "type", xml_buf);
00488             free(xml_buf);
00489             break;
00490     }
00491 }