Haplo Prediction
predict haplogroups
|
00001 /* 00002 * This work is licensed under a Creative Commons 00003 * Attribution-Noncommercial-Share Alike 3.0 United States License. 00004 * 00005 * http://creativecommons.org/licenses/by-nc-sa/3.0/us/ 00006 * 00007 * You are free: 00008 * 00009 * to Share - to copy, distribute, display, and perform the work 00010 * to Remix - to make derivative works 00011 * 00012 * Under the following conditions: 00013 * 00014 * Attribution. You must attribute the work in the manner specified by the 00015 * author or licensor (but not in any way that suggests that they endorse you 00016 * or your use of the work). 00017 * 00018 * Noncommercial. You may not use this work for commercial purposes. 00019 * 00020 * Share Alike. If you alter, transform, or build upon this work, you may 00021 * distribute the resulting work only under the same or similar license to 00022 * this one. 00023 * 00024 * For any reuse or distribution, you must make clear to others the license 00025 * terms of this work. The best way to do this is by including this header. 00026 * 00027 * Any of the above conditions can be waived if you get permission from the 00028 * copyright holder. 00029 * 00030 * Apart from the remix rights granted under this license, nothing in this 00031 * license impairs or restricts the author's moral rights. 00032 */ 00033 00034 00046 #include <config.h> 00047 00048 #include <stdlib.h> 00049 #include <stdio.h> 00050 #include <inttypes.h> 00051 #include <assert.h> 00052 #include <errno.h> 00053 #include <string.h> 00054 00055 #include <libxml/tree.h> 00056 00057 #ifdef HAPLO_HAVE_DMALLOC 00058 #include <dmalloc.h> 00059 #endif 00060 00061 #include <jwsc/base/error.h> 00062 #include <jwsc/vector/vector.h> 00063 #include <jwsc/matblock/matblock.h> 00064 00065 #include "options.h" 00066 #include "xml.h" 00067 #include "haplo_groups.h" 00068 #include "output.h" 00069 00070 00082 Error* open_output 00083 ( 00084 FILE** fp_out, 00085 xmlDoc** xml_doc_out, 00086 const char* xml_name, 00087 const char* xml_dtd, 00088 const char* fname 00089 ) 00090 { 00091 const char* fp_mode; 00092 int slen = 256; 00093 char str[slen]; 00094 00095 switch (opts.output_format) 00096 { 00097 case HAPLO_OUTPUT_TXT: 00098 case HAPLO_OUTPUT_CSV: 00099 fp_mode = (strcmp("/dev/stdout", fname) == 0) ? "a" : "w"; 00100 if (!((*fp_out) = fopen(fname, fp_mode))) 00101 { 00102 snprintf(str, slen, "%s: %s", fname, strerror(errno)); 00103 return JWSC_EIO(str); 00104 } 00105 break; 00106 case HAPLO_OUTPUT_XML: 00107 if (!xml_doc_out) 00108 return NULL; 00109 *xml_doc_out = xmlNewDoc((xmlChar*)"1.0"); 00110 xmlDocSetRootElement(*xml_doc_out, XMLNewNode(xml_name)); 00111 XMLCreateIntSubset(*xml_doc_out, xml_name, NULL, xml_dtd); 00112 break; 00113 } 00114 00115 return NULL; 00116 } 00117 00118 00128 Error* close_output(FILE* fp, xmlDoc* xml_doc, const char* fname) 00129 { 00130 int slen = 256; 00131 char str[slen]; 00132 00133 switch (opts.output_format) 00134 { 00135 case HAPLO_OUTPUT_TXT: 00136 case HAPLO_OUTPUT_CSV: 00137 if (fclose(fp) != 0) 00138 { 00139 snprintf(str, slen, "%s: %s", fname, strerror(errno)); 00140 return JWSC_EIO(str); 00141 } 00142 break; 00143 case HAPLO_OUTPUT_XML: 00144 if (!xml_doc) 00145 return NULL; 00146 if (xmlSaveFormatFile(fname, xml_doc, 1) <= 0) 00147 { 00148 snprintf(str, slen, "%s: %s", fname, strerror(errno)); 00149 return JWSC_EIO(str); 00150 } 00151 xmlFreeDoc(xml_doc); 00152 xmlCleanupParser(); 00153 break; 00154 } 00155 00156 return NULL; 00157 } 00158 00166 void write_ids 00167 ( 00168 const Matblock_u8* ids, 00169 uint32_t i, 00170 Haplo_field_sep field_sep, 00171 FILE* fp, 00172 xmlNode* xml_node 00173 ) 00174 { 00175 uint32_t j; 00176 const char* fmt; 00177 xmlNode* xml_child = NULL; 00178 char* xml_buf = NULL; 00179 uint32_t xml_len; 00180 00181 if (!ids) 00182 return; 00183 00184 for (j = 0; j < ids->num_rows; j++) 00185 { 00186 switch (opts.output_format) 00187 { 00188 case HAPLO_OUTPUT_TXT: 00189 switch (field_sep) 00190 { 00191 case HAPLO_SEP_PREFIX: 00192 fmt = " %-10s"; break; 00193 case HAPLO_SEP_SUFFIX: 00194 fmt = "%-10s "; break; 00195 case HAPLO_SEP_BOTH: 00196 fmt = " %-10s "; break; 00197 case HAPLO_SEP_NONE: 00198 fmt = "%-10s"; break; 00199 } 00200 fprintf(fp, fmt, ids->elts[ i ][ j ]); 00201 break; 00202 case HAPLO_OUTPUT_CSV: 00203 switch (field_sep) 00204 { 00205 case HAPLO_SEP_PREFIX: 00206 fmt = ",%s"; break; 00207 case HAPLO_SEP_SUFFIX: 00208 fmt = "%s,"; break; 00209 case HAPLO_SEP_BOTH: 00210 fmt = ",%s,"; break; 00211 case HAPLO_SEP_NONE: 00212 fmt = "%s"; break; 00213 } 00214 fprintf(fp, fmt, ids->elts[ i ][ j ]); 00215 break; 00216 case HAPLO_OUTPUT_XML: 00217 xml_len = strlen((char*)ids->elts[ i ][ j ]) + 256; 00218 xml_buf = malloc(xml_len*sizeof(char)); 00219 snprintf(xml_buf, xml_len, "%s", ids->elts[ i ][ j ]); 00220 xml_child = XMLNewChild(xml_node, "id", xml_buf); 00221 snprintf(xml_buf, xml_len, "%d", j+1); 00222 XMLNewProp(xml_child, "number", xml_buf); 00223 free(xml_buf); 00224 break; 00225 } 00226 } 00227 } 00228 00236 void write_label 00237 ( 00238 const Vector_u32* labels, 00239 uint32_t i, 00240 Haplo_field_sep field_sep, 00241 FILE* fp, 00242 xmlNode* xml_node 00243 ) 00244 { 00245 const char* fmt; 00246 const char* label; 00247 char* xml_buf = NULL; 00248 uint32_t xml_len; 00249 00250 if (!labels) 00251 return; 00252 00253 assert(lookup_haplo_group_label_from_index(&label, labels->elts[ i ]) 00254 == NULL); 00255 00256 switch (opts.output_format) 00257 { 00258 case HAPLO_OUTPUT_TXT: 00259 switch (field_sep) 00260 { 00261 case HAPLO_SEP_PREFIX: 00262 fmt = " %-10s"; break; 00263 case HAPLO_SEP_SUFFIX: 00264 fmt = "%-10s "; break; 00265 case HAPLO_SEP_BOTH: 00266 fmt = " %-10s "; break; 00267 case HAPLO_SEP_NONE: 00268 fmt = "%-10s"; break; 00269 } 00270 fprintf(fp, fmt, label); 00271 break; 00272 case HAPLO_OUTPUT_CSV: 00273 switch (field_sep) 00274 { 00275 case HAPLO_SEP_PREFIX: 00276 fmt = ",%s"; break; 00277 case HAPLO_SEP_SUFFIX: 00278 fmt = "%s,"; break; 00279 case HAPLO_SEP_BOTH: 00280 fmt = ",%s,"; break; 00281 case HAPLO_SEP_NONE: 00282 fmt = "%s"; break; 00283 } 00284 fprintf(fp, fmt, label); 00285 break; 00286 case HAPLO_OUTPUT_XML: 00287 xml_len = strlen(label) + 1; 00288 xml_buf = malloc(xml_len*sizeof(char)); 00289 snprintf(xml_buf, xml_len, "%s", label); 00290 XMLNewChild(xml_node, "label", xml_buf); 00291 free(xml_buf); 00292 break; 00293 } 00294 } 00295 00296 00304 void write_conf 00305 ( 00306 const Vector_d* confs, 00307 uint32_t i, 00308 Haplo_field_sep field_sep, 00309 FILE* fp, 00310 xmlNode* xml_node 00311 ) 00312 { 00313 double conf; 00314 const char* fmt; 00315 char xml_buf[256] = {0}; 00316 00317 if (!confs) 00318 return; 00319 00320 conf = confs->elts[ i ]; 00321 00322 switch (opts.output_format) 00323 { 00324 case HAPLO_OUTPUT_TXT: 00325 switch (field_sep) 00326 { 00327 case HAPLO_SEP_PREFIX: 00328 fmt = " %-5.3f"; break; 00329 case HAPLO_SEP_SUFFIX: 00330 fmt = "%-5.3f "; break; 00331 case HAPLO_SEP_BOTH: 00332 fmt = " %-5.3f "; break; 00333 case HAPLO_SEP_NONE: 00334 fmt = "%-5.3f"; break; 00335 } 00336 fprintf(fp, fmt, conf); 00337 break; 00338 case HAPLO_OUTPUT_CSV: 00339 switch (field_sep) 00340 { 00341 case HAPLO_SEP_PREFIX: 00342 fmt = ",%.3f"; break; 00343 case HAPLO_SEP_SUFFIX: 00344 fmt = "%.3f,"; break; 00345 case HAPLO_SEP_BOTH: 00346 fmt = ",%.3f,"; break; 00347 case HAPLO_SEP_NONE: 00348 fmt = "%.3f"; break; 00349 } 00350 fprintf(fp, fmt, conf); 00351 break; 00352 case HAPLO_OUTPUT_XML: 00353 snprintf(xml_buf, 256, "%5.3f", conf); 00354 XMLNewChild(xml_node, "confidence", xml_buf); 00355 break; 00356 } 00357 } 00358 00359 00369 void write_prediction 00370 ( 00371 const char* type, 00372 const Vector_u32* labels, 00373 const Vector_d* confs, 00374 uint32_t i, 00375 Haplo_field_sep field_sep, 00376 FILE* fp, 00377 xmlNode* xml_node 00378 ) 00379 { 00380 xmlNode* xml_child; 00381 00382 if (!labels) 00383 return; 00384 00385 if (opts.output_format == HAPLO_OUTPUT_XML) 00386 { 00387 xml_child = XMLNewChild(xml_node, "prediction", NULL); 00388 XMLNewProp(xml_child, "type", type); 00389 } 00390 00391 write_label(labels, i, field_sep, fp, xml_child); 00392 write_conf(confs, i, field_sep, fp, xml_child); 00393 } 00394 00395 00404 void write_ancestor_label 00405 ( 00406 const Vector_u32* ancestor_types, 00407 const Vector_u32* ancestor_labels, 00408 uint32_t i, 00409 Haplo_field_sep field_sep, 00410 FILE* fp, 00411 xmlNode* xml_node 00412 ) 00413 { 00414 uint32_t ancestor; 00415 Haplo_ancestor_type type; 00416 const char* fmt; 00417 const char* label; 00418 xmlNode* xml_child = NULL; 00419 char* xml_buf = NULL; 00420 uint32_t xml_len; 00421 00422 assert(ancestor_types && ancestor_labels); 00423 00424 type = ancestor_types->elts[ i ]; 00425 ancestor = ancestor_labels->elts[ i ]; 00426 00427 switch (type) 00428 { 00429 case HAPLO_ANCESTOR_INDIRECT: 00430 assert(lookup_haplo_group_label_from_index(&label, ancestor)==NULL); 00431 break; 00432 case HAPLO_ANCESTOR_DIRECT: 00433 assert(lookup_haplo_group_label_from_index(&label, ancestor)==NULL); 00434 break; 00435 case HAPLO_ANCESTOR_NONE: 00436 label = "NONE"; 00437 break; 00438 } 00439 00440 switch (opts.output_format) 00441 { 00442 case HAPLO_OUTPUT_TXT: 00443 switch (field_sep) 00444 { 00445 case HAPLO_SEP_PREFIX: 00446 fmt = " %-10s %-4d"; break; 00447 case HAPLO_SEP_SUFFIX: 00448 fmt = "%-10s %-4d "; break; 00449 case HAPLO_SEP_BOTH: 00450 fmt = " %-10s %-4d "; break; 00451 case HAPLO_SEP_NONE: 00452 fmt = "%-10s %-4d"; break; 00453 } 00454 fprintf(fp, fmt, label, type); 00455 break; 00456 case HAPLO_OUTPUT_CSV: 00457 switch (field_sep) 00458 { 00459 case HAPLO_SEP_PREFIX: 00460 fmt = ",%s,%d"; break; 00461 case HAPLO_SEP_SUFFIX: 00462 fmt = "%s,%d,"; break; 00463 case HAPLO_SEP_BOTH: 00464 fmt = ",%s,%d,"; break; 00465 case HAPLO_SEP_NONE: 00466 fmt = "%s,%d"; break; 00467 } 00468 fprintf(fp, fmt, label, type); 00469 break; 00470 case HAPLO_OUTPUT_XML: 00471 xml_len = strlen(label) + 9; 00472 xml_buf = malloc(xml_len*sizeof(char)); 00473 snprintf(xml_buf, xml_len, "%s", label); 00474 xml_child = XMLNewChild(xml_node, "ancestor", xml_buf); 00475 switch (type) 00476 { 00477 case HAPLO_ANCESTOR_DIRECT: 00478 snprintf(xml_buf, 9, "%s", "direct"); 00479 break; 00480 case HAPLO_ANCESTOR_INDIRECT: 00481 snprintf(xml_buf, 9, "%s", "indirect"); 00482 break; 00483 case HAPLO_ANCESTOR_NONE: 00484 snprintf(xml_buf, 9, "%s", "none"); 00485 break; 00486 } 00487 XMLNewProp(xml_child, "type", xml_buf); 00488 free(xml_buf); 00489 break; 00490 } 00491 }