323{
324 columns.Clear();
325 columnHash.Clear();
326 columnCount = 0;
327
330
331 ReadLineHelper(input, buffer, tokens);
332
333 if (tokens.Length() != 4 || tokens[2].AsInteger() != (int) chromosomeX ||
334 tokens[0].AsInteger() < 0)
335 error("Cannot handle first line of data file\n\n"
336 "Expecting four (4) numeric values, which correspond to:\n"
337 " num-loci -- number of loci in the pedigree\n"
338 " this value must be positive\n"
339 " risk-locus -- locus for which risks should be calculated\n"
340 " this value will be ignored\n"
341 " sex-link -- are the loci sex linked [0 - No, 1 - Yes]\n"
342 " %s\n"
343 " program -- which LINKAGE program do you want to use?\n"
344 " this value will also be ignored\n\n"
345 "The actual input read:\n%s\n",
346 chromosomeX ? "expecting X-linked data, so this value must be ONE (1)"
347 : "expecting autosomal data, so this must be ZERO (0)",
348 (const char *) buffer);
349
350 int numloci = tokens[0];
351
352 ReadLineHelper(input, buffer, tokens);
353
354 if (tokens.Length() != 4 ||
355 tokens[0].AsInteger() != 0 ||
356 tokens[3].AsInteger() != 0)
357 error("Cannot handle second line of data file\n\n"
358 "Expecting four (4) numeric values, which correspond to:\n"
359 " mutation-model -- must be zero, corresponding to no mutation\n"
360 " male-mutation-rate -- ignored\n"
361 " female-mutation-rate -- ignored\n"
362 " linkage-disequilibrium -- must be zero, may be used in the future to\n"
363 " read haplotype frequencies\n\n"
364 "The actual input read:\n%s\n", (const char *) buffer);
365
367 int unknown = 0;
368
369 ReadLineHelper(input, buffer, markerOrder);
370
371 if (markerOrder.Length() > numloci)
372 error("The third line of the data file lists marker order\n\n"
373 "Although %d loci are defined [in the first line],\n"
374 "this line includes %d values:\n%s\n",
375 numloci, markerOrder.Length(), (const char *) buffer);
376
378 bool need_blank_line = false;
379
380 while (!
ifeof(input) && numloci--)
381 {
382 if (ReadLineHelper(input, buffer, tokens) == 0)
383 error("Linkage data file ends unexpectedly");
384
385 if (tokens.Length() < 2)
386 error("Incomplete locus information in data file\n"
387 "Information for each locus should include 2 or more fiels\n"
388 "The expected fields are:\n"
389 " field_type -- indicator of locus type (trait, marker,...)\n"
390 " alleles -- number of alleles\n"
391 " name -- locus name, preceded by hash (#) sign\n\n"
392 "The actual input read:\n%s\n", (const char *) buffer);
393
394 int locus_type = (int) tokens[0];
395 int alleles = (int) tokens[1];
396
397 String locus_name(
"LOCUS");
398 locus_name += ++unknown;
399
400 if (tokens.Length() > 2 && tokens[2][0] == '#')
401 {
402 if (tokens[2][1] != 0)
403 locus_name = tokens[2].SubStr(1);
404 else if (tokens.Length() > 3)
405 locus_name = tokens[3];
406 }
407
408 if ((locus_type == 4 && alleles == 0) ||
409 (locus_type == 4 && alleles == 1))
410 {
411 columnHash.Push(GetCovariateID(locus_name));
412 columns.Push(pcCovariate);
413 columnCount++;
414 continue;
415 }
416
417 if (locus_type == 0 && alleles == 0)
418 {
419 columnHash.Push(GetTraitID(locus_name));
420 columns.Push(pcTrait);
421 columnCount++;
422 continue;
423 }
424
425 if (ReadLineHelper(input, buffer, tokens) != alleles)
426 error("Expecting %d allele frequencies, but input has %d columns:\n"
427 "%s\n", alleles, tokens.Length(), (const char *) buffer);
428
429 Vector frequencies(alleles + 1);
430
431 frequencies[0] = 0.0;
432 for (int i = 1; i <= alleles; i++)
433 frequencies[i] = (double) tokens[i - 1];
434
435 double sum = frequencies.Sum();
436
437 if (sum <= 0.0)
438 error("Allele frequencies at %s sum to %f, which doesn't make sense\n",
439 (const char *) locus_name, sum);
440
441 if (fabs(sum - 1.0) > 1.2e-5)
442 {
443 printf("Allele frequencies at %s sum to %f, adjusted to 1.0\n",
444 (const char *) locus_name, sum);
445 need_blank_line = true;
446 }
447
448 if (sum != 1.0)
449 frequencies *= 1.0 / sum;
450
451 switch (locus_type)
452 {
453 case 1 :
454 {
455
456 columnHash.Push(GetAffectionID(locus_name));
457 columns.Push(pcAffection);
458 columnCount++;
459
460
461 if (ReadLineHelper(input, buffer, tokens) == 0)
462 error("Linkage data file ends unexpectedly\n");
463
464
465 int classes = tokens[0];
466 if (classes > 1)
467 {
468 columnHash.Push(0);
469 columns.Push(pcSkip);
470 columnCount++;
471 }
472
473
474 if (chromosomeX) classes *= 2;
475
476 while (classes--)
477 if (ReadLineHelper(input, buffer, tokens) == 0)
478 error("Linkage data file ends unexpectedly\n");
479
480
481 locus.Push(-1);
482 }
483 break;
484 case 3 :
485 {
486 columnHash.Push(GetMarkerID(locus_name));
487 columns.Push(pcMarker);
488 columnCount++;
489
490
491 MarkerInfo * info = GetMarkerInfo(locus_name);
492
493 info->freq = frequencies;
494
495
496 info->alleleLabels.Clear();
497 for (int i = 0; i < frequencies.Length(); i++)
498 info->alleleLabels.Push(label = i);
499 info->IndexAlleles();
500
501
502 locus.Push(GetMarkerID(locus_name));
503 }
504 break;
505 case 0 :
506 {
507
508 if (ReadLineHelper(input, buffer, tokens) == 0)
509 error("Linkage data file ends unexpectedly\n");
510
511
512
513 for (int vars = tokens[0], i = 0; i < vars; i++)
514 {
515 if (ReadLineHelper(input, buffer, tokens) == 0)
516 error("Linkage data file ends unexpectedly\n");
517
518 String trait_name(locus_name);
519
520 if (i)
521 {
522 trait_name += ".";
523 trait_name += i + 1;
524 }
525
526 columnHash.Push(GetTraitID(trait_name));
527 columns.Push(pcTrait);
528 columnCount++;
529 }
530
531
532 if (ReadLineHelper(input, buffer, tokens) == 0)
533 error("Linkage data file ends unexpectedly\n");
534
535
536 if (ReadLineHelper(input, buffer, tokens) == 0)
537 error("Linkage data file ends unexpectedly\n");
538
539
540 locus.Push(-1);
541 }
542 break;
543 case 2 :
544 error("The data file includes binary factors\n"
545 "Regretably, loci of this type are not supported\n\n");
546 break;
547 default :
548 error("Unsupported locus type [%d] in data file", locus_type);
549 break;
550 }
551 }
552
553 if (need_blank_line) printf("\n");
554
555 columns.Push(pcEnd);
556 columnHash.Push(0);
557
558 ReadLineHelper(input, buffer, tokens);
559 int sexDifference = tokens.Length() ? tokens[0].AsInteger() : -1;
560 if (tokens.Length() != 2 ||
561 (sexDifference != 0 && sexDifference != 2) ||
562 tokens[1].AsInteger() != 0)
563 error("Error retrieving recombination information\n\n"
564 "Expecting two (2) numeric values, which correspond to:\n"
565 " sex-difference -- must be zero (no difference) or two (sex specific recombination)\n"
566 " map-function -- must be zero, that is, no interference\n"
567 "The actual input read:\n%s\n", (const char *) buffer);
568
570 bool distance_in_centimorgans = false;
571
572 for (int r = 0; r <= sexDifference; r += 2)
573 {
574 ReadLineHelper(input, buffer, tokens);
575 if (tokens.Length() != markerOrder.Length() - 1)
576 error("Error retrieving recombination information\n\n"
577 "Expecting %d recombination fractions (current map includes %d loci)\n"
578 "Instead the following line was input:\n%s\n",
579 markerOrder.Length() - 1, markerOrder.Length(), (const char *) buffer);
580
581 distances[r >> 1].Dimension(tokens.Length());
582 for (int i = 0; i < tokens.Length(); i++)
583 distances[r >> 1][i] = (double) tokens[i];
584
585 if (distances[r >> 1].Min() < 0.0)
586 error("Linkage datafile specifies negative recombination fractions");
587
588 bool centimorgans = distances[r >> 1].Max() > 0.5;
589
590 if (centimorgans && !distance_in_centimorgans)
591 printf(" Some recombination fractions in datafile are greater than 0.5,\n"
592 " so recombination fractions will be interpreted as cM distances\n\n");
593
594 distance_in_centimorgans |= centimorgans;
595 }
596
597 double position = 0.0, positionMale = 0.0;
598
599 for (int i = 0, moving = false; i < markerOrder.Length(); i++)
600 {
601 int m = markerOrder[i].AsInteger() - 1;
602
603 if (m < 0 || m >= locus.Length())
604 error("The marker order in the linkage datafile is invalid\n");
605
606 m = locus[m];
607
608 if (m != -1)
609 {
611 info->chromosome = chromosomeX ? 9999 : 0;
612
613 if (sexDifference == 2)
614 info->position = (position + positionMale) * 0.5,
615 info->positionFemale = position,
616 info->positionMale = positionMale;
617 else
618 info->position = info->positionMale = info->positionFemale = position;
619
620 moving = true;
621 }
622
623 if (i < markerOrder.Length() - 1 && moving)
624 position += distance_in_centimorgans ?
625 0.01 * distances[0][i] : RecombinationToDistance(distances[0][i]);
626
627 if (sexDifference == 2 && i < markerOrder.Length() - 1 && moving)
628 positionMale += distance_in_centimorgans ?
629 0.01 * distances[1][i] : RecombinationToDistance(distances[1][i]);
630 }
631}