libStatGen Software 1
Loading...
Searching...
No Matches
Pedigree Class Reference
Inheritance diagram for Pedigree:
Collaboration diagram for Pedigree:

Public Member Functions

void Prepare (IFILE &input)
 
void Load (IFILE &input)
 
void LoadMendel (IFILE &input)
 
void Prepare (const char *input)
 
void Load (const char *input, bool allowFailures=false)
 
int TranslateSexCode (const char *code, bool &failure)
 
void PrepareDichotomization ()
 
int Dichotomize (int trait, double mean=_NAN_)
 
void DichotomizeAll (double mean=_NAN_)
 
void WriteDataFile (FILE *output)
 
void WritePedigreeFile (FILE *output)
 
void WriteDataFile (const char *output)
 
void WritePedigreeFile (const char *output)
 
void WritePerson (FILE *output, int who, const char *famid=NULL, const char *pid=NULL, const char *fatid=NULL, const char *motid=NULL)
 
void WriteRecodedPerson (FILE *output, int who, MarkerInfo **markerInfo, const char *famid=NULL, const char *pid=NULL, const char *fatid=NULL, const char *motid=NULL)
 
void Sort ()
 
FamilyFindFamily (const char *famid)
 
PersonFindPerson (const char *famid, const char *pid)
 
int CountAlleles (int marker)
 
void LumpAlleles (double treshold, bool reorder=true)
 
void EstimateFrequencies (int estimator, bool quiet=false)
 
Personoperator[] (int i)
 
bool InheritanceCheck (bool abortIfInconsistent=true)
 
bool AutosomalCheck ()
 
bool SexLinkedCheck ()
 
bool TwinCheck ()
 
void MergeTwins ()
 
void Trim (bool quiet=false, int *informative=NULL)
 
void AddPerson (const char *famid, const char *pid, const char *fatid, const char *motid, int sex, bool delay_sort=false)
 
void ExtractFamily (int id, Pedigree &new_ped)
 
void ExtractOnAffection (int a, Pedigree &new_ped, int target_status=2)
 
void Filter (IntArray &filter)
 
void ShowMemoryInfo ()
 

Public Attributes

int size
 
int count
 
Person ** persons
 
int familyCount
 
Family ** families
 
int haveTwins
 
PedigreeDescription pd
 
PedigreeDescriptionmultiPd
 
int multiFileCount
 

Static Public Attributes

static bool sexAsCovariate = false
 
static String missing
 
- Static Public Attributes inherited from PedigreeGlobals
static int traitCount = 0
 
static int markerCount = 0
 
static int affectionCount = 0
 
static int covariateCount = 0
 
static int stringCount = 0
 
static bool chromosomeX = false
 
static bool sexSpecificMap = false
 
static StringArray traitNames
 
static StringArray covariateNames
 
static StringArray affectionNames
 
static StringArray markerNames
 
static StringArray stringNames
 
static StringIntHash markerLookup
 
static StringIntHash traitLookup
 
static StringIntHash affectionLookup
 
static StringIntHash covariateLookup
 
static StringIntHash stringLookup
 
static int markerInfoCount = 0
 
static int markerInfoSize = 0
 
static MarkerInfo ** markerInfo = NULL
 
static StringHash markerInfoByName
 
static MarkerInfo ** markerInfoByInteger = NULL
 

Additional Inherited Members

- Static Public Member Functions inherited from PedigreeGlobals
static int GetTraitID (const char *name)
 
static int GetMarkerID (const char *name)
 
static int GetCovariateID (const char *name)
 
static int GetAffectionID (const char *name)
 
static int GetStringID (const char *name)
 
static int LookupTrait (const char *name)
 
static int LookupMarker (const char *name)
 
static int LookupCovariate (const char *name)
 
static int LookupAffection (const char *name)
 
static int LookupString (const char *name)
 
static void GrowMarkerInfo ()
 
static MarkerInfoGetMarkerInfo (String &name)
 
static MarkerInfoGetMarkerInfo (int marker)
 
static int SortMarkersInMapOrder (IntArray &markers, int chromosome=-1)
 
static void GetOrderedMarkers (IntArray &markers)
 
static void FlagMissingMarkers (IntArray &missingMarkers)
 
static bool MarkerPositionsAvailable ()
 
static bool AlleleFrequenciesAvailable ()
 
static void VerifySexSpecificOrder ()
 
static void LoadAlleleFrequencies (const char *filename, bool required=false)
 
static void LoadAlleleFrequencies (IFILE &file)
 
static void LoadMarkerMap (const char *filename, bool filter=false)
 
static void LoadMarkerMap (IFILE &file, bool filter=false)
 
static void LoadBasepairMap (const char *filename)
 
static void LoadBasepairMap (IFILE &file)
 
static void WriteMapFile (const char *filename)
 
static void WriteMapFile (FILE *file)
 
static void WriteFreqFile (const char *filename, bool old_format=false)
 
static void WriteFreqFile (FILE *file, bool old_format=false)
 
static int LoadAllele (int marker, String &label)
 
static int LoadAllele (MarkerInfo *info, String &label)
 

Detailed Description

Definition at line 32 of file Pedigree.h.

Constructor & Destructor Documentation

◆ Pedigree()

Pedigree::Pedigree ( )

Definition at line 30 of file Pedigree.cpp.

30 : pd()
31{
32 haveTwins = count = 0;
33 size = 10000;
34 persons = new Person *[size];
35 familyCount = 0;
36 families = new Family * [1];
37 multiPd = NULL;
38 multiFileCount = 0;
39}

◆ ~Pedigree()

Pedigree::~Pedigree ( )

Definition at line 41 of file Pedigree.cpp.

42{
43 for (int i = 0; i < count; i++)
44 delete persons[i];
45
46 for (int i = 0; i < familyCount; i++)
47 delete families[i];
48
49 delete [] families;
50 delete [] persons;
51
52 if (multiPd != NULL)
53 delete [] multiPd;
54}

Member Function Documentation

◆ AddPerson()

void Pedigree::AddPerson ( const char *  famid,
const char *  pid,
const char *  fatid,
const char *  motid,
int  sex,
bool  delay_sort = false 
)

Definition at line 921 of file Pedigree.cpp.

924{
925 if (count == size) Grow();
926
927 persons[count] = new Person;
928
929 persons[count]->famid = famid;
930 persons[count]->pid = pid;
931 persons[count]->fatid = fatid;
932 persons[count]->motid = motid;
933 persons[count]->sex = sex;
934
935 count++;
936
937 if (!delay_sort) Sort();
938}

◆ AutosomalCheck()

bool Pedigree::AutosomalCheck ( )

Definition at line 520 of file Pedigree.cpp.

521{
522 // Arrays indicating which alleles and homozygotes occur
523 IntArray haplos, genos, counts, failedFamilies;
524
525 bool fail = false;
526
527 // For each marker ...
528 for (int m = 0; m < markerCount; m++)
529 {
530 MarkerInfo * info = GetMarkerInfo(m);
531
532 // Summary for marker
533 int alleleCount = CountAlleles(m);
534 int genoCount = alleleCount * (alleleCount + 1) / 2;
535
536 // Initialize arrays
537 haplos.Dimension(alleleCount + 1);
538 haplos.Set(-1);
539
540 genos.Dimension(genoCount + 1);
541 genos.Set(-1);
542
543 failedFamilies.Dimension(familyCount);
544 failedFamilies.Zero();
545
546 counts.Dimension(alleleCount + 1);
547
548 for (int f = 0; f < familyCount; f++)
549 for (int i = families[f]->first; i <= families[f]->last; i++)
550 if (!persons[i]->isFounder() && persons[i]->sibs[0] == persons[i])
551 {
552 // This loop runs once per sibship
553 Alleles fat = persons[i]->father->markers[m];
554 Alleles mot = persons[i]->mother->markers[m];
555 bool fgeno = fat.isKnown();
556 bool mgeno = mot.isKnown();
557
558 // Number of alleles, homozygotes and genotypes in this sibship
559 int haplo = 0, homo = 0, diplo = 0;
560
561 // No. of different genotypes per allele
562 counts.Zero();
563
564 // In general, there should be no more than 3 genotypes per allele
565 bool too_many_genos = false;
566
567 for (int j = 0; j < persons[i]->sibCount; j++)
568 if (persons[i]->sibs[j]->isGenotyped(m))
569 {
570 Alleles geno = persons[i]->sibs[j]->markers[m];
571
572 int fat1 = fat.hasAllele(geno.one);
573 int fat2 = fat.hasAllele(geno.two);
574 int mot1 = mot.hasAllele(geno.one);
575 int mot2 = mot.hasAllele(geno.two);
576
577 if ((fgeno && mgeno && !((fat1 && mot2) || (fat2 && mot1))) ||
578 (fgeno && !(fat1 || fat2)) || (mgeno && !(mot1 || mot2)))
579 {
580 printf("%s - Fam %s: Child %s [%s/%s] has ",
581 (const char *) markerNames[m],
582 (const char *) persons[i]->sibs[j]->famid,
583 (const char *) persons[i]->sibs[j]->pid,
584 (const char *) info->GetAlleleLabel(geno.one),
585 (const char *) info->GetAlleleLabel(geno.two));
586
587 if (!fgeno || !mgeno)
588 printf("%s [%s/%s]\n",
589 fgeno ? "father" : "mother",
590 (const char *) info->GetAlleleLabel(fgeno ? fat.one : mot.one),
591 (const char *) info->GetAlleleLabel(fgeno ? fat.two : mot.two));
592 else
593 printf("parents [%s/%s]*[%s/%s]\n",
594 (const char *) info->GetAlleleLabel(fat.one),
595 (const char *) info->GetAlleleLabel(fat.two),
596 (const char *) info->GetAlleleLabel(mot.one),
597 (const char *) info->GetAlleleLabel(mot.two));
598
599 fail = true;
600 failedFamilies[f] = true;
601 }
602 else
603 {
604 if (haplos[geno.one] != i)
605 {
606 haplo++;
607 haplos[geno.one] = i;
608 };
609 if (haplos[geno.two] != i)
610 {
611 haplo++;
612 haplos[geno.two] = i;
613 };
614
615 int index = geno.SequenceCoded();
616
617 if (genos[index] != i)
618 {
619 genos[index] = i;
620 diplo++;
621 counts[geno.one]++;
622 if (geno.isHomozygous())
623 homo++;
624 else
625 counts[geno.two]++;
626 if (counts[geno.one] > 2) too_many_genos = true;
627 if (counts[geno.two] > 2) too_many_genos = true;
628 }
629 }
630 }
631
632 if (fgeno)
633 {
634 if (haplos[fat.one] != i)
635 {
636 haplo++;
637 haplos[fat.one] = i;
638 }
639 if (haplos[fat.two] != i)
640 {
641 haplo++;
642 haplos[fat.two] = i;
643 }
644 homo += fat.isHomozygous();
645 }
646
647 if (mgeno)
648 {
649 if (haplos[mot.one] != i)
650 {
651 haplo++;
652 haplos[mot.one] = i;
653 }
654 if (haplos[mot.two] != i)
655 {
656 haplo++;
657 haplos[mot.two] = i;
658 }
659 homo += mot.isHomozygous();
660 }
661
662 if (diplo > 4 || haplo + homo > 4 || (haplo == 4 && too_many_genos))
663 {
664 printf("%s - Fam %s: ",
665 (const char *) markerNames[m],
666 (const char *) persons[i]->famid);
667 if (persons[i]->father->markers[m].isKnown())
668 printf("Father %s [%s/%s] has children [",
669 (const char *) persons[i]->father->pid,
670 (const char *) info->GetAlleleLabel(fat.one),
671 (const char *) info->GetAlleleLabel(fat.two));
672 else if (persons[i]->mother->markers[m].isKnown())
673 printf("Mother %s [%s/%s] has children [",
674 (const char *) persons[i]->mother->pid,
675 (const char *) info->GetAlleleLabel(mot.one),
676 (const char *) info->GetAlleleLabel(mot.two));
677 else
678 printf("Couple %s * %s has children [",
679 (const char *) persons[i]->mother->pid,
680 (const char *) persons[i]->father->pid);
681
682 for (int j = 0; j < persons[i]->sibCount; j++)
683 printf("%s%s/%s", j == 0 ? "" : " ",
684 (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].one),
685 (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].two));
686 printf("]\n");
687
688 fail = true;
689 failedFamilies[f] = true;
690 }
691 }
692
693 for (int f = 0; f < familyCount; f++)
694 if (!failedFamilies[f] &&
695 (families[f]->count > families[f]->founders + 1) &&
696 !families[f]->isNuclear())
697 fail |= !GenotypeList::EliminateGenotypes(*this, families[f], m);
698 }
699
700 if (fail)
701 printf("\nMendelian inheritance errors detected\n");
702
703 return fail;
704}

◆ CountAlleles()

int Pedigree::CountAlleles ( int  marker)

Definition at line 225 of file Pedigree.cpp.

226{
227 return ::CountAlleles(*this, marker);
228}

◆ Dichotomize()

int Pedigree::Dichotomize ( int  trait,
double  mean = _NAN_ 
)

Definition at line 460 of file Pedigree.cpp.

461{
462 String new_affection = traitNames[t] + "*";
463
464 int af = GetAffectionID(new_affection);
465
466 if (mean == _NAN_)
467 {
468 mean = 0.0;
469 double dcount = 0;
470 for (int i = 0; i < count; i++)
471 if (persons[i]->isPhenotyped(t) &&
472 !persons[i]->isFounder())
473 {
474 mean += persons[i]->traits[t];
475 dcount ++;
476 }
477
478 if (!dcount) return af;
479
480 mean /= dcount;
481 }
482
483 printf("Dichotomizing %s around mean of %.3f ...\n",
484 (const char *) traitNames[t], mean);
485
486 for (int i = 0; i < count; i++)
487 if (persons[i]->isPhenotyped(t) && !persons[i]->isFounder())
488 persons[i]->affections[af] = persons[i]->traits[t] > mean ? 2 : 1;
489 else
490 persons[i]->affections[af] = 0;
491
492 Sort();
493
494 return af;
495}

◆ DichotomizeAll()

void Pedigree::DichotomizeAll ( double  mean = _NAN_)

Definition at line 497 of file Pedigree.cpp.

498{
499 for (int t = 0; t < traitCount; t++)
500 Dichotomize(t, mean);
501}

◆ EstimateFrequencies()

void Pedigree::EstimateFrequencies ( int  estimator,
bool  quiet = false 
)

Definition at line 239 of file Pedigree.cpp.

240{
241 bool estimated = false;
242 int line = 3;
243
244 const char * estimators[] =
245 { "using all genotypes", "using founder genotypes", "assumed equal" };
246
247 bool condensed = markerCount > 100;
248 int grain = markerCount / 50, estimates = 0;
249
250 for (int m=0; m < markerCount; m++)
251 if (::EstimateFrequencies(*this, m, estimator))
252 if (!quiet)
253 {
254 if (!estimated)
255 printf("Estimating allele frequencies... [%s]\n ",
256 estimators[estimator]), estimated = true;
257
258 if (condensed)
259 {
260 if (estimates++ % grain == 0)
261 {
262 printf(".");
263 fflush(stdout);
264 }
265 continue;
266 }
267
268 if (line + markerNames[m].Length() + 1 > 79)
269 printf("\n "), line = 3;
270
271 printf("%s ", (const char *) markerNames[m]);
272 line += markerNames[m].Length() + 1;
273 }
274
275 if (estimated)
276 printf(condensed ? "\nDone estimating frequencies for %d markers\n\n" : "\n\n", estimates);
277}

◆ ExtractFamily()

void Pedigree::ExtractFamily ( int  id,
Pedigree new_ped 
)

Definition at line 885 of file Pedigree.cpp.

886{
887 for (int i = families[id]->first; i <= families[id]->last; i++)
888 single_fam_ped.Add(*persons[i]);
889
890 single_fam_ped.Sort();
891}

◆ ExtractOnAffection()

void Pedigree::ExtractOnAffection ( int  a,
Pedigree new_ped,
int  target_status = 2 
)

Definition at line 893 of file Pedigree.cpp.

894{
895 for (int i = 0; i < count; i++)
896 if (persons[i]->affections[a] == target_status)
897 new_ped.Add(*persons[i]);
898 else
899 {
900 Person blank_person;
901 blank_person.CopyIDs(*persons[i]);
902 new_ped.Add(blank_person);
903 }
904
905 new_ped.Sort();
906}

◆ Filter()

void Pedigree::Filter ( IntArray filter)

Definition at line 908 of file Pedigree.cpp.

909{
910 if (filter.Length() != count)
911 error("Pedigree:Size of pedigree filter doesn't match number of persons in pedigree");
912
913 for (int i = 0; i < count; i++)
914 if (filter[i] == 1)
915 {
916 persons[i]->WipePhenotypes();
917 persons[i]->filter = true;
918 }
919}

◆ FindFamily()

Family * Pedigree::FindFamily ( const char *  famid)

Definition at line 213 of file Pedigree.cpp.

214{
215 PedigreeKey key;
216 key.famid = famid;
217
218 Family ** result = (Family **) BinarySearch
219 (&key, families, familyCount, sizeof(Family *),
220 COMPAREFUNC CompareKeyToFamily);
221
222 return (result == NULL) ? (Family *) NULL : *result;
223}

◆ FindPerson()

Person * Pedigree::FindPerson ( const char *  famid,
const char *  pid 
)

Definition at line 187 of file Pedigree.cpp.

188{
189 PedigreeKey key;
190 key.famid = famid;
191 key.pid = pid;
192
193 Person ** result = (Person **) BinarySearch
194 (&key, persons, count, sizeof(Person *),
195 COMPAREFUNC CompareKeyToPerson);
196
197 return (result == NULL) ? (Person *) NULL : *result;
198}

◆ InheritanceCheck()

bool Pedigree::InheritanceCheck ( bool  abortIfInconsistent = true)

Definition at line 503 of file Pedigree.cpp.

504{
505 bool fail = false;
506
507 if (haveTwins) fail |= TwinCheck();
508
509 if (chromosomeX)
510 fail |= SexLinkedCheck();
511 else
512 fail |= AutosomalCheck();
513
514 if (fail && abortIfInconsistent)
515 error("Mendelian inheritance errors detected\n");
516
517 return !fail;
518}

◆ Load() [1/2]

void Pedigree::Load ( const char *  input,
bool  allowFailures = false 
)

Definition at line 564 of file PedigreeLoader.cpp.

565{
566 if (multiFileCount <= 1)
567 {
568 IFILE f = ifopen(filename, "rb");
569
570 if (f == NULL && allowFailures)
571 return;
572
573 if (f == NULL)
574 error(
575 "The pedigree file %s cannot be opened\n\n"
576 "Common causes for this problem are:\n"
577 " * You might not have used the correct options to specify input file names,\n"
578 " please check the program documentation for information on how to do this\n\n"
579 " * The file doesn't exist or the filename might have been misspelt\n\n"
580 " * The file exists but it is being used by another program which you will need\n"
581 " to close\n\n"
582 " * The file is larger than 2GB and you haven't compiled this application with\n"
583 " large file support.\n\n",
584 filename);
585
586 Load(f);
587 ifclose(f);
588 }
589 else
590 {
591 StringArray filenames;
592
593 filenames.AddColumns(filename, ',');
594
595 if (filenames.Length() != multiFileCount)
596 error("Different numbers of comma separated data and pedigree file names provided\n");
597
598 for (int i = 0; i < filenames.Length(); i++)
599 {
600 printf(" AUTOMATIC MERGE: Datafile '%s' matched to pedigree '%s' ...\n",
601 (const char *) multiPd[i].filename, (const char *) filenames[i]);
602
603 pd = multiPd[i];
604
605 IFILE f = ifopen(filenames[i], "rb");
606
607 if (f == NULL)
608 error("The pedigree file '%s' cannot be opened\n\n",
609 (const char *) filenames[i]);
610
611 Load(f);
612 ifclose(f);
613 }
614
615 printf("\n");
616 }
617}
IFILE ifopen(const char *filename, const char *mode, InputFile::ifileCompression compressionMode=InputFile::DEFAULT)
Open a file with the specified name and mode, using a filename of "-" to indicate stdin/stdout.
Definition InputFile.h:562
int ifclose(IFILE &file)
Close the file.
Definition InputFile.h:580
Class for easily reading/writing files without having to worry about file type (uncompressed,...
Definition InputFile.h:37

◆ Load() [2/2]

void Pedigree::Load ( IFILE input)

Definition at line 31 of file PedigreeLoader.cpp.

32{
33 if (pd.mendelFormat)
34 {
35 LoadMendel(input);
36 return;
37 }
38
39 int sexCovariate = sexAsCovariate ? GetCovariateID("sex") : -1;
40
41 int textCols = pd.CountTextColumns() + 5;
42 int oldCount = count;
43 bool warn = true;
44 int line = 0;
45
46 String buffer;
47 StringArray tokens;
48
49 while (!ifeof(input))
50 {
51 int field = 0;
52
53 buffer.ReadLine(input);
54
55 tokens.Clear();
56 tokens.AddTokens(buffer, WHITESPACE);
57
58 if (tokens.Length() == 0) continue;
59 if (tokens[0].SlowCompare("end") == 0) break;
60
61 line++;
62
63 if (tokens.Length() < textCols)
64 {
65 if (buffer.Length() > 79)
66 {
67 buffer.SetLength(75);
68 buffer += " ...";
69 }
70
71 String description;
72
73 pd.ColumnSummary(description);
74 error("Loading Pedigree...\n\n"
75 "Expecting %d columns (%s),\n"
76 "but read only %d columns in line %d.\n\n"
77 "The problem line is transcribed below:\n%s\n",
78 textCols, (const char *) description,
79 tokens.Length(), line, (const char *) buffer);
80 }
81
82 if (tokens.Length() > textCols && warn && textCols > 5)
83 {
84 pd.ColumnSummary(buffer);
85 printf("WARNING -- Trailing columns in pedigree file will be ignored\n"
86 " Expecting %d data columns (%s)\n"
87 " However line %d, for example, has %d data columns\n\n",
88 textCols - 5, (const char *) buffer, line, tokens.Length() - 5);
89 warn = false;
90 }
91
92 Person * p;
93
94 // create a new person if necessary
95 if (oldCount==0 || (p = FindPerson(tokens[0], tokens[1], oldCount))==NULL)
96 {
97 if (count == size) Grow();
98
99 p = persons[count++] = new Person;
100 }
101
102 p->famid = tokens[field++]; // famid
103 p->pid = tokens[field++]; // pid
104 p->fatid = tokens[field++]; // fatid
105 p->motid = tokens[field++]; // motid
106
107 bool failure = false;
108 p->sex = TranslateSexCode(tokens[field++], failure);
109 if (failure)
110 error("Can't interpret the sex of individual #%d\n"
111 "Family: %s Individual: %s Sex Code: %s", count,
112 (const char *) p->famid, (const char *) p->pid,
113 (const char *) tokens[field-1]);
114
115 if (sexAsCovariate)
116 {
117 if (p->sex)
118 p->covariates[sexCovariate] = p->sex;
119 else
120 p->covariates[sexCovariate] = _NAN_;
121 }
122
123 for (int col = 0; col < pd.columnCount; col++)
124 switch (pd.columns[col])
125 {
126 case pcAffection :
127 {
128 int a = pd.columnHash[col];
129 int new_status;
130
131 const char * affection = tokens[field++];
132
133 switch (toupper(affection[0]))
134 {
135 case '1' :
136 case 'N' :
137 case 'U' :
138 new_status = 1;
139 break;
140 case '2' :
141 case 'D' :
142 case 'A' :
143 case 'Y' :
144 new_status = 2;
145 break;
146 default :
147 new_status = atoi(affection);
148 if (new_status < 0 || new_status > 2)
149 error("Incorrect formatting for affection status "
150 "Col %d, Affection %s\n"
151 "Family: %s Individual: %s Status: %s",
152 col, (const char *) affectionNames[a],
153 (const char *) p->famid, (const char *) p->pid,
154 affection);
155 }
156 if (new_status != 0 && p->affections[a] != 0 &&
157 new_status != p->affections[a])
158 error("Conflict with previous affection status - "
159 "Col %d, Affection %s\n"
160 "Family: %s Individual: %s Old: %d New: %d",
161 col, (const char *) affectionNames[a],
162 (const char *) p->famid, (const char *) p->pid,
163 p->affections[a], new_status);
164 if (new_status) p->affections[a] = new_status;
165 break;
166 }
167 case pcMarker :
168 {
169 int m = pd.columnHash[col];
170
171 Alleles new_genotype;
172
173 new_genotype[0] = LoadAllele(m, tokens[field++]);
174 new_genotype[1] = LoadAllele(m, tokens[field++]);
175
176 if (p->markers[m].isKnown() && new_genotype.isKnown() &&
177 new_genotype != p->markers[m])
178 {
179 MarkerInfo * info = GetMarkerInfo(m);
180
181 error("Conflict with previous genotype - Col %d, Marker %s\n"
182 "Family: %s Individual: %s Old: %s/%s New: %s/%s",
183 col, (const char *) markerNames[m],
184 (const char *) p->famid, (const char *) p->pid,
185 (const char *) info->GetAlleleLabel(p->markers[m][0]),
186 (const char *) info->GetAlleleLabel(p->markers[m][1]),
187 (const char *) info->GetAlleleLabel(new_genotype[0]),
188 (const char *) info->GetAlleleLabel(new_genotype[1]));
189 }
190
191 if (new_genotype.isKnown()) p->markers[m] = new_genotype;
192 break;
193 }
194 case pcTrait :
195 case pcUndocumentedTraitCovariate :
196 {
197 int t = pd.columnHash[col];
198 double new_pheno = _NAN_;
199
200 if (pd.columns[col] == pcUndocumentedTraitCovariate)
201 t = t / 32768;
202
203 const char * value = tokens[field++];
204 char * flag = NULL;
205
206 if (missing == (const char *) NULL || strcmp(value, missing) != 0)
207 new_pheno = strtod(value, &flag);
208 if (flag != NULL && *flag) new_pheno = _NAN_;
209
210 if (p->traits[t] != _NAN_ && new_pheno != _NAN_ &&
211 new_pheno != p->traits[t])
212 error("Conflict with previous phenotype - Col %d, Trait %s\n"
213 "Family: %s Individual: %s Old: %f New: %f",
214 col, (const char *) traitNames[t],
215 (const char *) p->famid, (const char *) p->pid,
216 p->traits[t], new_pheno);
217
218 if (new_pheno != _NAN_) p->traits[t] = new_pheno;
219 if (pd.columns[col] == pcTrait) break;
220 }
221 case pcCovariate :
222 {
223 int c = pd.columnHash[col];
224 double new_covar = _NAN_;
225
226 if (pd.columns[col] == pcUndocumentedTraitCovariate)
227 {
228 c = c % 32768;
229 field--;
230 }
231
232 const char * value = tokens[field++];
233 char * flag = NULL;
234
235 if (missing == (const char *) NULL || strcmp(value, missing) != 0)
236 new_covar = strtod(value, &flag);
237 if (flag != NULL && *flag) new_covar = _NAN_;
238
239 if (p->covariates[c] != _NAN_ && new_covar != _NAN_ &&
240 new_covar != p->covariates[c])
241 error("Conflict with previous value - Col %d, Covariate %s\n"
242 "Family: %s Individual: %s Old: %f New: %f",
243 col, (const char *) covariateNames[c],
244 (const char *) p->famid, (const char *) p->pid,
245 p->covariates[c], new_covar);
246
247 if (new_covar != _NAN_) p->covariates[c] = new_covar;
248 break;
249 }
250 case pcString :
251 {
252 int c = pd.columnHash[col];
253
254 if (!p->strings[c].IsEmpty() && p->strings[c] != tokens[field])
255 error("Conflict with previous value - Col %d, String %s\n"
256 "Family: %s Individual: %s Old: %s New: %s",
257 col, (const char *) stringNames[c],
258 (const char *) p->famid, (const char *) p->pid,
259 (const char *) p->strings[c], (const char *) tokens[field]);
260
261 p->strings[c] = tokens[field++];
262
263 break;
264 }
265 case pcSkip :
266 field++;
267 break;
268 case pcZygosity :
269 {
270 int new_zygosity;
271
272 const char * zygosity = tokens[field++];
273
274 switch (zygosity[0])
275 {
276 case 'D' :
277 case 'd' :
278 new_zygosity = 2;
279 break;
280 case 'M' :
281 case 'm' :
282 new_zygosity = 1;
283 break;
284 default :
285 new_zygosity = atoi(zygosity);
286 }
287 if (p->zygosity != 0 && new_zygosity != p->zygosity)
288 error("Conflict with previous zygosity - "
289 "Column %d in pedigree\n"
290 "Family: %s Individual: %s Old: %d New: %d\n",
291 col, (const char *) p->famid, (const char *) p->pid,
292 p->zygosity, new_zygosity);
293 p->zygosity = new_zygosity;
294 break;
295 }
296 case pcEnd :
297 break;
298 default :
299 error("Inconsistent Pedigree Description -- Internal Error");
300 }
301 }
302
303 Sort();
304}
int ifeof(IFILE file)
Check to see if we have reached the EOF (returns 0 if not EOF).
Definition InputFile.h:654

◆ LoadMendel()

void Pedigree::LoadMendel ( IFILE input)

Definition at line 306 of file PedigreeLoader.cpp.

307{
308 // First, retrieve the two format statements from file
309 String familyHeader;
310 String individualRecord;
311
312 familyHeader.ReadLine(input);
313 individualRecord.ReadLine(input);
314
315 // Then create two FORTRAN input streams...
316 // One will be used for retrieving family labels and sizes, the other
317 // will be used for individual information
318 FortranFormat headers, records;
319
320 headers.SetInputFile(input);
321 headers.SetFormat(familyHeader);
322
323 records.SetInputFile(input);
324 records.SetFormat(individualRecord);
325
326 // Storage for key pieces of information
327 String famid;
328 String phenotype;
329 String affectionCode;
330 String affectionStem;
331 int familySize;
332
333 String allele1, allele2;
334
335 int sexCovariate = sexAsCovariate ? GetCovariateID("sex") : -1;
336
337 while (!ifeof(input))
338 {
339 if (count == size)
340 Grow();
341
342 // Retrieve header for next family
343 familySize = headers.GetNextInteger();
344 headers.GetNextField(famid);
345 headers.Flush();
346
347 if (famid.IsEmpty())
348 {
349 if (ifeof(input) && familySize == 0)
350 break;
351 else
352 error("Blank family id encountered\n");
353 }
354
355 // Retrieve each individual in the family
356 for (int i = 0; i < familySize; i++)
357 {
358 Person * p = persons[count++] = new Person;
359
360 // Retrieve basic pedigree structure
361 p->famid = famid;
362 records.GetNextField(p->pid);
363 records.GetNextField(p->fatid);
364 records.GetNextField(p->motid);
365
366 if (p->pid.IsEmpty())
367 error("No unique identifier for individual #%d in family %s\n",
368 i + 1, (const char *) famid);
369
370 if (p->pid.Compare(".") == 0)
371 error("Family %s has an individual named '.', but this code is\n"
372 "reserved to indicate missing parents\n");
373
374 if (p->fatid.IsEmpty()) p->fatid = ".";
375 if (p->motid.IsEmpty()) p->motid = ".";
376
377 // Retrieve and decode sex code
378 char sex = records.GetNextCharacter();
379
380 switch (sex)
381 {
382 case '0' :
383 case 'x' :
384 case 'X' :
385 case '?' :
386 case 0 :
387 p->sex = 0;
388 break;
389 case '1' :
390 case 'm' :
391 case 'M' :
392 p->sex = 1;
393 break;
394 case '2' :
395 case 'f' :
396 case 'F' :
397 p->sex = 2;
398 break;
399 default :
400 error("Can't interpret the sex of individual #%d\n"
401 "Family: %s Individual: %s Sex Code: %s", count,
402 (const char *) p->famid, (const char *) p->pid, sex);
403 };
404
405 if (sexAsCovariate)
406 {
407 if (p->sex)
408 p->covariates[sexCovariate] = p->sex;
409 else
410 p->covariates[sexCovariate] = _NAN_;
411 }
412
413 // Retrieve and decode zygosity
414 char zygosity = records.GetNextCharacter();
415
416 // Mendel uses a unique character to indicate each MZ pair,
417 // we use a unique odd number...
418 if (zygosity)
419 p->zygosity = (zygosity - ' ') * 2 - 1;
420
421 affectionStem.Clear();
422 for (int col = 0; col < pd.columnCount; col++)
423 switch (pd.columns[col])
424 {
425 case pcAffection :
426 {
427 int a = pd.columnHash[col];
428
429 // We expand each Mendel non-codominant trait into multiple
430 // affection status column... First, if this is not a
431 // continuation of a previous expansion we first retrieve
432 // and encode the affection status.
433 if (affectionStem.Length() == 0 ||
434 affectionNames[a].CompareToStem(affectionStem) != 0)
435 {
436 affectionStem.Copy(affectionNames[a], 0, affectionNames[a].FindChar('>') + 1);
437 records.GetNextField(phenotype);
438 affectionCode = affectionStem + phenotype;
439 }
440
441 // Then encode each phenotype appropriately
442 if (phenotype.IsEmpty())
443 p->affections[a] = 0;
444 else
445 p->affections[a] = affectionCode.Compare(affectionNames[a]) == 0 ? 2 : 1;
446
447 break;
448 }
449 case pcMarker :
450 {
451 int m = pd.columnHash[col];
452
453 records.GetNextField(phenotype);
454
455 if (phenotype.IsEmpty())
456 {
457 p->markers[m].one = p->markers[m].two = 0;
458 continue;
459 }
460
461 int separator = phenotype.FindChar('/');
462 if (separator == -1) separator = phenotype.FindChar('|');
463
464 if (separator == -1)
465 error("At marker %s, person %s in family %s has genotype %s.\n"
466 "This genotype is not in the 'al1/al2' format.\n",
467 (const char *) markerNames[m],
468 (const char *) p->pid,
469 (const char *) p->famid,
470 (const char *) phenotype);
471
472 allele1.Copy(phenotype, 0, separator);
473 allele1.Trim();
474
475 allele2.Copy(phenotype, separator + 1, 8);
476 allele2.Trim();
477
478 MarkerInfo * info = GetMarkerInfo(m);
479
480 int one = info->alleleNumbers.Integer(allele1);
481
482 if (one < 0)
483 {
484 if (info->freq.Length() == 0)
485 one = info->NewAllele(allele1);
486 else
487 error("At marker %s, person %s in family %s has genotype %s.\n"
488 "However, '%s' is not a valid allele for this marker.\n",
489 (const char *) markerNames[m],
490 (const char *) p->pid,
491 (const char *) p->famid,
492 (const char *) phenotype,
493 (const char *) allele1);
494 }
495
496 int two = info->alleleNumbers.Integer(allele2);
497
498 if (two < 0)
499 {
500 if (info->freq.Length() == 0)
501 two = info->NewAllele(allele2);
502 else
503 error("At marker %s, person %s in family %s has genotype %s.\n"
504 "However, '%s' is not a valid allele for this marker.\n",
505 (const char *) markerNames[m],
506 (const char *) p->pid,
507 (const char *) p->famid,
508 (const char *) phenotype,
509 (const char *) allele2);
510 }
511
512 p->markers[m].one = one;
513 p->markers[m].two = two;
514 break;
515 }
516 case pcEnd :
517 break;
518 case pcTrait :
519 case pcCovariate :
520 case pcSkip :
521 case pcZygosity :
522 default:
523 error("Inconsistent Pedigree Description -- Internal Error");
524 }
525
526 records.Flush();
527 }
528 }
529
530 Sort();
531}

◆ LumpAlleles()

void Pedigree::LumpAlleles ( double  treshold,
bool  reorder = true 
)

Definition at line 230 of file Pedigree.cpp.

231{
232 if (min > 0.0)
233 printf("Lumping alleles with frequencies of %.2f or less...\n\n", min);
234
235 for (int m=0; m < markerCount; m++)
236 ::LumpAlleles(*this, m, min, reorder);
237}

◆ MergeTwins()

void Pedigree::MergeTwins ( )

Definition at line 100 of file PedigreeTwin.cpp.

101{
102 if (!haveTwins) return;
103
104 IntArray mzTwins, surplus;
105
106 printf("Merging MZ twins into a single individual...\n");
107
108 for (int f = 0; f < familyCount; f++)
109 {
110 mzTwins.Clear();
111
112 for (int i = families[f]->first, j; i <= families[f]->last; i++)
113 if (persons[i]->isMzTwin(*persons[i]))
114 {
115 // Have we got another identical sib yet?
116 for (j = 0; j < mzTwins.Length(); j++)
117 if (persons[i]->isMzTwin(*persons[mzTwins[j]]))
118 break;
119
120 // If not, add to list of twins
121 if (j == mzTwins.Length())
122 {
123 mzTwins.Push(i);
124 continue;
125 }
126
127 // Append name to first twins name
128 persons[mzTwins[j]]->pid += ((char) '$') + persons[i]->pid;
129
130 // Set the first twin to affected if at least one of the cotwins is affected
131 for (int j = 0; j < affectionCount; j++)
132 if (persons[i]->affections[j] == 2)
133 persons[mzTwins[j]]->affections[j] = 2;
134
135 surplus.Push(i);
136 }
137
138 // More than one representative of each twin-pair...
139 if (surplus.Length())
140 {
141 // Reassign parent names for each offspring
142 for (int i = families[f]->first, j; i < families[f]->last; i++)
143 if (!persons[i]->isFounder())
144 {
145 if (persons[i]->father->isMzTwin(*persons[i]->father))
146 {
147 for (j = 0; j < mzTwins.Length(); j++)
148 if (persons[i]->father->isMzTwin(*persons[mzTwins[j]]))
149 break;
150 persons[i]->fatid = persons[mzTwins[j]]->pid;
151 }
152 if (persons[i]->mother->isMzTwin(*persons[i]->mother))
153 {
154 for (j = 0; j < mzTwins.Length(); j++)
155 if (persons[i]->mother->isMzTwin(*persons[mzTwins[j]]))
156 break;
157 persons[i]->motid = persons[mzTwins[j]]->pid;
158 }
159 }
160
161 // Delete surplus individuals
162 while (surplus.Length())
163 {
164 int serial = surplus.Pop();
165
166 delete persons[serial];
167
168 for (count--; serial < count; serial++)
169 persons[serial] = persons[serial + 1];
170 }
171
172 // Resort pedigree
173 Sort();
174 }
175 }
176}

◆ operator[]()

Person & Pedigree::operator[] ( int  i)
inline

Definition at line 102 of file Pedigree.h.

103 {
104 return *(persons[i]);
105 }

◆ Prepare() [1/2]

void Pedigree::Prepare ( const char *  input)

Definition at line 533 of file PedigreeLoader.cpp.

534{
535 // Clear any previously loaded pedigree description
536 if (multiPd != NULL)
537 delete [] multiPd;
538
539 multiFileCount = 1;
540
541 // Enable multifile support
542 StringArray filenames;
543
544 filenames.AddColumns(filename, ',');
545
546 if (filenames.Length() <= 1)
547 pd.Load(filename);
548 else
549 {
550 printf("AUTOMATIC MERGE ENABLED: Detected multiple datafile names, separated by commas...\n");
551
552 multiPd = new PedigreeDescription[filenames.Length()];
553
554 for (int i = 0; i < filenames.Length(); i++)
555 {
556 printf(" AUTOMATIC MERGE: Reading data file '%s' ...\n", (const char *) filenames[i]);
557 multiPd[i].Load(filenames[i], false);
558 }
559
560 multiFileCount = filenames.Length();
561 }
562}

◆ Prepare() [2/2]

void Pedigree::Prepare ( IFILE input)

Definition at line 26 of file PedigreeLoader.cpp.

27{
28 pd.Load(input);
29}

◆ PrepareDichotomization()

void Pedigree::PrepareDichotomization ( )

Definition at line 450 of file Pedigree.cpp.

451{
452
453 for (int t = 0; t < traitCount; t++)
454 {
455 String new_affection = traitNames[t] + "*";
456 GetAffectionID(new_affection);
457 }
458}

◆ SexLinkedCheck()

bool Pedigree::SexLinkedCheck ( )

Definition at line 706 of file Pedigree.cpp.

707{
708 bool fail = false;
709
710 // Keep track of what families fail the basic inheritance check,
711 // so that we can run later run genotype elimination check on the remainder
712 IntArray failedFamilies(familyCount);
713
714 // For each marker ...
715 for (int m = 0; m < markerCount; m++)
716 {
717 MarkerInfo * info = GetMarkerInfo(m);
718
719 failedFamilies.Zero();
720
721 // Check for homozygous males
722 for (int f = 0; f < familyCount; f++)
723 for (int i = families[f]->first; i <= families[f]->last; i++)
724 if (persons[i]->sex == SEX_MALE && persons[i]->markers[m].isKnown() &&
725 !persons[i]->markers[m].isHomozygous())
726 {
727 printf("%s - Fam %s: Male %s has two X alleles [%s/%s]\n",
728 (const char *) markerNames[m],
729 (const char *) persons[i]->famid, (const char *) persons[i]->pid,
730 (const char *) info->GetAlleleLabel(persons[i]->markers[m].one),
731 (const char *) info->GetAlleleLabel(persons[i]->markers[m].two));
732
733 // Wipe this genotype so we don't get cascading errors below
734 persons[i]->markers[m][0] = persons[i]->markers[m][1] = 0;
735
736 fail = true;
737 failedFamilies[f] = true;
738 }
739
740 // Check full sibships for errors
741 // TODO -- We could do better by grouping male half-sibs
742 for (int f = 0; f < familyCount; f++)
743 for (int i = families[f]->first; i <= families[f]->last; i++)
744 if (!persons[i]->isFounder() && persons[i]->sibs[0] == persons[i])
745 {
746 // This loop runs once per sibship
747 Alleles fat = persons[i]->father->markers[m];
748 Alleles mot = persons[i]->mother->markers[m];
749
750 bool fgeno = fat.isKnown();
751 bool mgeno = mot.isKnown();
752
753 Alleles inferred_mother = mot;
754 Alleles first_sister;
755 Alleles inferred_father;
756
757 bool mother_ok = true;
758
759 int sisters = 0;
760
761 for (int j = 0; j < persons[i]->sibCount; j++)
762 if (persons[i]->sibs[j]->isGenotyped(m))
763 {
764 Alleles geno = persons[i]->sibs[j]->markers[m];
765
766 bool fat1 = fat.hasAllele(geno.one);
767 bool fat2 = fat.hasAllele(geno.two);
768 bool mot1 = mot.hasAllele(geno.one);
769 bool mot2 = mot.hasAllele(geno.two);
770
771 int sex = persons[i]->sibs[j]->sex;
772
773 if (sex == SEX_MALE)
774 {
775 if (mgeno && !mot1)
776 {
777 printf("%s - Fam %s: Child %s [%s/Y] has mother [%s/%s]\n",
778 (const char *) markerNames[m],
779 (const char *) persons[i]->famid,
780 (const char *) persons[i]->sibs[j]->pid,
781 (const char *) info->GetAlleleLabel(geno.one),
782 (const char *) info->GetAlleleLabel(mot.one),
783 (const char *) info->GetAlleleLabel(mot.two));
784 fail = true;
785 failedFamilies[f] = true;
786 }
787 else
788 mother_ok &= inferred_mother.AddAllele(geno.one);
789 }
790 if (sex == SEX_FEMALE)
791 {
792 if ((fgeno && mgeno && !((fat1 && mot2) || (fat2 && mot1))) ||
793 (fgeno && !(fat1 || fat2)) || (mgeno && !(mot1 || mot2)))
794 {
795 printf("%s - Fam %s: Child %s [%s/%s] has ",
796 (const char *) markerNames[m],
797 (const char *) persons[i]->famid,
798 (const char *) persons[i]->sibs[j]->pid,
799 (const char *) info->GetAlleleLabel(geno.one),
800 (const char *) info->GetAlleleLabel(geno.two));
801
802 if (!fgeno)
803 printf("mother [%s/%s]\n",
804 (const char *) info->GetAlleleLabel(mot.one),
805 (const char *) info->GetAlleleLabel(mot.two));
806 else if (!mgeno)
807 printf("father [%s/Y]\n",
808 (const char *) info->GetAlleleLabel(fat.one));
809 else
810 printf("parents [%s/Y]*[%s/%s]\n",
811 (const char *) info->GetAlleleLabel(fat.one),
812 (const char *) info->GetAlleleLabel(mot.one),
813 (const char *) info->GetAlleleLabel(mot.two));
814
815 fail = true;
816 failedFamilies[f] = true;
817 }
818 else
819 {
820 if (!sisters++)
821 inferred_father = first_sister = geno;
822 else if (first_sister != geno)
823 {
824 inferred_father.Intersect(geno);
825
826 mother_ok &= inferred_mother.AddAllele(
827 geno.otherAllele(inferred_father.one));
828 mother_ok &= inferred_mother.AddAllele(
829 first_sister.otherAllele(inferred_father.one));
830 }
831
832 if (!fgeno && (mot1 ^ mot2))
833 inferred_father.Intersect(mot1 ? geno.two : geno.one);
834
835 if (!mgeno && (fat1 ^ fat2))
836 mother_ok &= inferred_mother.AddAllele(fat1 ? geno.two : geno.one);
837 }
838 }
839 }
840
841 if (!mother_ok || (sisters && !inferred_father.isKnown()))
842 {
843 printf("%s - Fam %s: ",
844 (const char *) markerNames[m],
845 (const char *) persons[i]->famid);
846 if (fgeno)
847 printf("Father %s [%s/Y] has children [",
848 (const char *) persons[i]->father->pid,
849 (const char *) info->GetAlleleLabel(fat.one));
850 else if (mgeno)
851 printf("Mother %s [%s/%s] has children [",
852 (const char *) persons[i]->mother->pid,
853 (const char *) info->GetAlleleLabel(mot.one),
854 (const char *) info->GetAlleleLabel(mot.two));
855 else
856 printf("Couple %s * %s has children [",
857 (const char *) persons[i]->mother->pid,
858 (const char *) persons[i]->father->pid);
859
860 for (int j = 0; j < persons[i]->sibCount; j++)
861 printf(
862 persons[i]->sibs[j]->sex == SEX_MALE ? "%s%s/Y" : "%s%s/%s",
863 j == 0 ? "" : " ",
864 (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].one),
865 (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].two));
866 printf("]\n");
867 fail = true;
868 failedFamilies[f] = true;
869 }
870 }
871
872 for (int f = 0; f < familyCount; f++)
873 if (!failedFamilies[f] &&
874 (families[f]->count > families[f]->founders + 1) &&
875 !families[f]->isNuclear())
876 fail |= !GenotypeList::EliminateGenotypes(*this, families[f], m);
877 }
878
879 if (fail)
880 printf("\nMendelian inheritance errors detected\n");
881
882 return fail;
883}

◆ ShowMemoryInfo()

void Pedigree::ShowMemoryInfo ( )

Definition at line 940 of file Pedigree.cpp.

941{
942 unsigned int bytes = 0;
943
944 for (int i = 0; i < count; i++)
945 bytes += persons[i]->famid.BufferSize() + persons[i]->pid.BufferSize() +
946 persons[i]->fatid.BufferSize() + persons[i]->motid.BufferSize();
947
948 bytes += count * (markerCount * sizeof(Alleles) + traitCount * sizeof(double) +
949 covariateCount * sizeof(double) + affectionCount * sizeof(char) +
950 sizeof(Person));
951
952 printf(" %40s %s\n", "Pedigree file ...", (const char *) MemoryInfo(bytes));
953}

◆ Sort()

void Pedigree::Sort ( )

Definition at line 56 of file Pedigree.cpp.

57{
58 QuickSort(persons, count, sizeof(Person *),
59 COMPAREFUNC Pedigree::ComparePersons);
60
61 haveTwins = 0;
62
63 // Check for structural problems in input pedigree
64 bool problem = false;
65
66 // Check that we have no duplicates...
67 for (int i = 1; i < count; i++)
68 if (ComparePersons((const Person **) &persons[i-1],
69 (const Person **) &persons[i]) == 0)
70 {
71 printf("Family %s: Person %s is duplicated\n",
72 (const char *) persons[i]->famid,
73 (const char *) persons[i]->pid);
74 problem = true;
75
76 do
77 {
78 i++;
79 }
80 while (i < count &&
81 ComparePersons((const Person **) &persons[i-1],
82 (const Person **) &persons[i]) == 0);
83 }
84
85 // Assign parents...
86 for (int i = 0; i < count; i++)
87 {
88 persons[i]->serial = i;
89 persons[i]->father = FindPerson(persons[i]->famid, persons[i]->fatid);
90 persons[i]->mother = FindPerson(persons[i]->famid, persons[i]->motid);
91
92 problem |= !persons[i]->CheckParents();
93
94 persons[i]->AssessStatus();
95
96 // Check if we have any twins...
97 haveTwins |= persons[i]->zygosity;
98 }
99
100 if (problem)
101 error("Please correct problems with pedigree structure\n");
102
103 MakeSibships();
104 MakeFamilies();
105}

◆ TranslateSexCode()

int Pedigree::TranslateSexCode ( const char *  code,
bool &  failure 
)

Definition at line 619 of file PedigreeLoader.cpp.

620{
621 failure = false;
622
623 switch (code[0])
624 {
625 case 'x' :
626 case 'X' :
627 case '?' :
628 return 0;
629 case '1' :
630 case 'm' :
631 case 'M' :
632 return 1;
633 case '2' :
634 case 'f' :
635 case 'F' :
636 return 2;
637 default :
638 {
639 int result = atoi(code);
640
641 if (result != 0 && result != 1 && result != 2)
642 {
643 failure = true;
644 result = 0;
645 }
646
647 return result;
648 }
649 };
650}

◆ Trim()

void Pedigree::Trim ( bool  quiet = false,
int *  informative = NULL 
)

Definition at line 29 of file PedigreeTrim.cpp.

30{
31 int newCount = 0;
32 Person ** newPersons = new Person * [count];
33
34 // This function applies the following filters to reduce complexity
35 // of pedigree
36 //
37 // RULE 1: Remove all pedigrees no genotype or phenotype data
38 // RULE 2: Remove leaf individuals with no data
39 // RULE 3: Remove founder couples with <2 offspring and no data
40
41 bool showHeader = true;
42 IntArray discardable, offspring, mates, haveData;
43
44 for (int f = 0; f < familyCount; f++)
45 {
46 Family * fam = families[f];
47
48 // Cache for storing indicators about whether each family member is
49 // informative
50 haveData.Dimension(fam->count);
51
52 // Check that some data is available in the family
53 int hasData = false;
54 for (int i = fam->first; i <= fam->last; i++)
55 if (informative == NULL)
56 hasData |= haveData[persons[i]->traverse] = persons[i]->haveData();
57 else
58 hasData |= haveData[persons[i]->traverse] = informative[i];
59
60 if (!hasData)
61 {
62 if (!quiet)
63 {
64 ShowTrimHeader(showHeader);
65 printf(" Removing family %s: No data\n", (const char *) fam->famid);
66 }
67
68 for (int i = fam->first; i <= fam->last; i++)
69 delete persons[i];
70
71 continue;
72 }
73
74 // Assume that we need everyone in the family
75 discardable.Dimension(fam->count);
76 discardable.Set(0);
77
78 bool trimming = true;
79
80 while (trimming)
81 {
82 trimming = false;
83
84 // Tally the number of offspring for each individual
85 offspring.Dimension(fam->count);
86 offspring.Zero();
87
88 // Tally the number of mates for each individual
89 mates.Dimension(fam->count);
90 mates.Set(-1);
91
92 // In the first round, we count the number of offspring
93 // for each individual in the current trimmed version of the
94 // pedigree
95 for (int i = fam->count - 1; i >= fam->founders; i--)
96 {
97 if (discardable[i]) continue;
98
99 Person & p = *(persons[fam->path[i]]);
100
101 if (discardable[p.father->traverse])
102 continue;
103
104 if (offspring[i] == 0 && !haveData[p.traverse])
105 {
106 trimming = true;
107 discardable[i] = true;
108 continue;
109 }
110
111 int father = p.father->traverse;
112 int mother = p.mother->traverse;
113
114 if (mates[father] == -1 && mates[mother] == -1)
115 {
116 mates[father] = mother,
117 mates[mother] = father;
118 }
119 else if (mates[father] != mother)
120 {
121 if (mates[father] >= 0)
122 mates[mates[father]] = -2;
123
124 if (mates[mother] >= 0)
125 mates[mates[mother]] = -2;
126
127 mates[mother] = -2;
128 mates[father] = -2;
129 }
130
131 offspring[father]++;
132 offspring[mother]++;
133 }
134
135 // In the second pass, we remove individuals with no
136 // data who are founders with a single offspring (and
137 // no multiple matings) or who have no descendants
138 for (int i = fam->count - 1; i >= 0; i--)
139 {
140 if (discardable[i]) continue;
141
142 Person & p = *(persons[fam->path[i]]);
143
144 if (p.isFounder() || discardable[p.father->traverse])
145 {
146 if (mates[i] == -2 ||
147 offspring[i] > 1 ||
148 (mates[i] >= fam->founders &&
149 !discardable[persons[fam->path[mates[i]]]->father->traverse]) ||
150 haveData[p.traverse] ||
151 (mates[i] != -1 && haveData[mates[i]]))
152 continue;
153
154 trimming = true;
155 discardable[i] = true;
156 continue;
157 }
158 }
159 }
160
161 for (int i = fam->count - 1; i >= 0; i--)
162 if (discardable[i])
163 {
164 if (!quiet)
165 {
166 ShowTrimHeader(showHeader);
167 printf(" Removing person %s->%s: No data\n",
168 (const char *) fam->famid,
169 (const char *) persons[fam->path[i]]->pid);
170 }
171 delete persons[fam->path[i]];
172 }
173 else
174 newPersons[newCount++] = persons[fam->path[i]];
175 }
176
177 if (!showHeader)
178 printf("\n");
179
180 delete [] persons;
181
182 persons = newPersons;
183 count = newCount;
184 Sort();
185}

◆ TwinCheck()

bool Pedigree::TwinCheck ( )

Definition at line 23 of file PedigreeTwin.cpp.

24{
25 bool fail = false;
26 IntArray mzTwins;
27
28 for (int f = 0; f < familyCount; f++)
29 {
30 mzTwins.Clear();
31
32 for (int i = families[f]->first, j; i <= families[f]->last; i++)
33 // Is this person an identical twin?
34 if (persons[i]->isMzTwin(*persons[i]))
35 {
36 // Have we got another identical sib yet?
37 for (j = 0; j < mzTwins.Length(); j++)
38 if (persons[i]->isMzTwin(*persons[mzTwins[j]]))
39 break;
40
41 // If not, add to list of twins
42 if (j == mzTwins.Length())
43 {
44 mzTwins.Push(i);
45 continue;
46 }
47
48 // Check that their genotypes are compatible and
49 // merge new twin's genotypes into original twin...
50 Person * original = persons[mzTwins[j]];
51 Person * twin = persons[i];
52
53 for (int m = 0; m < Person::markerCount; m++)
54 {
55 if (!original->markers[m].isKnown())
56 original->markers[m] = twin->markers[m];
57 else if (twin->markers[m].isKnown() &&
58 twin->markers[m] != original->markers[m])
59 printf("MZ Twins %s and %s in family %s have "
60 "different %s genotypes\n",
61 (const char *) original->pid,
62 (const char *) twin->pid,
63 (const char *) original->famid,
64 (const char *) Person::markerNames[m]),
65 fail = true;
66
67 if (twin->sex != original->sex)
68 printf("MZ Twins %s and %s in family %s have "
69 "different sexes\n",
70 (const char *) original->pid,
71 (const char *) twin->pid,
72 (const char *) original->famid),
73 fail = true;
74 }
75 }
76
77 if (mzTwins.Length() == 0) continue;
78
79 // In the second pass we copy merged twin genotypes
80 // from original twin to other twins
81 for (int i = families[f]->first, j; i <= families[f]->last; i++)
82 if (persons[i]->isMzTwin(*persons[i]))
83 {
84 for (j = 0; j < mzTwins.Length(); j++)
85 if (persons[i]->isMzTwin(*persons[mzTwins[j]]))
86 break;
87
88 if (mzTwins[j] == i) continue;
89
90 Person * original = persons[mzTwins[j]];
91 Person * twin = persons[i];
92
93 for (int m = 0; m < Person::markerCount; m++)
94 twin->markers[m] = original->markers[m];
95 }
96 }
97 return fail;
98}

◆ WriteDataFile() [1/2]

void Pedigree::WriteDataFile ( const char *  output)

Definition at line 434 of file Pedigree.cpp.

435{
436 FILE * f = fopen(output, "wt");
437 if (f == NULL) error("Couldn't open data file %s", output);
438 WriteDataFile(f);
439 fclose(f);
440}

◆ WriteDataFile() [2/2]

void Pedigree::WriteDataFile ( FILE *  output)

Definition at line 324 of file Pedigree.cpp.

325{
326 // write in the following order:
327 // markers, traits, affections, covariates
328
329 if (haveTwins)
330 fprintf(output, " Z Zygosity\n");
331
332 for (int m = 0; m < markerCount; m++)
333 fprintf(output, " M %s\n", (const char *) markerNames[m]);
334
335 for (int t = 0; t < traitCount; t++)
336 fprintf(output, " T %s\n", (const char *) traitNames[t]);
337
338 for (int a = 0; a < affectionCount; a++)
339 fprintf(output, " A %s\n", (const char *) affectionNames[a]);
340
341 for (int c = 0; c < covariateCount; c++)
342 fprintf(output, " C %s\n", (const char *) covariateNames[c]);
343
344 for (int s = 0; s < stringCount; s++)
345 fprintf(output, " $ %s\n", (const char *) stringNames[s]);
346
347 fprintf(output, " E END-OF-DATA \n");
348}

◆ WritePedigreeFile() [1/2]

void Pedigree::WritePedigreeFile ( const char *  output)

Definition at line 442 of file Pedigree.cpp.

443{
444 FILE * f = fopen(output, "wt");
445 if (f == NULL) error("Couldn't open pedigree file %s", output);
446 WritePedigreeFile(f);
447 fclose(f);
448}

◆ WritePedigreeFile() [2/2]

void Pedigree::WritePedigreeFile ( FILE *  output)

Definition at line 350 of file Pedigree.cpp.

351{
352 MarkerInfo ** info = new MarkerInfo * [markerCount];
353
354 for (int i = 0; i < markerCount; i++)
355 info[i] = GetMarkerInfo(i);
356
357 for (int i = 0; i < count; i++)
358 WriteRecodedPerson(output, i, info);
359 fprintf(output, "end\n");
360
361 delete [] info;
362}

◆ WritePerson()

void Pedigree::WritePerson ( FILE *  output,
int  who,
const char *  famid = NULL,
const char *  pid = NULL,
const char *  fatid = NULL,
const char *  motid = NULL 
)

Definition at line 364 of file Pedigree.cpp.

366{
367 WriteRecodedPerson(output, person, NULL, famid, pid, fatid, motid);
368}

◆ WriteRecodedPerson()

void Pedigree::WriteRecodedPerson ( FILE *  output,
int  who,
MarkerInfo **  markerInfo,
const char *  famid = NULL,
const char *  pid = NULL,
const char *  fatid = NULL,
const char *  motid = NULL 
)

Definition at line 370 of file Pedigree.cpp.

374{
375 Person * p = persons[person];
376
377 if (famid == NULL) famid = p->famid;
378 if (pid == NULL) pid = p->pid;
379 if (fatid == NULL) fatid = p->fatid;
380 if (motid == NULL) motid = p->motid;
381
382 // write in the following order:
383 // markers, traits, affections, covariates
384
385 fprintf(output, "%s\t%s\t%s\t%s\t%d\t",
386 famid, pid, fatid, motid, p->sex);
387
388 const char * twinCodes[] = {"0", "MZ", "DZ"};
389
390 if (haveTwins)
391 {
392 if (p->zygosity <= 2)
393 fprintf(output, "%s\t", twinCodes[p->zygosity]);
394 else
395 fprintf(output, "%d\t", p->zygosity);
396 }
397
398 for (int m = 0; m < markerCount; m++)
399 if (markerInfo == NULL)
400 fprintf(output, markerCount < 20 ? "%3d/%3d\t" : "%d/%d\t",
401 p->markers[m][0], p->markers[m][1]);
402 else
403 fprintf(output, markerCount < 20 ? "%3s/%3s\t" : "%s/%s\t",
404 (const char *) markerInfo[m]->GetAlleleLabel(p->markers[m][0]),
405 (const char *) markerInfo[m]->GetAlleleLabel(p->markers[m][1]));
406
407 for (int t = 0; t < traitCount; t++)
408 if (p->isPhenotyped(t))
409 fprintf(output, "%.3f\t", p->traits[t]);
410 else
411 fprintf(output, "x\t");
412
413 for (int a = 0; a < affectionCount; a++)
414 if (p->isDiagnosed(a))
415 fprintf(output, "%d\t", p->affections[a]);
416 else
417 fprintf(output, "x\t");
418
419 for (int c = 0; c < covariateCount; c++)
420 if (p->isControlled(c))
421 fprintf(output, "%.3f\t", p->covariates[c]);
422 else
423 fprintf(output, "x\t");
424
425 for (int s = 0; s < stringCount; s++)
426 if (!p->strings[s].IsEmpty())
427 fprintf(output, "%s\t", (const char *) p->strings[s]);
428 else
429 fprintf(output, ".\t");
430
431 fprintf(output, "\n");
432}

Member Data Documentation

◆ count

int Pedigree::count

Definition at line 39 of file Pedigree.h.

◆ families

Family** Pedigree::families

Definition at line 42 of file Pedigree.h.

◆ familyCount

int Pedigree::familyCount

Definition at line 41 of file Pedigree.h.

◆ haveTwins

int Pedigree::haveTwins

Definition at line 43 of file Pedigree.h.

◆ missing

String Pedigree::missing
static

Definition at line 36 of file Pedigree.h.

◆ multiFileCount

int Pedigree::multiFileCount

Definition at line 47 of file Pedigree.h.

◆ multiPd

PedigreeDescription* Pedigree::multiPd

Definition at line 46 of file Pedigree.h.

◆ pd

PedigreeDescription Pedigree::pd

Definition at line 45 of file Pedigree.h.

◆ persons

Person** Pedigree::persons

Definition at line 40 of file Pedigree.h.

◆ sexAsCovariate

bool Pedigree::sexAsCovariate = false
static

Definition at line 35 of file Pedigree.h.

◆ size

int Pedigree::size

Definition at line 38 of file Pedigree.h.


The documentation for this class was generated from the following files: