libStatGen Software  1
Pedigree.cpp
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "Pedigree.h"
19 #include "GenotypeLists.h"
20 #include "MemoryInfo.h"
21 #include "Constant.h"
22 #include "Error.h"
23 #include "Sort.h"
24 
25 #include <stdlib.h>
26 
27 bool Pedigree::sexAsCovariate = false;
28 String Pedigree::missing("-99.999");
29 
30 Pedigree::Pedigree() : pd()
31 {
32  haveTwins = count = 0;
33  size = 10000;
34  persons = new Person *[size];
35  familyCount = 0;
36  families = new Family * [1];
37  multiPd = NULL;
38  multiFileCount = 0;
39 }
40 
41 Pedigree::~Pedigree()
42 {
43  for (int i = 0; i < count; i++)
44  delete persons[i];
45 
46  for (int i = 0; i < familyCount; i++)
47  delete families[i];
48 
49  delete [] families;
50  delete [] persons;
51 
52  if (multiPd != NULL)
53  delete [] multiPd;
54 }
55 
56 void Pedigree::Sort()
57 {
58  QuickSort(persons, count, sizeof(Person *),
59  COMPAREFUNC Pedigree::ComparePersons);
60 
61  haveTwins = 0;
62 
63  // Check for structural problems in input pedigree
64  bool problem = false;
65 
66  // Check that we have no duplicates...
67  for (int i = 1; i < count; i++)
68  if (ComparePersons((const Person **) &persons[i-1],
69  (const Person **) &persons[i]) == 0)
70  {
71  printf("Family %s: Person %s is duplicated\n",
72  (const char *) persons[i]->famid,
73  (const char *) persons[i]->pid);
74  problem = true;
75 
76  do
77  {
78  i++;
79  }
80  while (i < count &&
81  ComparePersons((const Person **) &persons[i-1],
82  (const Person **) &persons[i]) == 0);
83  }
84 
85  // Assign parents...
86  for (int i = 0; i < count; i++)
87  {
88  persons[i]->serial = i;
89  persons[i]->father = FindPerson(persons[i]->famid, persons[i]->fatid);
90  persons[i]->mother = FindPerson(persons[i]->famid, persons[i]->motid);
91 
92  problem |= !persons[i]->CheckParents();
93 
94  persons[i]->AssessStatus();
95 
96  // Check if we have any twins...
97  haveTwins |= persons[i]->zygosity;
98  }
99 
100  if (problem)
101  error("Please correct problems with pedigree structure\n");
102 
103  MakeSibships();
104  MakeFamilies();
105 }
106 
107 void Pedigree::MakeSibships()
108 {
109  Person ** sibs = new Person * [count];
110  for (int i = 0; i < count; i++)
111  sibs[i] = persons[i];
112 
113  QuickSort(sibs, count, sizeof(Person *),
114  COMPAREFUNC Pedigree::CompareParents);
115 
116  for (int first = 0; first < count; first++)
117  if (!sibs[first]->isFounder())
118  {
119  int last = first + 1;
120  while (last < count)
121  if (sibs[first]-> mother != sibs[last]->mother ||
122  sibs[first]-> father != sibs[last]->father)
123  break;
124  else last++;
125  last --;
126 
127  for (int j = first; j <= last; j++)
128  {
129  if (sibs[j]->sibCount) delete [] sibs[j]->sibs;
130  sibs[j]->sibCount = last - first + 1;
131  sibs[j]->sibs = new Person * [sibs[j]->sibCount];
132  for (int k = first; k <= last; k++)
133  sibs[j]->sibs[k - first] = sibs[k];
134  }
135  first = last;
136  }
137  delete [] sibs;
138 }
139 
140 void Pedigree::MakeFamilies()
141 {
142  for (int i = 0; i < familyCount; i++)
143  delete families[i];
144  delete [] families;
145 
146  familyCount = 0;
147  families = new Family * [count];
148 
149  for (int first=0; first < count; first++)
150  {
151  int last = first;
152  while (last < count)
153  if (SlowCompare(persons[first]->famid, persons[last]->famid) == 0)
154  last++;
155  else break;
156 
157  families[familyCount] = new Family(*this, first, --last, familyCount);
158 
159  first = last;
160  familyCount++;
161  }
162 }
163 
164 // Utility functions for finding a person in a pedigree
165 
167 {
168  const char * famid;
169  const char * pid;
170 };
171 
172 int CompareKeyToPerson(PedigreeKey * key, Person ** p)
173 {
174  int result = SlowCompare(key->famid, (**p).famid);
175 
176  if (result != 0)
177  return result;
178 
179  return SlowCompare(key->pid, (**p).pid);
180 }
181 
182 int CompareKeyToFamily(PedigreeKey * key, Family ** f)
183 {
184  return SlowCompare(key->famid, (**f).famid);
185 }
186 
187 Person * Pedigree::FindPerson(const char * famid, const char * pid)
188 {
189  PedigreeKey key;
190  key.famid = famid;
191  key.pid = pid;
192 
193  Person ** result = (Person **) BinarySearch
194  (&key, persons, count, sizeof(Person *),
195  COMPAREFUNC CompareKeyToPerson);
196 
197  return (result == NULL) ? (Person *) NULL : *result;
198 }
199 
200 Person * Pedigree::FindPerson(const char *famid, const char *pid, int universe)
201 {
202  PedigreeKey key;
203  key.famid = famid;
204  key.pid = pid;
205 
206  Person ** result = (Person **) BinarySearch
207  (&key, persons, universe, sizeof(Person *),
208  COMPAREFUNC CompareKeyToPerson);
209 
210  return (result == NULL) ? (Person *) NULL : *result;
211 }
212 
213 Family * Pedigree::FindFamily(const char * famid)
214 {
215  PedigreeKey key;
216  key.famid = famid;
217 
218  Family ** result = (Family **) BinarySearch
219  (&key, families, familyCount, sizeof(Family *),
220  COMPAREFUNC CompareKeyToFamily);
221 
222  return (result == NULL) ? (Family *) NULL : *result;
223 }
224 
225 int Pedigree::CountAlleles(int marker)
226 {
227  return ::CountAlleles(*this, marker);
228 }
229 
230 void Pedigree::LumpAlleles(double min, bool reorder)
231 {
232  if (min > 0.0)
233  printf("Lumping alleles with frequencies of %.2f or less...\n\n", min);
234 
235  for (int m=0; m < markerCount; m++)
236  ::LumpAlleles(*this, m, min, reorder);
237 }
238 
239 void Pedigree::EstimateFrequencies(int estimator, bool quiet)
240 {
241  bool estimated = false;
242  int line = 3;
243 
244  const char * estimators[] =
245  { "using all genotypes", "using founder genotypes", "assumed equal" };
246 
247  bool condensed = markerCount > 100;
248  int grain = markerCount / 50, estimates = 0;
249 
250  for (int m=0; m < markerCount; m++)
251  if (::EstimateFrequencies(*this, m, estimator))
252  if (!quiet)
253  {
254  if (!estimated)
255  printf("Estimating allele frequencies... [%s]\n ",
256  estimators[estimator]), estimated = true;
257 
258  if (condensed)
259  {
260  if (estimates++ % grain == 0)
261  {
262  printf(".");
263  fflush(stdout);
264  }
265  continue;
266  }
267 
268  if (line + markerNames[m].Length() + 1 > 79)
269  printf("\n "), line = 3;
270 
271  printf("%s ", (const char *) markerNames[m]);
272  line += markerNames[m].Length() + 1;
273  }
274 
275  if (estimated)
276  printf(condensed ? "\nDone estimating frequencies for %d markers\n\n" : "\n\n", estimates);
277 }
278 
279 int Pedigree::ComparePersons(const Person ** p1, const Person ** p2)
280 {
281  int result = SlowCompare((**p1).famid, (**p2).famid);
282 
283  if (result != 0) return result;
284 
285  return SlowCompare((**p1).pid, (**p2).pid);
286 }
287 
288 int Pedigree::CompareParents(const Person ** p1, const Person ** p2)
289 {
290  int result = SlowCompare((**p1).famid, (**p2).famid);
291 
292  if (result) return result;
293 
294  result = SlowCompare((**p1).fatid, (**p2).fatid);
295 
296  if (result) return result;
297 
298  return SlowCompare((**p1).motid, (**p2).motid);
299 }
300 
301 void Pedigree::Grow()
302 {
303  size *= 2;
304 
305  Person ** temp = new Person * [size];
306  if (temp == NULL) error("Out of memory");
307 
308  for (int i=0; i<count; i++)
309  temp[i] = persons[i];
310 
311  delete [] persons;
312  persons = temp;
313 }
314 
315 void Pedigree::Add(Person & rhs)
316 {
317  if (count == size)
318  Grow();
319 
320  persons[count] = new Person();
321  persons[count++]->Copy(rhs);
322 }
323 
324 void Pedigree::WriteDataFile(FILE * output)
325 {
326  // write in the following order:
327  // markers, traits, affections, covariates
328 
329  if (haveTwins)
330  fprintf(output, " Z Zygosity\n");
331 
332  for (int m = 0; m < markerCount; m++)
333  fprintf(output, " M %s\n", (const char *) markerNames[m]);
334 
335  for (int t = 0; t < traitCount; t++)
336  fprintf(output, " T %s\n", (const char *) traitNames[t]);
337 
338  for (int a = 0; a < affectionCount; a++)
339  fprintf(output, " A %s\n", (const char *) affectionNames[a]);
340 
341  for (int c = 0; c < covariateCount; c++)
342  fprintf(output, " C %s\n", (const char *) covariateNames[c]);
343 
344  for (int s = 0; s < stringCount; s++)
345  fprintf(output, " $ %s\n", (const char *) stringNames[s]);
346 
347  fprintf(output, " E END-OF-DATA \n");
348 }
349 
350 void Pedigree::WritePedigreeFile(FILE * output)
351 {
352  MarkerInfo ** info = new MarkerInfo * [markerCount];
353 
354  for (int i = 0; i < markerCount; i++)
355  info[i] = GetMarkerInfo(i);
356 
357  for (int i = 0; i < count; i++)
358  WriteRecodedPerson(output, i, info);
359  fprintf(output, "end\n");
360 
361  delete [] info;
362 }
363 
364 void Pedigree::WritePerson(FILE * output, int person, const char * famid,
365  const char * pid, const char * fatid, const char * motid)
366 {
367  WriteRecodedPerson(output, person, NULL, famid, pid, fatid, motid);
368 }
369 
370 void Pedigree::WriteRecodedPerson(
371  FILE * output, int person, MarkerInfo ** markerInfo,
372  const char * famid, const char * pid, const char * fatid,
373  const char * motid)
374 {
375  Person * p = persons[person];
376 
377  if (famid == NULL) famid = p->famid;
378  if (pid == NULL) pid = p->pid;
379  if (fatid == NULL) fatid = p->fatid;
380  if (motid == NULL) motid = p->motid;
381 
382  // write in the following order:
383  // markers, traits, affections, covariates
384 
385  fprintf(output, "%s\t%s\t%s\t%s\t%d\t",
386  famid, pid, fatid, motid, p->sex);
387 
388  const char * twinCodes[] = {"0", "MZ", "DZ"};
389 
390  if (haveTwins)
391  {
392  if (p->zygosity <= 2)
393  fprintf(output, "%s\t", twinCodes[p->zygosity]);
394  else
395  fprintf(output, "%d\t", p->zygosity);
396  }
397 
398  for (int m = 0; m < markerCount; m++)
399  if (markerInfo == NULL)
400  fprintf(output, markerCount < 20 ? "%3d/%3d\t" : "%d/%d\t",
401  p->markers[m][0], p->markers[m][1]);
402  else
403  fprintf(output, markerCount < 20 ? "%3s/%3s\t" : "%s/%s\t",
404  (const char *) markerInfo[m]->GetAlleleLabel(p->markers[m][0]),
405  (const char *) markerInfo[m]->GetAlleleLabel(p->markers[m][1]));
406 
407  for (int t = 0; t < traitCount; t++)
408  if (p->isPhenotyped(t))
409  fprintf(output, "%.3f\t", p->traits[t]);
410  else
411  fprintf(output, "x\t");
412 
413  for (int a = 0; a < affectionCount; a++)
414  if (p->isDiagnosed(a))
415  fprintf(output, "%d\t", p->affections[a]);
416  else
417  fprintf(output, "x\t");
418 
419  for (int c = 0; c < covariateCount; c++)
420  if (p->isControlled(c))
421  fprintf(output, "%.3f\t", p->covariates[c]);
422  else
423  fprintf(output, "x\t");
424 
425  for (int s = 0; s < stringCount; s++)
426  if (!p->strings[s].IsEmpty())
427  fprintf(output, "%s\t", (const char *) p->strings[s]);
428  else
429  fprintf(output, ".\t");
430 
431  fprintf(output, "\n");
432 }
433 
434 void Pedigree::WriteDataFile(const char * output)
435 {
436  FILE * f = fopen(output, "wt");
437  if (f == NULL) error("Couldn't open data file %s", output);
438  WriteDataFile(f);
439  fclose(f);
440 }
441 
442 void Pedigree::WritePedigreeFile(const char * output)
443 {
444  FILE * f = fopen(output, "wt");
445  if (f == NULL) error("Couldn't open pedigree file %s", output);
446  WritePedigreeFile(f);
447  fclose(f);
448 }
449 
450 void Pedigree::PrepareDichotomization()
451 {
452 
453  for (int t = 0; t < traitCount; t++)
454  {
455  String new_affection = traitNames[t] + "*";
456  GetAffectionID(new_affection);
457  }
458 }
459 
460 int Pedigree::Dichotomize(int t, double mean)
461 {
462  String new_affection = traitNames[t] + "*";
463 
464  int af = GetAffectionID(new_affection);
465 
466  if (mean == _NAN_)
467  {
468  mean = 0.0;
469  double dcount = 0;
470  for (int i = 0; i < count; i++)
471  if (persons[i]->isPhenotyped(t) &&
472  !persons[i]->isFounder())
473  {
474  mean += persons[i]->traits[t];
475  dcount ++;
476  }
477 
478  if (!dcount) return af;
479 
480  mean /= dcount;
481  }
482 
483  printf("Dichotomizing %s around mean of %.3f ...\n",
484  (const char *) traitNames[t], mean);
485 
486  for (int i = 0; i < count; i++)
487  if (persons[i]->isPhenotyped(t) && !persons[i]->isFounder())
488  persons[i]->affections[af] = persons[i]->traits[t] > mean ? 2 : 1;
489  else
490  persons[i]->affections[af] = 0;
491 
492  Sort();
493 
494  return af;
495 }
496 
497 void Pedigree::DichotomizeAll(double mean)
498 {
499  for (int t = 0; t < traitCount; t++)
500  Dichotomize(t, mean);
501 }
502 
503 bool Pedigree::InheritanceCheck(bool abortIfInconsistent)
504 {
505  bool fail = false;
506 
507  if (haveTwins) fail |= TwinCheck();
508 
509  if (chromosomeX)
510  fail |= SexLinkedCheck();
511  else
512  fail |= AutosomalCheck();
513 
514  if (fail && abortIfInconsistent)
515  error("Mendelian inheritance errors detected\n");
516 
517  return !fail;
518 }
519 
520 bool Pedigree::AutosomalCheck()
521 {
522  // Arrays indicating which alleles and homozygotes occur
523  IntArray haplos, genos, counts, failedFamilies;
524 
525  bool fail = false;
526 
527  // For each marker ...
528  for (int m = 0; m < markerCount; m++)
529  {
530  MarkerInfo * info = GetMarkerInfo(m);
531 
532  // Summary for marker
533  int alleleCount = CountAlleles(m);
534  int genoCount = alleleCount * (alleleCount + 1) / 2;
535 
536  // Initialize arrays
537  haplos.Dimension(alleleCount + 1);
538  haplos.Set(-1);
539 
540  genos.Dimension(genoCount + 1);
541  genos.Set(-1);
542 
543  failedFamilies.Dimension(familyCount);
544  failedFamilies.Zero();
545 
546  counts.Dimension(alleleCount + 1);
547 
548  for (int f = 0; f < familyCount; f++)
549  for (int i = families[f]->first; i <= families[f]->last; i++)
550  if (!persons[i]->isFounder() && persons[i]->sibs[0] == persons[i])
551  {
552  // This loop runs once per sibship
553  Alleles fat = persons[i]->father->markers[m];
554  Alleles mot = persons[i]->mother->markers[m];
555  bool fgeno = fat.isKnown();
556  bool mgeno = mot.isKnown();
557 
558  // Number of alleles, homozygotes and genotypes in this sibship
559  int haplo = 0, homo = 0, diplo = 0;
560 
561  // No. of different genotypes per allele
562  counts.Zero();
563 
564  // In general, there should be no more than 3 genotypes per allele
565  bool too_many_genos = false;
566 
567  for (int j = 0; j < persons[i]->sibCount; j++)
568  if (persons[i]->sibs[j]->isGenotyped(m))
569  {
570  Alleles geno = persons[i]->sibs[j]->markers[m];
571 
572  int fat1 = fat.hasAllele(geno.one);
573  int fat2 = fat.hasAllele(geno.two);
574  int mot1 = mot.hasAllele(geno.one);
575  int mot2 = mot.hasAllele(geno.two);
576 
577  if ((fgeno && mgeno && !((fat1 && mot2) || (fat2 && mot1))) ||
578  (fgeno && !(fat1 || fat2)) || (mgeno && !(mot1 || mot2)))
579  {
580  printf("%s - Fam %s: Child %s [%s/%s] has ",
581  (const char *) markerNames[m],
582  (const char *) persons[i]->sibs[j]->famid,
583  (const char *) persons[i]->sibs[j]->pid,
584  (const char *) info->GetAlleleLabel(geno.one),
585  (const char *) info->GetAlleleLabel(geno.two));
586 
587  if (!fgeno || !mgeno)
588  printf("%s [%s/%s]\n",
589  fgeno ? "father" : "mother",
590  (const char *) info->GetAlleleLabel(fgeno ? fat.one : mot.one),
591  (const char *) info->GetAlleleLabel(fgeno ? fat.two : mot.two));
592  else
593  printf("parents [%s/%s]*[%s/%s]\n",
594  (const char *) info->GetAlleleLabel(fat.one),
595  (const char *) info->GetAlleleLabel(fat.two),
596  (const char *) info->GetAlleleLabel(mot.one),
597  (const char *) info->GetAlleleLabel(mot.two));
598 
599  fail = true;
600  failedFamilies[f] = true;
601  }
602  else
603  {
604  if (haplos[geno.one] != i)
605  {
606  haplo++;
607  haplos[geno.one] = i;
608  };
609  if (haplos[geno.two] != i)
610  {
611  haplo++;
612  haplos[geno.two] = i;
613  };
614 
615  int index = geno.SequenceCoded();
616 
617  if (genos[index] != i)
618  {
619  genos[index] = i;
620  diplo++;
621  counts[geno.one]++;
622  if (geno.isHomozygous())
623  homo++;
624  else
625  counts[geno.two]++;
626  if (counts[geno.one] > 2) too_many_genos = true;
627  if (counts[geno.two] > 2) too_many_genos = true;
628  }
629  }
630  }
631 
632  if (fgeno)
633  {
634  if (haplos[fat.one] != i)
635  {
636  haplo++;
637  haplos[fat.one] = i;
638  }
639  if (haplos[fat.two] != i)
640  {
641  haplo++;
642  haplos[fat.two] = i;
643  }
644  homo += fat.isHomozygous();
645  }
646 
647  if (mgeno)
648  {
649  if (haplos[mot.one] != i)
650  {
651  haplo++;
652  haplos[mot.one] = i;
653  }
654  if (haplos[mot.two] != i)
655  {
656  haplo++;
657  haplos[mot.two] = i;
658  }
659  homo += mot.isHomozygous();
660  }
661 
662  if (diplo > 4 || haplo + homo > 4 || (haplo == 4 && too_many_genos))
663  {
664  printf("%s - Fam %s: ",
665  (const char *) markerNames[m],
666  (const char *) persons[i]->famid);
667  if (persons[i]->father->markers[m].isKnown())
668  printf("Father %s [%s/%s] has children [",
669  (const char *) persons[i]->father->pid,
670  (const char *) info->GetAlleleLabel(fat.one),
671  (const char *) info->GetAlleleLabel(fat.two));
672  else if (persons[i]->mother->markers[m].isKnown())
673  printf("Mother %s [%s/%s] has children [",
674  (const char *) persons[i]->mother->pid,
675  (const char *) info->GetAlleleLabel(mot.one),
676  (const char *) info->GetAlleleLabel(mot.two));
677  else
678  printf("Couple %s * %s has children [",
679  (const char *) persons[i]->mother->pid,
680  (const char *) persons[i]->father->pid);
681 
682  for (int j = 0; j < persons[i]->sibCount; j++)
683  printf("%s%s/%s", j == 0 ? "" : " ",
684  (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].one),
685  (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].two));
686  printf("]\n");
687 
688  fail = true;
689  failedFamilies[f] = true;
690  }
691  }
692 
693  for (int f = 0; f < familyCount; f++)
694  if (!failedFamilies[f] &&
695  (families[f]->count > families[f]->founders + 1) &&
696  !families[f]->isNuclear())
697  fail |= !GenotypeList::EliminateGenotypes(*this, families[f], m);
698  }
699 
700  if (fail)
701  printf("\nMendelian inheritance errors detected\n");
702 
703  return fail;
704 }
705 
706 bool Pedigree::SexLinkedCheck()
707 {
708  bool fail = false;
709 
710  // Keep track of what families fail the basic inheritance check,
711  // so that we can run later run genotype elimination check on the remainder
712  IntArray failedFamilies(familyCount);
713 
714  // For each marker ...
715  for (int m = 0; m < markerCount; m++)
716  {
717  MarkerInfo * info = GetMarkerInfo(m);
718 
719  failedFamilies.Zero();
720 
721  // Check for homozygous males
722  for (int f = 0; f < familyCount; f++)
723  for (int i = families[f]->first; i <= families[f]->last; i++)
724  if (persons[i]->sex == SEX_MALE && persons[i]->markers[m].isKnown() &&
725  !persons[i]->markers[m].isHomozygous())
726  {
727  printf("%s - Fam %s: Male %s has two X alleles [%s/%s]\n",
728  (const char *) markerNames[m],
729  (const char *) persons[i]->famid, (const char *) persons[i]->pid,
730  (const char *) info->GetAlleleLabel(persons[i]->markers[m].one),
731  (const char *) info->GetAlleleLabel(persons[i]->markers[m].two));
732 
733  // Wipe this genotype so we don't get cascading errors below
734  persons[i]->markers[m][0] = persons[i]->markers[m][1] = 0;
735 
736  fail = true;
737  failedFamilies[f] = true;
738  }
739 
740  // Check full sibships for errors
741  // TODO -- We could do better by grouping male half-sibs
742  for (int f = 0; f < familyCount; f++)
743  for (int i = families[f]->first; i <= families[f]->last; i++)
744  if (!persons[i]->isFounder() && persons[i]->sibs[0] == persons[i])
745  {
746  // This loop runs once per sibship
747  Alleles fat = persons[i]->father->markers[m];
748  Alleles mot = persons[i]->mother->markers[m];
749 
750  bool fgeno = fat.isKnown();
751  bool mgeno = mot.isKnown();
752 
753  Alleles inferred_mother = mot;
754  Alleles first_sister;
755  Alleles inferred_father;
756 
757  bool mother_ok = true;
758 
759  int sisters = 0;
760 
761  for (int j = 0; j < persons[i]->sibCount; j++)
762  if (persons[i]->sibs[j]->isGenotyped(m))
763  {
764  Alleles geno = persons[i]->sibs[j]->markers[m];
765 
766  bool fat1 = fat.hasAllele(geno.one);
767  bool fat2 = fat.hasAllele(geno.two);
768  bool mot1 = mot.hasAllele(geno.one);
769  bool mot2 = mot.hasAllele(geno.two);
770 
771  int sex = persons[i]->sibs[j]->sex;
772 
773  if (sex == SEX_MALE)
774  {
775  if (mgeno && !mot1)
776  {
777  printf("%s - Fam %s: Child %s [%s/Y] has mother [%s/%s]\n",
778  (const char *) markerNames[m],
779  (const char *) persons[i]->famid,
780  (const char *) persons[i]->sibs[j]->pid,
781  (const char *) info->GetAlleleLabel(geno.one),
782  (const char *) info->GetAlleleLabel(mot.one),
783  (const char *) info->GetAlleleLabel(mot.two));
784  fail = true;
785  failedFamilies[f] = true;
786  }
787  else
788  mother_ok &= inferred_mother.AddAllele(geno.one);
789  }
790  if (sex == SEX_FEMALE)
791  {
792  if ((fgeno && mgeno && !((fat1 && mot2) || (fat2 && mot1))) ||
793  (fgeno && !(fat1 || fat2)) || (mgeno && !(mot1 || mot2)))
794  {
795  printf("%s - Fam %s: Child %s [%s/%s] has ",
796  (const char *) markerNames[m],
797  (const char *) persons[i]->famid,
798  (const char *) persons[i]->sibs[j]->pid,
799  (const char *) info->GetAlleleLabel(geno.one),
800  (const char *) info->GetAlleleLabel(geno.two));
801 
802  if (!fgeno)
803  printf("mother [%s/%s]\n",
804  (const char *) info->GetAlleleLabel(mot.one),
805  (const char *) info->GetAlleleLabel(mot.two));
806  else if (!mgeno)
807  printf("father [%s/Y]\n",
808  (const char *) info->GetAlleleLabel(fat.one));
809  else
810  printf("parents [%s/Y]*[%s/%s]\n",
811  (const char *) info->GetAlleleLabel(fat.one),
812  (const char *) info->GetAlleleLabel(mot.one),
813  (const char *) info->GetAlleleLabel(mot.two));
814 
815  fail = true;
816  failedFamilies[f] = true;
817  }
818  else
819  {
820  if (!sisters++)
821  inferred_father = first_sister = geno;
822  else if (first_sister != geno)
823  {
824  inferred_father.Intersect(geno);
825 
826  mother_ok &= inferred_mother.AddAllele(
827  geno.otherAllele(inferred_father.one));
828  mother_ok &= inferred_mother.AddAllele(
829  first_sister.otherAllele(inferred_father.one));
830  }
831 
832  if (!fgeno && (mot1 ^ mot2))
833  inferred_father.Intersect(mot1 ? geno.two : geno.one);
834 
835  if (!mgeno && (fat1 ^ fat2))
836  mother_ok &= inferred_mother.AddAllele(fat1 ? geno.two : geno.one);
837  }
838  }
839  }
840 
841  if (!mother_ok || (sisters && !inferred_father.isKnown()))
842  {
843  printf("%s - Fam %s: ",
844  (const char *) markerNames[m],
845  (const char *) persons[i]->famid);
846  if (fgeno)
847  printf("Father %s [%s/Y] has children [",
848  (const char *) persons[i]->father->pid,
849  (const char *) info->GetAlleleLabel(fat.one));
850  else if (mgeno)
851  printf("Mother %s [%s/%s] has children [",
852  (const char *) persons[i]->mother->pid,
853  (const char *) info->GetAlleleLabel(mot.one),
854  (const char *) info->GetAlleleLabel(mot.two));
855  else
856  printf("Couple %s * %s has children [",
857  (const char *) persons[i]->mother->pid,
858  (const char *) persons[i]->father->pid);
859 
860  for (int j = 0; j < persons[i]->sibCount; j++)
861  printf(
862  persons[i]->sibs[j]->sex == SEX_MALE ? "%s%s/Y" : "%s%s/%s",
863  j == 0 ? "" : " ",
864  (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].one),
865  (const char *) info->GetAlleleLabel(persons[i]->sibs[j]->markers[m].two));
866  printf("]\n");
867  fail = true;
868  failedFamilies[f] = true;
869  }
870  }
871 
872  for (int f = 0; f < familyCount; f++)
873  if (!failedFamilies[f] &&
874  (families[f]->count > families[f]->founders + 1) &&
875  !families[f]->isNuclear())
876  fail |= !GenotypeList::EliminateGenotypes(*this, families[f], m);
877  }
878 
879  if (fail)
880  printf("\nMendelian inheritance errors detected\n");
881 
882  return fail;
883 }
884 
885 void Pedigree::ExtractFamily(int id, Pedigree & single_fam_ped)
886 {
887  for (int i = families[id]->first; i <= families[id]->last; i++)
888  single_fam_ped.Add(*persons[i]);
889 
890  single_fam_ped.Sort();
891 }
892 
893 void Pedigree::ExtractOnAffection(int a, Pedigree & new_ped, int target_status)
894 {
895  for (int i = 0; i < count; i++)
896  if (persons[i]->affections[a] == target_status)
897  new_ped.Add(*persons[i]);
898  else
899  {
900  Person blank_person;
901  blank_person.CopyIDs(*persons[i]);
902  new_ped.Add(blank_person);
903  }
904 
905  new_ped.Sort();
906 }
907 
908 void Pedigree::Filter(IntArray & filter)
909 {
910  if (filter.Length() != count)
911  error("Pedigree:Size of pedigree filter doesn't match number of persons in pedigree");
912 
913  for (int i = 0; i < count; i++)
914  if (filter[i] == 1)
915  {
916  persons[i]->WipePhenotypes();
917  persons[i]->filter = true;
918  }
919 }
920 
921 void Pedigree::AddPerson(const char * famid, const char * pid,
922  const char * fatid, const char * motid,
923  int sex, bool delay_sort)
924 {
925  if (count == size) Grow();
926 
927  persons[count] = new Person;
928 
929  persons[count]->famid = famid;
930  persons[count]->pid = pid;
931  persons[count]->fatid = fatid;
932  persons[count]->motid = motid;
933  persons[count]->sex = sex;
934 
935  count++;
936 
937  if (!delay_sort) Sort();
938 }
939 
940 void Pedigree::ShowMemoryInfo()
941 {
942  unsigned int bytes = 0;
943 
944  for (int i = 0; i < count; i++)
945  bytes += persons[i]->famid.BufferSize() + persons[i]->pid.BufferSize() +
946  persons[i]->fatid.BufferSize() + persons[i]->motid.BufferSize();
947 
948  bytes += count * (markerCount * sizeof(Alleles) + traitCount * sizeof(double) +
949  covariateCount * sizeof(double) + affectionCount * sizeof(char) +
950  sizeof(Person));
951 
952  printf(" %40s %s\n", "Pedigree file ...", (const char *) MemoryInfo(bytes));
953 }
954 
955 
Alleles
Definition: PedigreeAlleles.h:24
String
Definition: StringBasics.h:39
PedigreeKey
Definition: Pedigree.cpp:167
Pedigree
Definition: Pedigree.h:33
MarkerInfo
Definition: PedigreeGlobals.h:30
IntArray
Definition: IntArray.h:24
Family
Definition: PedigreeFamily.h:28
Person
Definition: PedigreePerson.h:32