libStatGen Software  1
GlfRecord.h
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifndef __GLF_RECORD_H__
19 #define __GLF_RECORD_H__
20 
21 #include <map>
22 #include <stdint.h>
23 
24 #include "InputFile.h"
25 #include "CharBuffer.h"
26 
27 /// This class allows a user to easily get/set the fields in a GLF record.
28 class GlfRecord
29 {
30 public:
31  /// Constructor
32  GlfRecord();
33 
34  /// Destructor
35  ~GlfRecord();
36 
37 // // Copy Constructor
38 // GlfRecord(const GlfRecord& record);
39 
40 // // Overload operator = to copy the passed in record into this record.
41 // GlfRecord & operator = (const GlfRecord& record);
42 
43 // // Overload operator = to copy the passed in record into this record.
44 // bool copy(const GlfRecord& record);
45 
46  /// Clear this record back to the default setting.
47  void reset();
48 
49  /// Read the record from the specified file (file MUST be in
50  /// the correct position for reading a record).
51  /// \param filePtr file to read from that is in the correct position.
52  /// \return true if the record was successfully read from the file (even
53  /// if it is an endMarker), false if it was not successfully read.
54  bool read(IFILE filePtr);
55 
56  /// Write the record to the specified file.
57  /// \param filePtr file to write to that is in the correct position.
58  /// \return true if the record was successfully written to the
59  /// file, false if not.
60  bool write(IFILE filePtr) const;
61 
62  /// Print the reference section in a readable format.
63  void print() const;
64 
65  /// @name Generic Accessors for Record Types 1 & 2
66  //@{
67  /// Set the record type and reference base.
68  /// \param rtypeRef record type & reference base. Formatted as:
69  /// record_type<<4|numeric_ref_base.
70  /// \return true if the record type and reference base were successfully
71  /// set, false if not.
72  bool setRtypeRef(uint8_t rtypeRef);
73 
74  /// Set the record type.
75  /// \param recType record type: 1 - simple likelihood record,
76  /// 2 - indel likelihood record, 0 - end maker
77  /// \return true if the record type was successfully set, false if not.
78  bool setRecordType(uint8_t recType);
79 
80  /// Set the reference base from an integer value.
81  /// \param refBase integer representation of the reference base.
82  /// \anchor BaseCharacterIntMap
83  /// <table>
84  /// <tr><th>Int Value</th><td>0</td><td>1</td><td>2</td><td>3</td><td>4</td><td>5</td><td>6</td><td>7</td><td>8</td><td>9</td><td>10</td><td>11</td><td>12</td><td>13</td><td>14</td><td>15</td></tr>
85  /// <tr><th>Character Base</th><td>X</td><td>A</td><td>C</td><td>M</td><td>G</td><td>R</td><td>S</td><td>V</td><td>T</td><td>W</td><td>Y</td><td>H</td><td>K</td><td>D</td><td>B</td><td>N</td></tr>
86  /// </table>
87  /// \return true if the reference base was successfully set, false if not.
88  bool setRefBaseInt(uint8_t refBase);
89 
90  // TODO bool setRefBaseChar(char refBase);
91 
92  /// Set the offset from the precedent record.
93  /// 0-based coordinate of the record minus the coordinate of the
94  /// precedent record. For the first record in a reference sequence,
95  /// the previous coordinate is 0.
96  /// For insertions between x & x+1, the coordinate is x.
97  /// For deletions between x & y, the coordinate is x.
98  /// \param offset offset from the precedent record.
99  /// \return true if successfully set, false if not.
100  bool setOffset(uint32_t offset);
101 
102  /// Set the minimum likelihood and the read depth.
103  /// \param minDepth minimum likelihood and read depth. Formatted as:
104  /// min_lk<<24|read_dpeth. (min_lk capped at 255)
105  /// \return true if successfully set, false if not.
106  bool setMinDepth(uint32_t minDepth);
107 
108  /// Set the minimum likelihood.
109  /// \param minLk minimum likelihood (capped at 255).
110  /// \return true if successfully set, false if not.
111  bool setMinLk(uint8_t minLk);
112 
113  /// Set the the read depth.
114  /// \param readDepth read depth.
115  /// \return true if successfully set, false if not.
116  bool setReadDepth(uint32_t readDepth);
117 
118  /// Set the RMS of mapping qualities of reads covering the site.
119  /// \param rmsMapQ RMS of mapping qualities
120  /// \return true if successfully set, false if not.
121  bool setRmsMapQ(uint8_t rmsMapQ);
122 
123  /// Return the record type.
124  /// \return record type for this record: 0 - endMarker,
125  /// 1 - simple likelihood, 2 - indel likelihood
126  inline int getRecordType() const
127  {
128  return(myRecTypeRefBase >> REC_TYPE_SHIFT);
129  }
130 
131  /// Return the reference base as an integer.
132  /// \return integer representation of the reference base.
133  /// See: \ref BaseCharacterIntMap
134  inline int getRefBase() const
135  {
136  return(myRecTypeRefBase & REF_BASE_MASK);
137  }
138 
139  /// Return the reference base as a character.
140  /// \return character representation of the reference base.
141  char getRefBaseChar() const;
142 
143  /// Return the offset from the precedent record.
144  /// \return offset from the precedent record.
145  uint32_t getOffset() const;
146 
147  /// Return the minimum likelihood and read depth. Formatted as:
148  /// min_lk<<24|read_dpeth. (min_lk capped at 255)
149  /// \return minimum likelihood and read depth
150  uint32_t getMinDepth() const;
151 
152  /// Return the minimum likelihood
153  /// \return minimum likelihood
154  uint8_t getMinLk() const;
155 
156  /// Return the read depth.
157  /// \return read depth
158  uint32_t getReadDepth() const;
159 
160  /// Return the RMS of mapping qualities of reads covering the site.
161  /// \return RMS of maping qualities.
162  uint8_t getRmsMapQ() const;
163 
164  //@}
165 
166  /// @name Record Type 1 Accessors
167  /// Record Type 1: Simple Likelihood Record
168  //@{
169  //bool setType1(all fields for type 1);
170 
171  /// Set the likelihood for the specified genotype.
172  /// Throws an exception if index is out of range.
173  /// \param index index for the genotype for which the likelihood is
174  /// being set.
175  /// \anchor GenotypeIndexTable
176  /// <table>
177  /// <tr><th>Index</th><td>0</td><td>1</td><td>2</td><td>3</td><td>4</td><td>5</td><td>6</td><td>7</td><td>8</td><td>9</td></tr>
178  /// <tr><th>Genotype</th><td>AA</td><td>AC</td><td>AG</td><td>AT</td><td>CC</td><td>CG</td><td>CT</td><td>GG</td><td>GT</td><td>TT</td></tr>
179  /// </table>
180  /// \param value likelihood for the genotype at the specified index.
181  /// \return true if successfully set, false if not.
182  bool setLk(int index, uint8_t value);
183 
184  //bool getType1(all fields for type 1);
185 
186  /// Get the likelihood for the specified genotype index.
187  /// Throws an exception if index is out of range.
188  /// \param index index of the genotype for which the likelihood should
189  /// be returned. See: \ref GenotypeIndexTable
190  /// \return likelihood of the specified index.
191  uint8_t getLk(int index);
192  //@}
193 
194  /// @name Record Type 2 Accessors
195  /// Record Type2: Indel Likelihood Record
196  //@{
197 // bool setType2(all fields for type 2);
198 
199  /// Set the likelihood of the first homozygous indel allele.
200  /// \param lk likelihood of the 1st homozygous indel allele (capped at 255)
201  /// \return true if successfully set, false if not.
202  bool setLkHom1(uint8_t lk);
203 
204  /// Set the likelihood of the 2nd homozygous indel allele.
205  /// \param lk likelihood of the 2nd homozygous indel allele (capped at 255)
206  /// \return true if successfully set, false if not.
207  bool setLkHom2(uint8_t lk);
208 
209  /// Set the likelihood of a heterozygote.
210  /// \param lk likelihood of a heterozygote (capped at 255)
211  /// \return true if successfully set, false if not.
212  bool setLkHet(uint8_t lk);
213 
214  /// Set the sequence of the first indel allele if the
215  /// first indel is an insertion.
216  /// \param indelSeq sequence of the first indel allele (insertion).
217  /// \return true if successfully set, false if not.
218  bool setInsertionIndel1(const std::string& indelSeq);
219 
220  /// Set the sequence of the first indel allele if the
221  /// first indel is an deletion.
222  /// \param indelSeq sequence of the first indel allele (deletion).
223  /// \return true if successfully set, false if not.
224  bool setDeletionIndel1(const std::string& indelSeq);
225 
226  /// Set the sequence of the 2nd indel allele if the
227  /// 2nd indel is an insertion.
228  /// \param indelSeq sequence of the 2nd indel allele (insertion).
229  /// \return true if successfully set, false if not.
230  bool setInsertionIndel2(const std::string& indelSeq);
231 
232  /// Set the sequence of the 2nd indel allele if the
233  /// 2nd indel is an deletion.
234  /// \param indelSeq sequence of the 2nd indel allele (deletion).
235  /// \return true if successfully set, false if not.
236  bool setDeletionIndel2(const std::string& indelSeq);
237 
238  // bool setType2(all fields for type 2);
239 
240  /// Return the likelihood of the 1st homozygous indel allele.
241  /// \return likelihood of the 1st homozygous indel allele.
242  uint8_t getLkHom1();
243 
244  /// Return the likelihood of the 2nd homozygous indel allele.
245  /// \return likelihood of the 2nd homozygous indel allele.
246  uint8_t getLkHom2();
247 
248  /// Return the likelihood of a heterozygote.
249  /// \return likelihood of a hetereozygote.
250  uint8_t getLkHet();
251 
252  /// Get the sequence and length (+:ins, -:del) of the 1st indel allele.
253  /// \param indelSeq string to set with the sequence of the 1st indel allele
254  /// \return length of the 1st indel allele
255  /// (positive=insertion; negative=deletion; 0=no-indel)
256  int16_t getIndel1(std::string& indelSeq);
257 
258  /// Get the sequence and length (+:ins, -:del) of the 2nd indel allele.
259  /// \param indelSeq string to set with the sequence of the 2nd indel allele
260  /// \return length of the 2nd indel allele
261  /// (positive=insertion; negative=deletion; 0=no-indel)
262  int16_t getIndel2(std::string& indelSeq);
263  //@}
264 
265 private:
266  // Read a record of record type 1.
267  void readType1(IFILE filePtr);
268 
269  // Read a record of record type 2.
270  void readType2(IFILE filePtr);
271 
272 
273  // Write the rtyperef field.
274  void writeRtypeRef(IFILE filePtr) const;
275 
276 
277  // Write a record of record type 1.
278  void writeType1(IFILE filePtr) const;
279 
280  // Write a record of record type 2.
281  void writeType2(IFILE filePtr) const;
282 
283  // Contains record_type and ref_base.
284  uint8_t myRecTypeRefBase;
285 
286  static const uint8_t REC_TYPE_SHIFT = 4;
287  static const uint8_t REF_BASE_MASK = 0xF;
288  static const uint8_t REC_TYPE_MASK = 0xF0;
289 
290  static const uint32_t MIN_LK_SHIFT = 24;
291  static const uint32_t READ_DEPTH_MASK = 0xFFFFFF;
292  static const uint32_t MIN_LK_MASK = 0xFF000000;
293 
294  static const char REF_BASE_MAX = 15;
295  static std::string REF_BASE_CHAR;
296 
297  static const int NUM_REC1_LIKELIHOOD = 10;
298 
299  struct
300  {
301  uint32_t offset;
302  uint32_t min_depth;
303  uint8_t rmsMapQ;
304  uint8_t lk[GlfRecord::NUM_REC1_LIKELIHOOD];
305  } myRec1Base;
306 
307  static const int REC1_BASE_SIZE = 19;
308 
309  struct
310  {
311  uint32_t offset;
312  uint32_t min_depth;
313  uint8_t rmsMapQ;
314  uint8_t lkHom1;
315  uint8_t lkHom2;
316  uint8_t lkHet;
317  int16_t indelLen1;
318  int16_t indelLen2;
319  } myRec2Base;
320 
321  // TODO rest of rec 2.
322  CharBuffer myIndelSeq1;
323  CharBuffer myIndelSeq2;
324 
325  static const int REC2_BASE_SIZE = 16;
326 
327 };
328 
329 #endif
GlfRecord::print
void print() const
Print the reference section in a readable format.
Definition: GlfRecord.cpp:143
GlfRecord::getRefBaseChar
char getRefBaseChar() const
Return the reference base as a character.
Definition: GlfRecord.cpp:251
GlfRecord::setRmsMapQ
bool setRmsMapQ(uint8_t rmsMapQ)
Set the RMS of mapping qualities of reads covering the site.
Definition: GlfRecord.cpp:243
GlfRecord::setOffset
bool setOffset(uint32_t offset)
Set the offset from the precedent record.
Definition: GlfRecord.cpp:215
GlfRecord::setLkHom2
bool setLkHom2(uint8_t lk)
Set the likelihood of the 2nd homozygous indel allele.
Definition: GlfRecord.cpp:380
GlfRecord::getLkHet
uint8_t getLkHet()
Return the likelihood of a heterozygote.
Definition: GlfRecord.cpp:442
GlfRecord::getRefBase
int getRefBase() const
Return the reference base as an integer.
Definition: GlfRecord.h:134
GlfRecord::read
bool read(IFILE filePtr)
Read the record from the specified file (file MUST be in the correct position for reading a record).
Definition: GlfRecord.cpp:65
GlfRecord::setMinDepth
bool setMinDepth(uint32_t minDepth)
Set the minimum likelihood and the read depth.
Definition: GlfRecord.cpp:222
GlfRecord::getLkHom2
uint8_t getLkHom2()
Return the likelihood of the 2nd homozygous indel allele.
Definition: GlfRecord.cpp:431
GlfRecord::reset
void reset()
Clear this record back to the default setting.
Definition: GlfRecord.cpp:38
GlfRecord
This class allows a user to easily get/set the fields in a GLF record.
Definition: GlfRecord.h:29
GlfRecord::setInsertionIndel1
bool setInsertionIndel1(const std::string &indelSeq)
Set the sequence of the first indel allele if the first indel is an insertion.
Definition: GlfRecord.cpp:392
GlfRecord::~GlfRecord
~GlfRecord()
Destructor.
Definition: GlfRecord.cpp:31
GlfRecord::getRecordType
int getRecordType() const
Return the record type.
Definition: GlfRecord.h:126
GlfRecord::setDeletionIndel1
bool setDeletionIndel1(const std::string &indelSeq)
Set the sequence of the first indel allele if the first indel is an deletion.
Definition: GlfRecord.cpp:399
GlfRecord::setDeletionIndel2
bool setDeletionIndel2(const std::string &indelSeq)
Set the sequence of the 2nd indel allele if the 2nd indel is an deletion.
Definition: GlfRecord.cpp:413
GlfRecord::getOffset
uint32_t getOffset() const
Return the offset from the precedent record.
Definition: GlfRecord.cpp:263
GlfRecord::getLkHom1
uint8_t getLkHom1()
Return the likelihood of the 1st homozygous indel allele.
Definition: GlfRecord.cpp:420
GlfRecord::getIndel2
int16_t getIndel2(std::string &indelSeq)
Get the sequence and length (+:ins, -:del) of the 2nd indel allele.
Definition: GlfRecord.cpp:465
InputFile.h
GlfRecord::setMinLk
bool setMinLk(uint8_t minLk)
Set the minimum likelihood.
Definition: GlfRecord.cpp:229
GlfRecord::getIndel1
int16_t getIndel1(std::string &indelSeq)
Get the sequence and length (+:ins, -:del) of the 1st indel allele.
Definition: GlfRecord.cpp:453
CharBuffer
Definition: CharBuffer.h:25
GlfRecord::GlfRecord
GlfRecord()
Constructor.
Definition: GlfRecord.cpp:25
InputFile
Class for easily reading/writing files without having to worry about file type (uncompressed,...
Definition: InputFile.h:37
GlfRecord::getReadDepth
uint32_t getReadDepth() const
Return the read depth.
Definition: GlfRecord.cpp:308
GlfRecord::setReadDepth
bool setReadDepth(uint32_t readDepth)
Set the the read depth.
Definition: GlfRecord.cpp:236
GlfRecord::setInsertionIndel2
bool setInsertionIndel2(const std::string &indelSeq)
Set the sequence of the 2nd indel allele if the 2nd indel is an insertion.
Definition: GlfRecord.cpp:406
GlfRecord::getMinLk
uint8_t getMinLk() const
Return the minimum likelihood.
Definition: GlfRecord.cpp:293
GlfRecord::setLkHet
bool setLkHet(uint8_t lk)
Set the likelihood of a heterozygote.
Definition: GlfRecord.cpp:386
GlfRecord::write
bool write(IFILE filePtr) const
Write the record to the specified file.
Definition: GlfRecord.cpp:113
GlfRecord::setLkHom1
bool setLkHom1(uint8_t lk)
Set the likelihood of the first homozygous indel allele.
Definition: GlfRecord.cpp:374
GlfRecord::setLk
bool setLk(int index, uint8_t value)
Set the likelihood for the specified genotype.
Definition: GlfRecord.cpp:340
GlfRecord::getLk
uint8_t getLk(int index)
Get the likelihood for the specified genotype index.
Definition: GlfRecord.cpp:355
GlfRecord::setRtypeRef
bool setRtypeRef(uint8_t rtypeRef)
Set the record type and reference base.
Definition: GlfRecord.cpp:188
GlfRecord::getRmsMapQ
uint8_t getRmsMapQ() const
Return the RMS of mapping qualities of reads covering the site.
Definition: GlfRecord.cpp:323
GlfRecord::getMinDepth
uint32_t getMinDepth() const
Return the minimum likelihood and read depth.
Definition: GlfRecord.cpp:278
GlfRecord::setRecordType
bool setRecordType(uint8_t recType)
Set the record type.
Definition: GlfRecord.cpp:194
GlfRecord::setRefBaseInt
bool setRefBaseInt(uint8_t refBase)
Set the reference base from an integer value.
Definition: GlfRecord.cpp:201