The class Alphabet implements an alphabet and alphabet utility functions.
These utility functions can be used to remap characters to more (bit-)efficient representations, check if a string is valid, compute histograms etc.
Currently supported alphabets are DNA, RAWDNA, RNA, PROTEIN, ALPHANUM, CUBE, RAW, IUPAC_NUCLEIC_ACID and IUPAC_AMINO_ACID.
Definition at line 67 of file Alphabet.h.
Public Member Functions | |
CAlphabet (char *alpha, int32_t len) | |
CAlphabet (EAlphabet alpha) | |
CAlphabet (CAlphabet *alpha) | |
virtual | ~CAlphabet () |
bool | set_alphabet (EAlphabet alpha) |
EAlphabet | get_alphabet () |
int32_t | get_num_symbols () |
int32_t | get_num_bits () |
uint8_t | remap_to_bin (uint8_t c) |
uint8_t | remap_to_char (uint8_t c) |
void | clear_histogram () |
clear histogram | |
template<class T > | |
void | add_string_to_histogram (T *p, int64_t len) |
void | add_byte_to_histogram (uint8_t p) |
void | print_histogram () |
print histogram | |
void | get_hist (int64_t **h, int32_t *len) |
const int64_t * | get_histogram () |
get pointer to histogram | |
bool | check_alphabet (bool print_error=true) |
bool | is_valid (uint8_t c) |
bool | check_alphabet_size (bool print_error=true) |
int32_t | get_num_symbols_in_histogram () |
int32_t | get_max_value_in_histogram () |
int32_t | get_num_bits_in_histogram () |
virtual const char * | get_name () const |
Static Public Member Functions | |
static const char * | get_alphabet_name (EAlphabet alphabet) |
Static Public Attributes | |
static const uint8_t | B_A = 0 |
static const uint8_t | B_C = 1 |
static const uint8_t | B_G = 2 |
static const uint8_t | B_T = 3 |
static const uint8_t | MAPTABLE_UNDEF = 0xff |
static const char * | alphabet_names [11] = {"DNA", "RAWDNA", "RNA", "PROTEIN", "ALPHANUM", "CUBE", "RAW", "IUPAC_NUCLEIC_ACID", "IUPAC_AMINO_ACID", "NONE", "UNKNOWN"} |
Protected Member Functions | |
void | init_map_table () |
void | copy_histogram (CAlphabet *src) |
Protected Attributes | |
EAlphabet | alphabet |
int32_t | num_symbols |
int32_t | num_bits |
bool | valid_chars [1<< (sizeof(uint8_t)*8)] |
uint8_t | maptable_to_bin [1<< (sizeof(uint8_t)*8)] |
uint8_t | maptable_to_char [1<< (sizeof(uint8_t)*8)] |
int64_t | histogram [1<< (sizeof(uint8_t)*8)] |
CAlphabet::CAlphabet | ( | char * | alpha, | |
int32_t | len | |||
) |
CAlphabet::CAlphabet | ( | EAlphabet | alpha | ) |
CAlphabet::CAlphabet | ( | CAlphabet * | alpha | ) |
CAlphabet::~CAlphabet | ( | ) | [virtual] |
Definition at line 70 of file Alphabet.cpp.
void CAlphabet::add_byte_to_histogram | ( | uint8_t | p | ) |
void CAlphabet::add_string_to_histogram | ( | T * | p, | |
int64_t | len | |||
) |
make histogram for whole string
p | string | |
len | length of string |
Definition at line 153 of file Alphabet.h.
bool CAlphabet::check_alphabet | ( | bool | print_error = true |
) |
check whether symbols in histogram are valid in alphabet e.g. for DNA if only letters ACGT appear
print_error | if errors shall be printed |
Definition at line 430 of file Alphabet.cpp.
bool CAlphabet::check_alphabet_size | ( | bool | print_error = true |
) |
check whether symbols in histogram ALL fit in alphabet
print_error | if errors shall be printed |
Definition at line 452 of file Alphabet.cpp.
void CAlphabet::clear_histogram | ( | ) |
clear histogram
Definition at line 379 of file Alphabet.cpp.
void CAlphabet::copy_histogram | ( | CAlphabet * | src | ) | [protected] |
copy histogram
src | alphabet to copy histogram from |
Definition at line 469 of file Alphabet.cpp.
EAlphabet CAlphabet::get_alphabet | ( | ) |
const char * CAlphabet::get_alphabet_name | ( | EAlphabet | alphabet | ) | [static] |
return alphabet name
alphabet | alphabet type to get name from |
Definition at line 474 of file Alphabet.cpp.
void CAlphabet::get_hist | ( | int64_t ** | h, | |
int32_t * | len | |||
) |
get histogram
h | where the histogram will be stored | |
len | length of histogram |
Definition at line 176 of file Alphabet.h.
const int64_t* CAlphabet::get_histogram | ( | ) |
get pointer to histogram
Definition at line 188 of file Alphabet.h.
int32_t CAlphabet::get_max_value_in_histogram | ( | ) |
return maximum value in histogram
Definition at line 385 of file Alphabet.cpp.
virtual const char* CAlphabet::get_name | ( | ) | const [virtual] |
int32_t CAlphabet::get_num_bits | ( | ) |
get number of bits necessary to store all symbols in alphabet
Definition at line 119 of file Alphabet.h.
int32_t CAlphabet::get_num_bits_in_histogram | ( | ) |
return number of bits required to store all symbols in histogram
Definition at line 412 of file Alphabet.cpp.
int32_t CAlphabet::get_num_symbols | ( | ) |
get number of symbols in alphabet
Definition at line 109 of file Alphabet.h.
int32_t CAlphabet::get_num_symbols_in_histogram | ( | ) |
return number of symbols in histogram
Definition at line 400 of file Alphabet.cpp.
void CAlphabet::init_map_table | ( | ) | [protected] |
init map table
Definition at line 124 of file Alphabet.cpp.
bool CAlphabet::is_valid | ( | uint8_t | c | ) |
check whether symbols are valid in alphabet e.g. for DNA if symbol is one of the A,C,G or T
c | symbol |
Definition at line 207 of file Alphabet.h.
void CAlphabet::print_histogram | ( | ) |
print histogram
Definition at line 421 of file Alphabet.cpp.
uint8_t CAlphabet::remap_to_bin | ( | uint8_t | c | ) |
remap element e.g translate ACGT to 0123
c | element to remap |
Definition at line 129 of file Alphabet.h.
uint8_t CAlphabet::remap_to_char | ( | uint8_t | c | ) |
remap element e.g translate 0123 to ACGT
c | element to remap |
Definition at line 139 of file Alphabet.h.
bool CAlphabet::set_alphabet | ( | EAlphabet | alpha | ) |
set alphabet and initialize mapping table (for remap)
alpha | new alphabet |
Definition at line 74 of file Alphabet.cpp.
EAlphabet CAlphabet::alphabet [protected] |
alphabet
Definition at line 276 of file Alphabet.h.
const char * CAlphabet::alphabet_names = {"DNA", "RAWDNA", "RNA", "PROTEIN", "ALPHANUM", "CUBE", "RAW", "IUPAC_NUCLEIC_ACID", "IUPAC_AMINO_ACID", "NONE", "UNKNOWN"} [static] |
alphabet names
Definition at line 272 of file Alphabet.h.
const uint8_t CAlphabet::B_A = 0 [static] |
B_A
Definition at line 262 of file Alphabet.h.
const uint8_t CAlphabet::B_C = 1 [static] |
B_C
Definition at line 264 of file Alphabet.h.
const uint8_t CAlphabet::B_G = 2 [static] |
B_G
Definition at line 266 of file Alphabet.h.
const uint8_t CAlphabet::B_T = 3 [static] |
B_T
Definition at line 268 of file Alphabet.h.
int64_t CAlphabet::histogram[1<< (sizeof(uint8_t)*8)] [protected] |
histogram
Definition at line 288 of file Alphabet.h.
uint8_t CAlphabet::maptable_to_bin[1<< (sizeof(uint8_t)*8)] [protected] |
maptable to bin
Definition at line 284 of file Alphabet.h.
uint8_t CAlphabet::maptable_to_char[1<< (sizeof(uint8_t)*8)] [protected] |
maptable to char
Definition at line 286 of file Alphabet.h.
const uint8_t CAlphabet::MAPTABLE_UNDEF = 0xff [static] |
MAPTABLE UNDEF
Definition at line 270 of file Alphabet.h.
int32_t CAlphabet::num_bits [protected] |
number of bits
Definition at line 280 of file Alphabet.h.
int32_t CAlphabet::num_symbols [protected] |
number of symbols
Definition at line 278 of file Alphabet.h.
bool CAlphabet::valid_chars[1<< (sizeof(uint8_t)*8)] [protected] |
valid chars
Definition at line 282 of file Alphabet.h.