Compressor.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2009 Soeren Sonnenburg
00008  * Copyright (C) 2009 Berlin Institute of Technology
00009  */
00010 #include "lib/Compressor.h"
00011 #include "lib/Mathematics.h"
00012 #include <string.h>
00013 
00014 #ifdef USE_LZO
00015 #include <lzo/lzoconf.h>
00016 #include <lzo/lzoutil.h>
00017 #include <lzo/lzo1x.h>
00018 #endif
00019 
00020 #ifdef USE_GZIP
00021 #include <zlib.h>
00022 #endif
00023 
00024 #ifdef USE_BZIP2
00025 #include <bzlib.h>
00026 #endif
00027 
00028 #ifdef USE_LZMA
00029 #include <lzma.h>
00030 #endif
00031 
00032 using namespace shogun;
00033 
00034 void CCompressor::compress(uint8_t* uncompressed, uint64_t uncompressed_size,
00035         uint8_t* &compressed, uint64_t &compressed_size, int32_t level)
00036 {
00037     uint64_t initial_buffer_size=0;
00038 
00039     if (uncompressed_size==0)
00040     {
00041         compressed=NULL;
00042         compressed_size=0;
00043         return;
00044     }
00045 
00046     switch (compression_type)
00047     {
00048         case UNCOMPRESSED:
00049             {
00050                 initial_buffer_size=uncompressed_size;
00051                 compressed_size=uncompressed_size;
00052                 compressed=new uint8_t[compressed_size];
00053                 memcpy(compressed, uncompressed, uncompressed_size);
00054                 break;
00055             }
00056 #ifdef USE_LZO
00057         case LZO:
00058             {
00059                 if (lzo_init() != LZO_E_OK)
00060                     SG_ERROR("Error initializing LZO Compression\n");
00061 
00062                 lzo_bytep lzo_wrkmem = (lzo_bytep) lzo_malloc(LZO1X_999_MEM_COMPRESS);
00063                 if (!lzo_wrkmem)
00064                     SG_ERROR("Error allocating LZO workmem\n");
00065 
00066                 initial_buffer_size=uncompressed_size +
00067                     uncompressed_size / 16+ 64 + 3;
00068 
00069                 compressed_size=initial_buffer_size;
00070                 compressed=new uint8_t[initial_buffer_size];
00071 
00072                 lzo_uint lzo_size=compressed_size;
00073 
00074                 int ret;
00075                 if (level<9)
00076                 {
00077                     ret=lzo1x_1_15_compress(uncompressed, uncompressed_size,
00078                                 compressed, &lzo_size, lzo_wrkmem);
00079                 }
00080                 else
00081                 {
00082                     ret=lzo1x_999_compress(uncompressed, uncompressed_size,
00083                                 compressed, &lzo_size, lzo_wrkmem);
00084                 }
00085 
00086                 compressed_size=lzo_size;
00087                 lzo_free(lzo_wrkmem);
00088 
00089                 if (ret!= LZO_E_OK)
00090                     SG_ERROR("Error lzo-compressing data\n");
00091 
00092                 break;
00093             }
00094 #endif
00095 #ifdef USE_GZIP
00096         case GZIP:
00097             {
00098                 initial_buffer_size=1.001*uncompressed_size + 12;
00099                 compressed_size=initial_buffer_size;
00100                 compressed=new uint8_t[initial_buffer_size];
00101                 uLongf gz_size=compressed_size;
00102 
00103                 if (compress2(compressed, &gz_size, uncompressed,
00104                             uncompressed_size, level) != Z_OK)
00105                 {
00106                     SG_ERROR("Error gzip-compressing data\n");
00107                 }
00108                 compressed_size=gz_size;
00109                 break;
00110             }
00111 #endif
00112 #ifdef USE_BZIP2
00113         case BZIP2:
00114             {
00115                 bz_stream strm;
00116                 strm.bzalloc=NULL;
00117                 strm.bzfree=NULL;
00118                 strm.opaque=NULL;
00119                 initial_buffer_size=1.01*uncompressed_size + 600;
00120                 compressed_size=initial_buffer_size;
00121                 compressed=new uint8_t[initial_buffer_size];
00122                 if (BZ2_bzCompressInit(&strm, level, 0, 0)!=BZ_OK)
00123                     SG_ERROR("Error initializing bzip2 compressor\n");
00124 
00125                 strm.next_in=(char*) uncompressed;
00126                 strm.avail_in=(unsigned int) uncompressed_size;
00127                 strm.next_out=(char*) compressed;
00128                 strm.avail_out=(unsigned int) compressed_size;
00129                 if (BZ2_bzCompress(&strm, BZ_RUN) != BZ_RUN_OK)
00130                     SG_ERROR("Error bzip2-compressing data (BZ_RUN)\n");
00131 
00132                 int ret=0;
00133                 while (true)
00134                 {
00135                     ret=BZ2_bzCompress(&strm, BZ_FINISH);
00136                     if (ret==BZ_FINISH_OK)
00137                         continue;
00138                     if (ret==BZ_STREAM_END)
00139                         break;
00140                     else
00141                         SG_ERROR("Error bzip2-compressing data (BZ_FINISH)\n");
00142                 }
00143                 BZ2_bzCompressEnd(&strm);
00144                 compressed_size=(((uint64_t) strm.total_out_hi32) << 32) + strm.total_out_lo32;
00145                 break;
00146             }
00147 #endif
00148 #ifdef USE_LZMA
00149         case LZMA:
00150             {
00151                 lzma_stream strm = LZMA_STREAM_INIT;
00152                 initial_buffer_size = lzma_stream_buffer_bound(uncompressed_size);
00153                 compressed_size=initial_buffer_size;
00154                 compressed=new uint8_t[initial_buffer_size];
00155                 strm.next_in=uncompressed;
00156                 strm.avail_in=(size_t) uncompressed_size;
00157                 strm.next_out=compressed;
00158                 strm.avail_out=(size_t) compressed_size;
00159 
00160                 if (lzma_easy_encoder(&strm, level, LZMA_CHECK_CRC32) != LZMA_OK)
00161                     SG_ERROR("Error initializing lzma compressor\n");
00162                 if (lzma_code(&strm, LZMA_RUN) != LZMA_OK)
00163                     SG_ERROR("Error lzma-compressing data (LZMA_RUN)\n");
00164 
00165                 lzma_ret ret;
00166                 while (true)
00167                 {
00168                     ret=lzma_code(&strm, LZMA_FINISH);
00169                     if (ret==LZMA_OK)
00170                         continue;
00171                     if (ret==LZMA_STREAM_END)
00172                         break;
00173                     else
00174                         SG_ERROR("Error lzma-compressing data (LZMA_FINISH)\n");
00175                 }
00176                 lzma_end(&strm);
00177                 compressed_size=strm.total_out;
00178                 break;
00179             }
00180 #endif
00181         default:
00182             SG_ERROR("Unknown compression type\n");
00183     }
00184 
00185     if (compressed)
00186         CMath::resize(compressed, initial_buffer_size, compressed_size);
00187 }
00188 
00189 void CCompressor::decompress(uint8_t* compressed, uint64_t compressed_size,
00190         uint8_t* uncompressed, uint64_t& uncompressed_size)
00191 {
00192     if (compressed_size==0)
00193     {
00194         uncompressed_size=0;
00195         return;
00196     }
00197 
00198     switch (compression_type)
00199     {
00200         case UNCOMPRESSED:
00201             {
00202                 ASSERT(uncompressed_size>=compressed_size);
00203                 uncompressed_size=compressed_size;
00204                 memcpy(uncompressed, compressed, uncompressed_size);
00205                 break;
00206             }
00207 #ifdef USE_LZO
00208         case LZO:
00209             {
00210                 if (lzo_init() != LZO_E_OK)
00211                     SG_ERROR("Error initializing LZO Compression\n");
00212 
00213                 lzo_bytep lzo_wrkmem = (lzo_bytep) lzo_malloc(LZO1X_999_MEM_COMPRESS);
00214                 if (!lzo_wrkmem)
00215                     SG_ERROR("Error allocating LZO workmem\n");
00216 
00217                 lzo_uint lzo_size=uncompressed_size;
00218                 if (lzo1x_decompress(compressed, compressed_size, uncompressed,
00219                             &lzo_size, NULL) != LZO_E_OK)
00220                 {
00221                     SG_ERROR("Error uncompressing lzo-data\n");
00222                 }
00223                 uncompressed_size=lzo_size;
00224 
00225                 lzo_free(lzo_wrkmem);
00226                 break;
00227             }
00228 #endif
00229 #ifdef USE_GZIP
00230         case GZIP:
00231             {
00232                 uLongf gz_size=uncompressed_size;
00233                 if (uncompress(uncompressed, &gz_size, compressed,
00234                             compressed_size) != Z_OK)
00235                 {
00236                     SG_ERROR("Error uncompressing gzip-data\n");
00237                 }
00238                 uncompressed_size=gz_size;
00239                 break;
00240             }
00241 #endif
00242 #ifdef USE_BZIP2
00243         case BZIP2:
00244             {
00245                 bz_stream strm;
00246                 strm.bzalloc=NULL;
00247                 strm.bzfree=NULL;
00248                 strm.opaque=NULL;
00249                 if (BZ2_bzDecompressInit(&strm, 0, 0)!=BZ_OK)
00250                     SG_ERROR("Error initializing bzip2 decompressor\n");
00251                 strm.next_in=(char*) compressed;
00252                 strm.avail_in=(unsigned int) compressed_size;
00253                 strm.next_out=(char*) uncompressed;
00254                 strm.avail_out=(unsigned int) uncompressed_size;
00255                 if (BZ2_bzDecompress(&strm) != BZ_STREAM_END || strm.avail_in!=0)
00256                     SG_ERROR("Error uncompressing bzip2-data\n");
00257                 BZ2_bzDecompressEnd(&strm);
00258                 break;
00259             }
00260 #endif
00261 #ifdef USE_LZMA
00262         case LZMA:
00263             {
00264                 lzma_stream strm = LZMA_STREAM_INIT;
00265                 strm.next_in=compressed;
00266                 strm.avail_in=(size_t) compressed_size;
00267                 strm.next_out=uncompressed;
00268                 strm.avail_out=(size_t) uncompressed_size;
00269 
00270                 uint64_t memory_limit=lzma_easy_decoder_memusage(9);
00271 
00272                 if (lzma_stream_decoder(&strm, memory_limit, 0)!= LZMA_OK)
00273                     SG_ERROR("Error initializing lzma decompressor\n");
00274                 if (lzma_code(&strm, LZMA_RUN) != LZMA_STREAM_END)
00275                     SG_ERROR("Error decompressing lzma data\n");
00276                 lzma_end(&strm);
00277                 break;
00278             }
00279 #endif
00280         default:
00281             SG_ERROR("Unknown compression type\n");
00282     }
00283 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation