dlvhex  2.5.0
vs12/bm/encoding.h
Go to the documentation of this file.
00001 #ifndef BMENCODING_H__INCLUDED__
00002 #define BMENCODING_H__INCLUDED__
00003 /*
00004 Copyright(c) 2002-2009 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
00005 
00006 
00007 Permission is hereby granted, free of charge, to any person 
00008 obtaining a copy of this software and associated documentation 
00009 files (the "Software"), to deal in the Software without restriction, 
00010 including without limitation the rights to use, copy, modify, merge, 
00011 publish, distribute, sublicense, and/or sell copies of the Software, 
00012 and to permit persons to whom the Software is furnished to do so, 
00013 subject to the following conditions:
00014 
00015 The above copyright notice and this permission notice shall be included 
00016 in all copies or substantial portions of the Software.
00017 
00018 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
00019 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
00020 OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
00021 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 
00022 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
00023 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
00024 OTHER DEALINGS IN THE SOFTWARE.
00025 
00026 
00027 For more information please visit:   http://bmagic.sourceforge.net
00028 
00029 */
00030 
00031 #include <memory.h>
00032 #include "bmutil.h"
00033 
00034 namespace bm
00035 {
00036 
00037 
00038 // ----------------------------------------------------------------
00045 class encoder
00046 {
00047 public:
00048     typedef unsigned char* position_type;
00049 public:
00050     encoder(unsigned char* buf, unsigned size);
00051     void put_8(unsigned char c);
00052     void put_16(bm::short_t  s);
00053     void put_16(const bm::short_t* s, unsigned count);
00054     void put_32(bm::word_t  w);
00055     void put_32(const bm::word_t* w, unsigned count);
00056     void put_prefixed_array_32(unsigned char c, 
00057                                const bm::word_t* w, unsigned count);
00058     void put_prefixed_array_16(unsigned char c, 
00059                                const bm::short_t* s, unsigned count,
00060                                bool encode_count);
00061     unsigned size() const;
00062     unsigned char* get_pos() const;
00063     void set_pos(unsigned char* buf_pos);
00064 private:
00065     unsigned char*  buf_;
00066     unsigned char*  start_;
00067     unsigned int    size_;
00068 };
00069 
00070 // ----------------------------------------------------------------
00074 class decoder_base
00075 {
00076 public:
00077     decoder_base(const unsigned char* buf) { buf_ = start_ = buf; }
00079     BMFORCEINLINE unsigned char get_8() { return *buf_++; }
00081     BMFORCEINLINE 
00082     unsigned size() const { return (unsigned)(buf_ - start_); }
00084     BMFORCEINLINE
00085     void seek(int delta) { buf_ += delta; }
00086 protected:
00087    const unsigned char*   buf_;
00088    const unsigned char*   start_;
00089 };
00090 
00091 
00092 // ----------------------------------------------------------------
00097 class decoder : public decoder_base
00098 {
00099 public:
00100     decoder(const unsigned char* buf);
00101     bm::short_t get_16();
00102     bm::word_t get_32();
00103     void get_32(bm::word_t* w, unsigned count);
00104     void get_16(bm::short_t* s, unsigned count);
00105 };
00106 
00107 // ----------------------------------------------------------------
00114 typedef decoder decoder_big_endian;
00115 
00116 
00117 // ----------------------------------------------------------------
00124 class decoder_little_endian : public decoder_base
00125 {
00126 public:
00127     decoder_little_endian(const unsigned char* buf);
00128     bm::short_t get_16();
00129     bm::word_t get_32();
00130     void get_32(bm::word_t* w, unsigned count);
00131     void get_16(bm::short_t* s, unsigned count);
00132 };
00133 
00134 
00140 template<class TEncoder>
00141 class bit_out
00142 {
00143 public:
00144     bit_out(TEncoder& dest)
00145         : dest_(dest), used_bits_(0), accum_(0)
00146     {}
00147 
00148     ~bit_out()
00149     {
00150         if (used_bits_)
00151             dest_.put_32(accum_);
00152     }
00153 
00154     void put_bit(unsigned value)
00155     {
00156         BM_ASSERT(value <= 1);
00157         accum_ |= (value << used_bits_);
00158         if (++used_bits_ == (sizeof(accum_) * 8))
00159             flush_accum();
00160     }
00161 
00162     void put_bits(unsigned value, unsigned count)
00163     {
00164         unsigned used = used_bits_;
00165         unsigned acc = accum_;
00166 
00167         {
00168             unsigned mask = ~0;
00169             mask >>= (sizeof(accum_) * 8) - count;
00170             value &= mask;
00171         }
00172         for (;count;)
00173         {  
00174             acc |= value << used;
00175 
00176             unsigned free_bits = (sizeof(accum_) * 8) - used;
00177             if (count <= free_bits)
00178             {
00179                 used += count;
00180                 break;
00181             }
00182             else
00183             {
00184                 value >>= free_bits;
00185                 count -= free_bits;
00186                 dest_.put_32(acc);
00187                 acc = used = 0;
00188                 continue;
00189             }
00190         }
00191         used_bits_ = used;
00192         accum_ = acc;
00193     }
00194 
00195     void put_zero_bit()
00196     {
00197         if (++used_bits_ == (sizeof(accum_) * 8))
00198             flush_accum();        
00199     }
00200 
00201     void put_zero_bits(register unsigned count)
00202     {
00203         register unsigned used = used_bits_;
00204         unsigned free_bits = (sizeof(accum_) * 8) - used;
00205         if (count >= free_bits)
00206         {
00207             flush_accum();
00208             count -= free_bits;
00209             used = 0;
00210 
00211             for ( ;count >= sizeof(accum_) * 8; count -= sizeof(accum_) * 8)
00212             {
00213                 dest_.put_32(0);
00214             }
00215             used += count; 
00216         }
00217         else
00218         {
00219             used += count;
00220         }
00221         accum_ |= (1 << used);
00222         if (++used == (sizeof(accum_) * 8))
00223             flush_accum();
00224         else
00225             used_bits_ = used;
00226     }
00227 
00228 
00229     void gamma(unsigned value)
00230     {
00231         BM_ASSERT(value);
00232 
00233         unsigned logv = 
00234         #if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER))
00235             bm::bsr_asm32(value);
00236         #else
00237             bm::ilog2_LUT(value);
00238         #endif
00239 
00240         // Put zeroes + 1 bit
00241 
00242         unsigned used = used_bits_;
00243         unsigned acc = accum_;
00244         const unsigned acc_bits = (sizeof(acc) * 8);
00245         unsigned free_bits = acc_bits - used;
00246 
00247         {
00248         unsigned count = logv;
00249         if (count >= free_bits)
00250         {
00251             dest_.put_32(acc);
00252             acc = used ^= used;
00253             count -= free_bits;
00254 
00255             for ( ;count >= acc_bits; count -= acc_bits)
00256             {
00257                 dest_.put_32(0);
00258             }
00259             used += count; 
00260         }
00261         else
00262         {
00263             used += count;
00264         }
00265         acc |= (1 << used);
00266         if (++used == acc_bits)
00267         {
00268             dest_.put_32(acc);
00269             acc = used ^= used;
00270         }
00271         }
00272 
00273         // Put the value bits
00274         //
00275         {
00276             unsigned mask = (~0u);
00277             mask >>= acc_bits - logv;
00278             value &= mask;
00279         }
00280         for (;logv;)
00281         {  
00282             acc |= value << used;
00283             free_bits = acc_bits - used;
00284             if (logv <= free_bits)
00285             {
00286                 used += logv;
00287                 break;
00288             }
00289             else
00290             {
00291                 value >>= free_bits;
00292                 logv -= free_bits;
00293                 dest_.put_32(acc);
00294                 acc = used ^= used;
00295                 continue;
00296             }
00297         } // for
00298 
00299         used_bits_ = used;
00300         accum_ = acc;
00301     }
00302 
00303 
00304     void flush()
00305     {
00306         if (used_bits_)
00307             flush_accum();
00308     }
00309 
00310 private:
00311     void flush_accum()
00312     {
00313         dest_.put_32(accum_);
00314         used_bits_ = accum_ = 0;
00315     }
00316 private:
00317     bit_out(const bit_out&);
00318     bit_out& operator=(const bit_out&);
00319 
00320 private:
00321     TEncoder&      dest_;      
00322     unsigned       used_bits_; 
00323     unsigned       accum_;     
00324 };
00325 
00326 
00332 template<class TDecoder>
00333 class bit_in
00334 {
00335 public:
00336     bit_in(TDecoder& decoder)
00337         : src_(decoder),
00338           used_bits_(sizeof(accum_) * 8),
00339           accum_(0)
00340     {
00341     }
00342 
00343     unsigned gamma()
00344     {
00345         unsigned acc = accum_;
00346         unsigned used = used_bits_;
00347 
00348         if (used == (sizeof(acc) * 8))
00349         {
00350             acc = src_.get_32();
00351             used ^= used;
00352         }
00353         unsigned zero_bits = 0;
00354         while (true)
00355         {
00356             if (acc == 0)
00357             {
00358                 zero_bits += (sizeof(acc) * 8) - used;
00359                 used = 0;
00360                 acc = src_.get_32();
00361                 continue;
00362             }
00363             unsigned first_bit_idx = 
00364                 #if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER))
00365                     bm::bsf_asm32(acc);
00366                 #else
00367                     bm::bit_scan_fwd(acc);
00368                 #endif
00369             acc >>= first_bit_idx;
00370             zero_bits += first_bit_idx;
00371             used += first_bit_idx;
00372             break;
00373         } // while
00374 
00375         // eat the border bit
00376         //
00377         if (used == (sizeof(acc) * 8))
00378         {
00379             acc = src_.get_32();
00380             used = 1;
00381         }
00382         else
00383         {
00384             ++used;
00385         }
00386         acc >>= 1;
00387 
00388         // get the value
00389         unsigned current;
00390         
00391         unsigned free_bits = (sizeof(acc) * 8) - used;
00392         if (zero_bits <= free_bits)
00393         {
00394         take_accum:
00395             current = 
00396                 (acc & block_set_table<true>::_left[zero_bits]) | (1 << zero_bits);
00397             acc >>= zero_bits;
00398             used += zero_bits;
00399             goto ret;
00400         }
00401 
00402         if (used == (sizeof(acc) * 8))
00403         {
00404             acc = src_.get_32();
00405             used ^= used;
00406             goto take_accum;
00407         }
00408 
00409         // take the part
00410         current = acc;
00411         // read the next word
00412         acc = src_.get_32();
00413         used = zero_bits - free_bits;
00414         current |= 
00415             ((acc & block_set_table<true>::_left[used]) << free_bits) | 
00416             (1 << zero_bits);
00417 
00418         acc >>= used;
00419     ret:
00420         accum_ = acc;
00421         used_bits_ = used;
00422         return current;
00423     }
00424 
00425 
00426 private:
00427     bit_in(const bit_in&);
00428     bit_in& operator=(const bit_in&);
00429 private:
00430     TDecoder&           src_;        
00431     unsigned            used_bits_;  
00432     unsigned            accum_;      
00433 };
00434 
00435 
00439 template<typename T, typename TBitIO>
00440 class gamma_encoder
00441 {
00442 public:
00443     gamma_encoder(TBitIO& bout) : bout_(bout) 
00444     {}
00445         
00449     BMFORCEINLINE
00450     void operator()(T value)
00451     {
00452         bout_.gamma(value);
00453     }
00454 private:
00455     gamma_encoder(const gamma_encoder&);
00456     gamma_encoder& operator=(const gamma_encoder&);
00457 private:
00458     TBitIO&  bout_;
00459 };
00460 
00461 
00465 template<typename T, typename TBitIO>
00466 class gamma_decoder
00467 {
00468 public:
00469     gamma_decoder(TBitIO& bin) : bin_(bin) 
00470     {}
00471     
00475     void start()
00476     {}
00477     
00481     void stop()
00482     {}
00483     
00487     T operator()(void)
00488     {
00489         return (T)bin_.gamma();
00490     }
00491 private:
00492     gamma_decoder(const gamma_decoder&);
00493     gamma_decoder& operator=(const gamma_decoder&);
00494 private:
00495     TBitIO&  bin_;
00496 };
00497 
00498 
00499 // ----------------------------------------------------------------
00500 // Implementation details. 
00501 // ----------------------------------------------------------------
00502 
00509 inline encoder::encoder(unsigned char* buf, unsigned size)
00510 : buf_(buf), start_(buf), size_(size)
00511 {
00512 }
00516 inline void encoder::put_prefixed_array_32(unsigned char c, 
00517                                            const bm::word_t* w, 
00518                                            unsigned count)
00519 {
00520     put_8(c);
00521     put_32(w, count);
00522 }
00523 
00527 inline void encoder::put_prefixed_array_16(unsigned char c, 
00528                                            const bm::short_t* s, 
00529                                            unsigned count,
00530                                            bool encode_count)
00531 {
00532     put_8(c);
00533     if (encode_count)
00534         put_16((bm::short_t) count);
00535     put_16(s, count);
00536 }
00537 
00538 
00544 BMFORCEINLINE void encoder::put_8(unsigned char c)
00545 {
00546     *buf_++ = c;
00547 }
00548 
00554 BMFORCEINLINE void encoder::put_16(bm::short_t s)
00555 {
00556 #if (BM_UNALIGNED_ACCESS_OK == 1)
00557     *((bm::short_t*)buf_) = s;
00558     buf_ += sizeof(s);
00559 #else
00560     *buf_++ = (unsigned char) s;
00561     s >>= 8;
00562     *buf_++ = (unsigned char) s;
00563 #endif
00564 }
00565 
00569 inline void encoder::put_16(const bm::short_t* s, unsigned count)
00570 {
00571 #if (BM_UNALIGNED_ACCESS_OK == 1)
00572     unsigned short* buf = (unsigned short*)buf_;
00573     const bm::short_t* s_end = s + count;
00574     do 
00575     {
00576         *buf++ = *s++;
00577     } while (s < s_end);
00578         
00579     buf_ = (unsigned char*)buf;
00580 #else
00581     unsigned char* buf = buf_;
00582     const bm::short_t* s_end = s + count;
00583     do 
00584     {
00585         bm::short_t w16 = *s++;
00586         unsigned char a = (unsigned char)  w16;
00587         unsigned char b = (unsigned char) (w16 >> 8);
00588         
00589         *buf++ = a;
00590         *buf++ = b;
00591                 
00592     } while (s < s_end);
00593     
00594     buf_ = (unsigned char*)buf;
00595 #endif
00596 }
00597 
00598 
00603 inline unsigned encoder::size() const
00604 {
00605     return (unsigned)(buf_ - start_);
00606 }
00607 
00611 inline encoder::position_type encoder::get_pos() const
00612 {
00613     return buf_;
00614 }
00615 
00619 inline void encoder::set_pos(encoder::position_type buf_pos)
00620 {
00621     buf_ = buf_pos;
00622 }
00623 
00624 
00630 BMFORCEINLINE void encoder::put_32(bm::word_t w)
00631 {
00632 #if (BM_UNALIGNED_ACCESS_OK == 1)
00633     *((bm::word_t*) buf_) = w;
00634     buf_ += sizeof(w);
00635 #else
00636     *buf_++ = (unsigned char) w;
00637     *buf_++ = (unsigned char) (w >> 8);
00638     *buf_++ = (unsigned char) (w >> 16);
00639     *buf_++ = (unsigned char) (w >> 24);
00640 #endif
00641 }
00642 
00646 inline 
00647 void encoder::put_32(const bm::word_t* w, unsigned count)
00648 {
00649 #if (BM_UNALIGNED_ACCESS_OK == 1)
00650     bm::word_t* buf = (bm::word_t*)buf_;
00651     const bm::word_t* w_end = w + count;
00652     do 
00653     {
00654         *buf++ = *w++;
00655     } while (w < w_end);
00656     
00657     buf_ = (unsigned char*)buf;
00658 #else
00659     unsigned char* buf = buf_;
00660     const bm::word_t* w_end = w + count;
00661     do 
00662     {
00663         bm::word_t w32 = *w++;
00664         unsigned char a = (unsigned char) w32;
00665         unsigned char b = (unsigned char) (w32 >> 8);
00666         unsigned char c = (unsigned char) (w32 >> 16);
00667         unsigned char d = (unsigned char) (w32 >> 24);
00668 
00669         *buf++ = a;
00670         *buf++ = b;
00671         *buf++ = c;
00672         *buf++ = d;
00673     } while (w < w_end);
00674     
00675     buf_ = (unsigned char*)buf;
00676 #endif
00677 }
00678 
00679 
00680 // ---------------------------------------------------------------------
00681 
00687 inline decoder::decoder(const unsigned char* buf) 
00688 : decoder_base(buf)
00689 {
00690 }
00691 
00696 BMFORCEINLINE bm::short_t decoder::get_16() 
00697 {
00698 #if (BM_UNALIGNED_ACCESS_OK == 1)
00699     bm::short_t a = *((bm::short_t*)buf_);
00700 #else
00701     bm::short_t a = (bm::short_t)(buf_[0] + ((bm::short_t)buf_[1] << 8));
00702 #endif
00703     buf_ += sizeof(a);
00704     return a;
00705 }
00706 
00711 BMFORCEINLINE bm::word_t decoder::get_32() 
00712 {
00713 #if (BM_UNALIGNED_ACCESS_OK == 1)
00714     bm::word_t a = *((bm::word_t*)buf_);
00715 #else
00716     bm::word_t a = buf_[0]+ ((unsigned)buf_[1] << 8) +
00717                    ((unsigned)buf_[2] << 16) + ((unsigned)buf_[3] << 24);
00718 #endif
00719     buf_+=sizeof(a);
00720     return a;
00721 }
00722 
00723 
00730 inline void decoder::get_32(bm::word_t* w, unsigned count)
00731 {
00732     if (!w) 
00733     {
00734         seek(count * 4);
00735         return;
00736     }
00737 #if (BM_UNALIGNED_ACCESS_OK == 1)
00738     memcpy(w, buf_, count * sizeof(bm::word_t));
00739     seek(count * 4);
00740     return;
00741 #else
00742     const unsigned char* buf = buf_;
00743     const bm::word_t* w_end = w + count;
00744     do 
00745     {
00746         bm::word_t a = buf[0]+ ((unsigned)buf[1] << 8) +
00747                    ((unsigned)buf[2] << 16) + ((unsigned)buf[3] << 24);
00748         *w++ = a;
00749         buf += sizeof(a);
00750     } while (w < w_end);
00751     buf_ = (unsigned char*)buf;
00752 #endif
00753 }
00754 
00761 inline void decoder::get_16(bm::short_t* s, unsigned count)
00762 {
00763     if (!s) 
00764     {
00765         seek(count * 2);
00766         return;
00767     }
00768 #if (BM_UNALIGNED_ACCESS_OK == 1)
00769     const bm::short_t* buf = (bm::short_t*)buf_;
00770     const bm::short_t* s_end = s + count;
00771     do 
00772     {
00773         *s++ = *buf++;
00774     } while (s < s_end);
00775 #else
00776     const unsigned char* buf = buf_;
00777     const bm::short_t* s_end = s + count;
00778     do 
00779     {
00780         bm::short_t a = (bm::short_t)(buf[0] + ((bm::short_t)buf[1] << 8));
00781         *s++ = a;
00782         buf += sizeof(a);
00783     } while (s < s_end);
00784 #endif
00785     buf_ = (unsigned char*)buf;
00786 }
00787 
00788 
00789 
00790 // ---------------------------------------------------------------------
00791 
00792 inline decoder_little_endian::decoder_little_endian(const unsigned char* buf)
00793 : decoder_base(buf)
00794 {
00795 }
00796 
00797 BMFORCEINLINE bm::short_t decoder_little_endian::get_16()
00798 {
00799     bm::short_t a = ((bm::short_t)buf_[0] << 8) + ((bm::short_t)buf_[1]);
00800     buf_ += sizeof(a);
00801     return a;
00802 }
00803 
00804 BMFORCEINLINE bm::word_t decoder_little_endian::get_32() 
00805 {
00806     bm::word_t a = ((unsigned)buf_[0] << 24)+ ((unsigned)buf_[1] << 16) +
00807                    ((unsigned)buf_[2] << 8) + ((unsigned)buf_[3]);
00808     buf_+=sizeof(a);
00809     return a;
00810 }
00811 
00812 inline void decoder_little_endian::get_32(bm::word_t* w, unsigned count)
00813 {
00814     if (!w) 
00815     {
00816         seek(count * 4);
00817         return;
00818     }
00819 
00820     const unsigned char* buf = buf_;
00821     const bm::word_t* w_end = w + count;
00822     do 
00823     {
00824         bm::word_t a = ((unsigned)buf[0] << 24)+ ((unsigned)buf[1] << 16) +
00825                        ((unsigned)buf[2] << 8) + ((unsigned)buf[3]);
00826         *w++ = a;
00827         buf += sizeof(a);
00828     } while (w < w_end);
00829     buf_ = (unsigned char*)buf;
00830 }
00831 
00832 inline void decoder_little_endian::get_16(bm::short_t* s, unsigned count)
00833 {
00834     if (!s) 
00835     {
00836         seek(count * 2);
00837         return;
00838     }
00839 
00840     const unsigned char* buf = buf_;
00841     const bm::short_t* s_end = s + count;
00842     do 
00843     {
00844         bm::short_t a = ((bm::short_t)buf[0] << 8) + ((bm::short_t)buf[1]);
00845         *s++ = a;
00846         buf += sizeof(a);
00847     } while (s < s_end);
00848     buf_ = (unsigned char*)buf;
00849 }
00850 
00851 
00852 } // namespace bm
00853 
00854 #endif