dlvhex  2.5.0
vs10/bm/bmserial.h
Go to the documentation of this file.
00001 #ifndef BMSERIAL__H__INCLUDED__
00002 #define BMSERIAL__H__INCLUDED__
00003 /*
00004 Copyright(c) 2002-2010 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
00005 
00006 Permission is hereby granted, free of charge, to any person 
00007 obtaining a copy of this software and associated documentation 
00008 files (the "Software"), to deal in the Software without restriction, 
00009 including without limitation the rights to use, copy, modify, merge, 
00010 publish, distribute, sublicense, and/or sell copies of the Software, 
00011 and to permit persons to whom the Software is furnished to do so, 
00012 subject to the following conditions:
00013 
00014 The above copyright notice and this permission notice shall be included 
00015 in all copies or substantial portions of the Software.
00016 
00017 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
00018 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
00019 OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
00020 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 
00021 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
00022 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
00023 OTHER DEALINGS IN THE SOFTWARE.
00024 
00025 For more information please visit:  http://bmagic.sourceforge.net
00026 
00027 */
00028 
00029 
00036 #ifndef BM__H__INCLUDED__
00037 #define BM__H__INCLUDED__
00038 
00039 #include "bm.h"
00040 
00041 #endif
00042 
00043 #ifdef _MSC_VER
00044 #pragma warning( push )
00045 #pragma warning( disable : 4311 4312 4127)
00046 #endif
00047 
00048 
00049 
00050 #include "encoding.h"
00051 #include "bmdef.h"
00052 #include "bmfunc.h"
00053 #include "bmtrans.h"
00054 #include "bmalgo_impl.h"
00055 #include "bmutil.h"
00056 
00057 //#include "bmgamma.h"
00058 
00059 
00060 namespace bm
00061 {
00062 
00063 
00064 // Serialization stream markup constants
00065 
00066 
00067 const unsigned char set_block_end               = 0;   
00068 const unsigned char set_block_1zero             = 1;   
00069 const unsigned char set_block_1one              = 2;   
00070 const unsigned char set_block_8zero             = 3;   
00071 const unsigned char set_block_8one              = 4;   
00072 const unsigned char set_block_16zero            = 5;   
00073 const unsigned char set_block_16one             = 6;   
00074 const unsigned char set_block_32zero            = 7;   
00075 const unsigned char set_block_32one             = 8;   
00076 const unsigned char set_block_azero             = 9;   
00077 const unsigned char set_block_aone              = 10;  
00078 const unsigned char set_block_bit               = 11;  
00079 const unsigned char set_block_sgapbit           = 12;  
00080 const unsigned char set_block_sgapgap           = 13;  
00081 const unsigned char set_block_gap               = 14;  
00082 const unsigned char set_block_gapbit            = 15;  
00083 const unsigned char set_block_arrbit            = 16;  
00084 const unsigned char set_block_bit_interval      = 17; 
00085 const unsigned char set_block_arrgap            = 18;  
00086 const unsigned char set_block_bit_1bit          = 19; 
00087 const unsigned char set_block_gap_egamma        = 20; 
00088 const unsigned char set_block_arrgap_egamma     = 21; 
00089 const unsigned char set_block_bit_0runs         = 22; 
00090 const unsigned char set_block_arrgap_egamma_inv = 23; 
00091 const unsigned char set_block_arrgap_inv        = 24;  
00092 
00093 
00096 enum serialization_header_mask {
00097     BM_HM_DEFAULT = 1,
00098     BM_HM_RESIZE  = (1 << 1), 
00099     BM_HM_ID_LIST = (1 << 2), 
00100     BM_HM_NO_BO   = (1 << 3), 
00101     BM_HM_NO_GAPL = (1 << 4)  
00102 };
00103 
00104 
00105 
00106 #define SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO) \
00107    if (nb == 1) \
00108       enc.put_8(B_1ZERO); \
00109    else if (nb < 256) \
00110    { \
00111       enc.put_8(B_8ZERO); \
00112       enc.put_8((unsigned char)nb); \
00113    } \
00114    else if (nb < 65536) \
00115    { \
00116       enc.put_8(B_16ZERO); \
00117       enc.put_16((unsigned short)nb); \
00118    } \
00119    else \
00120    {\
00121       enc.put_8(B_32ZERO); \
00122       enc.put_32(nb); \
00123    }
00124 
00125 
00126 #define BM_SET_ONE_BLOCKS(x) \
00127     {\
00128          unsigned end_block = i + x; \
00129          for (;i < end_block; ++i) \
00130             bman.set_block_all_set(i); \
00131     } \
00132     --i
00133 
00134 
00135 
00136 
00145 template<class BV>
00146 class serializer
00147 {
00148 public:
00149     typedef typename BV::allocator_type      allocator_type;
00150     typedef typename BV::blocks_manager_type blocks_manager_type;
00151 public:
00152     serializer(const allocator_type&   alloc  = allocator_type());
00153     ~serializer();
00154 
00160     void set_compression_level(unsigned clevel);
00161 
00165     unsigned get_compression_level() const;
00166     
00181     unsigned serialize(const BV& bv, 
00182                        unsigned char* buf, unsigned buf_size);
00183 
00184     
00190     void gap_length_serialization(bool value);
00196     void byte_order_serialization(bool value);
00197 
00198 protected:
00202     void encode_header(const BV& bv, bm::encoder& enc);
00203     
00207     void encode_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc);
00208 
00212     void gamma_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc);
00213 
00217     void gamma_gap_array(const bm::gap_word_t* gap_block, 
00218                          unsigned              arr_len, 
00219                          bm::encoder&          enc,
00220                          bool                  inverted = false);
00221 
00225     void encode_bit_interval(const bm::word_t* blk, 
00226                              bm::encoder&      enc,
00227                              unsigned          size_control);
00228 
00229 private:
00230     serializer(const serializer&);
00231     serializer& operator=(const serializer&);
00232 
00233 private:
00234 
00235     typedef bm::bit_out<bm::encoder>  bit_out_type;
00236     typedef bm::gamma_encoder<bm::gap_word_t, bit_out_type> gamma_encoder_func;
00237 
00238 private:
00239     allocator_type alloc_;
00240     bool           gap_serial_;
00241     bool           byte_order_serial_;
00242     bm::word_t*    temp_block_;
00243     unsigned       compression_level_;
00244 };
00245 
00250 template<class DEC>
00251 class deseriaizer_base
00252 {
00253 public:
00254     typedef DEC decoder_type;
00255 protected:
00256     deseriaizer_base(){}
00257 
00262     unsigned read_gap_block(decoder_type&   decoder, 
00263                             unsigned        block_type, 
00264                             bm::gap_word_t* dst_block,
00265                             bm::gap_word_t& gap_head);
00266 
00270     unsigned read_id_list(decoder_type&   decoder, 
00271                           unsigned        block_type, 
00272                           bm::gap_word_t* dst_arr);
00273 
00274 protected:
00275     bm::gap_word_t   id_array_[bm::gap_equiv_len * 2];
00276 };
00277 
00282 template<class BV, class DEC>
00283 class deserializer : protected deseriaizer_base<DEC>
00284 {
00285 public:
00286     typedef BV bvector_type;
00287     typedef typename deseriaizer_base<DEC>::decoder_type decoder_type;
00288 //    typedef DEC decoder_type;
00289 public:
00290     deserializer() : temp_block_(0) {}
00291     
00292     unsigned deserialize(bvector_type&        bv, 
00293                          const unsigned char* buf, 
00294                          bm::word_t*          temp_block);
00295 protected:
00296    typedef typename BV::blocks_manager_type blocks_manager_type;
00297    typedef typename BV::allocator_type allocator_type;
00298 
00299 protected:
00300    void deserialize_gap(unsigned char btype, decoder_type& dec, 
00301                         bvector_type&  bv, blocks_manager_type& bman,
00302                         unsigned i,
00303                         bm::word_t* blk);
00304 protected:
00305     bm::gap_word_t   gap_temp_block_[bm::gap_equiv_len * 4];
00306     bm::word_t*      temp_block_;
00307 };
00308 
00309 
00316 template<class BV, class SerialIterator>
00317 class iterator_deserializer
00318 {
00319 public:
00320     typedef BV              bvector_type;
00321     typedef SerialIterator  serial_iterator_type;
00322 public:
00323     static
00324     unsigned deserialize(bvector_type&         bv, 
00325                          serial_iterator_type& sit, 
00326                          bm::word_t*           temp_block,
00327                          set_operation         op = bm::set_OR);
00328 
00332     static
00333     void deserialize(bvector_type&         bv_target,
00334                      const bvector_type&   bv_mask,
00335                      serial_iterator_type& sit, 
00336                      bm::word_t*           temp_block,
00337                      set_operation         op);
00338 
00339 private:
00340     typedef typename BV::blocks_manager_type blocks_manager_type;
00341 
00343     static
00344     void load_id_list(bvector_type&         bv, 
00345                       serial_iterator_type& sit,
00346                       unsigned              id_count,
00347                       bool                  set_clear);
00348 
00350     static
00351     unsigned finalize_target_vector(blocks_manager_type& bman,
00352                                     set_operation        op,
00353                                     unsigned             bv_block_idx);
00354 
00356     static
00357     unsigned process_id_list(bvector_type&         bv, 
00358                              serial_iterator_type& sit,
00359                              set_operation         op);
00360 
00361 
00362 };
00363 
00370 template<class DEC>
00371 class serial_stream_iterator : protected deseriaizer_base<DEC>
00372 {
00373 public:
00374     typedef typename deseriaizer_base<DEC>::decoder_type decoder_type;
00375 public:
00376     serial_stream_iterator(const unsigned char* buf);
00377 
00379     unsigned bv_size() const { return bv_size_; }
00380 
00382     bool is_eof() const { return end_of_stream_; }
00383 
00385     void next();
00386 
00388     void skip_mono_blocks();
00389 
00391     unsigned get_bit_block(bm::word_t*       dst_block, 
00392                            bm::word_t*       tmp_block,
00393                            set_operation     op);
00394 
00395 
00397     void get_gap_block(bm::gap_word_t* dst_block);
00398 
00400     unsigned dec_size() const { return decoder_.size(); }
00401 
00403     decoder_type& decoder() { return decoder_; }
00404 
00408     enum iterator_state 
00409     {
00410         e_unknown = 0,
00411         e_list_ids,     
00412         e_blocks,       
00413         e_zero_blocks,  
00414         e_one_blocks,   
00415         e_bit_block,    
00416         e_gap_block     
00417 
00418     };
00419 
00421     iterator_state state() const { return this->state_; }
00422 
00423     iterator_state get_state() const { return this->state_; }
00425     unsigned get_id_count() const { return this->id_cnt_; }
00426 
00428     bm::id_t get_id() const { return this->last_id_; }
00429 
00431     unsigned block_idx() const { return this->block_idx_; }
00432 
00433 public:
00436     typedef 
00437         unsigned (serial_stream_iterator<DEC>::*get_bit_func_type)
00438                                                 (bm::word_t*,bm::word_t*);
00439 
00440     unsigned 
00441     get_bit_block_ASSIGN(bm::word_t* dst_block, bm::word_t* tmp_block);
00442     unsigned 
00443     get_bit_block_OR    (bm::word_t* dst_block, bm::word_t* tmp_block);
00444     unsigned 
00445     get_bit_block_AND   (bm::word_t* dst_block, bm::word_t* tmp_block);
00446     unsigned 
00447     get_bit_block_SUB   (bm::word_t* dst_block, bm::word_t* tmp_block);
00448     unsigned 
00449     get_bit_block_XOR   (bm::word_t* dst_block, bm::word_t* tmp_block);
00450     unsigned 
00451     get_bit_block_COUNT (bm::word_t* dst_block, bm::word_t* tmp_block);
00452     unsigned 
00453     get_bit_block_COUNT_AND(bm::word_t* dst_block, bm::word_t* tmp_block);
00454     unsigned 
00455     get_bit_block_COUNT_OR(bm::word_t* dst_block, bm::word_t* tmp_block);
00456     unsigned 
00457     get_bit_block_COUNT_XOR(bm::word_t* dst_block, bm::word_t* tmp_block);
00458     unsigned 
00459     get_bit_block_COUNT_SUB_AB(bm::word_t* dst_block, bm::word_t* tmp_block);
00460     unsigned 
00461     get_bit_block_COUNT_SUB_BA(bm::word_t* dst_block, bm::word_t* tmp_block);
00462     unsigned 
00463     get_bit_block_COUNT_A(bm::word_t* dst_block, bm::word_t* tmp_block);
00464     unsigned 
00465     get_bit_block_COUNT_B(bm::word_t* dst_block, bm::word_t* tmp_block)
00466     {
00467         return get_bit_block_COUNT(dst_block, tmp_block);
00468     }
00469 
00473     unsigned get_arr_bit(bm::word_t* dst_block, 
00474                          bool clear_target=true);
00475 
00477     unsigned get_block_type() const { return block_type_; }
00478 
00479     unsigned get_bit();
00480 
00481 protected:
00482     get_bit_func_type  bit_func_table_[bm::set_END];
00483 
00484     decoder_type       decoder_;
00485     bool               end_of_stream_;
00486     unsigned           bv_size_;
00487     iterator_state     state_;
00488     unsigned           id_cnt_;  
00489     bm::id_t           last_id_; 
00490     gap_word_t         glevels_[bm::gap_levels]; 
00491 
00492     unsigned           block_type_;     
00493     unsigned           block_idx_;      
00494     unsigned           mono_block_cnt_; 
00495 
00496     gap_word_t         gap_head_;
00497 };
00498 
00505 template<class BV>
00506 class operation_deserializer
00507 {
00508 public:
00509     typedef BV bvector_type;
00510 public:
00511     static
00512     unsigned deserialize(bvector_type&        bv, 
00513                          const unsigned char* buf, 
00514                          bm::word_t*          temp_block,
00515                          set_operation        op = bm::set_OR);
00516 private:
00518     static
00519     void deserialize(bvector_type&        bv_target,
00520                      const bvector_type&  bv_mask,
00521                      const unsigned char* buf, 
00522                      bm::word_t*          temp_block,
00523                      set_operation        op);
00524 
00525 private:
00526     typedef 
00527         typename BV::blocks_manager_type               blocks_manager_type;
00528     typedef 
00529         serial_stream_iterator<bm::decoder>            serial_stream_current;
00530     typedef 
00531         serial_stream_iterator<bm::decoder_big_endian> serial_stream_be;
00532     typedef 
00533         serial_stream_iterator<bm::decoder_little_endian> serial_stream_le;
00534 
00535 };
00536 
00537 
00538 
00539 
00540 
00541 //---------------------------------------------------------------------
00542 
00543 template<class BV>
00544 serializer<BV>::serializer(const allocator_type&   alloc)
00545 : alloc_(alloc),
00546   gap_serial_(false),
00547   byte_order_serial_(true),
00548   temp_block_(0),
00549   compression_level_(3)
00550 {
00551     temp_block_ = alloc_.alloc_bit_block();
00552 }
00553 
00554 template<class BV>
00555 void serializer<BV>::set_compression_level(unsigned clevel)
00556 {
00557     compression_level_ = clevel;
00558 }
00559 
00560 template<class BV>
00561 unsigned serializer<BV>::get_compression_level() const
00562 {
00563     return compression_level_;
00564 }
00565 
00566 template<class BV>
00567 serializer<BV>::~serializer()
00568 {
00569     alloc_.free_bit_block(temp_block_);
00570 }
00571 
00572 
00573 template<class BV>
00574 void serializer<BV>::gap_length_serialization(bool value)
00575 {
00576     gap_serial_ = value;
00577 }
00578 
00579 template<class BV>
00580 void serializer<BV>::byte_order_serialization(bool value)
00581 {
00582     byte_order_serial_ = value;
00583 }
00584 
00585 template<class BV>
00586 void serializer<BV>::encode_header(const BV& bv, bm::encoder& enc)
00587 {
00588     const blocks_manager_type& bman = bv.get_blocks_manager();
00589 
00590     unsigned char header_flag = 0;
00591     if (bv.size() == bm::id_max) // no dynamic resize
00592         header_flag |= BM_HM_DEFAULT;
00593     else 
00594         header_flag |= BM_HM_RESIZE;
00595 
00596     if (!byte_order_serial_) 
00597         header_flag |= BM_HM_NO_BO;
00598 
00599     if (!gap_serial_) 
00600         header_flag |= BM_HM_NO_GAPL;
00601 
00602     enc.put_8(header_flag);
00603 
00604     if (byte_order_serial_)
00605     {
00606         ByteOrder bo = globals<true>::byte_order();
00607         enc.put_8((unsigned char)bo);
00608     }
00609 
00610     // keep GAP levels information
00611     if (gap_serial_)
00612     {
00613         enc.put_16(bman.glen(), bm::gap_levels);
00614     }
00615 
00616     // save size (only if bvector has been down-sized)
00617     if (header_flag & BM_HM_RESIZE) 
00618     {
00619         enc.put_32(bv.size());
00620     }
00621     
00622 }
00623 
00624 template<class BV>
00625 void serializer<BV>::gamma_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc)
00626 {
00627     unsigned len = gap_length(gap_block);
00628 
00629     // Use Elias Gamma encoding 
00630     if (len > 6 && (compression_level_ > 3)) 
00631     {
00632         encoder::position_type enc_pos0 = enc.get_pos();
00633         {
00634             bit_out_type bout(enc);
00635             gamma_encoder_func gamma(bout);
00636 
00637             enc.put_8(set_block_gap_egamma);        
00638             enc.put_16(gap_block[0]);
00639 
00640             for_each_dgap(gap_block, gamma);        
00641         }
00642 
00643         // evaluate gamma coding efficiency
00644         encoder::position_type enc_pos1 = enc.get_pos();
00645         unsigned gamma_size = enc_pos1 - enc_pos0;        
00646         if (gamma_size > (len-1)*sizeof(gap_word_t))
00647         {
00648             enc.set_pos(enc_pos0);
00649         }
00650         else
00651         {
00652             return;
00653         }
00654     }
00655 
00656     // save as plain GAP block 
00657     enc.put_8(set_block_gap);
00658     enc.put_16(gap_block, len-1);
00659 }
00660 
00661 template<class BV>
00662 void serializer<BV>::gamma_gap_array(const bm::gap_word_t* gap_array, 
00663                                      unsigned              arr_len, 
00664                                      bm::encoder&          enc,
00665                                      bool                  inverted)
00666 {
00667     if (compression_level_ > 3 && arr_len > 25)
00668     {        
00669         encoder::position_type enc_pos0 = enc.get_pos();
00670         {
00671             bit_out_type bout(enc);
00672 
00673             enc.put_8(
00674                 inverted ? set_block_arrgap_egamma_inv 
00675                          : set_block_arrgap_egamma);
00676 
00677             bout.gamma(arr_len);
00678 
00679             gap_word_t prev = gap_array[0];
00680             bout.gamma(prev + 1);
00681 
00682             for (unsigned i = 1; i < arr_len; ++i)
00683             {
00684                 gap_word_t curr = gap_array[i];
00685                 bout.gamma(curr - prev);
00686                 prev = curr;
00687             }
00688         }
00689 
00690         encoder::position_type enc_pos1 = enc.get_pos();
00691         unsigned gamma_size = enc_pos1 - enc_pos0;            
00692         if (gamma_size > (arr_len)*sizeof(gap_word_t))
00693         {
00694             enc.set_pos(enc_pos0);
00695         }
00696         else
00697         {
00698             return;
00699         }
00700     }
00701 
00702     // save as an plain array
00703     enc.put_prefixed_array_16(inverted ? set_block_arrgap_inv : set_block_arrgap, 
00704                               gap_array, arr_len, true);
00705 }
00706 
00707 
00708 template<class BV>
00709 void serializer<BV>::encode_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc)
00710 {
00711     if (compression_level_ > 2)
00712     {
00713         gap_word_t*  gap_temp_block = (gap_word_t*) temp_block_;    
00714         gap_word_t arr_len;
00715 
00716         unsigned bc = gap_bit_count(gap_block);
00717         if (bc == 1)
00718         {
00719             arr_len = gap_convert_to_arr(gap_temp_block,
00720                                          gap_block,
00721                                          bm::gap_equiv_len-10);
00722             BM_ASSERT(arr_len == 1);
00723             enc.put_8(set_block_bit_1bit);              
00724             enc.put_16(gap_temp_block[0]);
00725             return;
00726         }
00727 
00728         unsigned len = gap_length(gap_block);
00729         bool invert, use_array;
00730         invert = use_array = false;
00731         
00732         if (bc < len-1) 
00733         {
00734             use_array = true;
00735         }
00736         else  // inverted array is a better alternative?
00737         {
00738             unsigned inverted_bc = bm::gap_max_bits - bc;
00739             if (inverted_bc < len-1)
00740             {
00741                 use_array = invert = true;
00742             }
00743         }
00744         if (use_array)
00745         {
00746             arr_len = gap_convert_to_arr(gap_temp_block,
00747                                          gap_block,
00748                                          bm::gap_equiv_len-10,
00749                                          invert);
00750             if (arr_len)
00751             {
00752                 gamma_gap_array(gap_temp_block, arr_len, enc, invert);
00753                 return;
00754             }
00755         }
00756     }
00757 
00758     gamma_gap_block(gap_block, enc);
00759 }
00760 
00761 template<class BV>
00762 void serializer<BV>::encode_bit_interval(const bm::word_t* blk, 
00763                                          bm::encoder&      enc,
00764                                          unsigned          //size_control
00765                                          )
00766 {
00767     enc.put_8(set_block_bit_0runs);
00768     enc.put_8((blk[0]==0) ? 0 : 1); // encode start 
00769 
00770     unsigned i,j;//,k;
00771 
00772     for (i = 0; i < bm::set_block_size; ++i)
00773     {
00774         if (blk[i] == 0)
00775         {
00776             // scan fwd to find 0 island length
00777             for (j = i+1; j < bm::set_block_size; ++j)
00778             {
00779                 if (blk[j] != 0)
00780                     break;
00781             }
00782             enc.put_16((gap_word_t)(j-i)); 
00783             i = j - 1;
00784         }
00785         else
00786         {
00787             // scan fwd to find non-0 island length
00788             for (j = i+1; j < bm::set_block_size; ++j)
00789             {
00790                 if (blk[j] == 0)
00791                 {
00792                     // look ahead to identify and ignore short 0-run
00793                     if (((j+1 < bm::set_block_size) && blk[j+1]) ||
00794                         ((j+2 < bm::set_block_size) && blk[j+2])
00795                        )
00796                     {
00797                         ++j; // skip zero word
00798                         continue;
00799                     }
00800                     break;
00801                 }
00802             }
00803             enc.put_16((gap_word_t)(j-i)); 
00804             // stream all bit-words now
00805             BM_ASSERT(i < j);
00806             enc.put_32(blk + i, j - i);
00807             i = j - 1;
00808         }
00809     }
00810 }
00811 
00812 
00813 template<class BV>
00814 unsigned serializer<BV>::serialize(const BV& bv, 
00815                                    unsigned char* buf, unsigned buf_size)
00816 {
00817     BM_ASSERT(temp_block_);
00818     
00819     const blocks_manager_type& bman = bv.get_blocks_manager();
00820 
00821     gap_word_t*  gap_temp_block = (gap_word_t*) temp_block_;
00822     
00823     bm::encoder enc(buf, buf_size);  // create the encoder
00824     encode_header(bv, enc);
00825 
00826     unsigned i,j;
00827 
00828 
00829     // save blocks.
00830     for (i = 0; i < bm::set_total_blocks; ++i)
00831     {
00832         bm::word_t* blk = bman.get_block(i);
00833         // -----------------------------------------
00834         // Empty or ONE block serialization
00835 
00836         bool flag;
00837         flag = bman.is_block_zero(i, blk, false);
00838         if (flag)
00839         {
00840         zero_block:
00841             unsigned next_nb = bman.find_next_nz_block(i+1, false);
00842             if (next_nb == bm::set_total_blocks) // no more blocks
00843             {
00844                 enc.put_8(set_block_azero);
00845                 return enc.size();
00846             }
00847             unsigned nb = next_nb - i;
00848             
00849             if (nb > 1 && nb < 128)
00850             {
00851                 // special (but frequent) case -- GAP delta fits in 7bits
00852                 unsigned char c = (unsigned char)((1u << 7) | nb);
00853                 enc.put_8(c);
00854             }
00855             else 
00856             {
00857                 SER_NEXT_GRP(enc, nb, set_block_1zero, 
00858                                       set_block_8zero, 
00859                                       set_block_16zero, 
00860                                       set_block_32zero) 
00861             }
00862             i = next_nb - 1;
00863             continue;
00864         }
00865         else
00866         {
00867             flag = bman.is_block_one(i, blk, false);
00868             if (flag)
00869             {
00870                 // Look ahead for similar blocks
00871                 for(j = i+1; j < bm::set_total_blocks; ++j)
00872                 {
00873                    bm::word_t* blk_next = bman.get_block(j);
00874                    if (flag != bman.is_block_one(j, blk_next, false))
00875                        break;
00876                 }
00877                 if (j == bm::set_total_blocks)
00878                 {
00879                     enc.put_8(set_block_aone);
00880                     break;
00881                 }
00882                 else
00883                 {
00884                    unsigned nb = j - i;
00885                    SER_NEXT_GRP(enc, nb, set_block_1one, 
00886                                          set_block_8one, 
00887                                          set_block_16one, 
00888                                          set_block_32one) 
00889                 }
00890                 i = j - 1;
00891                 continue;
00892             }
00893         }
00894 
00895         // ------------------------------
00896         // GAP serialization
00897 
00898         if (BM_IS_GAP(blk))
00899         {
00900             gap_word_t* gblk = BMGAP_PTR(blk);
00901             encode_gap_block(gblk, enc);
00902             continue;
00903         }
00904                 
00905         // ----------------------------------------------
00906         // BIT BLOCK serialization
00907 
00908         {
00909         if (compression_level_ <= 1)
00910         {
00911             enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size);
00912             continue;            
00913         }
00914 
00915         // compute bit-block statistics: bit-count and number of GAPS
00916         unsigned block_bc = 0;
00917         bm::id_t bit_gaps = 
00918             bit_block_calc_count_change(blk, blk + bm::set_block_size,
00919                                         &block_bc);
00920         unsigned block_bc_inv = bm::gap_max_bits - block_bc;
00921         switch (block_bc)
00922         {
00923         case 1: // corner case: only 1 bit on
00924         {
00925             bm::id_t bit_idx = 0;
00926             bit_find_in_block(blk, bit_idx, &bit_idx);
00927             enc.put_8(set_block_bit_1bit); enc.put_16((short)bit_idx);
00928             continue;
00929         }
00930         case 0: goto zero_block; // empty block
00931         }
00932        
00933        
00934         // compute alternative representation sizes
00935         //
00936         unsigned arr_block_size = sizeof(gap_word_t) + (block_bc * sizeof(gap_word_t));
00937         unsigned arr_block_size_inv = sizeof(gap_word_t) + (block_bc_inv * sizeof(gap_word_t));
00938         unsigned gap_block_size = sizeof(gap_word_t) + ((bit_gaps+1) * sizeof(gap_word_t)); 
00939         unsigned interval_block_size;
00940         interval_block_size = bit_count_nonzero_size(blk, bm::set_block_size);
00941         bool inverted = false;
00942 
00943         if (arr_block_size_inv < arr_block_size &&
00944             arr_block_size_inv < gap_block_size &&
00945             arr_block_size_inv < interval_block_size)
00946         {
00947             inverted = true;
00948             goto bit_as_array;
00949         }
00950 
00951         // if interval representation is not a good alternative
00952         if ((interval_block_size > arr_block_size) || 
00953             (interval_block_size > gap_block_size))
00954         {
00955             if (gap_block_size < (bm::gap_equiv_len-64) &&
00956                 gap_block_size < arr_block_size)
00957             {
00958                 unsigned len = bit_convert_to_gap(gap_temp_block, 
00959                                                   blk, 
00960                                                   bm::gap_max_bits, 
00961                                                   bm::gap_equiv_len-64);
00962                 if (len) // save as GAP
00963                 {
00964                     gamma_gap_block(gap_temp_block, enc);
00965                     continue;
00966                 }
00967             }
00968             
00969             if (arr_block_size < ((bm::gap_equiv_len-64) * sizeof(gap_word_t)))
00970             {
00971             bit_as_array:
00972                 gap_word_t arr_len;
00973                 unsigned mask = inverted ? ~0 : 0;
00974                 arr_len = bit_convert_to_arr(gap_temp_block, 
00975                                              blk, 
00976                                              bm::gap_max_bits, 
00977                                              bm::gap_equiv_len-64,
00978                                              mask);
00979                 if (arr_len)
00980                 {
00981                     gamma_gap_array(gap_temp_block, arr_len, enc, inverted);
00982                     continue;
00983                 }
00984                 
00985             }
00986             // full bit-block
00987             enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size);
00988             continue;            
00989         }
00990         
00991         // if interval block is a winner
00992         if (interval_block_size < arr_block_size &&
00993             interval_block_size < gap_block_size)
00994         {
00995             encode_bit_interval(blk, enc, interval_block_size);
00996             continue;
00997         }
00998         
00999         if (gap_block_size < bm::gap_equiv_len &&
01000             gap_block_size < arr_block_size)
01001         {
01002             unsigned len = bit_convert_to_gap(gap_temp_block, 
01003                                               blk, 
01004                                               bm::gap_max_bits, 
01005                                               bm::gap_equiv_len-64);
01006             if (len) // save as GAP
01007             {
01008                 gamma_gap_block(gap_temp_block, enc);
01009                 continue;
01010             }
01011         }
01012         
01013          
01014         // if array is best
01015         if (arr_block_size < bm::gap_equiv_len-64)
01016         {
01017             goto bit_as_array;
01018         }
01019         
01020         // full bit-block
01021         enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size);
01022         continue;            
01023         }
01024     }
01025 
01026     enc.put_8(set_block_end);
01027 
01028     unsigned encoded_size = enc.size();
01029     return encoded_size;
01030 
01031 }
01032 
01033 
01034 
01037 enum serialization_flags {
01038     BM_NO_BYTE_ORDER = 1,       
01039     BM_NO_GAP_LENGTH = (1 << 1) 
01040 };
01041 
01069 /*
01070  Serialization format:
01071  <pre>
01072 
01073  | HEADER | BLOCKS |
01074 
01075  Header structure:
01076    BYTE : Serialization header (bit mask of BM_HM_*)
01077    BYTE : Byte order ( 0 - Big Endian, 1 - Little Endian)
01078    INT16: Reserved (0)
01079    INT16: Reserved Flags (0)
01080 
01081  </pre>
01082 */
01083 template<class BV>
01084 unsigned serialize(const BV& bv, 
01085                    unsigned char* buf, 
01086                    bm::word_t*    temp_block,
01087                    unsigned       serialization_flags = 0)
01088 {
01089     bm::serializer<BV> bv_serial;
01090     if (serialization_flags & BM_NO_BYTE_ORDER) 
01091         bv_serial.byte_order_serialization(false);
01092         
01093     if (serialization_flags & BM_NO_GAP_LENGTH) 
01094         bv_serial.gap_length_serialization(false);
01095     else
01096         bv_serial.gap_length_serialization(true);
01097 
01098     bv_serial.set_compression_level(4);
01099     
01100     return bv_serial.serialize(bv, buf, 0);
01101 }
01102 
01109 template<class BV>
01110 unsigned serialize(BV& bv, 
01111                    unsigned char* buf, 
01112                    unsigned  serialization_flags=0)
01113 {
01114     bm::serializer<BV> bv_serial;
01115     if (serialization_flags & BM_NO_BYTE_ORDER) 
01116         bv_serial.byte_order_serialization(false);
01117         
01118     if (serialization_flags & BM_NO_GAP_LENGTH) 
01119         bv_serial.gap_length_serialization(false);
01120     else
01121         bv_serial.gap_length_serialization(true);
01122 
01123     bv_serial.set_compression_level(4);
01124     
01125     return bv_serial.serialize(bv, buf, 0);
01126 }
01127 
01128 
01129 
01145 template<class BV>
01146 unsigned deserialize(BV& bv, 
01147                      const unsigned char* buf, 
01148                      bm::word_t* temp_block=0)
01149 {
01150     ByteOrder bo_current = globals<true>::byte_order();
01151 
01152     bm::decoder dec(buf);
01153     unsigned char header_flag = dec.get_8();
01154     ByteOrder bo = bo_current;
01155     if (!(header_flag & BM_HM_NO_BO))
01156     {
01157         bo = (bm::ByteOrder) dec.get_8();
01158     }
01159 
01160     if (bo_current == bo)
01161     {
01162         deserializer<BV, bm::decoder> deserial;
01163         return deserial.deserialize(bv, buf, temp_block);
01164     }
01165     switch (bo_current) 
01166     {
01167     case BigEndian:
01168         {
01169         deserializer<BV, bm::decoder_big_endian> deserial;
01170         return deserial.deserialize(bv, buf, temp_block);
01171         }
01172     case LittleEndian:
01173         {
01174         deserializer<BV, bm::decoder_little_endian> deserial;
01175         return deserial.deserialize(bv, buf, temp_block);
01176         }
01177     default:
01178         BM_ASSERT(0);
01179     };
01180     return 0;
01181 }
01182 
01183 template<class DEC>
01184 unsigned deseriaizer_base<DEC>::read_id_list(decoder_type&   decoder, 
01185                                              unsigned        block_type, 
01186                                              bm::gap_word_t* dst_arr)
01187 {
01188     typedef bit_in<DEC> bit_in_type;
01189 
01190     gap_word_t len = 0;
01191 
01192     switch (block_type)
01193     {
01194     case set_block_bit_1bit:
01195         dst_arr[0] = decoder.get_16();
01196         len = 1;
01197         break;
01198     case set_block_arrgap:
01199     case set_block_arrgap_inv:
01200         len = decoder.get_16();
01201         decoder.get_16(dst_arr, len);
01202         break;
01203     case set_block_arrgap_egamma:
01204     case set_block_arrgap_egamma_inv:
01205         {
01206             bit_in_type bin(decoder);
01207             len = (gap_word_t)bin.gamma();
01208             gap_word_t prev, bit_idx;
01209             prev = 0;
01210             for (gap_word_t k = 0; k < len; ++k)
01211             {
01212                 bit_idx = (gap_word_t)bin.gamma();
01213                 bit_idx -= (k == 0);
01214                 bit_idx += prev;
01215                 dst_arr[k] = prev = bit_idx;
01216             } // for
01217         }
01218         break;
01219     default:
01220         BM_ASSERT(0);
01221     }
01222     return len;
01223 }
01224 
01225 
01226 template<class DEC>
01227 unsigned deseriaizer_base<DEC>::read_gap_block(decoder_type&   decoder, 
01228                                                unsigned        block_type, 
01229                                                bm::gap_word_t* dst_block,
01230                                                bm::gap_word_t& gap_head)
01231 {
01232     typedef bit_in<DEC> bit_in_type;
01233     unsigned gap_len = 0;
01234 
01235     switch (block_type)
01236     {
01237     case set_block_gap:
01238         {
01239             gap_len = gap_length(&gap_head);
01240             --gap_len;
01241             *dst_block = gap_head;
01242             decoder.get_16(dst_block+1, gap_len - 1);
01243             dst_block[gap_len] = gap_max_bits - 1;
01244             ++gap_len;
01245         }
01246         break;
01247     case set_block_bit_1bit:
01248         {
01249             gap_set_all(dst_block, bm::gap_max_bits, 0);
01250             gap_word_t bit_idx = decoder.get_16();
01251             gap_len = gap_add_value(dst_block, bit_idx);
01252             ++gap_len;
01253         }
01254         break;
01255     case set_block_arrgap:
01256     case set_block_arrgap_inv:
01257         {
01258             gap_set_all(dst_block, bm::gap_max_bits, 0);
01259             gap_word_t len = decoder.get_16();
01260 
01261             for (gap_word_t k = 0; k < len; ++k)
01262             {
01263                 gap_word_t bit_idx = decoder.get_16();
01264                 gap_len = gap_add_value(dst_block, bit_idx);
01265             } // for
01266             ++gap_len;
01267         }
01268         break;
01269     case set_block_arrgap_egamma:
01270     case set_block_arrgap_egamma_inv:
01271         {
01272             unsigned arr_len = read_id_list(decoder, block_type, id_array_);
01273             dst_block[0] = 0;
01274             gap_len = gap_set_array(dst_block, id_array_, arr_len);
01275         }
01276         break;
01277     case set_block_gap_egamma:
01278         {
01279         gap_len = (gap_head >> 3);
01280         --gap_len;
01281         // read gamma GAP block into a dest block
01282         {
01283             *dst_block = gap_head;
01284             gap_word_t* gap_data_ptr = dst_block + 1;
01285 
01286             bit_in_type bin(decoder);
01287             {
01288                 gap_word_t v = (gap_word_t)bin.gamma();
01289                 gap_word_t gap_sum = *gap_data_ptr = v - 1;
01290                 for (unsigned i = 1; i < gap_len; ++i)
01291                 {                   
01292                     v = (gap_word_t)bin.gamma();
01293                     *(++gap_data_ptr) = gap_sum += v;
01294                 } 
01295                 dst_block[++gap_len] = bm::gap_max_bits - 1;
01296             }
01297         }
01298 
01299         }
01300         break;        
01301     default:
01302         BM_ASSERT(0);
01303     }
01304 
01305     if (block_type == set_block_arrgap_egamma_inv || 
01306         block_type == set_block_arrgap_inv)
01307     {
01308         gap_invert(dst_block);
01309     }
01310     return gap_len;
01311 }
01312 
01313 
01314 template<class BV, class DEC>
01315 void 
01316 deserializer<BV, DEC>::deserialize_gap(unsigned char btype, decoder_type& dec, 
01317                                        bvector_type&  bv, blocks_manager_type& bman,
01318                                        unsigned i,
01319                                        bm::word_t* blk)
01320 {
01321     typedef bit_in<DEC> bit_in_type;
01322     gap_word_t gap_head; 
01323     gap_word_t gap_len = 0;
01324 
01325     switch (btype)
01326     {
01327     case set_block_gap: 
01328     case set_block_gapbit:
01329     {
01330         gap_word_t gap_head = (gap_word_t)
01331             (sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32());
01332 
01333         gap_len = gap_length(&gap_head);
01334         int level = gap_calc_level(gap_len, bman.glen());
01335         --gap_len;
01336         if (level == -1)  // Too big to be GAP: convert to BIT block
01337         {
01338             *gap_temp_block_ = gap_head;
01339             dec.get_16(gap_temp_block_+1, gap_len - 1);
01340             gap_temp_block_[gap_len] = gap_max_bits - 1;
01341 
01342             if (blk == 0)  // block does not exist yet
01343             {
01344                 blk = bman.get_allocator().alloc_bit_block();
01345                 bman.set_block(i, blk);
01346                 gap_convert_to_bitset(blk, gap_temp_block_);
01347             }
01348             else  // We have some data already here. Apply OR operation.
01349             {
01350                 blk = bman.deoptimize_block(i);
01351                 gap_add_to_bitset(blk, gap_temp_block_);
01352             }
01353             return;
01354         } // level == -1
01355 
01356         set_gap_level(&gap_head, level);
01357 
01358         if (blk == 0)
01359         {
01360             gap_word_t* gap_blk = 
01361               bman.get_allocator().alloc_gap_block(level, bman.glen());
01362             gap_word_t* gap_blk_ptr = BMGAP_PTR(gap_blk);
01363             *gap_blk_ptr = gap_head;
01364             set_gap_level(gap_blk_ptr, level);
01365             bman.set_block(i, (bm::word_t*)gap_blk);
01366             bman.set_block_gap(i);
01367             dec.get_16(gap_blk + 1, gap_len - 1);
01368             gap_blk[gap_len] = bm::gap_max_bits - 1;
01369         }
01370         else // target block exists
01371         {
01372             // read GAP block into a temp memory and perform OR
01373             *gap_temp_block_ = gap_head;
01374             dec.get_16(gap_temp_block_ + 1, gap_len - 1);
01375             gap_temp_block_[gap_len] = bm::gap_max_bits - 1;
01376             ++gap_len;
01377             break;
01378         }
01379         return;
01380     }
01381     case set_block_arrgap: 
01382     case set_block_arrgap_egamma:
01383         {
01384             unsigned arr_len = read_id_list(dec, btype, this->id_array_);
01385             gap_len = gap_set_array(gap_temp_block_, this->id_array_, arr_len);
01386             break;
01387         }
01388     case set_block_gap_egamma:            
01389         gap_head = (gap_word_t)
01390             (sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32());
01391     case set_block_arrgap_egamma_inv:
01392     case set_block_arrgap_inv:
01393         gap_len = read_gap_block(dec, btype, gap_temp_block_, gap_head);
01394         break;
01395     default:
01396         BM_ASSERT(0);
01397     }
01398 
01399     // check if encoded GAP block length is too high and use bit-block instead
01400 
01401     if (gap_len > bm::gap_length_threashold)
01402     {
01403         blk = bman.deoptimize_block(i);
01404         if (!blk)
01405         {
01406             blk = bman.get_allocator().alloc_bit_block();
01407             bman.set_block(i, blk);
01408             bit_block_set(blk, 0);
01409         }
01410         gap_add_to_bitset_l(blk, gap_temp_block_, gap_len-1);
01411     } 
01412     else 
01413     {
01414         bv.combine_operation_with_block(i, 
01415                                        (bm::word_t*)gap_temp_block_, 
01416                                         1, 
01417                                         BM_OR);
01418     }
01419 
01420 }
01421 
01422 
01423 template<class BV, class DEC>
01424 unsigned deserializer<BV, DEC>::deserialize(bvector_type&        bv, 
01425                                             const unsigned char* buf,
01426                                             bm::word_t*          temp_block)
01427 {
01428     blocks_manager_type& bman = bv.get_blocks_manager();
01429 
01430     bm::wordop_t* tmp_buf = 
01431         temp_block ? (bm::wordop_t*) temp_block 
01432                    : (bm::wordop_t*)bman.check_allocate_tempblock();
01433 
01434     temp_block_ = temp_block = (word_t*)tmp_buf;
01435     bm::strategy  strat = bv.get_new_blocks_strat();
01436     bv.set_new_blocks_strat(BM_GAP);
01437 
01438     decoder_type dec(buf);
01439 
01440     BM_SET_MMX_GUARD
01441 
01442     // Reading header
01443 
01444     unsigned char header_flag =  dec.get_8();
01445     if (!(header_flag & BM_HM_NO_BO))
01446     {
01447         /*ByteOrder bo = (bm::ByteOrder)*/dec.get_8();
01448     }
01449 
01450     if (header_flag & BM_HM_ID_LIST)
01451     {
01452         // special case: the next comes plain list of integers
01453         if (header_flag & BM_HM_RESIZE)
01454         {
01455             unsigned bv_size = dec.get_32();
01456             if (bv_size > bv.size())
01457             {
01458                 bv.resize(bv_size);
01459             }
01460         }
01461         
01462 
01463         for (unsigned cnt = dec.get_32(); cnt; --cnt) {
01464             bm::id_t id = dec.get_32();
01465             bv.set(id);
01466         } // for
01467         // -1 for compatibility with other deserialization branches
01468         return dec.size()-1;
01469     }
01470 
01471     unsigned i;
01472 
01473     if (!(header_flag & BM_HM_NO_GAPL)) 
01474     {
01475         gap_word_t glevels[bm::gap_levels];
01476         // read GAP levels information
01477         for (i = 0; i < bm::gap_levels; ++i)
01478         {
01479             glevels[i] = dec.get_16();
01480         }
01481     }
01482 
01483     if (header_flag & (1 << 1))
01484     {
01485         unsigned bv_size = dec.get_32();
01486         if (bv_size > bv.size())
01487         {
01488             bv.resize(bv_size);
01489         }
01490     }
01491 
01492     unsigned char btype;
01493     unsigned nb;
01494 
01495     for (i = 0; i < bm::set_total_blocks; ++i)
01496     {
01497         btype = dec.get_8();
01498         bm::word_t* blk = bman.get_block(i);
01499         // pre-check if we have short zero-run packaging here
01500         //
01501         if (btype & (1 << 7))
01502         {
01503             nb = btype & ~(1 << 7);
01504             i += nb-1;
01505             continue;
01506         }        
01507 
01508         switch (btype)
01509         {
01510         case set_block_azero: 
01511         case set_block_end:
01512             i = bm::set_total_blocks;
01513             break;
01514         case set_block_1zero:
01515             continue;
01516         case set_block_8zero:
01517             nb = dec.get_8();
01518             i += nb-1;
01519             continue;
01520         case set_block_16zero:
01521             nb = dec.get_16();
01522             i += nb-1;
01523             continue;
01524         case set_block_32zero:
01525             nb = dec.get_32();
01526             i += nb-1;
01527             continue;
01528         case set_block_aone:
01529             for (;i < bm::set_total_blocks; ++i)
01530             {
01531                 bman.set_block_all_set(i);
01532             }
01533             break;
01534         case set_block_1one:
01535             bman.set_block_all_set(i);
01536             continue;
01537         case set_block_8one:
01538             BM_SET_ONE_BLOCKS(dec.get_8());
01539             continue;
01540         case set_block_16one:
01541             BM_SET_ONE_BLOCKS(dec.get_16());
01542             continue;
01543         case set_block_32one:
01544             BM_SET_ONE_BLOCKS(dec.get_32());
01545             continue;
01546         case set_block_bit: 
01547         {
01548             if (blk == 0)
01549             {
01550                 blk = bman.get_allocator().alloc_bit_block();
01551                 bman.set_block(i, blk);
01552                 dec.get_32(blk, bm::set_block_size);
01553                 continue;                
01554             }
01555             
01556             dec.get_32(temp_block, bm::set_block_size);
01557             bv.combine_operation_with_block(i, 
01558                                             temp_block, 
01559                                             0, BM_OR);
01560             
01561             continue;
01562         }
01563         case set_block_bit_1bit:
01564         {
01565             unsigned bit_idx = dec.get_16();
01566             bit_idx += i * bm::bits_in_block; 
01567             bv.set_bit(bit_idx);
01568             continue;
01569         }
01570         case set_block_bit_0runs:
01571         {
01572             //TODO: optimization if block exists
01573             bit_block_set(temp_block, 0);
01574 
01575             unsigned char run_type = dec.get_8();
01576             for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
01577             {
01578                 unsigned run_length = dec.get_16();
01579                 if (run_type)
01580                 {
01581                     unsigned run_end = j + run_length;
01582                     for (;j < run_end; ++j)
01583                     {
01584                         BM_ASSERT(j < bm::set_block_size);
01585                         temp_block[j] = dec.get_32();
01586                     }
01587                 }
01588                 else
01589                 {
01590                     j += run_length;
01591                 }
01592             } // for
01593 
01594             bv.combine_operation_with_block(i, 
01595                                             temp_block,
01596                                             0, BM_OR);            
01597             continue;
01598         }
01599         case set_block_bit_interval: 
01600         {
01601             unsigned head_idx, tail_idx;
01602             head_idx = dec.get_16();
01603             tail_idx = dec.get_16();
01604 
01605             if (blk == 0)
01606             {
01607                 blk = bman.get_allocator().alloc_bit_block();
01608                 bman.set_block(i, blk);
01609                 for (unsigned i = 0; i < head_idx; ++i)
01610                 {
01611                     blk[i] = 0;
01612                 }
01613                 dec.get_32(blk + head_idx, tail_idx - head_idx + 1);
01614                 for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
01615                 {
01616                     blk[j] = 0;
01617                 }
01618                 continue;
01619             }
01620             bit_block_set(temp_block, 0);
01621             dec.get_32(temp_block + head_idx, tail_idx - head_idx + 1);
01622 
01623             bv.combine_operation_with_block(i, 
01624                                             temp_block,
01625                                             0, BM_OR);
01626             continue;
01627         }
01628         case set_block_gap: 
01629         case set_block_gapbit:
01630         case set_block_arrgap:
01631         case set_block_gap_egamma:
01632         case set_block_arrgap_egamma:
01633         case set_block_arrgap_egamma_inv:
01634         case set_block_arrgap_inv:    
01635             deserialize_gap(btype, dec, bv, bman, i, blk);
01636             continue;
01637         case set_block_arrbit:
01638         {
01639             gap_word_t len = (gap_word_t)
01640                 (sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32());
01641 
01642             if (bman.is_block_gap(i))
01643             {
01644                 // Here we most probably does not want to keep
01645                 // the block GAP since generic bitblock offers better
01646                 // performance.
01647                 blk = bman.convert_gap2bitset(i);
01648             }
01649             else
01650             {
01651                 if (blk == 0)  // block does not exists yet
01652                 {
01653                     blk = bman.get_allocator().alloc_bit_block();
01654                     bit_block_set(blk, 0);
01655                     bman.set_block(i, blk);
01656                 }
01657             }
01658 
01659             // Get the array one by one and set the bits.
01660             for (unsigned k = 0; k < len; ++k)
01661             {
01662                 gap_word_t bit_idx = dec.get_16();
01663                 set_bit(blk, bit_idx);
01664             }
01665             continue;
01666         }
01667         default:
01668             BM_ASSERT(0); // unknown block type
01669         } // switch
01670     } // for i
01671 
01672     bv.forget_count();
01673     bv.set_new_blocks_strat(strat);
01674 
01675     return dec.size();
01676 }
01677 
01678 
01679 
01680 template<class DEC>
01681 serial_stream_iterator<DEC>::serial_stream_iterator(const unsigned char* buf)
01682   : decoder_(buf),
01683     end_of_stream_(false),
01684     bv_size_(0),
01685     state_(e_unknown),
01686     id_cnt_(0),
01687     block_idx_(0),
01688     mono_block_cnt_(0)
01689 {
01690     ::memset(bit_func_table_, 0, sizeof(bit_func_table_));
01691 
01692     bit_func_table_[bm::set_AND] = 
01693         &serial_stream_iterator<DEC>::get_bit_block_AND;
01694     bit_func_table_[bm::set_ASSIGN] = 
01695         &serial_stream_iterator<DEC>::get_bit_block_ASSIGN;
01696     bit_func_table_[bm::set_OR]     = 
01697         &serial_stream_iterator<DEC>::get_bit_block_OR;
01698     bit_func_table_[bm::set_SUB] = 
01699         &serial_stream_iterator<DEC>::get_bit_block_SUB;
01700     bit_func_table_[bm::set_XOR] = 
01701         &serial_stream_iterator<DEC>::get_bit_block_XOR;
01702     bit_func_table_[bm::set_COUNT] = 
01703         &serial_stream_iterator<DEC>::get_bit_block_COUNT;
01704     bit_func_table_[bm::set_COUNT_AND] = 
01705         &serial_stream_iterator<DEC>::get_bit_block_COUNT_AND;
01706     bit_func_table_[bm::set_COUNT_XOR] = 
01707         &serial_stream_iterator<DEC>::get_bit_block_COUNT_XOR;
01708     bit_func_table_[bm::set_COUNT_OR] = 
01709         &serial_stream_iterator<DEC>::get_bit_block_COUNT_OR;
01710     bit_func_table_[bm::set_COUNT_SUB_AB] = 
01711         &serial_stream_iterator<DEC>::get_bit_block_COUNT_SUB_AB;
01712     bit_func_table_[bm::set_COUNT_SUB_BA] = 
01713         &serial_stream_iterator<DEC>::get_bit_block_COUNT_SUB_BA;
01714     bit_func_table_[bm::set_COUNT_A] = 
01715         &serial_stream_iterator<DEC>::get_bit_block_COUNT_A;
01716     bit_func_table_[bm::set_COUNT_B] = 
01717         &serial_stream_iterator<DEC>::get_bit_block_COUNT;
01718 
01719 
01720     // reading stream header
01721     unsigned char header_flag =  decoder_.get_8();
01722     if (!(header_flag & BM_HM_NO_BO))
01723     {
01724         /*ByteOrder bo = (bm::ByteOrder)*/decoder_.get_8();
01725     }
01726 
01727     // check if bitvector comes as an inverted, sorted list of ints
01728     //
01729     if (header_flag & BM_HM_ID_LIST)
01730     {
01731         // special case: the next comes plain list of unsigned integers
01732         if (header_flag & BM_HM_RESIZE)
01733         {
01734             bv_size_ = decoder_.get_32();
01735         }
01736 
01737         state_ = e_list_ids;
01738         id_cnt_ = decoder_.get_32();
01739         next(); // read first id
01740     }
01741     else
01742     {
01743         if (!(header_flag & BM_HM_NO_GAPL)) 
01744         {
01745             unsigned i;
01746             // keep GAP levels info
01747             for (i = 0; i < bm::gap_levels; ++i)
01748             {
01749                 glevels_[i] = decoder_.get_16();
01750             }
01751         }
01752 
01753         if (header_flag & (1 << 1))
01754         {
01755             bv_size_ = decoder_.get_32();
01756         }
01757         state_ = e_blocks;
01758     }
01759 }
01760 
01761 template<class DEC>
01762 void serial_stream_iterator<DEC>::next()
01763 {
01764     if (is_eof())
01765     {
01766         ++block_idx_;
01767         return;
01768     }
01769 
01770     switch (state_) 
01771     {
01772     case e_list_ids:
01773         // read inverted ids one by one
01774         if (id_cnt_ == 0)
01775         {
01776             end_of_stream_ = true;
01777             state_ = e_unknown;
01778             break;
01779         }
01780         last_id_ = decoder_.get_32();
01781         --id_cnt_;
01782         break;
01783 
01784     case e_blocks:
01785         if (block_idx_ == bm::set_total_blocks)
01786         {
01787             end_of_stream_ = true;
01788             state_ = e_unknown;
01789             break;
01790         }
01791 
01792         block_type_ = decoder_.get_8();
01793 
01794         // pre-check for 7-bit zero block
01795         //
01796         if (block_type_ & (1 << 7))
01797         {
01798             mono_block_cnt_ = (block_type_ & ~(1 << 7)) - 1;
01799             state_ = e_zero_blocks;
01800             break;
01801         }
01802 
01803         switch (block_type_)
01804         {
01805         case set_block_azero:
01806         case set_block_end:
01807             end_of_stream_ = true; state_ = e_unknown;
01808             break;
01809         case set_block_1zero:
01810             state_ = e_zero_blocks;
01811             mono_block_cnt_ = 0;
01812             break;
01813         case set_block_8zero:
01814             state_ = e_zero_blocks;
01815             mono_block_cnt_ = decoder_.get_8()-1;
01816             break;
01817         case set_block_16zero:
01818             state_ = e_zero_blocks;
01819             mono_block_cnt_ = decoder_.get_16()-1;
01820             break;
01821         case set_block_32zero:
01822             state_ = e_zero_blocks;
01823             mono_block_cnt_ = decoder_.get_32()-1;
01824             break;
01825         case set_block_aone:
01826             state_ = e_one_blocks;
01827             mono_block_cnt_ = bm::set_total_blocks - block_idx_;
01828             break;
01829         case set_block_1one:
01830             state_ = e_one_blocks;
01831             mono_block_cnt_ = 0;
01832             break;
01833         case set_block_8one:
01834             state_ = e_one_blocks;
01835             mono_block_cnt_ = decoder_.get_8()-1;
01836             break;
01837         case set_block_16one:
01838             state_ = e_one_blocks;
01839             mono_block_cnt_ = decoder_.get_16()-1;
01840             break;
01841         case set_block_32one:
01842             state_ = e_one_blocks;
01843             mono_block_cnt_ = decoder_.get_32()-1;
01844             break;
01845 
01846         case set_block_bit:
01847         case set_block_bit_interval:
01848         case set_block_bit_0runs:
01849         case set_block_arrbit:
01850             state_ = e_bit_block;
01851             break;
01852 
01853         case set_block_gap:
01854         case set_block_gap_egamma:
01855             gap_head_ = (gap_word_t)
01856                 (sizeof(gap_word_t) == 2 ? 
01857                     decoder_.get_16() : decoder_.get_32());
01858         case set_block_arrgap:
01859         case set_block_arrgap_egamma:
01860         case set_block_arrgap_egamma_inv:
01861         case set_block_arrgap_inv:
01862         case set_block_bit_1bit:
01863             state_ = e_gap_block;
01864             break;        
01865         case set_block_gapbit:
01866             state_ = e_gap_block; //e_bit_block; // TODO: make a better decision here
01867             break;
01868         
01869         default:
01870             BM_ASSERT(0);
01871         }// switch
01872 
01873         break;
01874 
01875     case e_zero_blocks:
01876     case e_one_blocks:
01877         ++block_idx_;
01878         if (!mono_block_cnt_)
01879         {
01880             state_ = e_blocks; // get new block token
01881             break;
01882         }
01883         --mono_block_cnt_;
01884         break;
01885 
01886     case e_unknown:
01887     default:
01888         BM_ASSERT(0);
01889     } // switch
01890 }
01891 
01892 template<class DEC>
01893 void serial_stream_iterator<DEC>::skip_mono_blocks()
01894 {
01895     BM_ASSERT(state_ == e_zero_blocks || state_ == e_one_blocks);
01896     if (!mono_block_cnt_)
01897     {
01898         ++block_idx_;
01899     }
01900     else
01901     {
01902         block_idx_ += mono_block_cnt_+1;
01903         mono_block_cnt_ = 0;
01904     }
01905     state_ = e_blocks;
01906 }
01907 
01908 template<class DEC>
01909 unsigned 
01910 serial_stream_iterator<DEC>::get_bit_block_ASSIGN(
01911                                             bm::word_t*  dst_block,
01912                                             bm::word_t*  /*tmp_block*/)
01913 {
01914     BM_ASSERT(this->state_ == e_bit_block);
01915     unsigned count = 0;
01916 
01917     switch (this->block_type_)
01918     {
01919     case set_block_bit:
01920         decoder_.get_32(dst_block, bm::set_block_size);
01921         break;
01922     case set_block_bit_0runs: 
01923         {
01924         if (dst_block)
01925             bit_block_set(dst_block, 0);
01926         unsigned char run_type = decoder_.get_8();
01927         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
01928         {
01929             unsigned run_length = decoder_.get_16();
01930             if (run_type)
01931             {
01932                 decoder_.get_32(dst_block ? dst_block + j : dst_block, run_length);
01933             }
01934             j += run_length;
01935         } // for
01936         }
01937         break;
01938     case set_block_bit_interval:
01939         {
01940             unsigned head_idx = decoder_.get_16();
01941             unsigned tail_idx = decoder_.get_16();
01942             if (dst_block) 
01943             {
01944                 for (unsigned i = 0; i < head_idx; ++i)
01945                     dst_block[i] = 0;
01946                 decoder_.get_32(dst_block + head_idx, 
01947                                 tail_idx - head_idx + 1);
01948                 for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
01949                     dst_block[j] = 0;
01950             }
01951             else
01952             {
01953                 decoder_.seek((tail_idx - head_idx + 1) * 4);
01954             }
01955         }
01956         break;
01957     case set_block_arrbit:
01958     case set_block_bit_1bit:
01959         get_arr_bit(dst_block, true /*clear target*/);
01960         break;
01961     case set_block_gapbit:
01962         BM_ASSERT(0);
01963         break;
01964     default:
01965         BM_ASSERT(0);
01966     } // switch
01967     return count;
01968 }
01969 
01970 template<class DEC>
01971 unsigned 
01972 serial_stream_iterator<DEC>::get_bit_block_OR(bm::word_t*  dst_block,
01973                                               bm::word_t*  /*tmp_block*/)
01974 {
01975     BM_ASSERT(this->state_ == e_bit_block);
01976     unsigned count = 0;
01977     switch (block_type_)
01978     {
01979     case set_block_bit:
01980         {
01981         bitblock_get_adapter ga(dst_block);
01982         bit_OR<bm::word_t> func;
01983         bitblock_store_adapter sa(dst_block);
01984 
01985         bit_recomb(ga,
01986                    decoder_,
01987                    func,
01988                    sa
01989                   );
01990         }
01991         break;
01992     case set_block_bit_interval:
01993         {
01994         unsigned head_idx = decoder_.get_16();
01995         unsigned tail_idx = decoder_.get_16();
01996         for (unsigned i = head_idx; i <= tail_idx; ++i)
01997             dst_block[i] |= decoder_.get_32();
01998         }
01999         break;
02000     case set_block_bit_0runs:
02001         {
02002         unsigned char run_type = decoder_.get_8();
02003         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02004         {
02005             unsigned run_length = decoder_.get_16();
02006             if (run_type)
02007             {
02008                 unsigned run_end = j + run_length;
02009                 for (;j < run_end; ++j)
02010                 {
02011                     BM_ASSERT(j < bm::set_block_size);
02012                     dst_block[j] |= decoder_.get_32();
02013                 }
02014             }
02015             else
02016             {
02017                 j += run_length;
02018             }
02019         } // for
02020         }
02021         break;
02022     case set_block_bit_1bit:
02023     case set_block_arrbit:
02024         get_arr_bit(dst_block, false /*don't clear target*/);
02025         break;      
02026     default:
02027         BM_ASSERT(0);
02028     } // switch
02029     return count;
02030 }
02031 
02032 template<class DEC>
02033 unsigned 
02034 serial_stream_iterator<DEC>::get_bit_block_AND(bm::word_t*  dst_block,
02035                                                bm::word_t*  tmp_block)
02036 {
02037     BM_ASSERT(this->state_ == e_bit_block);
02038     BM_ASSERT(dst_block != tmp_block);
02039 
02040     unsigned count = 0;
02041     switch (block_type_)
02042     {
02043     case set_block_bit:
02044         for (unsigned i = 0; i < bm::set_block_size; i+=4)
02045         {
02046             dst_block[i]   &= decoder_.get_32();
02047             dst_block[i+1] &= decoder_.get_32();
02048             dst_block[i+2] &= decoder_.get_32();
02049             dst_block[i+3] &= decoder_.get_32();
02050         }
02051         break;
02052     case set_block_bit_0runs: 
02053         {
02054         unsigned char run_type = decoder_.get_8();
02055         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02056         {
02057             unsigned run_length = decoder_.get_16();
02058             unsigned run_end = j + run_length;
02059             if (run_type)
02060             {
02061                 for (;j < run_end; ++j)
02062                 {
02063                     BM_ASSERT(j < bm::set_block_size);
02064                     dst_block[j] &= decoder_.get_32();
02065                 }
02066             }
02067             else
02068             {
02069                 for (;j < run_end; ++j)
02070                 {
02071                     BM_ASSERT(j < bm::set_block_size);
02072                     dst_block[j] = 0;
02073                 }
02074             }
02075         } // for
02076         }
02077         break;
02078     case set_block_bit_interval:
02079         {
02080             unsigned head_idx = decoder_.get_16();
02081             unsigned tail_idx = decoder_.get_16();
02082             unsigned i;
02083             for ( i = 0; i < head_idx; ++i)
02084                 dst_block[i] = 0;
02085             for ( i = head_idx; i <= tail_idx; ++i)
02086                 dst_block[i] &= decoder_.get_32();
02087             for ( i = tail_idx + 1; i < bm::set_block_size; ++i)
02088                 dst_block[i] = 0;
02089         }
02090         break;
02091     case set_block_bit_1bit:
02092     case set_block_arrbit:
02093         get_arr_bit(tmp_block, true /*clear target*/);
02094         if (dst_block)
02095             bit_block_and(dst_block, tmp_block);
02096         break;      
02097     default:
02098         BM_ASSERT(0);
02099     } // switch
02100     return count;
02101 }
02102 
02103 template<class DEC>
02104 unsigned 
02105 serial_stream_iterator<DEC>::get_bit_block_XOR(bm::word_t*  dst_block,
02106                                                bm::word_t*  tmp_block)
02107 {
02108     BM_ASSERT(this->state_ == e_bit_block);
02109     BM_ASSERT(dst_block != tmp_block);
02110 
02111     unsigned count = 0;
02112     switch (block_type_)
02113     {
02114     case set_block_bit:
02115         for (unsigned i = 0; i < bm::set_block_size; ++i)
02116             dst_block[i] ^= decoder_.get_32();
02117         break;
02118     case set_block_bit_0runs:
02119         {
02120         unsigned char run_type = decoder_.get_8();
02121         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02122         {
02123             unsigned run_length = decoder_.get_16();
02124             if (run_type)
02125             {
02126                 unsigned run_end = j + run_length;
02127                 for (;j < run_end; ++j)
02128                 {
02129                     BM_ASSERT(j < bm::set_block_size);
02130                     dst_block[j] ^= decoder_.get_32();
02131                 }
02132             }
02133             else
02134             {
02135                 j += run_length;
02136             }
02137         } // for
02138         }
02139         break;
02140     case set_block_bit_interval:
02141         {
02142             unsigned head_idx = decoder_.get_16();
02143             unsigned tail_idx = decoder_.get_16();
02144             for (unsigned i = head_idx; i <= tail_idx; ++i)
02145                 dst_block[i] ^= decoder_.get_32();
02146         }
02147         break;
02148     case set_block_bit_1bit:
02149         // TODO: optimization
02150     case set_block_arrbit:
02151         get_arr_bit(tmp_block, true /*clear target*/);
02152         if (dst_block)
02153         {
02154             bit_block_xor(dst_block, tmp_block);
02155         }
02156         break;
02157     default:
02158         BM_ASSERT(0);
02159     } // switch
02160     return count;
02161 }
02162 
02163 template<class DEC>
02164 unsigned 
02165 serial_stream_iterator<DEC>::get_bit_block_SUB(bm::word_t*  dst_block,
02166                                                bm::word_t*  tmp_block)
02167 {
02168     BM_ASSERT(this->state_ == e_bit_block);
02169     BM_ASSERT(dst_block != tmp_block);
02170 
02171     unsigned count = 0;
02172     switch (block_type_)
02173     {
02174     case set_block_bit:
02175         for (unsigned i = 0; i < bm::set_block_size; ++i)
02176             dst_block[i] &= ~decoder_.get_32();
02177         break;
02178     case set_block_bit_0runs:
02179         {
02180         unsigned char run_type = decoder_.get_8();
02181         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02182         {
02183             unsigned run_length = decoder_.get_16();
02184             if (run_type)
02185             {
02186                 unsigned run_end = j + run_length;
02187                 for (;j < run_end; ++j)
02188                 {
02189                     BM_ASSERT(j < bm::set_block_size);
02190                     dst_block[j] &= ~decoder_.get_32();
02191                 }
02192             }
02193             else
02194             {
02195                 j += run_length;
02196             }
02197         } // for
02198         }
02199         break;
02200     case set_block_bit_interval:
02201         {
02202             unsigned head_idx = decoder_.get_16();
02203             unsigned tail_idx = decoder_.get_16();
02204             for (unsigned i = head_idx; i <= tail_idx; ++i)
02205                 dst_block[i] &= ~decoder_.get_32();
02206         }
02207         break;
02208     case set_block_bit_1bit:
02209         // TODO: optimization
02210     case set_block_arrbit:
02211         get_arr_bit(tmp_block, true /*clear target*/);
02212         if (dst_block)
02213             bit_block_sub(dst_block, tmp_block);
02214         break;
02215     default:
02216         BM_ASSERT(0);
02217     } // switch
02218     return count;
02219 }
02220 
02221 
02222 template<class DEC>
02223 unsigned 
02224 serial_stream_iterator<DEC>::get_bit_block_COUNT(bm::word_t*  /*dst_block*/,
02225                                                  bm::word_t*  /*tmp_block*/)
02226 {
02227     BM_ASSERT(this->state_ == e_bit_block);
02228 
02229     unsigned count = 0;
02230     switch (block_type_)
02231     {
02232     case set_block_bit:
02233         for (unsigned i = 0; i < bm::set_block_size; ++i)
02234             count += word_bitcount(decoder_.get_32());
02235         break;
02236     case set_block_bit_0runs:
02237         {
02238         unsigned count = 0;
02239         unsigned char run_type = decoder_.get_8();
02240         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02241         {
02242             unsigned run_length = decoder_.get_16();
02243             if (run_type)
02244             {
02245                 unsigned run_end = j + run_length;
02246                 for (;j < run_end; ++j)
02247                 {
02248                     count += word_bitcount(decoder_.get_32());
02249                 }
02250             }
02251             else
02252             {
02253                 j += run_length;
02254             }
02255         } // for
02256         return count;
02257         }
02258     case set_block_bit_interval:
02259         {
02260             unsigned head_idx = decoder_.get_16();
02261             unsigned tail_idx = decoder_.get_16();
02262             for (unsigned i = head_idx; i <= tail_idx; ++i)
02263                 count += word_bitcount(decoder_.get_32());
02264         }
02265         break;
02266     case set_block_arrbit:
02267         count += get_arr_bit(0);
02268         break;
02269     case set_block_bit_1bit:
02270         ++count;
02271         decoder_.get_16();
02272         break;
02273     default:
02274         BM_ASSERT(0);
02275     } // switch
02276     return count;
02277 }
02278 
02279 template<class DEC>
02280 unsigned 
02281 serial_stream_iterator<DEC>::get_bit_block_COUNT_A(bm::word_t*  dst_block,
02282                                                    bm::word_t*  /*tmp_block*/)
02283 {
02284     BM_ASSERT(this->state_ == e_bit_block);
02285     unsigned count = 0;
02286     if (dst_block)
02287     {
02288         // count the block bitcount
02289         count = 
02290             bit_block_calc_count(dst_block, 
02291                                  dst_block + bm::set_block_size);
02292     }
02293 
02294     switch (block_type_)
02295     {
02296     case set_block_bit:
02297         decoder_.get_32(0, bm::set_block_size);
02298         break;
02299     case set_block_bit_0runs:
02300         {
02301         unsigned char run_type = decoder_.get_8();
02302         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02303         {
02304             unsigned run_length = decoder_.get_16();
02305             if (run_type)
02306             {
02307                 unsigned run_end = j + run_length;
02308                 for (;j < run_end; ++j)
02309                 {
02310                     decoder_.get_32();
02311                 }
02312             }
02313             else
02314             {
02315                 j += run_length;
02316             }
02317         } // for
02318         }
02319         break;
02320 
02321     case set_block_bit_interval:
02322         {
02323             unsigned head_idx = decoder_.get_16();
02324             unsigned tail_idx = decoder_.get_16();
02325             for (unsigned i = head_idx; i <= tail_idx; ++i)
02326                 decoder_.get_32();
02327         }
02328         break;
02329     case set_block_arrbit:
02330         get_arr_bit(0);
02331         break;
02332     case set_block_bit_1bit:
02333         decoder_.get_16();
02334         break;
02335     default:
02336         BM_ASSERT(0);
02337     } // switch
02338     return count;
02339 }
02340 
02341 
02342 template<class DEC>
02343 unsigned 
02344 serial_stream_iterator<DEC>::get_bit_block_COUNT_AND(bm::word_t*  dst_block,
02345                                                      bm::word_t*  tmp_block)
02346 {
02347     BM_ASSERT(this->state_ == e_bit_block);
02348     BM_ASSERT(dst_block);
02349 
02350     unsigned count = 0;
02351     switch (block_type_)
02352     {
02353     case set_block_bit:
02354         for (unsigned i = 0; i < bm::set_block_size; ++i)
02355             count += word_bitcount(dst_block[i] & decoder_.get_32());
02356         break;
02357     case set_block_bit_0runs:
02358         {
02359         unsigned count = 0;
02360         unsigned char run_type = decoder_.get_8();
02361         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02362         {
02363             unsigned run_length = decoder_.get_16();
02364             if (run_type)
02365             {
02366                 unsigned run_end = j + run_length;
02367                 for (;j < run_end; ++j)
02368                 {
02369                     count += word_bitcount(dst_block[j] & decoder_.get_32());
02370                 }
02371             }
02372             else
02373             {
02374                 j += run_length;
02375             }
02376         } // for
02377         return count;
02378         }
02379     case set_block_bit_interval:
02380         {
02381         unsigned head_idx = decoder_.get_16();
02382         unsigned tail_idx = decoder_.get_16();
02383         for (unsigned i = head_idx; i <= tail_idx; ++i)
02384             count += word_bitcount(dst_block[i] & decoder_.get_32());
02385         }
02386         break;
02387     case set_block_bit_1bit:
02388         // TODO: optimization
02389     case set_block_arrbit:
02390         get_arr_bit(tmp_block, true /*clear target*/);
02391         count += 
02392             bit_operation_and_count(dst_block, dst_block + bm::set_block_size, 
02393                                     tmp_block);
02394         break;
02395     default:
02396         BM_ASSERT(0);
02397     } // switch
02398     return count;
02399 }
02400 
02401 template<class DEC>
02402 unsigned 
02403 serial_stream_iterator<DEC>::get_bit_block_COUNT_OR(bm::word_t*  dst_block,
02404                                                     bm::word_t*  tmp_block)
02405 {
02406     BM_ASSERT(this->state_ == e_bit_block);
02407     BM_ASSERT(dst_block);
02408 
02409     bitblock_sum_adapter count_adapter;
02410     switch (block_type_)
02411     {
02412     case set_block_bit:
02413         {
02414         bitblock_get_adapter ga(dst_block);
02415         bit_COUNT_OR<bm::word_t> func;
02416         
02417         bit_recomb(ga,
02418                    decoder_,
02419                    func,
02420                    count_adapter
02421                   );
02422         }
02423         break;
02424     case set_block_bit_0runs: 
02425         {
02426         unsigned count = 0;
02427         unsigned char run_type = decoder_.get_8();
02428         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02429         {
02430             unsigned run_length = decoder_.get_16();
02431             unsigned run_end = j + run_length;
02432             if (run_type)
02433             {
02434                 for (;j < run_end; ++j)
02435                 {
02436                     BM_ASSERT(j < bm::set_block_size);
02437                     count += word_bitcount(dst_block[j] | decoder_.get_32());
02438                 }
02439             }
02440             else
02441             {
02442                 for (;j < run_end; ++j)
02443                 {
02444                     BM_ASSERT(j < bm::set_block_size);
02445                     count += word_bitcount(dst_block[j]);
02446                 }
02447             }
02448         } // for
02449         return count;
02450         }
02451     case set_block_bit_interval:
02452         {
02453         unsigned head_idx = decoder_.get_16();
02454         unsigned tail_idx = decoder_.get_16();
02455         unsigned count = 0;
02456         unsigned i;
02457         for (i = 0; i < head_idx; ++i)
02458             count += word_bitcount(dst_block[i]);
02459         for (i = head_idx; i <= tail_idx; ++i)
02460             count += word_bitcount(dst_block[i] | decoder_.get_32());
02461         for (i = tail_idx + 1; i < bm::set_block_size; ++i)
02462             count += word_bitcount(dst_block[i]);
02463         return count;
02464         }
02465     case set_block_bit_1bit:
02466         // TODO: optimization
02467     case set_block_arrbit:
02468         get_arr_bit(tmp_block, true /* clear target*/);
02469         return 
02470             bit_operation_or_count(dst_block, 
02471                                    dst_block + bm::set_block_size,
02472                                    tmp_block);
02473     default:
02474         BM_ASSERT(0);
02475     } // switch
02476     return count_adapter.sum();
02477 }
02478 
02479 template<class DEC>
02480 unsigned 
02481 serial_stream_iterator<DEC>::get_bit_block_COUNT_XOR(bm::word_t*  dst_block,
02482                                                      bm::word_t*  tmp_block)
02483 {
02484     BM_ASSERT(this->state_ == e_bit_block);
02485     BM_ASSERT(dst_block);
02486 
02487     bitblock_sum_adapter count_adapter;
02488     switch (block_type_)
02489     {
02490     case set_block_bit:
02491         {
02492         bitblock_get_adapter ga(dst_block);
02493         bit_COUNT_XOR<bm::word_t> func;
02494         
02495         bit_recomb(ga,
02496                    decoder_,
02497                    func,
02498                    count_adapter
02499                   );
02500         }
02501         break;
02502     case set_block_bit_0runs: 
02503         {
02504         unsigned count = 0;
02505         unsigned char run_type = decoder_.get_8();
02506         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02507         {
02508             unsigned run_length = decoder_.get_16();
02509             unsigned run_end = j + run_length;
02510             if (run_type)
02511             {
02512                 for (;j < run_end; ++j)
02513                 {
02514                     BM_ASSERT(j < bm::set_block_size);
02515                     count += word_bitcount(dst_block[j] ^ decoder_.get_32());
02516                 }
02517             }
02518             else
02519             {
02520                 for (;j < run_end; ++j)
02521                 {
02522                     BM_ASSERT(j < bm::set_block_size);
02523                     count += word_bitcount(dst_block[j]);
02524                 }
02525             }
02526         } // for
02527         return count;
02528         }
02529     case set_block_bit_interval:
02530         {
02531         unsigned head_idx = decoder_.get_16();
02532         unsigned tail_idx = decoder_.get_16();
02533         unsigned count = 0;
02534         unsigned i;
02535         for (i = 0; i < head_idx; ++i)
02536             count += word_bitcount(dst_block[i]);
02537         for (i = head_idx; i <= tail_idx; ++i)
02538             count += word_bitcount(dst_block[i] ^ decoder_.get_32());
02539         for (i = tail_idx + 1; i < bm::set_block_size; ++i)
02540             count += word_bitcount(dst_block[i]);
02541         return count;
02542         }
02543     case set_block_bit_1bit:
02544         // TODO: optimization
02545     case set_block_arrbit:
02546         get_arr_bit(tmp_block, true /* clear target*/);
02547         return 
02548             bit_operation_xor_count(dst_block, 
02549                                     dst_block + bm::set_block_size,
02550                                     tmp_block);
02551     default:
02552         BM_ASSERT(0);
02553     } // switch
02554     return count_adapter.sum();
02555 }
02556 
02557 template<class DEC>
02558 unsigned 
02559 serial_stream_iterator<DEC>::get_bit_block_COUNT_SUB_AB(bm::word_t*  dst_block,
02560                                                         bm::word_t*  tmp_block)
02561 {
02562     BM_ASSERT(this->state_ == e_bit_block);
02563     BM_ASSERT(dst_block);
02564 
02565     bitblock_sum_adapter count_adapter;
02566     switch (block_type_)
02567     {
02568     case set_block_bit:
02569         {
02570         bitblock_get_adapter ga(dst_block);
02571         bit_COUNT_SUB_AB<bm::word_t> func;
02572         
02573         bit_recomb(ga, 
02574                    decoder_,
02575                    func,
02576                    count_adapter
02577                   );
02578         }
02579         break;
02580     case set_block_bit_0runs: 
02581         {
02582         unsigned count = 0;
02583         unsigned char run_type = decoder_.get_8();
02584         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02585         {
02586             unsigned run_length = decoder_.get_16();
02587             unsigned run_end = j + run_length;
02588             if (run_type)
02589             {
02590                 for (;j < run_end; ++j)
02591                 {
02592                     BM_ASSERT(j < bm::set_block_size);
02593                     count += word_bitcount(dst_block[j] & ~decoder_.get_32());
02594                 }
02595             }
02596             else
02597             {
02598                 for (;j < run_end; ++j)
02599                 {
02600                     BM_ASSERT(j < bm::set_block_size);
02601                     count += word_bitcount(dst_block[j]);
02602                 }
02603             }
02604         } // for
02605         return count;
02606         }
02607     case set_block_bit_interval:
02608         {
02609         unsigned head_idx = decoder_.get_16();
02610         unsigned tail_idx = decoder_.get_16();
02611         unsigned count = 0;
02612         unsigned i;
02613         for (i = 0; i < head_idx; ++i)
02614             count += word_bitcount(dst_block[i]);
02615         for (i = head_idx; i <= tail_idx; ++i)
02616             count += word_bitcount(dst_block[i] & (~decoder_.get_32()));
02617         for (i = tail_idx + 1; i < bm::set_block_size; ++i)
02618             count += word_bitcount(dst_block[i]);
02619         return count;
02620         }
02621         break;
02622     case set_block_bit_1bit:
02623         //TODO: optimization
02624     case set_block_arrbit:
02625         get_arr_bit(tmp_block, true /* clear target*/);
02626         return 
02627             bit_operation_sub_count(dst_block, 
02628                                     dst_block + bm::set_block_size,
02629                                     tmp_block);
02630     default:
02631         BM_ASSERT(0);
02632     } // switch
02633     return count_adapter.sum();
02634 }
02635 
02636 template<class DEC>
02637 unsigned 
02638 serial_stream_iterator<DEC>::get_bit_block_COUNT_SUB_BA(bm::word_t*  dst_block,
02639                                                         bm::word_t*  tmp_block)
02640 {
02641     BM_ASSERT(this->state_ == e_bit_block);
02642     BM_ASSERT(dst_block);
02643 
02644     bitblock_sum_adapter count_adapter;
02645     switch (block_type_)
02646     {
02647     case set_block_bit:
02648         {
02649         bitblock_get_adapter ga(dst_block);
02650         bit_COUNT_SUB_BA<bm::word_t> func;
02651 
02652         bit_recomb(ga,
02653                    decoder_,
02654                    func,
02655                    count_adapter
02656                   );
02657         }
02658         break;
02659     case set_block_bit_0runs: 
02660         {
02661         unsigned count = 0;
02662         unsigned char run_type = decoder_.get_8();
02663         for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
02664         {
02665             unsigned run_length = decoder_.get_16();
02666             unsigned run_end = j + run_length;
02667             if (run_type)
02668             {
02669                 for (;j < run_end; ++j)
02670                 {
02671                     BM_ASSERT(j < bm::set_block_size);
02672                     count += word_bitcount(decoder_.get_32() & (~dst_block[j]));
02673                 }
02674             }
02675             else
02676             {
02677                 j += run_length;
02678             }
02679         } // for
02680         return count;
02681         }
02682 
02683     case set_block_bit_interval:
02684         {
02685         unsigned head_idx = decoder_.get_16();
02686         unsigned tail_idx = decoder_.get_16();
02687         unsigned count = 0;
02688         unsigned i;
02689         for (i = head_idx; i <= tail_idx; ++i)
02690             count += word_bitcount(decoder_.get_32() & (~dst_block[i]));
02691         return count;
02692         }
02693         break;
02694     case set_block_bit_1bit:
02695         // TODO: optimization
02696     case set_block_arrbit:
02697         get_arr_bit(tmp_block, true /* clear target*/);
02698         return 
02699             bit_operation_sub_count(tmp_block,
02700                                     tmp_block + bm::set_block_size,
02701                                     dst_block);
02702     default:
02703         BM_ASSERT(0);
02704     } // switch
02705     return count_adapter.sum();
02706 }
02707 
02708 
02709 
02710 template<class DEC>
02711 unsigned serial_stream_iterator<DEC>::get_arr_bit(bm::word_t* dst_block, 
02712                                                   bool        clear_target)
02713 {
02714     BM_ASSERT(this->block_type_ == set_block_arrbit || 
02715               this->block_type_ == set_block_bit_1bit);
02716     
02717     gap_word_t len = decoder_.get_16(); // array length / 1bit_idx
02718     if (dst_block)
02719     {
02720         if (clear_target)
02721             bit_block_set(dst_block, 0);
02722 
02723         if (this->block_type_ == set_block_bit_1bit)
02724         {
02725             // len contains idx of 1 bit set
02726             set_bit(dst_block, len);
02727             return 1;
02728         }
02729 
02730         for (unsigned k = 0; k < len; ++k)
02731         {
02732             gap_word_t bit_idx = decoder_.get_16();
02733             set_bit(dst_block, bit_idx);
02734         }
02735     }
02736     else
02737     {
02738         if (this->block_type_ == set_block_bit_1bit)
02739         {
02740             return 1; // nothing to do: len var already consumed 16bits
02741         }
02742         // fwd the decocing stream
02743         decoder_.seek(len * 2);
02744     }
02745     return len;
02746 }
02747 
02748 template<class DEC>
02749 unsigned serial_stream_iterator<DEC>::get_bit()
02750 {
02751     BM_ASSERT(this->block_type_ == set_block_bit_1bit);
02752     ++(this->block_idx_);
02753     this->state_ = e_blocks;
02754 
02755     return decoder_.get_16(); // 1bit_idx   
02756 }
02757 
02758 template<class DEC>
02759 void 
02760 serial_stream_iterator<DEC>::get_gap_block(bm::gap_word_t* dst_block)
02761 {
02762     BM_ASSERT(this->state_ == e_gap_block || 
02763               this->block_type_ == set_block_bit_1bit);
02764     BM_ASSERT(dst_block);
02765 
02766     read_gap_block(this->decoder_,
02767                    this->block_type_,
02768                    dst_block,
02769                    this->gap_head_);
02770 
02771     ++(this->block_idx_);
02772     this->state_ = e_blocks;
02773 }
02774 
02775 
02776 template<class DEC>
02777 unsigned 
02778 serial_stream_iterator<DEC>::get_bit_block(bm::word_t*    dst_block,
02779                                            bm::word_t*    tmp_block,
02780                                            set_operation  op)
02781 {
02782     BM_ASSERT(this->state_ == e_bit_block);
02783     get_bit_func_type bit_func = bit_func_table_[op];
02784     BM_ASSERT(bit_func);
02785     unsigned cnt = ((*this).*(bit_func))(dst_block, tmp_block);
02786     this->state_ = e_blocks;
02787     ++(this->block_idx_);
02788     return cnt;
02789 }
02790 
02791 
02792 
02793 template<class BV>
02794 unsigned operation_deserializer<BV>::deserialize(
02795                                         bvector_type&        bv, 
02796                                         const unsigned char* buf, 
02797                                         bm::word_t*          temp_block,
02798                                         set_operation        op)
02799 {
02800     ByteOrder bo_current = globals<true>::byte_order();
02801 
02802     bm::decoder dec(buf);
02803     unsigned char header_flag = dec.get_8();
02804     ByteOrder bo = bo_current;
02805     if (!(header_flag & BM_HM_NO_BO))
02806     {
02807         bo = (bm::ByteOrder) dec.get_8();
02808     }
02809 
02810     blocks_manager_type& bman = bv.get_blocks_manager();
02811     bit_block_guard<blocks_manager_type> bg(bman);
02812     if (temp_block == 0)
02813     {
02814         temp_block = bg.allocate();
02815     }
02816 
02817     if (bo_current == bo)
02818     {
02819         serial_stream_current ss(buf);
02820         return 
02821             iterator_deserializer<BV, serial_stream_current>::
02822                 deserialize(bv, ss, temp_block, op);
02823     }
02824     switch (bo_current) 
02825     {
02826     case BigEndian:
02827         {
02828         serial_stream_be ss(buf);
02829         return 
02830             iterator_deserializer<BV, serial_stream_be>::
02831                 deserialize(bv, ss, temp_block, op);
02832         }
02833     case LittleEndian:
02834         {
02835         serial_stream_le ss(buf);
02836         return 
02837             iterator_deserializer<BV, serial_stream_le>::
02838                 deserialize(bv, ss, temp_block, op);
02839         }
02840     default:
02841         BM_ASSERT(0);
02842     };
02843     return 0;
02844 }
02845 
02846 
02847 template<class BV>
02848 void operation_deserializer<BV>::deserialize(
02849                      bvector_type&        bv_target,
02850                      const bvector_type&  bv_mask,
02851                      const unsigned char* buf, 
02852                      bm::word_t*          temp_block,
02853                      set_operation        op)
02854 {
02855     ByteOrder bo_current = globals<true>::byte_order();
02856 
02857     bm::decoder dec(buf);
02858     unsigned char header_flag = dec.get_8();
02859     ByteOrder bo = bo_current;
02860     if (!(header_flag & BM_HM_NO_BO))
02861     {
02862         bo = (bm::ByteOrder) dec.get_8();
02863     }
02864 
02865     blocks_manager_type& bman = bv_target.get_blocks_manager();
02866     bit_block_guard<blocks_manager_type> bg(bman);
02867     if (temp_block == 0)
02868     {
02869         temp_block = bg.allocate();
02870     }
02871 
02872     if (bo_current == bo)
02873     {
02874         serial_stream_current ss(buf);
02875         iterator_deserializer<BV, serial_stream_current>::
02876             deserialize(bv_target, bv_mask, ss, temp_block, op);
02877         return;
02878     }
02879     switch (bo_current) 
02880     {
02881     case BigEndian:
02882         {
02883             serial_stream_be ss(buf);
02884             iterator_deserializer<BV, serial_stream_be>::
02885                 deserialize(bv_target, bv_mask, ss, temp_block, op);
02886         }
02887     case LittleEndian:
02888         {
02889             serial_stream_le ss(buf);
02890             iterator_deserializer<BV, serial_stream_le>::
02891                 deserialize(bv_target, bv_mask, ss, temp_block, op);
02892         }
02893     default:
02894         BM_ASSERT(0);
02895     };
02896 }
02897 
02898 
02899 template<class BV, class SerialIterator>
02900 void iterator_deserializer<BV, SerialIterator>::load_id_list(
02901                                             bvector_type&         bv, 
02902                                             serial_iterator_type& sit,
02903                                             unsigned              id_count,
02904                                             bool                  set_clear)
02905 {
02906     const unsigned win_size = 64;
02907     bm::id_t id_buffer[win_size+1];
02908 
02909     if (set_clear)  // set bits
02910     {
02911         for (unsigned i = 0; i <= id_count;)
02912         {
02913             unsigned j;
02914             for (j = 0; j < win_size && i <= id_count; ++j, ++i) 
02915             {
02916                 id_buffer[j] = sit.get_id();
02917                 sit.next();
02918             } // for j
02919             bm::combine_or(bv, id_buffer, id_buffer + j);
02920         } // for i
02921     } 
02922     else // clear bits
02923     {
02924         for (unsigned i = 0; i <= id_count;)
02925         {
02926             unsigned j;
02927             for (j = 0; j < win_size && i <= id_count; ++j, ++i) 
02928             {
02929                 id_buffer[j] = sit.get_id();
02930                 sit.next();
02931             } // for j
02932             bm::combine_sub(bv, id_buffer, id_buffer + j);
02933         } // for i
02934     }
02935 }
02936 
02937 template<class BV, class SerialIterator>
02938 unsigned 
02939 iterator_deserializer<BV, SerialIterator>::finalize_target_vector(
02940                                                 blocks_manager_type& bman,
02941                                                 set_operation        op,
02942                                                 unsigned             bv_block_idx)
02943 {
02944     unsigned count = 0;
02945     switch (op)
02946     {
02947     case set_OR:    case set_SUB:     case set_XOR:
02948     case set_COUNT: case set_COUNT_B: case set_COUNT_AND:
02949     case set_COUNT_SUB_BA:
02950         // nothing to do
02951         break;
02952     case set_AND: case set_ASSIGN:
02953         // clear the rest of the target vector
02954         {
02955             unsigned i, j;
02956             bman.get_block_coord(bv_block_idx, &i, &j);
02957             bm::word_t*** blk_root = bman.get_rootblock();
02958             unsigned effective_top_size = 
02959                 bman.effective_top_block_size();
02960             for (;i < effective_top_size; ++i) 
02961             {
02962                 bm::word_t** blk_blk = blk_root[i];
02963                 if (blk_blk == 0) 
02964                 {
02965                     bv_block_idx+=bm::set_array_size-j;
02966                     j = 0;
02967                     continue;
02968                 }
02969                 for (;j < bm::set_array_size; ++j, ++bv_block_idx)
02970                 {
02971                     if (blk_blk[j])
02972                         bman.zero_block(bv_block_idx);
02973                 } // for j
02974                 j = 0;
02975             } // for i
02976 
02977         }
02978         break;
02979     case set_COUNT_A: case set_COUNT_OR: case set_COUNT_XOR:
02980     case set_COUNT_SUB_AB:
02981         // count bits in the target vector
02982         {
02983             unsigned i, j;
02984             bman.get_block_coord(bv_block_idx, &i, &j);
02985             bm::word_t*** blk_root = bman.get_rootblock();
02986             unsigned effective_top_size = 
02987                 bman.effective_top_block_size();
02988             for (;i < effective_top_size; ++i) 
02989             {
02990                 bm::word_t** blk_blk = blk_root[i];
02991                 if (blk_blk == 0) 
02992                 {
02993                     bv_block_idx+=bm::set_array_size-j;
02994                     j = 0;
02995                     continue;
02996                 }
02997                 for (;j < bm::set_array_size; ++j, ++bv_block_idx)
02998                 {
02999                     if (blk_blk[j])
03000                         count += bman.block_bitcount(blk_blk[j]);//, bv_block_idx);
03001                 } // for j
03002                 j = 0;
03003             } // for i
03004 
03005         }
03006         break;
03007     default:
03008         BM_ASSERT(0);
03009     }
03010     return count;
03011 }
03012 
03013 template<class BV, class SerialIterator>
03014 unsigned 
03015 iterator_deserializer<BV, SerialIterator>::process_id_list(
03016                                     bvector_type&         bv, 
03017                                     serial_iterator_type& sit,
03018                                     set_operation         op)
03019 {
03020     unsigned count = 0;
03021     unsigned id_count = sit.get_id_count();
03022     bool set_clear = true;
03023     switch (op)
03024     {
03025     case set_AND:
03026         {
03027             // TODO: use some more optimal algorithm without temp vector
03028             BV bv_tmp(BM_GAP);
03029             load_id_list(bv_tmp, sit, id_count, true);
03030             bv &= bv_tmp;
03031         }
03032         break;
03033     case set_ASSIGN:
03034         bv.clear(true);
03035         // intentional case fall through here (not a bug)
03036     case set_OR:
03037         set_clear = true;
03038         // fall to SUB
03039     case set_SUB:
03040         load_id_list(bv, sit, id_count, set_clear);
03041         break;
03042     case set_XOR:
03043         for (unsigned i = 0; i < id_count; ++i)
03044         {
03045             bm::id_t id = sit.get_id();
03046             bv[id] ^= true;
03047             sit.next();
03048         } // for
03049         break;
03050     case set_COUNT: case set_COUNT_B:
03051         for (unsigned i = 0; i < id_count; ++i)
03052         {
03053             /* bm::id_t id = */ sit.get_id();
03054             ++count;
03055             sit.next();
03056         } // for
03057         break;
03058     case set_COUNT_A:
03059         return bv.count();
03060     case set_COUNT_AND:
03061         for (unsigned i = 0; i < id_count; ++i)
03062         {
03063             bm::id_t id = sit.get_id();
03064             count += bv.get_bit(id);
03065             sit.next();
03066         } // for
03067         break;
03068     case set_COUNT_XOR:
03069         {
03070             // TODO: get rid of the temp vector
03071             BV bv_tmp(BM_GAP);
03072             load_id_list(bv_tmp, sit, id_count, true);
03073             count += count_xor(bv, bv_tmp);
03074         }
03075         break;
03076     case set_COUNT_OR:
03077         {
03078             // TODO: get rid of the temp. vector
03079             BV bv_tmp(BM_GAP);
03080             load_id_list(bv_tmp, sit, id_count, true);
03081             count += count_or(bv, bv_tmp);
03082         }
03083         break;
03084     case set_COUNT_SUB_AB:
03085         {
03086             // TODO: get rid of the temp. vector
03087             BV bv_tmp(bv);
03088             load_id_list(bv_tmp, sit, id_count, false);
03089             count += bv_tmp.count();
03090         }
03091         break;
03092     default:
03093         BM_ASSERT(0);
03094     } // switch
03095 
03096     return count;
03097 }
03098 
03099 
03100 template<class BV, class SerialIterator>
03101 void iterator_deserializer<BV, SerialIterator>::deserialize(
03102                      bvector_type&         bv_target,
03103                      const bvector_type&   bv_mask,
03104                      serial_iterator_type& sit, 
03105                      bm::word_t*           temp_block,
03106                      set_operation         op)
03107 {
03108     BM_ASSERT(temp_block);
03109     BM_ASSERT(op == bm::set_AND ||
03110               op == bm::set_OR || op == bm::set_XOR || op == bm::set_SUB);
03111 
03112     gap_word_t   gap_temp_block[bm::gap_equiv_len*3];
03113     gap_temp_block[0] = 0;
03114 
03115     bv_target.clear(true); // clear and free the memory
03116 
03117     const blocks_manager_type& bman_mask = bv_mask.get_blocks_manager();
03118           blocks_manager_type& bman_target = bv_target.get_blocks_manager();
03119 
03120     unsigned bv_size = sit.bv_size();
03121     if (bv_mask.size() > bv_size) 
03122     {
03123         bv_size = bv_mask.size();    
03124     }
03125     if (bv_target.size() < bv_size)
03126     {
03127         bv_target.resize(bv_size);
03128     }
03129 
03130     unsigned top_blocks = bman_mask.effective_top_block_size();
03131 
03132     BM_SET_MMX_GUARD
03133 
03134     typename serial_iterator_type::iterator_state state;
03135     state = sit.get_state();
03136     if (state == serial_iterator_type::e_list_ids)
03137     {
03138         bv_target = bv_mask;
03139         process_id_list(bv_target, sit, op);
03140         return;
03141     }
03142 
03143     unsigned bv_block_idx = 0;
03144     for (;1;)
03145     {
03146         bv_block_idx = sit.block_idx();
03147         // early exit check to avoid over-scan
03148         {
03149             unsigned tb_idx = bv_block_idx >> bm::set_array_shift; // current top block
03150             if (tb_idx > top_blocks)
03151             {
03152                 if (op == bm::set_AND)
03153                     break;
03154                 if (sit.is_eof())
03155                     break;
03156             }
03157         }
03158         
03159         if (sit.is_eof()) // argument stream ended
03160         {
03161             if (op == bm::set_AND)
03162                 break;
03163             // (OR, SUB, XOR) need to scan fwd until mask vector ends
03164             state = serial_iterator_type::e_zero_blocks;
03165         }
03166         else 
03167         {
03168             state = sit.state();
03169         }
03170 
03171         switch (state)
03172         {
03173         case serial_iterator_type::e_blocks:
03174             sit.next();
03175             continue;
03176         case serial_iterator_type::e_bit_block:
03177             {
03178                 bm::set_operation sop = op;
03179                 const bm::word_t* blk_mask = bman_mask.get_block(bv_block_idx);
03180                 bm::word_t* blk_target = 0;
03181                 if (!blk_mask) 
03182                 {
03183                     switch (op)
03184                     {
03185                     case set_AND: case set_SUB: 
03186                         // first arg is 0, so the operation gives us zero
03187                         // all we need to do is to seek the input stream
03188                         sop = set_ASSIGN;
03189                         break;
03190                     case set_OR: case set_XOR:
03191                         blk_target = bman_target.make_bit_block(bv_block_idx);
03192                         break;
03193                     default:
03194                         BM_ASSERT(0);
03195                     }
03196                 }
03197                 else // block exists
03198                 {
03199                     int is_gap = BM_IS_GAP(blk_mask);
03200                     blk_target = bman_target.copy_bit_block(bv_block_idx, blk_mask, is_gap);
03201                 }
03202 
03203                 // 2 bit blocks recombination
03204                 sit.get_bit_block(blk_target, temp_block, sop);
03205             }
03206             break;
03207 
03208         case serial_iterator_type::e_zero_blocks:
03209             {
03210                 if (op == set_AND)
03211                 {
03212                     sit.skip_mono_blocks();
03213                     break;
03214                 }
03215                 sit.next();
03216                 // set_SUB: set_OR: set_XOR: 
03217                 bman_target.copy_block(bv_block_idx, bman_mask);
03218             }
03219             break;
03220 
03221         case serial_iterator_type::e_one_blocks:
03222             {
03223                 BM_ASSERT(bv_block_idx == sit.block_idx());
03224                 const bm::word_t* blk_mask = bman_mask.get_block(bv_block_idx);
03225                 sit.next();
03226 
03227                 switch (op)
03228                 {
03229                 case set_OR:
03230                     bman_target.set_block_all_set(bv_block_idx);
03231                     break;
03232                 case set_SUB:
03233                     break;
03234                 case set_AND:
03235                     bman_target.copy_block(bv_block_idx, bman_mask);
03236                     break;
03237                 case set_XOR:
03238                     if (blk_mask)
03239                     {
03240                         int is_gap = BM_IS_GAP(blk_mask);
03241                         bm::word_t* blk_target = 
03242                             bman_target.copy_bit_block(bv_block_idx, blk_mask, is_gap);
03243                         bit_block_xor(blk_target, FULL_BLOCK_ADDR);
03244                     }
03245                     else
03246                     {
03247                         // 0 XOR 1 = 1
03248                         bman_target.set_block_all_set(bv_block_idx);
03249                     }
03250                     break;
03251                 default:
03252                     BM_ASSERT(0);
03253                 } // switch
03254 
03255             }
03256             break;
03257 
03258         case serial_iterator_type::e_gap_block:
03259             {
03260                 // Single bit-in-block optimization             
03261                 if (sit.get_block_type() == set_block_bit_1bit)
03262                 {
03263                     if (op == set_AND)
03264                     {
03265                         unsigned bit_idx = sit.get_bit();
03266                         unsigned bn = (bv_block_idx << bm::set_block_shift) | bit_idx;
03267                         bool bval_mask = bv_mask.test(bn);
03268                         bv_target.set_bit(bn, bval_mask);                       
03269                         break;
03270                     }
03271                 }
03272                 
03273                 const bm::word_t* blk_mask = bman_mask.get_block(bv_block_idx);
03274 
03275                 sit.get_gap_block(gap_temp_block);
03276                 // gap_word_t   gap_head = gap_temp_block[0];
03277 
03278                 unsigned len = gap_length(gap_temp_block);
03279                 int level = gap_calc_level(len, bman_target.glen());
03280                 --len;
03281 
03282                 
03283                 if (!blk_mask)
03284                 {
03285                     if (op == set_OR || op == set_XOR)
03286                     {
03287                         bman_target.set_gap_block(bv_block_idx, gap_temp_block, level);
03288                     }
03289                 }
03290                 else  // mask block exists
03291                 {
03292 
03293                     bm::operation bop = bm::setop2op(op);
03294                     bman_target.copy_block(bv_block_idx, bman_mask);
03295                     bv_target.combine_operation_with_block(
03296                                         bv_block_idx, 
03297                                         (bm::word_t*)gap_temp_block, 
03298                                         1,  // GAP
03299                                         bop);
03300                 }
03301                 
03302             }
03303             break;
03304 
03305         default:
03306             BM_ASSERT(0);
03307         } // switch
03308 
03309 
03310     } // for (deserialization)
03311 
03312     bv_target.forget_count();
03313 }
03314 
03315 
03316 
03317 template<class BV, class SerialIterator>
03318 unsigned 
03319 iterator_deserializer<BV, SerialIterator>::deserialize(
03320                                        bvector_type&         bv, 
03321                                        serial_iterator_type& sit, 
03322                                        bm::word_t*           temp_block,
03323                                        set_operation         op)
03324 {
03325     BM_ASSERT(temp_block);
03326 
03327     unsigned count = 0;
03328     gap_word_t   gap_temp_block[bm::gap_equiv_len*3];
03329     gap_temp_block[0] = 0;
03330 
03331     blocks_manager_type& bman = bv.get_blocks_manager();
03332 
03333     bv.forget_count();
03334     if (sit.bv_size() && (sit.bv_size() > bv.size())) 
03335     {
03336         bv.resize(sit.bv_size());
03337     }
03338 
03339     BM_SET_MMX_GUARD
03340 
03341     typename serial_iterator_type::iterator_state state;
03342     state = sit.get_state();
03343     if (state == serial_iterator_type::e_list_ids)
03344     {
03345         count = process_id_list(bv, sit, op);
03346         return count;
03347     }
03348 
03349     unsigned bv_block_idx = 0;
03350 
03351     for (;1;)
03352     {
03353         bm::set_operation sop = op;
03354         if (sit.is_eof()) // argument stream ended
03355         {
03356             count += finalize_target_vector(bman, op, bv_block_idx);
03357             return count;
03358         }
03359 
03360         state = sit.state();
03361         switch (state)
03362         {
03363         case serial_iterator_type::e_blocks:
03364             sit.next();
03365             continue;
03366         case serial_iterator_type::e_bit_block:
03367             {
03368             BM_ASSERT(sit.block_idx() == bv_block_idx);
03369             bm::word_t* blk = bman.get_block(bv_block_idx);
03370 
03371             if (!blk) 
03372             {
03373                 switch (op)
03374                 {
03375                 case set_AND:          case set_SUB: case set_COUNT_AND:
03376                 case set_COUNT_SUB_AB: case set_COUNT_A:
03377                     // one arg is 0, so the operation gives us zero
03378                     // all we need to do is to seek the input stream
03379                     sop = set_ASSIGN;
03380                     break;
03381 
03382                 case set_OR: case set_XOR: case set_ASSIGN:
03383                     blk = bman.make_bit_block(bv_block_idx);
03384                     break;
03385 
03386                 case set_COUNT:        case set_COUNT_XOR: case set_COUNT_OR:
03387                 case set_COUNT_SUB_BA: case set_COUNT_B:
03388                     // first arg is not required (should work as is)
03389                     sop = set_COUNT;
03390                     break;
03391 
03392                 case set_END:
03393                 default:
03394                     BM_ASSERT(0);
03395                 }
03396             }
03397             else // block exists
03398             {
03399                 int is_gap = BM_IS_GAP(blk);
03400                 if (is_gap || IS_FULL_BLOCK(blk))
03401                 {
03402                     if (IS_FULL_BLOCK(blk) && is_const_set_operation(op))
03403                     {
03404                     }
03405                     else
03406                     {
03407                         // TODO: make sure const operations do not 
03408                         // deoptimize GAP blocks
03409                         blk = bman.deoptimize_block(bv_block_idx);
03410                     }
03411                 }
03412             }
03413 
03414             // 2 bit blocks recombination
03415             unsigned c = sit.get_bit_block(blk, temp_block, sop);
03416             count += c;
03417             }
03418             break;
03419 
03420         case serial_iterator_type::e_zero_blocks:
03421             {
03422             BM_ASSERT(bv_block_idx == sit.block_idx());
03423             bm::word_t* blk = bman.get_block(bv_block_idx);
03424             sit.next();
03425 
03426             if (blk)
03427             {
03428                 switch (op)
03429                 {
03430                 case set_AND: case set_ASSIGN:
03431                     // the result is 0
03432                     blk = bman.zero_block(bv_block_idx);
03433                     break;
03434 
03435                 case set_SUB: case set_COUNT_AND:    case set_OR:
03436                 case set_XOR: case set_COUNT_SUB_BA: case set_COUNT_B:
03437                     // nothing to do
03438                     break;
03439                 
03440                 case set_COUNT_SUB_AB: case set_COUNT_A: case set_COUNT_OR:
03441                 case set_COUNT:        case set_COUNT_XOR:
03442                     // valid bit block recombined with 0 block
03443                     // results with same block data
03444                     // all we need is to bitcount bv block
03445                     {
03446                     unsigned c = bman.block_bitcount(blk);//, bv_block_idx);
03447                     count += c;
03448                     }
03449                     break;
03450                 case set_END:
03451                 default:
03452                     BM_ASSERT(0);
03453                 } // switch op
03454             } // if blk
03455             }
03456             break;
03457 
03458         case serial_iterator_type::e_one_blocks:
03459             {
03460             BM_ASSERT(bv_block_idx == sit.block_idx());
03461             bm::word_t* blk = bman.get_block(bv_block_idx);
03462             sit.next();
03463 
03464             switch (op)
03465             {
03466             case set_OR: case set_ASSIGN:
03467                 bman.set_block_all_set(bv_block_idx);
03468                 break;
03469             case set_COUNT_OR: case set_COUNT_B: case set_COUNT:
03470                 // block becomes all set
03471                 count += bm::bits_in_block;
03472                 break;
03473             case set_SUB:
03474                 blk = bman.zero_block(bv_block_idx);
03475                 break;
03476             case set_COUNT_SUB_AB: case set_AND:
03477                 // nothing to do
03478                 break;
03479             case set_COUNT_AND: case set_COUNT_A:
03480                 count += bman.block_bitcount(blk);//, bv_block_idx);
03481                 break;
03482             default:
03483                 if (blk)
03484                 {
03485                     switch (op)
03486                     {
03487                     case set_XOR:
03488                         blk = bman.deoptimize_block(bv_block_idx);
03489                         bit_block_xor(blk, FULL_BLOCK_ADDR);
03490                         break;
03491                     case set_COUNT_XOR:
03492                         {
03493                         count += 
03494                             combine_count_operation_with_block(
03495                                                 blk,
03496                                                 FULL_BLOCK_ADDR,
03497                                                 bm::COUNT_XOR);
03498                         }
03499                         break;
03500                     case set_COUNT_SUB_BA:
03501                         {
03502                         count += 
03503                             combine_count_operation_with_block(
03504                                                 blk,
03505                                                 FULL_BLOCK_ADDR,
03506                                                 bm::COUNT_SUB_BA);
03507                         }
03508                         break;
03509                     default:
03510                         BM_ASSERT(0);
03511                     } // switch
03512                 }
03513                 else // blk == 0 
03514                 {
03515                     switch (op)
03516                     {
03517                     case set_XOR:
03518                         // 0 XOR 1 = 1
03519                         bman.set_block_all_set(bv_block_idx);
03520                         break;
03521                     case set_COUNT_XOR:
03522                         count += bm::bits_in_block;
03523                         break;
03524                     case set_COUNT_SUB_BA:
03525                         // 1 - 0 = 1
03526                         count += bm::bits_in_block;
03527                         break;
03528                     default:
03529                         break;
03530                     } // switch
03531                 } // else
03532             } // switch
03533 
03534             }
03535             break;
03536 
03537         case serial_iterator_type::e_gap_block:
03538             {
03539             BM_ASSERT(bv_block_idx == sit.block_idx());
03540             bm::word_t* blk = bman.get_block(bv_block_idx);
03541 
03542             sit.get_gap_block(gap_temp_block);
03543 
03544             unsigned len = gap_length(gap_temp_block);
03545             int level = gap_calc_level(len, bman.glen());
03546             --len;
03547 
03548             bool const_op = is_const_set_operation(op);
03549             if (const_op)
03550             {
03551                 distance_metric metric = operation2metric(op);
03552                 bm::word_t* gptr = (bm::word_t*)gap_temp_block;
03553                 BMSET_PTRGAP(gptr);
03554                 unsigned c = 
03555                     combine_count_operation_with_block(
03556                                         blk,
03557                                         gptr,
03558                                         metric);
03559                 count += c;
03560             }
03561             else // non-const set operation
03562             {
03563                 if ((sop == set_ASSIGN) && blk) // target block override
03564                 {
03565                     blk = bman.zero_block(bv_block_idx);
03566                     sop = set_OR;
03567                 }
03568                 if (blk == 0) // target block not found
03569                 {
03570                     switch (sop)
03571                     {
03572                     case set_AND: case set_SUB:
03573                         break;
03574                     case set_OR: case set_XOR: case set_ASSIGN:
03575                         bman.set_gap_block(
03576                             bv_block_idx, gap_temp_block, level);
03577                         break;
03578                     default:
03579                         BM_ASSERT(0);
03580                     } // switch
03581                 }
03582                 else  // target block exists
03583                 {
03584                     bm::operation bop = bm::setop2op(op);
03585                     if (level == -1) // too big to GAP
03586                     {
03587                         gap_convert_to_bitset(temp_block, gap_temp_block);
03588                         bv.combine_operation_with_block(bv_block_idx, 
03589                                                         temp_block, 
03590                                                         0, // BIT
03591                                                         bop);
03592                     }
03593                     else // GAP fits
03594                     {
03595                         set_gap_level(gap_temp_block, level);
03596                         bv.combine_operation_with_block(
03597                                                 bv_block_idx, 
03598                                                 (bm::word_t*)gap_temp_block, 
03599                                                 1,  // GAP
03600                                                 bop);
03601                     }
03602                 }
03603             }
03604             }
03605             break;
03606 
03607         default:
03608             BM_ASSERT(0);
03609         } // switch
03610 
03611         ++bv_block_idx;
03612 
03613     } // for (deserialization)
03614 
03615     return count;
03616 }
03617 
03618 
03619 
03620 
03621 } // namespace bm
03622 
03623 #include "bmundef.h"
03624 
03625 #ifdef _MSC_VER
03626 #pragma warning( pop )
03627 #endif
03628 
03629 
03630 #endif