dlvhex  2.5.0
vs12/bm/bmalgo_impl.h
Go to the documentation of this file.
00001 #ifndef BMALGO_IMPL__H__INCLUDED__
00002 #define BMALGO_IMPL__H__INCLUDED__
00003 /*
00004 Copyright(c) 2002-2010 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
00005 
00006 Permission is hereby granted, free of charge, to any person 
00007 obtaining a copy of this software and associated documentation 
00008 files (the "Software"), to deal in the Software without restriction, 
00009 including without limitation the rights to use, copy, modify, merge, 
00010 publish, distribute, sublicense, and/or sell copies of the Software, 
00011 and to permit persons to whom the Software is furnished to do so, 
00012 subject to the following conditions:
00013 
00014 The above copyright notice and this permission notice shall be included 
00015 in all copies or substantial portions of the Software.
00016 
00017 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
00018 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
00019 OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
00020 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 
00021 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
00022 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
00023 OTHER DEALINGS IN THE SOFTWARE.
00024 
00025 For more information please visit:  http://bmagic.sourceforge.net
00026 
00027 */
00028 
00029 #ifdef _MSC_VER
00030 #pragma warning( push )
00031 #pragma warning( disable : 4311 4312 4127)
00032 #endif
00033 
00034 #include "bmdef.h"
00035 #include "bmutil.h"
00036 
00037 namespace bm
00038 {
00039 
00055 enum distance_metric
00056 {
00057     COUNT_AND = set_COUNT_AND,          
00058     COUNT_XOR = set_COUNT_XOR,          
00059     COUNT_OR  = set_COUNT_OR,           
00060     COUNT_SUB_AB = set_COUNT_SUB_AB,    
00061     COUNT_SUB_BA = set_COUNT_SUB_BA,    
00062     COUNT_A      = set_COUNT_A,         
00063     COUNT_B      = set_COUNT_B          
00064 };
00065 
00070 inline
00071 distance_metric operation2metric(set_operation op)
00072 {
00073     BM_ASSERT(is_const_set_operation(op));
00074     if (op == set_COUNT) op = set_COUNT_B;
00075     // distance metric is created as a set operation sub-class
00076     // simple cast will work to convert
00077     return (distance_metric) op;
00078 }
00079 
00085 struct distance_metric_descriptor
00086 {
00087      distance_metric   metric;
00088      bm::id_t          result;
00089      
00090      distance_metric_descriptor(distance_metric m)
00091      : metric(m),
00092        result(0)
00093     {}
00094     distance_metric_descriptor()
00095     : metric(bm::COUNT_XOR),
00096       result(0)
00097     {}
00098     
00102     void reset()
00103     {
00104         result = 0;
00105     }
00106 };
00107 
00108 
00115 inline
00116 void combine_count_operation_with_block(const bm::word_t* blk,
00117                                         const bm::word_t* arg_blk,
00118                                         distance_metric_descriptor* dmit,
00119                                         distance_metric_descriptor* dmit_end)
00120                                             
00121 {
00122      gap_word_t* g1 = BMGAP_PTR(blk);
00123      gap_word_t* g2 = BMGAP_PTR(arg_blk);
00124 
00125      unsigned gap = BM_IS_GAP(blk);
00126      unsigned arg_gap = BM_IS_GAP(arg_blk);
00127      
00128      if (gap) // first block GAP-type
00129      {
00130          if (arg_gap)  // both blocks GAP-type
00131          {
00132              for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
00133              {
00134                  distance_metric_descriptor& dmd = *it;
00135                  
00136                  switch (dmd.metric)
00137                  {
00138                  case bm::COUNT_AND:
00139                      dmd.result += gap_count_and(g1, g2);
00140                      break;
00141                  case bm::COUNT_OR:
00142                      dmd.result += gap_count_or(g1, g2);
00143                      break;
00144                  case bm::COUNT_SUB_AB:
00145                      dmd.result += gap_count_sub(g1, g2);
00146                      break;
00147                  case bm::COUNT_SUB_BA:
00148                      dmd.result += gap_count_sub(g2, g1);
00149                      break;
00150                  case bm::COUNT_XOR:
00151                      dmd.result += gap_count_xor(g1, g2);
00152                     break;
00153                  case bm::COUNT_A:
00154                     dmd.result += gap_bit_count(g1);
00155                     break;
00156                  case bm::COUNT_B:
00157                     dmd.result += gap_bit_count(g2);
00158                     break;
00159                  } // switch
00160                                      
00161              } // for it
00162              
00163              return;
00164 
00165          }
00166          else // first block - GAP, argument - BITSET
00167          {
00168              for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
00169              {
00170                  distance_metric_descriptor& dmd = *it;
00171                  
00172                  switch (dmd.metric)
00173                  {
00174                  case bm::COUNT_AND:
00175                      if (arg_blk)
00176                         dmd.result += gap_bitset_and_count(arg_blk, g1);
00177                      break;
00178                  case bm::COUNT_OR:
00179                      if (!arg_blk)
00180                         dmd.result += gap_bit_count(g1);
00181                      else
00182                         dmd.result += gap_bitset_or_count(arg_blk, g1); 
00183                      break;
00184                  case bm::COUNT_SUB_AB:
00185                      {
00186                      bm::word_t  BM_ALIGN16 temp_bit_blk[bm::set_block_size] BM_ALIGN16ATTR;
00187 
00188                      gap_convert_to_bitset((bm::word_t*) temp_bit_blk, g1);
00189                      dmd.result += 
00190                        bit_operation_sub_count((bm::word_t*)temp_bit_blk, 
00191                           ((bm::word_t*)temp_bit_blk) + bm::set_block_size,
00192                            arg_blk);
00193                      }
00194                      break;
00195                  case bm::COUNT_SUB_BA:
00196                      dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
00197                      combine_count_operation_with_block(arg_blk,
00198                                                         blk,
00199                                                         it, it+1);
00200                      dmd.metric = bm::COUNT_SUB_BA; // restore status quo
00201                      break;
00202                  case bm::COUNT_XOR:
00203                      if (!arg_blk)
00204                         dmd.result += gap_bit_count(g1);
00205                      else
00206                         dmd.result += gap_bitset_xor_count(arg_blk, g1);
00207                      break;
00208                  case bm::COUNT_A:
00209                     if (g1)
00210                         dmd.result += gap_bit_count(g1);
00211                     break;
00212                  case bm::COUNT_B:
00213                     if (arg_blk)
00214                     {
00215                         dmd.result += 
00216                           bit_block_calc_count(arg_blk, 
00217                                                arg_blk + bm::set_block_size);
00218                     }
00219                     break;
00220                  } // switch
00221                                      
00222              } // for it
00223              
00224              return;
00225          
00226          }
00227      } 
00228      else // first block is BITSET-type
00229      {     
00230          if (arg_gap) // second argument block is GAP-type
00231          {
00232              for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
00233              {
00234                  distance_metric_descriptor& dmd = *it;
00235                  
00236                  switch (dmd.metric)
00237                  {
00238                  case bm::COUNT_AND:
00239                      if (blk) 
00240                         dmd.result += gap_bitset_and_count(blk, g2);                         
00241                      break;
00242                  case bm::COUNT_OR:
00243                      if (!blk)
00244                         dmd.result += gap_bit_count(g2);
00245                      else
00246                         dmd.result += gap_bitset_or_count(blk, g2);
00247                      break;
00248                  case bm::COUNT_SUB_AB:
00249                      if (blk)
00250                         dmd.result += gap_bitset_sub_count(blk, g2);
00251                      break;
00252                  case bm::COUNT_SUB_BA:
00253                      dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
00254                      combine_count_operation_with_block(arg_blk,
00255                                                         //arg_gap,
00256                                                         blk,
00257                                                         //gap,
00258                                                         it, it+1);
00259                      dmd.metric = bm::COUNT_SUB_BA; // restore status quo
00260                      break;
00261                  case bm::COUNT_XOR:
00262                      if (!blk)
00263                         dmd.result += gap_bit_count(g2);
00264                      else
00265                         dmd.result += gap_bitset_xor_count(blk, g2); 
00266                     break;
00267                  case bm::COUNT_A:
00268                     if (blk)
00269                     {
00270                         dmd.result += 
00271                             bit_block_calc_count(blk, 
00272                                                  blk + bm::set_block_size);
00273                     }
00274                     break;
00275                  case bm::COUNT_B:
00276                     if (g2)
00277                         dmd.result += gap_bit_count(g2);
00278                     break;
00279                  } // switch
00280                                      
00281              } // for it
00282              
00283              return;
00284          }
00285      }
00286 
00287      // --------------------------------------------
00288      //
00289      // Here we combine two plain bitblocks 
00290 
00291 
00292      for (distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
00293      {
00294          distance_metric_descriptor& dmd = *it;
00295         bit_operation_count_func_type gfunc = 
00296             operation_functions<true>::bit_operation_count(dmd.metric);
00297         if (gfunc)
00298         {
00299             dmd.result += (*gfunc)(blk, blk + bm::set_block_size, arg_blk);
00300         }
00301         else
00302         {
00303             switch (dmd.metric)
00304             {
00305             case bm::COUNT_A:
00306                 if (blk)
00307                     dmd.result += bit_block_calc_count(blk, blk + bm::set_block_size);
00308                 break;
00309             case bm::COUNT_B:
00310                 if (arg_blk)
00311                     dmd.result += 
00312                         bit_block_calc_count(arg_blk, 
00313                                              arg_blk + bm::set_block_size);
00314                 break;
00315             default:
00316                 BM_ASSERT(0);
00317             } // switch
00318         }
00319 
00320      } // for it
00321 }
00322 
00328 inline
00329 unsigned combine_count_and_operation_with_block(const bm::word_t* blk,
00330                                                 const bm::word_t* arg_blk)
00331 {
00332     unsigned gap = BM_IS_GAP(blk);
00333     unsigned arg_gap = BM_IS_GAP(arg_blk);
00334     if (gap) // first block GAP-type
00335     {
00336         if (arg_gap)  // both blocks GAP-type
00337         {
00338             return gap_count_and(BMGAP_PTR(blk), BMGAP_PTR(arg_blk));
00339         }
00340         else // first block - GAP, argument - BITSET
00341         {
00342             return gap_bitset_and_count(arg_blk, BMGAP_PTR(blk));
00343         }
00344     } 
00345     else // first block is BITSET-type
00346     {     
00347         if (arg_gap) // second argument block is GAP-type
00348         {
00349             return gap_bitset_and_count(blk, BMGAP_PTR(arg_blk));
00350         }
00351     }
00352 
00353     // --------------------------------------------
00354     // Here we combine two plain bitblocks 
00355 
00356     return bit_operation_and_count(blk, blk + (bm::set_block_size), arg_blk);
00357 }
00358 
00359 
00365 inline
00366 void combine_any_operation_with_block(const bm::word_t* blk,
00367                                       unsigned gap,
00368                                       const bm::word_t* arg_blk,
00369                                       int arg_gap,
00370                                       distance_metric_descriptor* dmit,
00371                                       distance_metric_descriptor* dmit_end)
00372                                             
00373 {
00374      gap_word_t* res=0;
00375      
00376      gap_word_t* g1 = BMGAP_PTR(blk);
00377      gap_word_t* g2 = BMGAP_PTR(arg_blk);
00378      
00379      if (gap) // first block GAP-type
00380      {
00381          if (arg_gap)  // both blocks GAP-type
00382          {
00383              gap_word_t tmp_buf[bm::gap_max_buff_len * 3]; // temporary result
00384              
00385              for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
00386              {
00387                  distance_metric_descriptor& dmd = *it;
00388                  if (dmd.result)
00389                  {
00390                      continue;
00391                  }
00392                  res = 0;
00393                  unsigned dsize = 0;
00394                  switch (dmd.metric)
00395                  {
00396                  case bm::COUNT_AND:
00397                      dmd.result += gap_operation_any_and(g1, g2);
00398                      break;
00399                  case bm::COUNT_OR:
00400                      res = gap_operation_or(g1, g2, tmp_buf, dsize);
00401                      break;
00402                  case bm::COUNT_SUB_AB:
00403                      dmd.result += gap_operation_any_sub(g1, g2); 
00404                      break;
00405                  case bm::COUNT_SUB_BA:
00406                      dmd.result += gap_operation_any_sub(g2, g1); 
00407                      break;
00408                  case bm::COUNT_XOR:
00409                     dmd.result += gap_operation_any_xor(g1, g2); 
00410                     break;
00411                  case bm::COUNT_A:
00412                     res = g1;
00413                     break;
00414                  case bm::COUNT_B:
00415                     res = g2;
00416                     break;
00417                  } // switch
00418                 if (res)
00419                     dmd.result += !gap_is_all_zero(res, bm::gap_max_bits);
00420                                      
00421              } // for it
00422              
00423              return;
00424 
00425          }
00426          else // first block - GAP, argument - BITSET
00427          {
00428              for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
00429              {
00430                  distance_metric_descriptor& dmd = *it;
00431                  if (dmd.result)
00432                  {
00433                      continue;
00434                  }
00435                  
00436                  switch (dmd.metric)
00437                  {
00438                  case bm::COUNT_AND:
00439                      if (arg_blk)
00440                         dmd.result += gap_bitset_and_any(arg_blk, g1);
00441                      break;
00442                  case bm::COUNT_OR:
00443                      if (!arg_blk)
00444                         dmd.result += !gap_is_all_zero(g1, bm::gap_max_bits);
00445                      else
00446                         dmd.result += gap_bitset_or_any(arg_blk, g1); 
00447                      break;
00448                  case bm::COUNT_SUB_AB:
00449                      {
00450                      bm::word_t  BM_ALIGN16 temp_blk[bm::set_block_size] BM_ALIGN16ATTR;
00451                      gap_convert_to_bitset((bm::word_t*) temp_blk, g1);
00452                      dmd.result += 
00453                        bit_operation_sub_any((bm::word_t*)temp_blk, 
00454                           ((bm::word_t*)temp_blk) + bm::set_block_size,
00455                            arg_blk);
00456                      }
00457                      break;
00458                  case bm::COUNT_SUB_BA:
00459                      dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
00460                      combine_any_operation_with_block(arg_blk,
00461                                                       arg_gap,
00462                                                       blk,
00463                                                       gap,
00464                                                       it, it+1);
00465                      dmd.metric = bm::COUNT_SUB_BA; // restore status quo
00466                      break;
00467                  case bm::COUNT_XOR:
00468                      if (!arg_blk)
00469                         dmd.result += !gap_is_all_zero(g1, bm::gap_max_bits);
00470                      else
00471                         dmd.result += gap_bitset_xor_any(arg_blk, g1);
00472                      break;
00473                  case bm::COUNT_A:
00474                     if (g1)
00475                         dmd.result += !gap_is_all_zero(g1, bm::gap_max_bits);
00476                     break;
00477                  case bm::COUNT_B:
00478                     if (arg_blk)
00479                     {
00480                         dmd.result += 
00481                           !bit_is_all_zero((bm::wordop_t*)arg_blk, 
00482                                            (bm::wordop_t*)(arg_blk + bm::set_block_size));
00483                     }
00484                     break;
00485                  } // switch
00486                                      
00487              } // for it
00488              
00489              return;
00490          
00491          }
00492      } 
00493      else // first block is BITSET-type
00494      {     
00495          if (arg_gap) // second argument block is GAP-type
00496          {
00497              for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
00498              {
00499                  distance_metric_descriptor& dmd = *it;
00500                  if (dmd.result)
00501                  {
00502                      continue;
00503                  }
00504                  
00505                  switch (dmd.metric)
00506                  {
00507                  case bm::COUNT_AND:
00508                      if (blk) 
00509                         dmd.result += gap_bitset_and_any(blk, g2);                         
00510                      break;
00511                  case bm::COUNT_OR:
00512                      if (!blk)
00513                         dmd.result += !gap_is_all_zero(g2, bm::gap_max_bits);
00514                      else
00515                         dmd.result += gap_bitset_or_any(blk, g2);
00516                      break;
00517                  case bm::COUNT_SUB_AB:
00518                      if (blk)
00519                         dmd.result += gap_bitset_sub_any(blk, g2);
00520                      break;
00521                  case bm::COUNT_SUB_BA:
00522                      dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
00523                      combine_any_operation_with_block(arg_blk,
00524                                                       arg_gap,
00525                                                       blk,
00526                                                       gap,
00527                                                       it, it+1);
00528                      dmd.metric = bm::COUNT_SUB_BA; // restore status quo
00529                      break;
00530                  case bm::COUNT_XOR:
00531                      if (!blk)
00532                         dmd.result += !gap_is_all_zero(g2, bm::gap_max_bits);
00533                      else
00534                         dmd.result += gap_bitset_xor_any(blk, g2); 
00535                     break;
00536                  case bm::COUNT_A:
00537                     if (blk)
00538                     {
00539                         dmd.result+=
00540                             !bit_is_all_zero((bm::wordop_t*)blk, 
00541                                               (bm::wordop_t*)blk + bm::set_block_size);
00542                     }
00543                     break;
00544                  case bm::COUNT_B:
00545                     if (g2)
00546                         dmd.result += !gap_is_all_zero(g2, bm::gap_max_bits);
00547                     break;
00548                  } // switch
00549                                      
00550              } // for it
00551              
00552              return;
00553          }
00554      }
00555 
00556      // --------------------------------------------
00557      //
00558      // Here we combine two plain bitblocks 
00559 
00560      const bm::word_t* blk_end;
00561      const bm::word_t* arg_end;
00562 
00563      blk_end = blk + (bm::set_block_size);
00564      arg_end = arg_blk + (bm::set_block_size);
00565 
00566      for (distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
00567      {
00568         distance_metric_descriptor& dmd = *it;
00569         if (dmd.result)
00570         {
00571             continue;
00572         }
00573 
00574         switch (dmd.metric)
00575         {
00576         case bm::COUNT_AND:
00577             dmd.result += 
00578             bit_operation_and_any(blk, blk_end, arg_blk);
00579             break;
00580         case bm::COUNT_OR:
00581             dmd.result += 
00582             bit_operation_or_any(blk, blk_end, arg_blk);
00583             break;
00584         case bm::COUNT_SUB_AB:
00585             dmd.result += 
00586             bit_operation_sub_any(blk, blk_end, arg_blk);
00587             break;
00588         case bm::COUNT_SUB_BA:
00589             dmd.result += 
00590             bit_operation_sub_any(arg_blk, arg_end, blk);
00591             break;
00592         case bm::COUNT_XOR:
00593             dmd.result += 
00594             bit_operation_xor_any(blk, blk_end, arg_blk);
00595             break;
00596         case bm::COUNT_A:
00597             if (blk)
00598                 dmd.result += !bit_is_all_zero((bm::wordop_t*)blk, 
00599                                                (bm::wordop_t*)blk_end);
00600             break;
00601         case bm::COUNT_B:
00602             if (arg_blk)
00603                 dmd.result += !bit_is_all_zero((bm::wordop_t*)arg_blk, 
00604                                                (bm::wordop_t*)arg_end);
00605             break;
00606         } // switch
00607 
00608      } // for it
00609 }
00610 
00611 
00612 
00618 inline
00619 unsigned combine_count_operation_with_block(const bm::word_t* blk,
00620                                             const bm::word_t* arg_blk,
00621                                             distance_metric metric)
00622 {
00623     distance_metric_descriptor dmd(metric);
00624     combine_count_operation_with_block(blk, //gap, 
00625                                        arg_blk, //arg_gap, 
00626                                        //temp_blk,
00627                                        &dmd, &dmd+1);
00628     return dmd.result;
00629 }
00630 
00631 
00637 inline
00638 unsigned combine_any_operation_with_block(const bm::word_t* blk,
00639                                           unsigned gap,
00640                                           const bm::word_t* arg_blk,
00641                                           int arg_gap,
00642                                           distance_metric metric)
00643 {
00644     distance_metric_descriptor dmd(metric);
00645     combine_any_operation_with_block(blk, gap, 
00646                                      arg_blk, arg_gap, 
00647                                      &dmd, &dmd+1);
00648     return dmd.result;
00649 }
00650 
00658 inline
00659 void distance_stage(const distance_metric_descriptor* dmit,
00660                     const distance_metric_descriptor* dmit_end,
00661                     bool*                             is_all_and)
00662 {
00663     for (const distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
00664     {
00665         if (it->metric != bm::COUNT_AND)
00666         {
00667             *is_all_and = false;
00668         } 
00669     } // for
00670 }
00671 
00692 template<class BV>
00693 void distance_operation(const BV& bv1, 
00694                         const BV& bv2, 
00695                         distance_metric_descriptor* dmit,
00696                         distance_metric_descriptor* dmit_end)
00697 {
00698     const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
00699     const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
00700 
00701     bool is_all_and = true; // flag is distance operation is just COUNT_AND
00702     distance_stage(dmit, dmit_end, &is_all_and);
00703 
00704     bm::word_t*** blk_root = bman1.get_rootblock();
00705     unsigned block_idx = 0;
00706     unsigned i, j;
00707     
00708     const bm::word_t* blk;
00709     const bm::word_t* arg_blk;
00710 
00711     BM_SET_MMX_GUARD
00712 
00713     unsigned effective_top_block_size = bman1.effective_top_block_size();
00714     unsigned ebs2 = bman2.effective_top_block_size();
00715     if (ebs2 > effective_top_block_size)
00716         effective_top_block_size = ebs2;
00717 
00718     for (i = 0; i < effective_top_block_size; ++i)
00719     {
00720         bm::word_t** blk_blk = blk_root[i];
00721 
00722         if (blk_blk == 0) // not allocated
00723         {
00724             // AND operation requested - we can skip this portion here 
00725             if (is_all_and)
00726             {
00727                 block_idx += bm::set_array_size;
00728                 continue;
00729             }
00730             const bm::word_t* const* bvbb = bman2.get_topblock(i);
00731             if (bvbb == 0) 
00732             {
00733                 block_idx += bm::set_array_size;
00734                 continue;
00735             }
00736 
00737             blk = 0;
00738             for (j = 0; j < bm::set_array_size; ++j,++block_idx)
00739             {                
00740                 arg_blk = bman2.get_block(i, j);
00741                 if (!arg_blk) 
00742                     continue;
00743                 combine_count_operation_with_block(blk, 
00744                                                    arg_blk, 
00745                                                    dmit, dmit_end);
00746             } // for j
00747             continue;
00748         }
00749 
00750         for (j = 0; j < bm::set_array_size; ++j, ++block_idx)
00751         {
00752             blk = blk_blk[j];
00753             if (blk == 0 && is_all_and)
00754                 continue;
00755 
00756             arg_blk = bman2.get_block(i, j);
00757 
00758             if (blk == 0 && arg_blk == 0)
00759                 continue;
00760                 
00761             combine_count_operation_with_block(blk, 
00762                                                arg_blk, 
00763                                                dmit, dmit_end);
00764             
00765 
00766         } // for j
00767 
00768     } // for i
00769 }
00770 
00771 
00782 template<class BV>
00783 unsigned distance_and_operation(const BV& bv1, 
00784                                 const BV& bv2)
00785 {
00786     const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
00787     const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
00788 
00789     bm::word_t*** blk_root = bman1.get_rootblock();
00790     bm::word_t*** blk_root_arg = bman2.get_rootblock();
00791 
00792     unsigned count = 0;
00793 
00794     BM_SET_MMX_GUARD
00795 
00796     unsigned effective_top_block_size = 
00797         bm::min_value(bman1.effective_top_block_size(), 
00798                       bman2.effective_top_block_size());
00799 
00800     for (unsigned i = 0; i < effective_top_block_size; ++i)
00801     {
00802         bm::word_t** blk_blk;
00803         bm::word_t** blk_blk_arg;
00804         if ((blk_blk = blk_root[i]) == 0 || (blk_blk_arg= blk_root_arg[i]) == 0)
00805         {
00806             continue;
00807         }
00808         for (unsigned j = 0; j < bm::set_array_size; j+=4)
00809         {
00810             (blk_blk[j] && blk_blk_arg[j]) ? 
00811                 count += combine_count_and_operation_with_block(blk_blk[j],blk_blk_arg[j])
00812                 :0;
00813             (blk_blk[j+1] && blk_blk_arg[j+1]) ? 
00814                 count += combine_count_and_operation_with_block(blk_blk[j+1],blk_blk_arg[j+1])
00815                 :0;
00816             (blk_blk[j+2] && blk_blk_arg[j+2]) ? 
00817                 count += combine_count_and_operation_with_block(blk_blk[j+2],blk_blk_arg[j+2])
00818                 :0;
00819             (blk_blk[j+3] && blk_blk_arg[j+3]) ? 
00820                 count += combine_count_and_operation_with_block(blk_blk[j+3],blk_blk_arg[j+3])
00821                 :0;
00822         } // for j
00823 
00824     } // for i
00825     return count;
00826 }
00827 
00828 
00829 
00830 
00852 template<class BV>
00853 void distance_operation_any(const BV& bv1, 
00854                             const BV& bv2, 
00855                             distance_metric_descriptor* dmit,
00856                             distance_metric_descriptor* dmit_end)
00857 {
00858     const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
00859     const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
00860     
00861     bool is_all_and = true; // flag is distance operation is just COUNT_AND
00862     //bm::word_t* temp_blk = 
00863     distance_stage(dmit, dmit_end, &is_all_and);
00864   
00865     bm::word_t*** blk_root = bman1.get_rootblock();
00866     unsigned block_idx = 0;
00867     unsigned i, j;
00868     
00869     const bm::word_t* blk;
00870     const bm::word_t* arg_blk;
00871     bool  blk_gap;
00872     bool  arg_gap;
00873 
00874     BM_SET_MMX_GUARD
00875 
00876     unsigned effective_top_block_size = bman1.effective_top_block_size();
00877     unsigned ebs2 = bman2.effective_top_block_size();
00878     if (ebs2 > effective_top_block_size)
00879         effective_top_block_size = ebs2;
00880 
00881     for (i = 0; i < effective_top_block_size; ++i)
00882     {
00883         bm::word_t** blk_blk = blk_root[i];
00884 
00885         if (blk_blk == 0) // not allocated
00886         {
00887             // AND operation requested - we can skip this portion here 
00888             if (is_all_and)
00889             {
00890                 block_idx += bm::set_array_size;
00891                 continue;
00892             }
00893 
00894             const bm::word_t* const* bvbb = bman2.get_topblock(i);
00895             if (bvbb == 0) 
00896             {
00897                 block_idx += bm::set_array_size;
00898                 continue;
00899             }
00900 
00901             blk = 0;
00902             blk_gap = false;
00903 
00904             for (j = 0; j < bm::set_array_size; ++j,++block_idx)
00905             {                
00906                 arg_blk = bman2.get_block(i, j);
00907                 arg_gap = BM_IS_GAP(arg_blk);
00908 
00909                 if (!arg_blk) 
00910                     continue;
00911                 combine_any_operation_with_block(blk, blk_gap,
00912                                                  arg_blk, arg_gap,
00913                                                  dmit, dmit_end);
00914 
00915                 // check if all distance requests alredy resolved
00916                 bool all_resolved = false;
00917                 distance_metric_descriptor* it=dmit;
00918                 do
00919                 {
00920                     if (!it->result)
00921                     {
00922                         all_resolved = false;
00923                         break;
00924                     }
00925                     ++it;
00926                 } while (it < dmit_end);
00927                 if (all_resolved)
00928                     return;
00929             } // for j
00930 
00931             continue;
00932         }
00933 
00934         for (j = 0; j < bm::set_array_size; ++j, ++block_idx)
00935         {
00936             blk = blk_blk[j];
00937             if (blk == 0 && is_all_and)
00938                 continue;
00939 
00940             arg_blk = bman2.get_block(i, j);
00941 
00942             if (blk == 0 && arg_blk == 0)
00943                 continue;
00944                 
00945             arg_gap = BM_IS_GAP(arg_blk);
00946             blk_gap = BM_IS_GAP(blk);
00947             
00948             combine_any_operation_with_block(blk, blk_gap,
00949                                              arg_blk, arg_gap,
00950                                              dmit, dmit_end);
00951             
00952             // check if all distance requests alredy resolved
00953             bool all_resolved = false;
00954             distance_metric_descriptor* it=dmit;
00955             do
00956             {
00957                 if (!it->result)
00958                 {
00959                     all_resolved = false;
00960                     break;
00961                 }
00962                 ++it;
00963             } while (it < dmit_end);
00964             if (all_resolved)
00965                 return;
00966 
00967         } // for j
00968 
00969     } // for i
00970 }
00971 
00972 
00973 
00981 template<class BV>
00982 bm::id_t count_and(const BV& bv1, const BV& bv2)
00983 {
00984     return distance_and_operation(bv1, bv2);
00985 }
00986 
00994 template<class BV>
00995 bm::id_t any_and(const BV& bv1, const BV& bv2)
00996 {
00997     distance_metric_descriptor dmd(bm::COUNT_AND);
00998     
00999     distance_operation_any(bv1, bv2, &dmd, &dmd+1);
01000     return dmd.result;
01001 }
01002 
01003 
01004 
01012 template<class BV>
01013 bm::id_t count_xor(const BV& bv1, const BV& bv2)
01014 {
01015     distance_metric_descriptor dmd(bm::COUNT_XOR);
01016     
01017     distance_operation(bv1, bv2, &dmd, &dmd+1);
01018     return dmd.result;
01019 }
01020 
01028 template<class BV>
01029 bm::id_t any_xor(const BV& bv1, const BV& bv2)
01030 {
01031     distance_metric_descriptor dmd(bm::COUNT_XOR);
01032     
01033     distance_operation_any(bv1, bv2, &dmd, &dmd+1);
01034     return dmd.result;
01035 }
01036 
01037 
01038 
01046 template<class BV>
01047 bm::id_t count_sub(const BV& bv1, const BV& bv2)
01048 {
01049     distance_metric_descriptor dmd(bm::COUNT_SUB_AB);
01050     
01051     distance_operation(bv1, bv2, &dmd, &dmd+1);
01052     return dmd.result;
01053 }
01054 
01055 
01063 template<class BV>
01064 bm::id_t any_sub(const BV& bv1, const BV& bv2)
01065 {
01066     distance_metric_descriptor dmd(bm::COUNT_SUB_AB);
01067     
01068     distance_operation_any(bv1, bv2, &dmd, &dmd+1);
01069     return dmd.result;
01070 }
01071 
01072 
01080 template<class BV>
01081 bm::id_t count_or(const BV& bv1, const BV& bv2)
01082 {
01083     distance_metric_descriptor dmd(bm::COUNT_OR);
01084     
01085     distance_operation(bv1, bv2, &dmd, &dmd+1);
01086     return dmd.result;
01087 }
01088 
01096 template<class BV>
01097 bm::id_t any_or(const BV& bv1, const BV& bv2)
01098 {
01099     distance_metric_descriptor dmd(bm::COUNT_OR);
01100     
01101     distance_operation_any(bv1, bv2, &dmd, &dmd+1);
01102     return dmd.result;
01103 }
01104 
01105 
01110 template<class It>
01111 It block_range_scan(It  first, It last, unsigned nblock, unsigned* max_id)
01112 {
01113     It right;
01114     for (right = first; right != last; ++right)
01115     {
01116         unsigned v = unsigned(*right);
01117         BM_ASSERT(v < bm::id_max);
01118         if (v >= *max_id)
01119             *max_id = v;
01120         unsigned nb = v >> bm::set_block_shift;
01121         if (nb != nblock)
01122             break;
01123     }
01124     return right;
01125 }
01126 
01140 template<class BV, class It>
01141 void combine_or(BV& bv, It  first, It last)
01142 {
01143     typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
01144     unsigned max_id = 0;
01145 
01146     while (first < last)
01147     {
01148         unsigned nblock = unsigned((*first) >> bm::set_block_shift);     
01149         It right = block_range_scan(first, last, nblock, &max_id);
01150 
01151         if (max_id >= bv.size())
01152         {
01153             BM_ASSERT(max_id < bm::id_max);
01154             bv.resize(max_id + 1);
01155         }
01156 
01157         // now we have one in-block array of bits to set
01158         
01159         label1:
01160         
01161         int block_type;
01162         bm::word_t* blk = 
01163             bman.check_allocate_block(nblock, 
01164                                       true, 
01165                                       bv.get_new_blocks_strat(), 
01166                                       &block_type);
01167         if (!blk) 
01168             continue;
01169                         
01170         if (block_type == 1) // gap
01171         {            
01172             bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
01173             unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
01174             
01175             for (; first < right; ++first)
01176             {
01177                 unsigned is_set;
01178                 unsigned nbit   = (*first) & bm::set_block_mask; 
01179                 
01180                 unsigned new_block_len =
01181                     gap_set_value(true, gap_blk, nbit, &is_set);
01182                 if (new_block_len > threshold) 
01183                 {
01184                     bman.extend_gap_block(nblock, gap_blk);
01185                     ++first;
01186                     goto label1;  // block pointer changed - goto reset
01187                 }
01188             }
01189         }
01190         else // bit block
01191         {
01192             for (; first < right; ++first)
01193             {
01194                 unsigned nbit   = unsigned(*first & bm::set_block_mask); 
01195                 unsigned nword  = unsigned(nbit >> bm::set_word_shift); 
01196                 nbit &= bm::set_word_mask;
01197                 blk[nword] |= (bm::word_t)1 << nbit;
01198             } // for
01199         }
01200     } // while
01201     
01202     bv.forget_count();
01203 }
01204 
01205 
01219 template<class BV, class It>
01220 void combine_xor(BV& bv, It  first, It last)
01221 {
01222     typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
01223     unsigned max_id = 0;
01224 
01225     while (first < last)
01226     {
01227         unsigned nblock = unsigned((*first) >> bm::set_block_shift);     
01228         It right = block_range_scan(first, last, nblock, &max_id);
01229 
01230         if (max_id >= bv.size())
01231         {
01232             BM_ASSERT(max_id < bm::id_max);
01233             bv.resize(max_id + 1);
01234         }
01235 
01236         // now we have one in-block array of bits to set
01237         
01238         label1:
01239         
01240         int block_type;
01241         bm::word_t* blk = 
01242             bman.check_allocate_block(nblock, 
01243                                       true, 
01244                                       bv.get_new_blocks_strat(), 
01245                                       &block_type,
01246                                       false /* no null return */);
01247         BM_ASSERT(blk); 
01248                         
01249         if (block_type == 1) // gap
01250         {
01251             bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
01252             unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
01253             
01254             for (; first < right; ++first)
01255             {
01256                 unsigned is_set;
01257                 unsigned nbit   = (*first) & bm::set_block_mask; 
01258                 
01259                 is_set = gap_test(gap_blk, nbit);
01260                 BM_ASSERT(is_set <= 1);
01261                 is_set ^= 1; 
01262                 
01263                 unsigned new_block_len =
01264                     gap_set_value(is_set, gap_blk, nbit, &is_set);
01265                 if (new_block_len > threshold) 
01266                 {
01267                     bman.extend_gap_block(nblock, gap_blk);
01268                     ++first;
01269                     goto label1;  // block pointer changed - goto reset
01270                 }
01271             }
01272         }
01273         else // bit block
01274         {
01275             for (; first < right; ++first)
01276             {
01277                 unsigned nbit   = unsigned(*first & bm::set_block_mask); 
01278                 unsigned nword  = unsigned(nbit >> bm::set_word_shift); 
01279                 nbit &= bm::set_word_mask;
01280                 blk[nword] ^= (bm::word_t)1 << nbit;
01281             } // for
01282         }
01283     } // while
01284     
01285     bv.forget_count();
01286 }
01287 
01288 
01289 
01303 template<class BV, class It>
01304 void combine_sub(BV& bv, It  first, It last)
01305 {
01306     typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
01307     unsigned max_id = 0;
01308 
01309     while (first < last)
01310     {
01311         unsigned nblock = unsigned((*first) >> bm::set_block_shift);     
01312         It right = block_range_scan(first, last, nblock, &max_id);
01313 
01314         if (max_id >= bv.size())
01315         {
01316             BM_ASSERT(max_id < bm::id_max);
01317             bv.resize(max_id + 1);
01318         }
01319 
01320         // now we have one in-block array of bits to set
01321         
01322         label1:
01323         
01324         int block_type;
01325         bm::word_t* blk = 
01326             bman.check_allocate_block(nblock, 
01327                                       false, 
01328                                       bv.get_new_blocks_strat(), 
01329                                       &block_type);
01330 
01331         if (!blk)
01332             continue;
01333                         
01334         if (block_type == 1) // gap
01335         {
01336             bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
01337             unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
01338             
01339             for (; first < right; ++first)
01340             {
01341                 unsigned is_set;
01342                 unsigned nbit   = (*first) & bm::set_block_mask; 
01343                 
01344                 is_set = gap_test(gap_blk, nbit);
01345                 if (!is_set)
01346                     continue;
01347                 
01348                 unsigned new_block_len =
01349                     gap_set_value(false, gap_blk, nbit, &is_set);
01350                 if (new_block_len > threshold) 
01351                 {
01352                     bman.extend_gap_block(nblock, gap_blk);
01353                     ++first;
01354                     goto label1;  // block pointer changed - goto reset
01355                 }
01356             }
01357         }
01358         else // bit block
01359         {
01360             for (; first < right; ++first)
01361             {
01362                 unsigned nbit   = unsigned(*first & bm::set_block_mask); 
01363                 unsigned nword  = unsigned(nbit >> bm::set_word_shift); 
01364                 nbit &= bm::set_word_mask;
01365                 blk[nword] &= ~((bm::word_t)1 << nbit);
01366             } // for
01367         }
01368     } // while
01369     
01370     bv.forget_count();
01371 }
01372 
01385 template<class BV, class It>
01386 void combine_and_sorted(BV& bv, It  first, It last)
01387 {
01388     bm::id_t prev = 0;
01389     for ( ;first < last; ++first)
01390     {
01391         bm::id_t id = *first;
01392         BM_ASSERT(id >= prev); // make sure it's sorted
01393         bv.set_bit_and(id, true);
01394         if (++prev < id) 
01395         {
01396             bv.set_range(prev, id-1, false);
01397         }
01398         prev = id;
01399     }
01400 }
01401 
01402 
01417 template<class BV, class It>
01418 void combine_and(BV& bv, It  first, It last)
01419 {
01420     BV bv_tmp;
01421     combine_or(bv_tmp, first, last);
01422     bv &= bv_tmp;
01423 }
01424 
01440 template<class BV>
01441 bm::id_t count_intervals(const BV& bv)
01442 {
01443     const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
01444 
01445     bm::word_t*** blk_root = bman.get_rootblock();
01446     typename BV::blocks_manager_type::block_count_change_func func(bman);
01447     for_each_block(blk_root, bman.top_block_size(), func);
01448 
01449     return func.count();        
01450 }
01451 
01466 template<class BV, class It>
01467 void export_array(BV& bv, It first, It last)
01468 {
01469     typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
01470     unsigned inp_word_size = sizeof(*first);
01471     size_t array_size = last - first;
01472     size_t bit_cnt = array_size * inp_word_size * 8;
01473     int block_type;
01474     bm::word_t tmp;
01475     unsigned b1, b2, b3, b4;
01476 
01477     if (bit_cnt >= bv.size())
01478         bv.resize((bm::id_t)bit_cnt + 1);
01479     else 
01480         bv.set_range((bm::id_t)bit_cnt, bv.size() - 1, false);
01481     
01482     switch (inp_word_size)
01483     {
01484     case 1:
01485         {
01486             size_t word_cnt = array_size / 4;
01487             for (unsigned i = 0; i < bm::set_total_blocks; ++i)
01488             {
01489                 bm::word_t* blk = 
01490                     bman.check_allocate_block(i, 
01491                                               false, 
01492                                               BM_BIT, 
01493                                               &block_type,
01494                                               false /*no NULL ret*/);
01495                 if (block_type == 1) // gap
01496                 {
01497                     blk = bman.convert_gap2bitset(i, BMGAP_PTR(blk));
01498                 }
01499                 
01500                 bm::word_t* wrd_ptr = blk;
01501                 if (word_cnt >= bm::set_block_size) {
01502                     bm::word_t* wrd_end = blk + bm::set_block_size;
01503                     do {
01504                         b1 = *first++; b2 = *first++;
01505                         b3 = *first++; b4 = *first++;
01506                         tmp = b1 | (b2 << 8) | (b3 << 16) | (b4 << 24);
01507                         *wrd_ptr++ = tmp;
01508                     } while (wrd_ptr < wrd_end);
01509                     word_cnt -= bm::set_block_size;
01510                 } 
01511                 else 
01512                 {
01513                     size_t to_convert = last - first;
01514                     for (size_t j = 0; j < to_convert / 4; ++j)
01515                     {
01516                         b1 = *first++; b2 = *first++;
01517                         b3 = *first++; b4 = *first++;
01518                         tmp = b1 | (b2 << 8) | (b3 << 16) | (b4 << 24);
01519                         *wrd_ptr++ = tmp;
01520                     }
01521                     while (1)
01522                     {
01523                         if (first == last) break;
01524                         *wrd_ptr = *first++;
01525                         if (first == last) break;
01526                         *wrd_ptr |= (*first++) << 8;
01527                         if (first == last) break;
01528                         *wrd_ptr |= (*first++) << 16;
01529                         if (first == last) break;
01530                         *wrd_ptr |= (*first++) << 24;
01531                         ++wrd_ptr;
01532                     }
01533                 }
01534                 if (first == last) break;
01535             } // for
01536         }
01537         break;
01538     case 2:
01539         {
01540             size_t word_cnt = array_size / 2;
01541             for (unsigned i = 0; i < bm::set_total_blocks; ++i)
01542             {
01543                 bm::word_t* blk = 
01544                     bman.check_allocate_block(i, 
01545                                               false, 
01546                                               BM_BIT, 
01547                                               &block_type,
01548                                               false /*no NULL ret*/);
01549                 if (block_type == 1) // gap
01550                 {
01551                     blk = bman.convert_gap2bitset(i, BMGAP_PTR(blk));
01552                 }
01553                 
01554                 bm::word_t* wrd_ptr = blk;
01555                 if (word_cnt >= bm::set_block_size) {
01556                     bm::word_t* wrd_end = blk + bm::set_block_size;
01557                     do {
01558                         b1 = *first++; b2 = *first++;
01559                         tmp = b1 | (b2 << 16);
01560                         *wrd_ptr++ = tmp;
01561                     } while (wrd_ptr < wrd_end);
01562                     word_cnt -= bm::set_block_size;
01563                 } 
01564                 else 
01565                 {
01566                     size_t to_convert = last - first;
01567                     for (unsigned j = 0; j < to_convert / 2; ++j)
01568                     {
01569                         b1 = *first++; b2 = *first++;
01570                         tmp = b1 | (b2 << 16);
01571                         *wrd_ptr++ = tmp;
01572                     }
01573                     while (1)
01574                     {
01575                         if (first == last) break;
01576                         *wrd_ptr = *first++;
01577                         if (first == last) break;
01578                         *wrd_ptr |= (*first++) << 16;
01579                         ++wrd_ptr;
01580                     }
01581                 }
01582                 if (first == last) break;
01583             } // for
01584         }
01585         break;
01586     case 4:
01587         {
01588             size_t word_cnt = array_size;
01589             for (unsigned i = 0; i < bm::set_total_blocks; ++i)
01590             {
01591                 bm::word_t* blk = 
01592                     bman.check_allocate_block(i, 
01593                                               false, 
01594                                               BM_BIT, 
01595                                               &block_type,
01596                                               false /*no NULL ret*/);
01597                 if (block_type == 1) // gap
01598                 {
01599                     blk = bman.convert_gap2bitset(i, BMGAP_PTR(blk));
01600                 }
01601                 
01602                 bm::word_t* wrd_ptr = blk;
01603                 if (word_cnt >= bm::set_block_size) {
01604                     bm::word_t* wrd_end = blk + bm::set_block_size;
01605                     do {
01606                         *wrd_ptr++ = *first++;
01607                     } while (wrd_ptr < wrd_end);
01608                     word_cnt -= bm::set_block_size;
01609                 } 
01610                 else 
01611                 {
01612                     while (1)
01613                     {
01614                         if (first == last) break;
01615                         *wrd_ptr = *first++;
01616                         ++wrd_ptr;
01617                     }
01618                 }
01619                 if (first == last) break;
01620             } // for
01621         }
01622         break;
01623     default:
01624         BM_ASSERT(0);
01625     } // switch
01626 
01627 }
01628 
01629 } // namespace bm
01630 
01631 #ifdef _MSC_VER
01632 #pragma warning( pop )
01633 #endif
01634 
01635 #endif