dlvhex  2.5.0
vs10/bm/bmtrans.h
Go to the documentation of this file.
00001 #ifndef BMTRANS__H__INCLUDED__
00002 #define BMTRANS__H__INCLUDED__
00003 
00004 /*
00005 Copyright(c) 2002-2009 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
00006 
00007 Permission is hereby granted, free of charge, to any person 
00008 obtaining a copy of this software and associated documentation 
00009 files (the "Software"), to deal in the Software without restriction, 
00010 including without limitation the rights to use, copy, modify, merge, 
00011 publish, distribute, sublicense, and/or sell copies of the Software, 
00012 and to permit persons to whom the Software is furnished to do so, 
00013 subject to the following conditions:
00014 
00015 The above copyright notice and this permission notice shall be included 
00016 in all copies or substantial portions of the Software.
00017 
00018 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
00019 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
00020 OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
00021 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 
00022 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
00023 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
00024 OTHER DEALINGS IN THE SOFTWARE.
00025 
00026 For more information please visit:  http://bmagic.sourceforge.net
00027 
00028 */
00029 #ifdef _MSC_VER
00030 #pragma warning( push )
00031 #pragma warning( disable : 4311 4312 4127)
00032 #endif
00033 
00034 namespace bm
00035 {
00036 
00041 template<typename T, unsigned ROWS, unsigned COLS>
00042 struct tmatrix
00043 {
00044     typedef T value_type;
00045     
00046     T BM_ALIGN16 value[ROWS][COLS] BM_ALIGN16ATTR;
00047 
00048     enum params
00049     {
00050         n_rows = ROWS,
00051         n_columns = COLS
00052     };
00053 
00054 
00056     struct rstat
00057     {
00058         unsigned               bit_count;
00059         unsigned               gap_count;
00060         bm::set_representation best_rep;
00061     };
00062 
00063     static unsigned rows() { return ROWS; }
00064     static unsigned cols() { return COLS; }
00065 
00066     const T* row(unsigned row_idx) const { return value[row_idx]; }
00067           T* row(unsigned row_idx)       { return value[row_idx]; }
00068 };
00069 
00070 
00075 template<typename T, unsigned BPC>
00076 struct bit_grabber
00077 {
00078     static
00079     unsigned get(const T*, unsigned)
00080     {
00081         BM_ASSERT(0); return 0;
00082     }
00083 };
00084 
00085 template<>
00086 struct bit_grabber<unsigned, 32>
00087 {
00088     static
00089     unsigned get(const unsigned* arr, unsigned j)
00090     {
00091         return  (((arr[0] >> j) & 1) << 0) |
00092                 (((arr[1] >> j) & 1) << 1) |
00093                 (((arr[2] >> j) & 1) << 2) |
00094                 (((arr[3] >> j) & 1) << 3) |
00095                 (((arr[4] >> j) & 1) << 4) |
00096                 (((arr[5] >> j) & 1) << 5) |
00097                 (((arr[6] >> j) & 1) << 6) |
00098                 (((arr[7] >> j) & 1) << 7) |
00099                 (((arr[8] >> j) & 1) << 8) |
00100                 (((arr[9] >> j) & 1) << 9) |
00101                 (((arr[10]>> j) & 1) << 10)|
00102                 (((arr[11]>> j) & 1) << 11)|
00103                 (((arr[12]>> j) & 1) << 12)|
00104                 (((arr[13]>> j) & 1) << 13)|
00105                 (((arr[14]>> j) & 1) << 14)|
00106                 (((arr[15]>> j) & 1) << 15)|
00107                 (((arr[16]>> j) & 1) << 16)|
00108                 (((arr[17]>> j) & 1) << 17)|
00109                 (((arr[18]>> j) & 1) << 18)|
00110                 (((arr[19]>> j) & 1) << 19)|
00111                 (((arr[20]>> j) & 1) << 20)|
00112                 (((arr[21]>> j) & 1) << 21)|
00113                 (((arr[22]>> j) & 1) << 22)|
00114                 (((arr[23]>> j) & 1) << 23)|
00115                 (((arr[24]>> j) & 1) << 24)|
00116                 (((arr[25]>> j) & 1) << 25)|
00117                 (((arr[26]>> j) & 1) << 26)|
00118                 (((arr[27]>> j) & 1) << 27)|
00119                 (((arr[28]>> j) & 1) << 28)|
00120                 (((arr[29]>> j) & 1) << 29)|
00121                 (((arr[30]>> j) & 1) << 30)|
00122                 (((arr[31]>> j) & 1) << 31);    
00123     }
00124 };
00125 
00126 template<>
00127 struct bit_grabber<unsigned short, 16>
00128 {
00129     static
00130     unsigned get(const unsigned short* arr, unsigned j)
00131     {
00132         return  (((arr[0] >> j) & 1) << 0) |
00133                 (((arr[1] >> j) & 1) << 1) |
00134                 (((arr[2] >> j) & 1) << 2) |
00135                 (((arr[3] >> j) & 1) << 3) |
00136                 (((arr[4] >> j) & 1) << 4) |
00137                 (((arr[5] >> j) & 1) << 5) |
00138                 (((arr[6] >> j) & 1) << 6) |
00139                 (((arr[7] >> j) & 1) << 7) |
00140                 (((arr[8] >> j) & 1) << 8) |
00141                 (((arr[9] >> j) & 1) << 9) |
00142                 (((arr[10]>> j) & 1) << 10)|
00143                 (((arr[11]>> j) & 1) << 11)|
00144                 (((arr[12]>> j) & 1) << 12)|
00145                 (((arr[13]>> j) & 1) << 13)|
00146                 (((arr[14]>> j) & 1) << 14)|
00147                 (((arr[15]>> j) & 1) << 15);
00148     }
00149 };
00150 
00151 
00152 template<>
00153 struct bit_grabber<unsigned char, 8>
00154 {
00155     static
00156     unsigned get(const unsigned char* arr, unsigned j)
00157     {
00158         return  (((arr[0] >> j) & 1) << 0) |
00159                 (((arr[1] >> j) & 1) << 1) |
00160                 (((arr[2] >> j) & 1) << 2) |
00161                 (((arr[3] >> j) & 1) << 3) |
00162                 (((arr[4] >> j) & 1) << 4) |
00163                 (((arr[5] >> j) & 1) << 5) |
00164                 (((arr[6] >> j) & 1) << 6) |
00165                 (((arr[7] >> j) & 1) << 7);
00166     }
00167 };
00168 
00169 
00174 template<typename T, unsigned BPC, unsigned BPS>
00175 struct bit_trans_grabber
00176 {
00177     static
00178     T get(const T  tmatrix[BPC][BPS], unsigned i, unsigned j)
00179     {
00180         T w = 0;
00181         
00182             // Next code hopes that compiler will completely
00183             // eliminate ifs (all conditions are known at compile time)
00184             //    ( typically C++ compilers are smart to do that)
00185             
00186             // 8-bit (minimum)
00187             w |=
00188                 (((tmatrix[0][i] >> j) & 1) << 0) |
00189                 (((tmatrix[1][i] >> j) & 1) << 1) |
00190                 (((tmatrix[2][i] >> j) & 1) << 2) |
00191                 (((tmatrix[3][i] >> j) & 1) << 3) |
00192                 (((tmatrix[4][i] >> j) & 1) << 4) |
00193                 (((tmatrix[5][i] >> j) & 1) << 5) |
00194                 (((tmatrix[6][i] >> j) & 1) << 6) |
00195                 (((tmatrix[7][i] >> j) & 1) << 7);
00196                 
00197             // 16-bit
00198             if (BPC > 8) 
00199             {
00200                 w |=
00201                 (((tmatrix[8][i] >> j) & 1) << 8) |
00202                 (((tmatrix[9][i] >> j) & 1) << 9) |
00203                 (((tmatrix[10][i] >> j) & 1) << 10) |
00204                 (((tmatrix[11][i] >> j) & 1) << 11) |
00205                 (((tmatrix[12][i] >> j) & 1) << 12) |
00206                 (((tmatrix[13][i] >> j) & 1) << 13) |
00207                 (((tmatrix[14][i] >> j) & 1) << 14) |
00208                 (((tmatrix[15][i] >> j) & 1) << 15);
00209             }
00210             
00211             // 32-bit
00212             if (BPC > 16)
00213             {
00214                 w |= 
00215                 (((tmatrix[16][i] >> j) & 1) << 16) |                
00216                 (((tmatrix[17][i] >> j) & 1) << 17) |
00217                 (((tmatrix[18][i] >> j) & 1) << 18) |
00218                 (((tmatrix[19][i] >> j) & 1) << 19) |
00219                 (((tmatrix[20][i] >> j) & 1) << 20) |
00220                 (((tmatrix[21][i] >> j) & 1) << 21) |
00221                 (((tmatrix[22][i] >> j) & 1) << 22) |
00222                 (((tmatrix[23][i] >> j) & 1) << 23) |
00223                 (((tmatrix[24][i] >> j) & 1) << 24) |
00224                 (((tmatrix[25][i] >> j) & 1) << 25) |
00225                 (((tmatrix[26][i] >> j) & 1) << 26) |
00226                 (((tmatrix[27][i] >> j) & 1) << 27) |
00227                 (((tmatrix[28][i] >> j) & 1) << 28) |
00228                 (((tmatrix[29][i] >> j) & 1) << 29) |
00229                 (((tmatrix[30][i] >> j) & 1) << 30) |
00230                 (((tmatrix[31][i] >> j) & 1) << 31); 
00231             }   
00232         return w;
00233     }
00234 };
00235 
00236 /*
00237 template<>
00238 struct bit_trans_grabber<unsigned, 32, bm::set_block_plain_size>
00239 {
00240     static
00241     unsigned get(const unsigned tmatrix[32][bm::set_block_plain_size], unsigned i, unsigned j)
00242     {
00243         return
00244                 (((tmatrix[0][i] >> j) & 1) << 0) |
00245                 (((tmatrix[1][i] >> j) & 1) << 1) |
00246                 (((tmatrix[2][i] >> j) & 1) << 2) |
00247                 (((tmatrix[3][i] >> j) & 1) << 3) |
00248                 (((tmatrix[4][i] >> j) & 1) << 4) |
00249                 (((tmatrix[5][i] >> j) & 1) << 5) |
00250                 (((tmatrix[6][i] >> j) & 1) << 6) |
00251                 (((tmatrix[7][i] >> j) & 1) << 7) |
00252                 (((tmatrix[8][i] >> j) & 1) << 8) |
00253                 (((tmatrix[9][i] >> j) & 1) << 9) |
00254                 (((tmatrix[10][i] >> j) & 1) << 10) |
00255                 (((tmatrix[11][i] >> j) & 1) << 11) |
00256                 (((tmatrix[12][i] >> j) & 1) << 12) |
00257                 (((tmatrix[13][i] >> j) & 1) << 13) |
00258                 (((tmatrix[14][i] >> j) & 1) << 14) |
00259                 (((tmatrix[15][i] >> j) & 1) << 15) |
00260                 (((tmatrix[16][i] >> j) & 1) << 16) |
00261                 (((tmatrix[17][i] >> j) & 1) << 17) |
00262                 (((tmatrix[18][i] >> j) & 1) << 18) |
00263                 (((tmatrix[19][i] >> j) & 1) << 19) |
00264                 (((tmatrix[20][i] >> j) & 1) << 20) |
00265                 (((tmatrix[21][i] >> j) & 1) << 21) |
00266                 (((tmatrix[22][i] >> j) & 1) << 22) |
00267                 (((tmatrix[23][i] >> j) & 1) << 23) |
00268                 (((tmatrix[24][i] >> j) & 1) << 24) |
00269                 (((tmatrix[25][i] >> j) & 1) << 25) |
00270                 (((tmatrix[26][i] >> j) & 1) << 26) |
00271                 (((tmatrix[27][i] >> j) & 1) << 27) |
00272                 (((tmatrix[28][i] >> j) & 1) << 28) |
00273                 (((tmatrix[29][i] >> j) & 1) << 29) |
00274                 (((tmatrix[30][i] >> j) & 1) << 30) |
00275                 (((tmatrix[31][i] >> j) & 1) << 31);    
00276     }
00277 };
00278 */
00279 
00280 
00281 
00293 template<typename T, unsigned BPC, unsigned BPS>
00294 void vect_bit_transpose(const T* arr, 
00295                         unsigned arr_size,
00296                         T        tmatrix[BPC][BPS])
00297 {
00298     BM_ASSERT(sizeof(T)*8 == BPC);
00299 
00300     unsigned col = 0;
00301     for (unsigned i = 0; i < arr_size; 
00302                          i+=BPC, arr+=BPC, 
00303                          ++col)
00304     {
00305         for (unsigned j = 0; j < BPC; ++j)
00306         {
00307             unsigned w = 
00308                 bm::bit_grabber<T, BPC>::get(arr, j);
00309             T* row = tmatrix[j];
00310             row[col] = (T)w;
00311         } // for j
00312     } // for i
00313 }
00314 
00315 
00326 template<typename T, unsigned BPC, unsigned BPS>
00327 void vect_bit_trestore(const T  tmatrix[BPC][BPS], 
00328                              T* arr)
00329 {
00330     unsigned col = 0;
00331     for (unsigned i = 0; i < BPS; ++i)
00332     {
00333         for (unsigned j = 0; j < BPC; ++j, ++col) 
00334         {
00335             arr[col] = 
00336                 bm::bit_trans_grabber<T, BPC, BPS>::get(tmatrix, i, j);
00337         } // for j
00338     } // for i    
00339 }
00340 
00341 
00342 
00351 template<typename T, unsigned BPC, unsigned BPS>
00352 void tmatrix_distance(const T  tmatrix[BPC][BPS], 
00353                       unsigned distance[BPC][BPC])
00354 {                      
00355     for (unsigned i = 0; i < BPC; ++i)
00356     {
00357         const T* r1 = tmatrix[i];
00358         const T* r1_end = r1 + BPS;
00359         distance[i][i] = 
00360             bm::bit_block_calc_count((bm::word_t*)r1, (bm::word_t*)r1_end);
00361 
00362         for (unsigned j = i + 1; j < BPC; ++j)
00363         {
00364             r1 = tmatrix[i];
00365             r1_end = r1 + BPS;
00366             unsigned count = 0;
00367 
00368             {
00369                 const T* r2 = tmatrix[i];
00370                 const T* r2_end = r2 + BPS;
00371                 const bm::word_t* r3 = (bm::word_t*)(tmatrix[j]);
00372                 do {
00373                     BM_INCWORD_BITCOUNT(count, r2[0] ^ r3[0]);
00374                     BM_INCWORD_BITCOUNT(count, r2[1] ^ r3[1]);
00375                     BM_INCWORD_BITCOUNT(count, r2[2] ^ r3[2]);
00376                     BM_INCWORD_BITCOUNT(count, r2[3] ^ r3[3]);
00377                     r2 += 4;
00378                     r3 += 4;
00379                 } while (r2 < r2_end);
00380             }
00381             distance[i][j] = count;
00382         } // for j
00383     } // for i
00384 }
00385 
00386 
00387 
00388 const unsigned char ibpc_uncompr = 0; 
00389 const unsigned char ibpc_all_zero= 1; 
00390 const unsigned char ibpc_all_one = 2; 
00391 const unsigned char ibpc_equiv   = 3; 
00392 const unsigned char ibpc_close   = 4; 
00393 
00394 const unsigned char ibpc_end = 8; 
00395 
00396 
00417 template<typename T, unsigned BPC, unsigned BPS>
00418 void bit_iblock_make_pcv(
00419       const unsigned  distance[BPC][BPC],
00420       unsigned char*  pc_vector)
00421 {
00422     BM_ASSERT(pc_vector);
00423 
00424     for (unsigned i = 0; i < BPC; ++i)
00425     {
00426         unsigned char pc = ibpc_uncompr; 
00427         unsigned row_bitcount = distance[i][i];
00428         
00429         const unsigned total_possible_max = sizeof(T)*8*BPS;
00430         switch (row_bitcount)
00431         {
00432         case 0:
00433             pc_vector[i] = ibpc_all_zero; 
00434             continue;
00435         case total_possible_max:
00436             pc_vector[i] = ibpc_all_one; 
00437             continue;
00438         }
00439         
00440         // Dense-populated set, leave it as is
00441         if (row_bitcount >  total_possible_max/2)
00442         {
00443             pc_vector[i] = ibpc_uncompr;
00444             continue;
00445         }
00446         
00447         // scan for the closest neighbor
00448         //
00449         unsigned rmin = ~0u;
00450         unsigned rmin_idx = 0;
00451         for (unsigned j = i + 1; j < BPC; ++j)
00452         {
00453             unsigned d = distance[i][j];
00454             if (d < rmin) // new minimum - closest plain
00455             {
00456                 if (d == 0) // plain is complete duplicate of j
00457                 {
00458                     pc = (unsigned char)(ibpc_equiv | (j << 3));
00459                     break;
00460                 }
00461                 rmin = d; rmin_idx = j;
00462             }
00463         } // for j
00464         
00465         if ((pc == 0) && rmin_idx && (rmin < row_bitcount)) // neighbor found
00466         {
00467             pc = (unsigned char)(ibpc_close | (rmin_idx << 3));
00468         }
00469         pc_vector[i] = pc;
00470     } // for i
00471 }
00472 
00473 
00477 inline
00478 void bit_iblock_pcv_stat(const unsigned char* BMRESTRICT pc_vector,
00479                          const unsigned char* BMRESTRICT pc_vector_end,
00480                          unsigned* BMRESTRICT pc_vector_stat
00481                         )
00482 {
00483     BM_ASSERT(pc_vector_stat);
00484     // pc_vector_stat MUST be assigned to 0 before 
00485     do 
00486     {
00487         unsigned ibpc = *pc_vector & 7;
00488         ++(pc_vector_stat[ibpc]);
00489     } while (++pc_vector < pc_vector_end);
00490 }
00491 
00492 
00493 
00497 inline
00498 void bit_iblock_reduce(
00499     const unsigned  tmatrix[bm::set_block_plain_cnt][bm::set_block_plain_size],
00500     const unsigned char* BMRESTRICT pc_vector,
00501     const unsigned char* BMRESTRICT pc_vector_end,
00502     unsigned  tmatrix_out[bm::set_block_plain_cnt][bm::set_block_plain_size])
00503 {
00504     ::memset(tmatrix_out, 0, sizeof(tmatrix_out));
00505     
00506     unsigned row = 0;
00507     do 
00508     {
00509         unsigned ibpc = *pc_vector & 7;
00510         unsigned n_row = *pc_vector >> 3;
00511         
00512         switch(ibpc)
00513         {
00514         case bm::ibpc_uncompr:
00515             {
00516             const unsigned* r1 = tmatrix[row];
00517             unsigned* r_out = tmatrix_out[row];
00518             for (unsigned i = 0; i < bm::set_block_plain_size; ++i)
00519             {
00520                 r_out[i] = r1[i];
00521             }
00522             }
00523             break;
00524         case bm::ibpc_all_zero:
00525             break;
00526         case bm::ibpc_all_one:
00527             break;
00528         case bm::ibpc_equiv:
00529             break;
00530         case bm::ibpc_close:
00531             {
00532             const unsigned* r1 = tmatrix[row];
00533             const unsigned* r2 = tmatrix[n_row];
00534             unsigned* r_out = tmatrix_out[row];
00535             for (unsigned i = 0; i < bm::set_block_plain_size; ++i)
00536             {
00537                 r_out[i] = r1[i] ^ r2[i];
00538             } // for
00539             }
00540             break;
00541         default:
00542             BM_ASSERT(0);
00543             break;
00544         } // switch
00545         ++row;
00546     } while (++pc_vector < pc_vector_end);
00547     
00548 }
00549 
00553 template<class TMatrix>
00554 void tmatrix_reduce(TMatrix& tmatrix, 
00555                     const unsigned char* pc_vector,
00556                     const unsigned       effective_cols)
00557 {
00558     BM_ASSERT(pc_vector);
00559 
00560     typedef typename TMatrix::value_type value_type;
00561 
00562     const unsigned char* pc_vector_end = pc_vector + tmatrix.rows();
00563     unsigned row = 0;
00564     unsigned cols = effective_cols ? effective_cols : tmatrix.cols();
00565 
00566     do
00567     {
00568         unsigned ibpc = *pc_vector & 7;        
00569         switch(ibpc)
00570         {
00571         case bm::ibpc_uncompr:
00572         case bm::ibpc_all_zero:
00573         case bm::ibpc_all_one:
00574         case bm::ibpc_equiv:
00575             break;
00576         case bm::ibpc_close:
00577             {
00578             unsigned n_row = *pc_vector >> 3;
00579             BM_ASSERT(n_row > row);
00580 
00581             value_type* r1 = tmatrix.row(row);
00582             const value_type* r2 = tmatrix.row(n_row);
00583             for (unsigned i = 0; i < cols; ++i)
00584             {
00585                 r1[i] ^= r2[i];
00586             } // for
00587             }
00588             break;
00589         default:
00590             BM_ASSERT(0);
00591             break;
00592         } // switch
00593         ++row;
00594     } while (++pc_vector < pc_vector_end);
00595 }
00596 
00600 template<class TMatrix>
00601 void tmatrix_restore(TMatrix& tmatrix, 
00602                      const unsigned char* pc_vector,
00603                      const unsigned effective_cols)
00604 {
00605     BM_ASSERT(pc_vector);
00606 
00607     typedef typename TMatrix::value_type value_type;
00608 
00609     unsigned cols = effective_cols ? effective_cols : tmatrix.cols();
00610     for (int row = tmatrix.rows()-1; row >= 0; --row)
00611     {
00612         unsigned ibpc = pc_vector[row] & 7;  
00613         int n_row = pc_vector[row] >> 3;
00614 
00615         value_type* r1 = tmatrix.row(row);
00616 
00617         switch(ibpc)
00618         {
00619         case bm::ibpc_uncompr:
00620             break;
00621         case bm::ibpc_all_zero:
00622             for (unsigned i = 0; i < cols; ++i)
00623                 r1[i] = 0;
00624              break;
00625         case bm::ibpc_all_one:
00626             for (unsigned i = 0; i < cols; ++i)
00627                 r1[i] = (value_type)(~0);
00628             break;
00629         case bm::ibpc_equiv:
00630             {
00631             BM_ASSERT(n_row > row);
00632             const value_type* r2 = tmatrix.row(n_row);
00633             for (unsigned i = 0; i < cols; ++i)
00634                 r1[i] = r2[i];
00635             }
00636             break;
00637         case bm::ibpc_close:
00638             {      
00639             BM_ASSERT(n_row > row);
00640             const value_type* r2 = tmatrix.row(n_row);
00641             for (unsigned i = 0; i < cols; ++i)
00642                 r1[i] ^= r2[i];
00643             }
00644             break;
00645         default:
00646             BM_ASSERT(0);
00647             break;
00648         } // switch
00649     }  // for
00650 
00651 }
00652 
00653 
00654 
00659 template<typename GT>//, typename BT>
00660 void gap_2_bitblock(const GT* BMRESTRICT gap_buf, 
00661                           GT* BMRESTRICT block, 
00662                           unsigned       block_size)
00663 {
00664     GT* dgap_buf = block;
00665     GT* block_end = block + block_size;
00666 
00667     GT* dgap_end = gap_2_dgap<GT>(gap_buf, dgap_buf, false);
00668 //    GT* block_end2 = (GT*) block_end;
00669     
00670     // zero the tail memory
00671     for ( ;dgap_end < block_end; ++dgap_end)
00672     {
00673         *dgap_end = 0;
00674     }
00675 }
00676 
00686 template<class TMatrix>
00687 void compute_tmatrix_rstat(const TMatrix& tmatrix, 
00688                            const unsigned char* pc_vector,
00689                            typename TMatrix::rstat* rstat,
00690                            unsigned effective_cols)
00691 {
00692     BM_ASSERT(rstat);
00693     typedef typename TMatrix::value_type value_type;
00694 
00695     unsigned cols = effective_cols ? effective_cols : tmatrix.cols();
00696     //unsigned cols = tmatrix.cols();
00697     unsigned rows = tmatrix.rows();
00698 
00699     for (unsigned i = 0; i < rows; ++i)
00700     {
00701         unsigned ibpc = pc_vector[i] & 7;        
00702         switch(ibpc)
00703         {
00704         case bm::ibpc_all_zero:
00705         case bm::ibpc_all_one:
00706         case bm::ibpc_equiv:
00707             rstat[i].bit_count = rstat[i].gap_count = 0;
00708             rstat[i].best_rep = bm::set_bitset;
00709             break;
00710         case bm::ibpc_uncompr:
00711         case bm::ibpc_close:
00712             {
00713             const value_type* r1 = tmatrix.row(i);
00714             const value_type* r1_end = r1 + cols;
00715             // TODO: find how to deal with the potentially incorrect type-cast
00716             bm::bit_count_change32((bm::word_t*)r1, (bm::word_t*)r1_end, 
00717                                     &rstat[i].bit_count, &rstat[i].gap_count);
00718 
00719             const unsigned bitset_size = sizeof(value_type) * cols;
00720             const unsigned total_possible_max_bits = sizeof(value_type)*8*cols;
00721 
00722             rstat[i].best_rep = 
00723                 bm::best_representation(rstat[i].bit_count,
00724                                         total_possible_max_bits,
00725                                         rstat[i].gap_count,
00726                                         bitset_size);
00727 
00728             }
00729             break;
00730         default:
00731             BM_ASSERT(0);
00732             break;
00733         } // switch
00734 
00735     } // for 
00736 }
00737 
00738 
00739 
00744 template<typename TM>
00745 unsigned find_effective_columns(const TM& tmatrix)
00746 {
00747     // TODO: need optimization in order not to scan the whole space
00748     unsigned col = 1;
00749     for (unsigned i = 0; i < tmatrix.rows(); ++i)
00750     {
00751         const typename TM::value_type* row = tmatrix.value[i];
00752         for (unsigned j = 0; j < tmatrix.cols(); ++j)
00753         {
00754             if (row[j] != 0 && j > col)
00755             {
00756                 col = j;
00757             }
00758         }
00759     }
00760     return col;
00761 }
00762 
00763 
00773 template<typename GT, typename BT, unsigned BLOCK_SIZE>
00774 class gap_transpose_engine
00775 {
00776 public:
00781     typedef 
00782     tmatrix<GT, sizeof(GT)*8, 
00783                 (((BLOCK_SIZE * sizeof(unsigned)) / (sizeof(GT))) / (sizeof(GT) * 8))>
00784                 tmatrix_type;
00785                 
00786     gap_transpose_engine() : eff_cols_(0)
00787     {}            
00788     
00791     void transpose(const GT* BMRESTRICT gap_buf)
00792     {
00793         const unsigned arr_size = BLOCK_SIZE * sizeof(unsigned) / sizeof(GT);
00794 
00795         BM_ASSERT(sizeof(tmatrix_.value) == tmatrix_type::n_columns * 
00796                                             tmatrix_type::n_rows * sizeof(GT));
00797         // load all GAP as D-GAP(but not head word) into aligned bit-block
00798         gap_2_bitblock(gap_buf, tmp_gap_block_, BLOCK_SIZE * 2);
00799         
00800         // transpose
00801         vect_bit_transpose<GT, tmatrix_type::n_rows, tmatrix_type::n_columns>
00802                            (tmp_gap_block_, arr_size, tmatrix_.value);
00803 
00804         // calculate number of non-zero columns
00805         eff_cols_ = find_effective_columns(tmatrix_);        
00806     }
00807 
00810     void transpose(const GT* BMRESTRICT garr,
00811                    unsigned garr_size)
00812     {
00813         BM_ASSERT(garr_size);
00814 
00815         bit_block_set(tmp_gap_block_, 0);
00816         ::memcpy(tmp_gap_block_, garr, sizeof(GT)*garr_size);
00817 
00818         const unsigned arr_size = BLOCK_SIZE * sizeof(unsigned) / sizeof(GT);
00819         BM_ASSERT(sizeof(tmatrix_.value) == tmatrix_type::n_columns * 
00820                                             tmatrix_type::n_rows * sizeof(GT));
00821         // transpose
00822         vect_bit_transpose<GT, tmatrix_type::n_rows, tmatrix_type::n_columns>
00823                            (tmp_gap_block_, arr_size, tmatrix_.value);
00824 
00825         // calculate number of non-zero columns
00826         eff_cols_ = find_effective_columns(tmatrix_);        
00827 
00828     }
00829 
00830     void compute_distance_matrix()
00831     {
00832         tmatrix_distance<typename tmatrix_type::value_type, 
00833                          tmatrix_type::n_rows, tmatrix_type::n_columns>
00834                          (tmatrix_.value, distance_);
00835 
00836         // make compression descriptor vector and statistics vector
00837         bit_iblock_make_pcv<unsigned char, 
00838                             tmatrix_type::n_rows, tmatrix_type::n_columns>
00839                             (distance_, pc_vector_);
00840 
00841         bit_iblock_pcv_stat(pc_vector_, 
00842                             pc_vector_ + tmatrix_type::n_rows, 
00843                             pc_vector_stat_);
00844     }
00845 
00846     void reduce()
00847     {
00848         tmatrix_reduce(tmatrix_, pc_vector_, eff_cols_);
00849         compute_tmatrix_rstat(tmatrix_, pc_vector_, rstat_vector_, eff_cols_);
00850     }
00851 
00852     void restore()
00853     {
00854         tmatrix_restore(tmatrix_, pc_vector_, eff_cols_);
00855     }
00856     
00857     
00860     void trestore(GT             gap_head, 
00861                   GT* BMRESTRICT gap_buf)
00862     {
00863         BM_ASSERT(sizeof(tmatrix_.value) == tmatrix_type::n_columns * 
00864                                             tmatrix_type::n_rows * sizeof(GT));
00865   
00866         // restore into a temp buffer
00867         GT* gap_tmp = tmp_gap_block_;
00868        
00869         vect_bit_trestore<GT, tmatrix_type::n_rows, tmatrix_type::n_columns>(tmatrix_.value, gap_tmp);
00870         
00871         // D-Gap to GAP block recalculation
00872         gap_tmp = tmp_gap_block_;
00873         dgap_2_gap<GT>(gap_tmp, gap_buf, gap_head);
00874     }
00875     
00876 public:
00877 //    GT            gap_head_;
00878     tmatrix_type                  tmatrix_;    
00879     unsigned                      eff_cols_;
00880     unsigned                      distance_[tmatrix_type::n_rows][tmatrix_type::n_rows];
00881     unsigned char                 pc_vector_[tmatrix_type::n_rows];
00882     unsigned                      pc_vector_stat_[bm::ibpc_end];
00883     typename tmatrix_type::rstat  rstat_vector_[tmatrix_type::n_rows];
00884 
00885     GT  BM_ALIGN16                tmp_gap_block_[BLOCK_SIZE*2] BM_ALIGN16ATTR;
00886 };
00887 
00888 
00889 } // namespace bm
00890 
00891 #ifdef _MSC_VER
00892 #pragma warning( pop )
00893 #endif
00894 
00895 
00896 #endif