M4RIE  0.20120415
 All Data Structures Files Functions Variables Defines
mzed.h
Go to the documentation of this file.
00001 
00026 #ifndef M4RIE_MZED_H
00027 #define M4RIE_MZED_H
00028 
00029 /******************************************************************************
00030 *
00031 *            M4RIE: Linear Algebra over GF(2^e)
00032 *
00033 *    Copyright (C) 2010,2011 Martin Albrecht <martinralbrecht@googlemail.com>
00034 *
00035 *  Distributed under the terms of the GNU General Public License (GEL)
00036 *  version 2 or higher.
00037 *
00038 *    This code is distributed in the hope that it will be useful,
00039 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
00040 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00041 *    General Public License for more details.
00042 *
00043 *  The full text of the GPL is available at:
00044 *
00045 *                  http://www.gnu.org/licenses/
00046 ******************************************************************************/
00047 
00048 #include <m4ri/m4ri.h>
00049 #include "gf2e.h"
00050 #include "m4ri_functions.h"
00051 
00058 typedef struct {
00059   mzd_t *x; 
00060   const gf2e *finite_field; 
00061   rci_t nrows; 
00062   rci_t ncols; 
00063   wi_t w;   
00064 } mzed_t;
00065 
00066 
00079 mzed_t *mzed_init(const gf2e *ff, const rci_t m, const rci_t n);
00080 
00089 void mzed_free(mzed_t *A);
00090 
00091 
00110 static inline mzed_t *mzed_concat(mzed_t *C, const mzed_t *A, const mzed_t *B) {
00111   if(C==NULL)
00112     C = mzed_init(A->finite_field, A->nrows, A->ncols + B->ncols);
00113   mzd_concat(C->x, A->x, B->x);
00114   return C;
00115 }
00116 
00134 static inline mzed_t *mzed_stack(mzed_t *C, const mzed_t *A, const mzed_t *B) {
00135   if(C==NULL)
00136     C = mzed_init(A->finite_field, A->nrows + B->nrows, A->ncols);
00137   mzd_stack(C->x, A->x, B->x);
00138   return C;
00139 }
00140 
00141 
00156 static inline mzed_t *mzed_submatrix(mzed_t *S, const mzed_t *M, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc) {
00157   if(S==NULL)
00158     S = mzed_init(M->finite_field, highr - lowr, highc - lowc);
00159 
00160   mzd_submatrix(S->x, M->x, lowr, lowc*M->w, highr, highc*M->w);
00161   return S;
00162 }
00163 
00186 static inline mzed_t *mzed_init_window(const mzed_t *A, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc) {
00187   mzed_t *B = (mzed_t *)m4ri_mm_malloc(sizeof(mzed_t));
00188   B->finite_field = A->finite_field;
00189   B->w = gf2e_degree_to_w(A->finite_field);
00190   B->nrows = highr - lowr;
00191   B->ncols = highc - lowc;
00192   B->x = mzd_init_window(A->x, lowr, B->w*lowc, highr, B->w*highc);
00193   return B;
00194 }
00195 
00204 static inline void mzed_free_window(mzed_t *A) {
00205   mzd_free_window(A->x);
00206   m4ri_mm_free(A);
00207 }
00208 
00222 mzed_t *mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B);
00223 
00234 mzed_t *_mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B);
00235 
00246 #define mzed_sub mzed_add
00247 
00258 #define _mzed_sub _mzed_add
00259 
00270 mzed_t *mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B);
00271 
00282 mzed_t *mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B);
00283 
00294 mzed_t *_mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B);
00295 
00306 mzed_t *_mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B);
00307 
00308 
00322 mzed_t *mzed_addmul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B);
00323 
00337 mzed_t *mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B);
00338 
00349 mzed_t *_mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B);
00350 
00361 mzed_t *mzed_mul_scalar(mzed_t *C, const word a, const mzed_t *B);
00362 
00373 mzed_t *_mzed_mul_init(mzed_t *C, const mzed_t *A, const mzed_t *B, int clear);
00374 
00385 void mzed_randomize(mzed_t *A);
00386 
00396 mzed_t *mzed_copy(mzed_t *B, const mzed_t *A);
00397 
00410 void mzed_set_ui(mzed_t *A, word value);
00411 
00412 
00423 static inline word mzed_read_elem(const mzed_t *A, const rci_t row, const rci_t col) {
00424   return __mzd_read_bits(A->x, row, A->w*col, A->w);
00425 }
00426 
00438 static inline void mzed_add_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem) {
00439   __mzd_xor_bits(A->x, row, A->w*col, A->w, elem);
00440 }
00441 
00453 static inline void mzed_write_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem) {
00454   __mzd_clear_bits(A->x, row, A->w*col, A->w);
00455   __mzd_xor_bits(A->x, row, A->w*col, A->w, elem);
00456 }
00457 
00471 static inline int mzed_cmp(mzed_t *A, mzed_t *B) {
00472   return mzd_cmp(A->x,B->x);
00473 }
00474 
00475 
00483 static inline int mzed_is_zero(const mzed_t *A) {
00484   return mzd_is_zero(A->x);
00485 }
00486 
00500 void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, word *X, rci_t start_col);
00501 
00514 static inline void mzed_add_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, rci_t start_col) {
00515   assert(A->ncols == B->ncols && A->finite_field == B->finite_field);
00516   assert(A->x->offset == B->x->offset);
00517   assert(start_col < A->ncols);
00518 
00519   const rci_t start = A->x->offset + A->w*start_col;
00520   const wi_t startblock = start/m4ri_radix;
00521   const word bitmask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - (start%m4ri_radix));
00522   const word bitmask_end = __M4RI_LEFT_BITMASK((A->x->offset + A->x->ncols) % m4ri_radix);
00523 
00524   word *_a = A->x->rows[ar];
00525   const word *_b = B->x->rows[br];
00526   wi_t j;
00527 
00528   if (A->x->width - startblock > 1) {
00529     _a[startblock] ^= _b[startblock] & bitmask_begin;
00530     for(j=startblock+1; j<A->x->width-1; j++)
00531       _a[j] ^= _b[j];
00532     _a[j] ^= _b[j] & bitmask_end;
00533   } else {
00534     _a[startblock] ^= _b[startblock] & (bitmask_begin & bitmask_end);
00535   }
00536 }
00537 
00549 static inline void mzed_rescale_row(mzed_t *A, rci_t r, rci_t start_col, const word *X) {
00550   assert(start_col < A->ncols);
00551 
00552   const rci_t start = A->x->offset + A->w*start_col;
00553   const wi_t startblock = start/m4ri_radix;
00554   word *_a = A->x->rows[r];
00555   const word bitmask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - (start%m4ri_radix));
00556   const word bitmask_end = __M4RI_LEFT_BITMASK((A->x->offset + A->x->ncols) % m4ri_radix);
00557   register word __a = _a[startblock]>>(start%m4ri_radix);
00558   register word __t = 0;
00559   int j;
00560 
00561   if(A->w == 2) {
00562     switch( (start/2) % 32 ) {
00563     case  0:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<0;   __a >>= 2;
00564     case  1:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<2;   __a >>= 2;
00565     case  2:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<4;   __a >>= 2;
00566     case  3:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<6;   __a >>= 2;
00567     case  4:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<8;   __a >>= 2;
00568     case  5:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<10;  __a >>= 2;
00569     case  6:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<12;  __a >>= 2;
00570     case  7:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<14;  __a >>= 2;
00571     case  8:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<16;  __a >>= 2;
00572     case  9:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<18;  __a >>= 2;
00573     case 10:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<20;  __a >>= 2;
00574     case 11:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<22;  __a >>= 2;
00575     case 12:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<24;  __a >>= 2;
00576     case 13:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<26;  __a >>= 2;
00577     case 14:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<28;  __a >>= 2;
00578     case 15:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<30;  __a >>= 2;
00579     case 16:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<32;  __a >>= 2;
00580     case 17:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<34;  __a >>= 2;
00581     case 18:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<36;  __a >>= 2;
00582     case 19:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<38;  __a >>= 2;
00583     case 20:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<40;  __a >>= 2;
00584     case 21:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<42;  __a >>= 2;
00585     case 22:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<44;  __a >>= 2;
00586     case 23:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<46;  __a >>= 2;
00587     case 24:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<48;  __a >>= 2;
00588     case 25:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<50;  __a >>= 2;
00589     case 26:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<52;  __a >>= 2;
00590     case 27:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<54;  __a >>= 2;
00591     case 28:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<56;  __a >>= 2;
00592     case 29:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<58;  __a >>= 2;
00593     case 30:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<60;  __a >>= 2;
00594     case 31:  __t ^= (X[((__a)& 0x0000000000000003ULL)])<<62;  break;
00595     default: m4ri_die("impossible");
00596     }
00597     if(A->x->width-startblock == 1) {
00598       _a[startblock] &= ~(bitmask_begin & bitmask_end);
00599       _a[startblock] ^= __t & bitmask_begin & bitmask_end;
00600       return;
00601     } else {
00602       _a[startblock] &= ~bitmask_begin;
00603       _a[startblock] ^= __t & bitmask_begin;
00604     }
00605 
00606     for(j=startblock+1; j<A->x->width -1; j++) {
00607       __a = _a[j], __t = 0;
00608       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<0;   __a >>= 2;
00609       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<2;   __a >>= 2;
00610       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<4;   __a >>= 2;
00611       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<6;   __a >>= 2;
00612       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<8;   __a >>= 2;
00613       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<10;  __a >>= 2;
00614       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<12;  __a >>= 2;
00615       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<14;  __a >>= 2;
00616       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<16;  __a >>= 2;
00617       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<18;  __a >>= 2;
00618       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<20;  __a >>= 2;
00619       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<22;  __a >>= 2;
00620       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<24;  __a >>= 2;
00621       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<26;  __a >>= 2;
00622       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<28;  __a >>= 2;
00623       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<30;  __a >>= 2;
00624       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<32;  __a >>= 2;
00625       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<34;  __a >>= 2;
00626       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<36;  __a >>= 2;
00627       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<38;  __a >>= 2;
00628       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<40;  __a >>= 2;
00629       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<42;  __a >>= 2;
00630       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<44;  __a >>= 2;
00631       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<46;  __a >>= 2;
00632       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<48;  __a >>= 2;
00633       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<50;  __a >>= 2;
00634       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<52;  __a >>= 2;
00635       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<54;  __a >>= 2;
00636       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<56;  __a >>= 2;
00637       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<58;  __a >>= 2;
00638       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<60;  __a >>= 2;
00639       __t ^= (X[((__a)& 0x0000000000000003ULL)])<<62;
00640       _a[j] = __t;
00641     }
00642 
00643     __t = _a[j] & ~bitmask_end;
00644     switch((A->x->offset+A->x->ncols) % m4ri_radix) {
00645     case  0: __t ^= ((word)X[(int)((_a[j] & 0xC000000000000000ULL)>>62)])<<62;
00646     case 62: __t ^= ((word)X[(int)((_a[j] & 0x3000000000000000ULL)>>60)])<<60;
00647     case 60: __t ^= ((word)X[(int)((_a[j] & 0x0C00000000000000ULL)>>58)])<<58;
00648     case 58: __t ^= ((word)X[(int)((_a[j] & 0x0300000000000000ULL)>>56)])<<56;
00649     case 56: __t ^= ((word)X[(int)((_a[j] & 0x00C0000000000000ULL)>>54)])<<54;
00650     case 54: __t ^= ((word)X[(int)((_a[j] & 0x0030000000000000ULL)>>52)])<<52;
00651     case 52: __t ^= ((word)X[(int)((_a[j] & 0x000C000000000000ULL)>>50)])<<50;
00652     case 50: __t ^= ((word)X[(int)((_a[j] & 0x0003000000000000ULL)>>48)])<<48;
00653     case 48: __t ^= ((word)X[(int)((_a[j] & 0x0000C00000000000ULL)>>46)])<<46;
00654     case 46: __t ^= ((word)X[(int)((_a[j] & 0x0000300000000000ULL)>>44)])<<44;
00655     case 44: __t ^= ((word)X[(int)((_a[j] & 0x00000C0000000000ULL)>>42)])<<42;
00656     case 42: __t ^= ((word)X[(int)((_a[j] & 0x0000030000000000ULL)>>40)])<<40;
00657     case 40: __t ^= ((word)X[(int)((_a[j] & 0x000000C000000000ULL)>>38)])<<38;
00658     case 38: __t ^= ((word)X[(int)((_a[j] & 0x0000003000000000ULL)>>36)])<<36;
00659     case 36: __t ^= ((word)X[(int)((_a[j] & 0x0000000C00000000ULL)>>34)])<<34;
00660     case 34: __t ^= ((word)X[(int)((_a[j] & 0x0000000300000000ULL)>>32)])<<32;
00661     case 32: __t ^= ((word)X[(int)((_a[j] & 0x00000000C0000000ULL)>>30)])<<30;
00662     case 30: __t ^= ((word)X[(int)((_a[j] & 0x0000000030000000ULL)>>28)])<<28;
00663     case 28: __t ^= ((word)X[(int)((_a[j] & 0x000000000C000000ULL)>>26)])<<26;
00664     case 26: __t ^= ((word)X[(int)((_a[j] & 0x0000000003000000ULL)>>24)])<<24;
00665     case 24: __t ^= ((word)X[(int)((_a[j] & 0x0000000000C00000ULL)>>22)])<<22;
00666     case 22: __t ^= ((word)X[(int)((_a[j] & 0x0000000000300000ULL)>>20)])<<20;
00667     case 20: __t ^= ((word)X[(int)((_a[j] & 0x00000000000C0000ULL)>>18)])<<18;
00668     case 18: __t ^= ((word)X[(int)((_a[j] & 0x0000000000030000ULL)>>16)])<<16;
00669     case 16: __t ^= ((word)X[(int)((_a[j] & 0x000000000000C000ULL)>>14)])<<14;
00670     case 14: __t ^= ((word)X[(int)((_a[j] & 0x0000000000003000ULL)>>12)])<<12;
00671     case 12: __t ^= ((word)X[(int)((_a[j] & 0x0000000000000C00ULL)>>10)])<<10;
00672     case 10: __t ^= ((word)X[(int)((_a[j] & 0x0000000000000300ULL)>> 8)])<< 8;
00673     case  8: __t ^= ((word)X[(int)((_a[j] & 0x00000000000000C0ULL)>> 6)])<< 6;
00674     case  6: __t ^= ((word)X[(int)((_a[j] & 0x0000000000000030ULL)>> 4)])<< 4;
00675     case  4: __t ^= ((word)X[(int)((_a[j] & 0x000000000000000CULL)>> 2)])<< 2;
00676     case  2: __t ^= ((word)X[(int)((_a[j] & 0x0000000000000003ULL)>> 0)])<< 0;
00677     };
00678     _a[j] = __t;
00679 
00680   } else if(A->w == 4) {
00681     switch( (start/4)%16 ) {
00682     case  0: __t ^= (X[((__a)& 0x000000000000000FULL)])<<0;   __a >>= 4;
00683     case  1: __t ^= (X[((__a)& 0x000000000000000FULL)])<<4;   __a >>= 4;
00684     case  2: __t ^= (X[((__a)& 0x000000000000000FULL)])<<8;   __a >>= 4;
00685     case  3: __t ^= (X[((__a)& 0x000000000000000FULL)])<<12;  __a >>= 4;
00686     case  4: __t ^= (X[((__a)& 0x000000000000000FULL)])<<16;  __a >>= 4;
00687     case  5: __t ^= (X[((__a)& 0x000000000000000FULL)])<<20;  __a >>= 4;
00688     case  6: __t ^= (X[((__a)& 0x000000000000000FULL)])<<24;  __a >>= 4;
00689     case  7: __t ^= (X[((__a)& 0x000000000000000FULL)])<<28;  __a >>= 4;
00690     case  8: __t ^= (X[((__a)& 0x000000000000000FULL)])<<32;  __a >>= 4;
00691     case  9: __t ^= (X[((__a)& 0x000000000000000FULL)])<<36;  __a >>= 4;
00692     case 10: __t ^= (X[((__a)& 0x000000000000000FULL)])<<40;  __a >>= 4;
00693     case 11: __t ^= (X[((__a)& 0x000000000000000FULL)])<<44;  __a >>= 4;
00694     case 12: __t ^= (X[((__a)& 0x000000000000000FULL)])<<48;  __a >>= 4;
00695     case 13: __t ^= (X[((__a)& 0x000000000000000FULL)])<<52;  __a >>= 4;
00696     case 14: __t ^= (X[((__a)& 0x000000000000000FULL)])<<56;  __a >>= 4;
00697     case 15: __t ^= (X[((__a)& 0x000000000000000FULL)])<<60;  break;
00698     default: m4ri_die("impossible");
00699     }
00700     if(A->x->width-startblock == 1) {
00701       _a[startblock] &= ~(bitmask_begin & bitmask_end);
00702       _a[startblock] ^= __t & bitmask_begin & bitmask_end;
00703       return;
00704     } else {
00705       _a[startblock] &= ~bitmask_begin;
00706       _a[startblock] ^= __t & bitmask_begin;
00707     }
00708 
00709     for(j=startblock+1; j<A->x->width -1; j++) {
00710       __a = _a[j], __t = 0;
00711       __t ^= (X[((__a)& 0x000000000000000FULL)])<<0;   __a >>= 4;
00712       __t ^= (X[((__a)& 0x000000000000000FULL)])<<4;   __a >>= 4;
00713       __t ^= (X[((__a)& 0x000000000000000FULL)])<<8;   __a >>= 4;
00714       __t ^= (X[((__a)& 0x000000000000000FULL)])<<12;  __a >>= 4;
00715       __t ^= (X[((__a)& 0x000000000000000FULL)])<<16;  __a >>= 4;
00716       __t ^= (X[((__a)& 0x000000000000000FULL)])<<20;  __a >>= 4;
00717       __t ^= (X[((__a)& 0x000000000000000FULL)])<<24;  __a >>= 4;
00718       __t ^= (X[((__a)& 0x000000000000000FULL)])<<28;  __a >>= 4;
00719       __t ^= (X[((__a)& 0x000000000000000FULL)])<<32;  __a >>= 4;
00720       __t ^= (X[((__a)& 0x000000000000000FULL)])<<36;  __a >>= 4;
00721       __t ^= (X[((__a)& 0x000000000000000FULL)])<<40;  __a >>= 4;
00722       __t ^= (X[((__a)& 0x000000000000000FULL)])<<44;  __a >>= 4;
00723       __t ^= (X[((__a)& 0x000000000000000FULL)])<<48;  __a >>= 4;
00724       __t ^= (X[((__a)& 0x000000000000000FULL)])<<52;  __a >>= 4;
00725       __t ^= (X[((__a)& 0x000000000000000FULL)])<<56;  __a >>= 4;
00726       __t ^= (X[((__a)& 0x000000000000000FULL)])<<60;
00727       _a[j] = __t;
00728     }
00729 
00730     __t = _a[j] & ~bitmask_end;
00731     switch( (A->x->offset + A->x->ncols) % m4ri_radix) {
00732     case  0: __t ^= ((word)X[(int)((_a[j] & 0xF000000000000000ULL)>>60)])<<60;
00733     case 60: __t ^= ((word)X[(int)((_a[j] & 0x0F00000000000000ULL)>>56)])<<56;
00734     case 56: __t ^= ((word)X[(int)((_a[j] & 0x00F0000000000000ULL)>>52)])<<52;
00735     case 52: __t ^= ((word)X[(int)((_a[j] & 0x000F000000000000ULL)>>48)])<<48;
00736     case 48: __t ^= ((word)X[(int)((_a[j] & 0x0000F00000000000ULL)>>44)])<<44;
00737     case 44: __t ^= ((word)X[(int)((_a[j] & 0x00000F0000000000ULL)>>40)])<<40;
00738     case 40: __t ^= ((word)X[(int)((_a[j] & 0x000000F000000000ULL)>>36)])<<36;
00739     case 36: __t ^= ((word)X[(int)((_a[j] & 0x0000000F00000000ULL)>>32)])<<32;
00740     case 32: __t ^= ((word)X[(int)((_a[j] & 0x00000000F0000000ULL)>>28)])<<28;
00741     case 28: __t ^= ((word)X[(int)((_a[j] & 0x000000000F000000ULL)>>24)])<<24;
00742     case 24: __t ^= ((word)X[(int)((_a[j] & 0x0000000000F00000ULL)>>20)])<<20;
00743     case 20: __t ^= ((word)X[(int)((_a[j] & 0x00000000000F0000ULL)>>16)])<<16;
00744     case 16: __t ^= ((word)X[(int)((_a[j] & 0x000000000000F000ULL)>>12)])<<12;
00745     case 12: __t ^= ((word)X[(int)((_a[j] & 0x0000000000000F00ULL)>> 8)])<< 8;
00746     case  8: __t ^= ((word)X[(int)((_a[j] & 0x00000000000000F0ULL)>> 4)])<< 4;
00747     case  4: __t ^= ((word)X[(int)((_a[j] & 0x000000000000000FULL)>> 0)])<< 0;
00748     };
00749     _a[j] = __t;
00750 
00751   } else if (A->w == 8) {
00752 
00753     register word __a0 = _a[startblock]>>(start%m4ri_radix);
00754     register word __a1;
00755     register word __t0 = 0;
00756     register word __t1;
00757 
00758     switch( (start/8) %8 ) {
00759     case 0: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<0;  __a0 >>= 8;
00760     case 1: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<8;  __a0 >>= 8;
00761     case 2: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<16; __a0 >>= 8;
00762     case 3: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<24; __a0 >>= 8;
00763     case 4: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<32; __a0 >>= 8;
00764     case 5: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<40; __a0 >>= 8;
00765     case 6: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<48; __a0 >>= 8;
00766     case 7: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<56; break;
00767     default: m4ri_die("impossible");
00768     }
00769     if(A->x->width-startblock == 1) {
00770       _a[startblock] &= ~(bitmask_begin & bitmask_end);
00771       _a[startblock] ^= __t0 & bitmask_begin & bitmask_end;
00772       return;
00773     } else {
00774       _a[startblock] &= ~bitmask_begin;
00775       _a[startblock] ^= __t0 & bitmask_begin;
00776     }
00777 
00778     for(j=startblock+1; j+2 < A->x->width; j+=2) {
00779       __a0 = _a[j], __t0 = 0;
00780       __a1 = _a[j+1], __t1 = 0;
00781       __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<0;  __a0 >>= 8;
00782       __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<0;  __a1 >>= 8;
00783       __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<8;  __a0 >>= 8;
00784       __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<8;  __a1 >>= 8;
00785       __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<16; __a0 >>= 8;
00786       __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<16; __a1 >>= 8;
00787       __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<24; __a0 >>= 8;
00788       __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<24; __a1 >>= 8;
00789       __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<32; __a0 >>= 8;
00790       __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<32; __a1 >>= 8;
00791       __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<40; __a0 >>= 8;
00792       __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<40; __a1 >>= 8;
00793       __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<48; __a0 >>= 8;
00794       __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<48; __a1 >>= 8;
00795       __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<56; __a0 >>= 8;
00796       __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<56;
00797       _a[j] = __t0;
00798       _a[j+1] = __t1;
00799     }
00800 
00801     for(; j < A->x->width-1; j++) {
00802       __a0 = _a[j], __t0 = 0;
00803       __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<0;  __a0 >>= 8;
00804       __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<8;  __a0 >>= 8;
00805       __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<16; __a0 >>= 8;
00806       __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<24; __a0 >>= 8;
00807       __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<32; __a0 >>= 8;
00808       __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<40; __a0 >>= 8;
00809       __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<48; __a0 >>= 8;
00810       __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<56;
00811       _a[j] = __t0;
00812     }
00813 
00814     __t = _a[j] & ~bitmask_end;
00815     switch( (A->x->offset + A->x->ncols) % m4ri_radix ) {
00816     case  0: __t ^= ((word)X[(int)((_a[j] & 0xFF00000000000000ULL)>>56)])<<56;
00817     case 56: __t ^= ((word)X[(int)((_a[j] & 0x00FF000000000000ULL)>>48)])<<48;
00818     case 48: __t ^= ((word)X[(int)((_a[j] & 0x0000FF0000000000ULL)>>40)])<<40;
00819     case 40: __t ^= ((word)X[(int)((_a[j] & 0x000000FF00000000ULL)>>32)])<<32;
00820     case 32: __t ^= ((word)X[(int)((_a[j] & 0x00000000FF000000ULL)>>24)])<<24;
00821     case 24: __t ^= ((word)X[(int)((_a[j] & 0x0000000000FF0000ULL)>>16)])<<16;
00822     case 16: __t ^= ((word)X[(int)((_a[j] & 0x000000000000FF00ULL)>> 8)])<< 8;
00823     case  8: __t ^= ((word)X[(int)((_a[j] & 0x00000000000000FFULL)>> 0)])<< 0;
00824     };
00825     _a[j] = __t;
00826 
00827   } else if (A->w == 16) {
00828     switch( (start/16) %4 ) {
00829     case 0: __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0;  __a >>= 16;
00830     case 1: __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
00831     case 2: __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
00832     case 3: __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48; break;
00833     default: m4ri_die("impossible");
00834     }
00835     if(A->x->width-startblock == 1) {
00836       _a[startblock] &= ~(bitmask_begin & bitmask_end);
00837       _a[startblock] ^= __t & bitmask_begin & bitmask_end;
00838       return;
00839     } else {
00840       _a[startblock] &= ~bitmask_begin;
00841       _a[startblock] ^= __t & bitmask_begin;
00842     }
00843 
00844     for(j=startblock+1; j+4<A->x->width; j+=4) {
00845       __a = _a[j], __t = 0;
00846       __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0;  __a >>= 16;
00847       __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
00848       __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
00849       __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48;
00850       _a[j] = __t;
00851 
00852       __a = _a[j+1], __t = 0;
00853       __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0;  __a >>= 16;
00854       __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
00855       __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
00856       __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48;
00857       _a[j+1] = __t;
00858 
00859 
00860       __a = _a[j+2], __t = 0;
00861       __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0;  __a >>= 16;
00862       __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
00863       __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
00864       __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48;
00865       _a[j+2] = __t;
00866 
00867       __a = _a[j+3], __t = 0;
00868       __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0;  __a >>= 16;
00869       __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
00870       __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
00871       __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48;
00872       _a[j+3] = __t;
00873     }
00874     for( ; j<A->x->width-1; j++) {
00875       __a = _a[j], __t = 0;
00876       __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0;  __a >>= 16;
00877       __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16;
00878       __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16;
00879       __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48;
00880       _a[j] = __t;
00881     }
00882 
00883     __t = _a[j] & ~bitmask_end;
00884     switch( (A->x->offset + A->x->ncols) % m4ri_radix) {
00885     case  0: __t ^= ((word)X[(int)((_a[j] & 0xFFFF000000000000ULL)>>48)])<<48;
00886     case 48: __t ^= ((word)X[(int)((_a[j] & 0x0000FFFF00000000ULL)>>32)])<<32;
00887     case 32: __t ^= ((word)X[(int)((_a[j] & 0x00000000FFFF0000ULL)>>16)])<<16;
00888     case 16: __t ^= ((word)X[(int)((_a[j] & 0x000000000000FFFFULL)>> 0)])<< 0;
00889     };
00890     _a[j] = __t;
00891 
00892   } else {
00893     for(rci_t j=start_col; j<A->ncols; j++) {
00894       mzed_write_elem(A, r, j, X[mzed_read_elem(A, r, j)]);
00895     }
00896   }
00897 }
00898 
00909 static inline void mzed_row_swap(mzed_t *M, const rci_t rowa, const rci_t rowb) {
00910   mzd_row_swap(M->x, rowa, rowb);
00911 }
00912 
00927 static inline void mzed_copy_row(mzed_t* B, rci_t i, const mzed_t* A, rci_t j) {
00928   mzd_copy_row(B->x, i, A->x, j);
00929 }
00930 
00941 static inline void mzed_col_swap(mzed_t *M, const rci_t cola, const rci_t colb) {
00942   for(rci_t i=0; i<M->w; i++)
00943     mzd_col_swap(M->x,M->w*cola+i, M->w*colb+i);
00944 }
00945 
00958 static inline void mzed_col_swap_in_rows(mzed_t *A, const rci_t cola, const rci_t colb, const rci_t start_row, rci_t stop_row) {
00959   for(unsigned int e=0; e < A->finite_field->degree; e++) {
00960     mzd_col_swap_in_rows(A->x, A->w*cola+e, A->w*colb+e, start_row, stop_row);
00961   };
00962 }
00963 
00977 static inline void mzed_row_add(mzed_t *M, const rci_t sourcerow, const rci_t destrow) {
00978   mzd_row_add(M->x, sourcerow, destrow);
00979 }
00980 
00991 static inline rci_t mzed_first_zero_row(mzed_t *A) {
00992   return mzd_first_zero_row(A->x);
00993 }
00994 
00995 
01006 static inline void mzed_row_clear_offset(mzed_t *M, const rci_t row, const rci_t coloffset) {
01007   mzd_row_clear_offset(M->x, row, coloffset*M->w);
01008 }
01009 
01023 rci_t mzed_echelonize_naive(mzed_t *A, int full);
01024 
01033 void mzed_print(const mzed_t *M);
01034 
01035 #endif //M4RIE_MATRIX_H