M4RIE
0.20120415
|
00001 00026 #ifndef M4RIE_MZED_H 00027 #define M4RIE_MZED_H 00028 00029 /****************************************************************************** 00030 * 00031 * M4RIE: Linear Algebra over GF(2^e) 00032 * 00033 * Copyright (C) 2010,2011 Martin Albrecht <martinralbrecht@googlemail.com> 00034 * 00035 * Distributed under the terms of the GNU General Public License (GEL) 00036 * version 2 or higher. 00037 * 00038 * This code is distributed in the hope that it will be useful, 00039 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00040 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00041 * General Public License for more details. 00042 * 00043 * The full text of the GPL is available at: 00044 * 00045 * http://www.gnu.org/licenses/ 00046 ******************************************************************************/ 00047 00048 #include <m4ri/m4ri.h> 00049 #include "gf2e.h" 00050 #include "m4ri_functions.h" 00051 00058 typedef struct { 00059 mzd_t *x; 00060 const gf2e *finite_field; 00061 rci_t nrows; 00062 rci_t ncols; 00063 wi_t w; 00064 } mzed_t; 00065 00066 00079 mzed_t *mzed_init(const gf2e *ff, const rci_t m, const rci_t n); 00080 00089 void mzed_free(mzed_t *A); 00090 00091 00110 static inline mzed_t *mzed_concat(mzed_t *C, const mzed_t *A, const mzed_t *B) { 00111 if(C==NULL) 00112 C = mzed_init(A->finite_field, A->nrows, A->ncols + B->ncols); 00113 mzd_concat(C->x, A->x, B->x); 00114 return C; 00115 } 00116 00134 static inline mzed_t *mzed_stack(mzed_t *C, const mzed_t *A, const mzed_t *B) { 00135 if(C==NULL) 00136 C = mzed_init(A->finite_field, A->nrows + B->nrows, A->ncols); 00137 mzd_stack(C->x, A->x, B->x); 00138 return C; 00139 } 00140 00141 00156 static inline mzed_t *mzed_submatrix(mzed_t *S, const mzed_t *M, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc) { 00157 if(S==NULL) 00158 S = mzed_init(M->finite_field, highr - lowr, highc - lowc); 00159 00160 mzd_submatrix(S->x, M->x, lowr, lowc*M->w, highr, highc*M->w); 00161 return S; 00162 } 00163 00186 static inline mzed_t *mzed_init_window(const mzed_t *A, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc) { 00187 mzed_t *B = (mzed_t *)m4ri_mm_malloc(sizeof(mzed_t)); 00188 B->finite_field = A->finite_field; 00189 B->w = gf2e_degree_to_w(A->finite_field); 00190 B->nrows = highr - lowr; 00191 B->ncols = highc - lowc; 00192 B->x = mzd_init_window(A->x, lowr, B->w*lowc, highr, B->w*highc); 00193 return B; 00194 } 00195 00204 static inline void mzed_free_window(mzed_t *A) { 00205 mzd_free_window(A->x); 00206 m4ri_mm_free(A); 00207 } 00208 00222 mzed_t *mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B); 00223 00234 mzed_t *_mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B); 00235 00246 #define mzed_sub mzed_add 00247 00258 #define _mzed_sub _mzed_add 00259 00270 mzed_t *mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B); 00271 00282 mzed_t *mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B); 00283 00294 mzed_t *_mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B); 00295 00306 mzed_t *_mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B); 00307 00308 00322 mzed_t *mzed_addmul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B); 00323 00337 mzed_t *mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B); 00338 00349 mzed_t *_mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B); 00350 00361 mzed_t *mzed_mul_scalar(mzed_t *C, const word a, const mzed_t *B); 00362 00373 mzed_t *_mzed_mul_init(mzed_t *C, const mzed_t *A, const mzed_t *B, int clear); 00374 00385 void mzed_randomize(mzed_t *A); 00386 00396 mzed_t *mzed_copy(mzed_t *B, const mzed_t *A); 00397 00410 void mzed_set_ui(mzed_t *A, word value); 00411 00412 00423 static inline word mzed_read_elem(const mzed_t *A, const rci_t row, const rci_t col) { 00424 return __mzd_read_bits(A->x, row, A->w*col, A->w); 00425 } 00426 00438 static inline void mzed_add_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem) { 00439 __mzd_xor_bits(A->x, row, A->w*col, A->w, elem); 00440 } 00441 00453 static inline void mzed_write_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem) { 00454 __mzd_clear_bits(A->x, row, A->w*col, A->w); 00455 __mzd_xor_bits(A->x, row, A->w*col, A->w, elem); 00456 } 00457 00471 static inline int mzed_cmp(mzed_t *A, mzed_t *B) { 00472 return mzd_cmp(A->x,B->x); 00473 } 00474 00475 00483 static inline int mzed_is_zero(const mzed_t *A) { 00484 return mzd_is_zero(A->x); 00485 } 00486 00500 void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, word *X, rci_t start_col); 00501 00514 static inline void mzed_add_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, rci_t start_col) { 00515 assert(A->ncols == B->ncols && A->finite_field == B->finite_field); 00516 assert(A->x->offset == B->x->offset); 00517 assert(start_col < A->ncols); 00518 00519 const rci_t start = A->x->offset + A->w*start_col; 00520 const wi_t startblock = start/m4ri_radix; 00521 const word bitmask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - (start%m4ri_radix)); 00522 const word bitmask_end = __M4RI_LEFT_BITMASK((A->x->offset + A->x->ncols) % m4ri_radix); 00523 00524 word *_a = A->x->rows[ar]; 00525 const word *_b = B->x->rows[br]; 00526 wi_t j; 00527 00528 if (A->x->width - startblock > 1) { 00529 _a[startblock] ^= _b[startblock] & bitmask_begin; 00530 for(j=startblock+1; j<A->x->width-1; j++) 00531 _a[j] ^= _b[j]; 00532 _a[j] ^= _b[j] & bitmask_end; 00533 } else { 00534 _a[startblock] ^= _b[startblock] & (bitmask_begin & bitmask_end); 00535 } 00536 } 00537 00549 static inline void mzed_rescale_row(mzed_t *A, rci_t r, rci_t start_col, const word *X) { 00550 assert(start_col < A->ncols); 00551 00552 const rci_t start = A->x->offset + A->w*start_col; 00553 const wi_t startblock = start/m4ri_radix; 00554 word *_a = A->x->rows[r]; 00555 const word bitmask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - (start%m4ri_radix)); 00556 const word bitmask_end = __M4RI_LEFT_BITMASK((A->x->offset + A->x->ncols) % m4ri_radix); 00557 register word __a = _a[startblock]>>(start%m4ri_radix); 00558 register word __t = 0; 00559 int j; 00560 00561 if(A->w == 2) { 00562 switch( (start/2) % 32 ) { 00563 case 0: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<0; __a >>= 2; 00564 case 1: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<2; __a >>= 2; 00565 case 2: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<4; __a >>= 2; 00566 case 3: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<6; __a >>= 2; 00567 case 4: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<8; __a >>= 2; 00568 case 5: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<10; __a >>= 2; 00569 case 6: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<12; __a >>= 2; 00570 case 7: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<14; __a >>= 2; 00571 case 8: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<16; __a >>= 2; 00572 case 9: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<18; __a >>= 2; 00573 case 10: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<20; __a >>= 2; 00574 case 11: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<22; __a >>= 2; 00575 case 12: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<24; __a >>= 2; 00576 case 13: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<26; __a >>= 2; 00577 case 14: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<28; __a >>= 2; 00578 case 15: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<30; __a >>= 2; 00579 case 16: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<32; __a >>= 2; 00580 case 17: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<34; __a >>= 2; 00581 case 18: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<36; __a >>= 2; 00582 case 19: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<38; __a >>= 2; 00583 case 20: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<40; __a >>= 2; 00584 case 21: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<42; __a >>= 2; 00585 case 22: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<44; __a >>= 2; 00586 case 23: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<46; __a >>= 2; 00587 case 24: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<48; __a >>= 2; 00588 case 25: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<50; __a >>= 2; 00589 case 26: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<52; __a >>= 2; 00590 case 27: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<54; __a >>= 2; 00591 case 28: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<56; __a >>= 2; 00592 case 29: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<58; __a >>= 2; 00593 case 30: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<60; __a >>= 2; 00594 case 31: __t ^= (X[((__a)& 0x0000000000000003ULL)])<<62; break; 00595 default: m4ri_die("impossible"); 00596 } 00597 if(A->x->width-startblock == 1) { 00598 _a[startblock] &= ~(bitmask_begin & bitmask_end); 00599 _a[startblock] ^= __t & bitmask_begin & bitmask_end; 00600 return; 00601 } else { 00602 _a[startblock] &= ~bitmask_begin; 00603 _a[startblock] ^= __t & bitmask_begin; 00604 } 00605 00606 for(j=startblock+1; j<A->x->width -1; j++) { 00607 __a = _a[j], __t = 0; 00608 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<0; __a >>= 2; 00609 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<2; __a >>= 2; 00610 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<4; __a >>= 2; 00611 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<6; __a >>= 2; 00612 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<8; __a >>= 2; 00613 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<10; __a >>= 2; 00614 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<12; __a >>= 2; 00615 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<14; __a >>= 2; 00616 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<16; __a >>= 2; 00617 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<18; __a >>= 2; 00618 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<20; __a >>= 2; 00619 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<22; __a >>= 2; 00620 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<24; __a >>= 2; 00621 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<26; __a >>= 2; 00622 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<28; __a >>= 2; 00623 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<30; __a >>= 2; 00624 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<32; __a >>= 2; 00625 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<34; __a >>= 2; 00626 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<36; __a >>= 2; 00627 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<38; __a >>= 2; 00628 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<40; __a >>= 2; 00629 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<42; __a >>= 2; 00630 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<44; __a >>= 2; 00631 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<46; __a >>= 2; 00632 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<48; __a >>= 2; 00633 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<50; __a >>= 2; 00634 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<52; __a >>= 2; 00635 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<54; __a >>= 2; 00636 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<56; __a >>= 2; 00637 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<58; __a >>= 2; 00638 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<60; __a >>= 2; 00639 __t ^= (X[((__a)& 0x0000000000000003ULL)])<<62; 00640 _a[j] = __t; 00641 } 00642 00643 __t = _a[j] & ~bitmask_end; 00644 switch((A->x->offset+A->x->ncols) % m4ri_radix) { 00645 case 0: __t ^= ((word)X[(int)((_a[j] & 0xC000000000000000ULL)>>62)])<<62; 00646 case 62: __t ^= ((word)X[(int)((_a[j] & 0x3000000000000000ULL)>>60)])<<60; 00647 case 60: __t ^= ((word)X[(int)((_a[j] & 0x0C00000000000000ULL)>>58)])<<58; 00648 case 58: __t ^= ((word)X[(int)((_a[j] & 0x0300000000000000ULL)>>56)])<<56; 00649 case 56: __t ^= ((word)X[(int)((_a[j] & 0x00C0000000000000ULL)>>54)])<<54; 00650 case 54: __t ^= ((word)X[(int)((_a[j] & 0x0030000000000000ULL)>>52)])<<52; 00651 case 52: __t ^= ((word)X[(int)((_a[j] & 0x000C000000000000ULL)>>50)])<<50; 00652 case 50: __t ^= ((word)X[(int)((_a[j] & 0x0003000000000000ULL)>>48)])<<48; 00653 case 48: __t ^= ((word)X[(int)((_a[j] & 0x0000C00000000000ULL)>>46)])<<46; 00654 case 46: __t ^= ((word)X[(int)((_a[j] & 0x0000300000000000ULL)>>44)])<<44; 00655 case 44: __t ^= ((word)X[(int)((_a[j] & 0x00000C0000000000ULL)>>42)])<<42; 00656 case 42: __t ^= ((word)X[(int)((_a[j] & 0x0000030000000000ULL)>>40)])<<40; 00657 case 40: __t ^= ((word)X[(int)((_a[j] & 0x000000C000000000ULL)>>38)])<<38; 00658 case 38: __t ^= ((word)X[(int)((_a[j] & 0x0000003000000000ULL)>>36)])<<36; 00659 case 36: __t ^= ((word)X[(int)((_a[j] & 0x0000000C00000000ULL)>>34)])<<34; 00660 case 34: __t ^= ((word)X[(int)((_a[j] & 0x0000000300000000ULL)>>32)])<<32; 00661 case 32: __t ^= ((word)X[(int)((_a[j] & 0x00000000C0000000ULL)>>30)])<<30; 00662 case 30: __t ^= ((word)X[(int)((_a[j] & 0x0000000030000000ULL)>>28)])<<28; 00663 case 28: __t ^= ((word)X[(int)((_a[j] & 0x000000000C000000ULL)>>26)])<<26; 00664 case 26: __t ^= ((word)X[(int)((_a[j] & 0x0000000003000000ULL)>>24)])<<24; 00665 case 24: __t ^= ((word)X[(int)((_a[j] & 0x0000000000C00000ULL)>>22)])<<22; 00666 case 22: __t ^= ((word)X[(int)((_a[j] & 0x0000000000300000ULL)>>20)])<<20; 00667 case 20: __t ^= ((word)X[(int)((_a[j] & 0x00000000000C0000ULL)>>18)])<<18; 00668 case 18: __t ^= ((word)X[(int)((_a[j] & 0x0000000000030000ULL)>>16)])<<16; 00669 case 16: __t ^= ((word)X[(int)((_a[j] & 0x000000000000C000ULL)>>14)])<<14; 00670 case 14: __t ^= ((word)X[(int)((_a[j] & 0x0000000000003000ULL)>>12)])<<12; 00671 case 12: __t ^= ((word)X[(int)((_a[j] & 0x0000000000000C00ULL)>>10)])<<10; 00672 case 10: __t ^= ((word)X[(int)((_a[j] & 0x0000000000000300ULL)>> 8)])<< 8; 00673 case 8: __t ^= ((word)X[(int)((_a[j] & 0x00000000000000C0ULL)>> 6)])<< 6; 00674 case 6: __t ^= ((word)X[(int)((_a[j] & 0x0000000000000030ULL)>> 4)])<< 4; 00675 case 4: __t ^= ((word)X[(int)((_a[j] & 0x000000000000000CULL)>> 2)])<< 2; 00676 case 2: __t ^= ((word)X[(int)((_a[j] & 0x0000000000000003ULL)>> 0)])<< 0; 00677 }; 00678 _a[j] = __t; 00679 00680 } else if(A->w == 4) { 00681 switch( (start/4)%16 ) { 00682 case 0: __t ^= (X[((__a)& 0x000000000000000FULL)])<<0; __a >>= 4; 00683 case 1: __t ^= (X[((__a)& 0x000000000000000FULL)])<<4; __a >>= 4; 00684 case 2: __t ^= (X[((__a)& 0x000000000000000FULL)])<<8; __a >>= 4; 00685 case 3: __t ^= (X[((__a)& 0x000000000000000FULL)])<<12; __a >>= 4; 00686 case 4: __t ^= (X[((__a)& 0x000000000000000FULL)])<<16; __a >>= 4; 00687 case 5: __t ^= (X[((__a)& 0x000000000000000FULL)])<<20; __a >>= 4; 00688 case 6: __t ^= (X[((__a)& 0x000000000000000FULL)])<<24; __a >>= 4; 00689 case 7: __t ^= (X[((__a)& 0x000000000000000FULL)])<<28; __a >>= 4; 00690 case 8: __t ^= (X[((__a)& 0x000000000000000FULL)])<<32; __a >>= 4; 00691 case 9: __t ^= (X[((__a)& 0x000000000000000FULL)])<<36; __a >>= 4; 00692 case 10: __t ^= (X[((__a)& 0x000000000000000FULL)])<<40; __a >>= 4; 00693 case 11: __t ^= (X[((__a)& 0x000000000000000FULL)])<<44; __a >>= 4; 00694 case 12: __t ^= (X[((__a)& 0x000000000000000FULL)])<<48; __a >>= 4; 00695 case 13: __t ^= (X[((__a)& 0x000000000000000FULL)])<<52; __a >>= 4; 00696 case 14: __t ^= (X[((__a)& 0x000000000000000FULL)])<<56; __a >>= 4; 00697 case 15: __t ^= (X[((__a)& 0x000000000000000FULL)])<<60; break; 00698 default: m4ri_die("impossible"); 00699 } 00700 if(A->x->width-startblock == 1) { 00701 _a[startblock] &= ~(bitmask_begin & bitmask_end); 00702 _a[startblock] ^= __t & bitmask_begin & bitmask_end; 00703 return; 00704 } else { 00705 _a[startblock] &= ~bitmask_begin; 00706 _a[startblock] ^= __t & bitmask_begin; 00707 } 00708 00709 for(j=startblock+1; j<A->x->width -1; j++) { 00710 __a = _a[j], __t = 0; 00711 __t ^= (X[((__a)& 0x000000000000000FULL)])<<0; __a >>= 4; 00712 __t ^= (X[((__a)& 0x000000000000000FULL)])<<4; __a >>= 4; 00713 __t ^= (X[((__a)& 0x000000000000000FULL)])<<8; __a >>= 4; 00714 __t ^= (X[((__a)& 0x000000000000000FULL)])<<12; __a >>= 4; 00715 __t ^= (X[((__a)& 0x000000000000000FULL)])<<16; __a >>= 4; 00716 __t ^= (X[((__a)& 0x000000000000000FULL)])<<20; __a >>= 4; 00717 __t ^= (X[((__a)& 0x000000000000000FULL)])<<24; __a >>= 4; 00718 __t ^= (X[((__a)& 0x000000000000000FULL)])<<28; __a >>= 4; 00719 __t ^= (X[((__a)& 0x000000000000000FULL)])<<32; __a >>= 4; 00720 __t ^= (X[((__a)& 0x000000000000000FULL)])<<36; __a >>= 4; 00721 __t ^= (X[((__a)& 0x000000000000000FULL)])<<40; __a >>= 4; 00722 __t ^= (X[((__a)& 0x000000000000000FULL)])<<44; __a >>= 4; 00723 __t ^= (X[((__a)& 0x000000000000000FULL)])<<48; __a >>= 4; 00724 __t ^= (X[((__a)& 0x000000000000000FULL)])<<52; __a >>= 4; 00725 __t ^= (X[((__a)& 0x000000000000000FULL)])<<56; __a >>= 4; 00726 __t ^= (X[((__a)& 0x000000000000000FULL)])<<60; 00727 _a[j] = __t; 00728 } 00729 00730 __t = _a[j] & ~bitmask_end; 00731 switch( (A->x->offset + A->x->ncols) % m4ri_radix) { 00732 case 0: __t ^= ((word)X[(int)((_a[j] & 0xF000000000000000ULL)>>60)])<<60; 00733 case 60: __t ^= ((word)X[(int)((_a[j] & 0x0F00000000000000ULL)>>56)])<<56; 00734 case 56: __t ^= ((word)X[(int)((_a[j] & 0x00F0000000000000ULL)>>52)])<<52; 00735 case 52: __t ^= ((word)X[(int)((_a[j] & 0x000F000000000000ULL)>>48)])<<48; 00736 case 48: __t ^= ((word)X[(int)((_a[j] & 0x0000F00000000000ULL)>>44)])<<44; 00737 case 44: __t ^= ((word)X[(int)((_a[j] & 0x00000F0000000000ULL)>>40)])<<40; 00738 case 40: __t ^= ((word)X[(int)((_a[j] & 0x000000F000000000ULL)>>36)])<<36; 00739 case 36: __t ^= ((word)X[(int)((_a[j] & 0x0000000F00000000ULL)>>32)])<<32; 00740 case 32: __t ^= ((word)X[(int)((_a[j] & 0x00000000F0000000ULL)>>28)])<<28; 00741 case 28: __t ^= ((word)X[(int)((_a[j] & 0x000000000F000000ULL)>>24)])<<24; 00742 case 24: __t ^= ((word)X[(int)((_a[j] & 0x0000000000F00000ULL)>>20)])<<20; 00743 case 20: __t ^= ((word)X[(int)((_a[j] & 0x00000000000F0000ULL)>>16)])<<16; 00744 case 16: __t ^= ((word)X[(int)((_a[j] & 0x000000000000F000ULL)>>12)])<<12; 00745 case 12: __t ^= ((word)X[(int)((_a[j] & 0x0000000000000F00ULL)>> 8)])<< 8; 00746 case 8: __t ^= ((word)X[(int)((_a[j] & 0x00000000000000F0ULL)>> 4)])<< 4; 00747 case 4: __t ^= ((word)X[(int)((_a[j] & 0x000000000000000FULL)>> 0)])<< 0; 00748 }; 00749 _a[j] = __t; 00750 00751 } else if (A->w == 8) { 00752 00753 register word __a0 = _a[startblock]>>(start%m4ri_radix); 00754 register word __a1; 00755 register word __t0 = 0; 00756 register word __t1; 00757 00758 switch( (start/8) %8 ) { 00759 case 0: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<0; __a0 >>= 8; 00760 case 1: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<8; __a0 >>= 8; 00761 case 2: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<16; __a0 >>= 8; 00762 case 3: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<24; __a0 >>= 8; 00763 case 4: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<32; __a0 >>= 8; 00764 case 5: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<40; __a0 >>= 8; 00765 case 6: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<48; __a0 >>= 8; 00766 case 7: __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<56; break; 00767 default: m4ri_die("impossible"); 00768 } 00769 if(A->x->width-startblock == 1) { 00770 _a[startblock] &= ~(bitmask_begin & bitmask_end); 00771 _a[startblock] ^= __t0 & bitmask_begin & bitmask_end; 00772 return; 00773 } else { 00774 _a[startblock] &= ~bitmask_begin; 00775 _a[startblock] ^= __t0 & bitmask_begin; 00776 } 00777 00778 for(j=startblock+1; j+2 < A->x->width; j+=2) { 00779 __a0 = _a[j], __t0 = 0; 00780 __a1 = _a[j+1], __t1 = 0; 00781 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<0; __a0 >>= 8; 00782 __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<0; __a1 >>= 8; 00783 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<8; __a0 >>= 8; 00784 __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<8; __a1 >>= 8; 00785 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<16; __a0 >>= 8; 00786 __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<16; __a1 >>= 8; 00787 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<24; __a0 >>= 8; 00788 __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<24; __a1 >>= 8; 00789 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<32; __a0 >>= 8; 00790 __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<32; __a1 >>= 8; 00791 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<40; __a0 >>= 8; 00792 __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<40; __a1 >>= 8; 00793 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<48; __a0 >>= 8; 00794 __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<48; __a1 >>= 8; 00795 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<56; __a0 >>= 8; 00796 __t1 ^= (X[((__a1)& 0x00000000000000FFULL)])<<56; 00797 _a[j] = __t0; 00798 _a[j+1] = __t1; 00799 } 00800 00801 for(; j < A->x->width-1; j++) { 00802 __a0 = _a[j], __t0 = 0; 00803 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<0; __a0 >>= 8; 00804 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<8; __a0 >>= 8; 00805 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<16; __a0 >>= 8; 00806 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<24; __a0 >>= 8; 00807 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<32; __a0 >>= 8; 00808 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<40; __a0 >>= 8; 00809 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<48; __a0 >>= 8; 00810 __t0 ^= (X[((__a0)& 0x00000000000000FFULL)])<<56; 00811 _a[j] = __t0; 00812 } 00813 00814 __t = _a[j] & ~bitmask_end; 00815 switch( (A->x->offset + A->x->ncols) % m4ri_radix ) { 00816 case 0: __t ^= ((word)X[(int)((_a[j] & 0xFF00000000000000ULL)>>56)])<<56; 00817 case 56: __t ^= ((word)X[(int)((_a[j] & 0x00FF000000000000ULL)>>48)])<<48; 00818 case 48: __t ^= ((word)X[(int)((_a[j] & 0x0000FF0000000000ULL)>>40)])<<40; 00819 case 40: __t ^= ((word)X[(int)((_a[j] & 0x000000FF00000000ULL)>>32)])<<32; 00820 case 32: __t ^= ((word)X[(int)((_a[j] & 0x00000000FF000000ULL)>>24)])<<24; 00821 case 24: __t ^= ((word)X[(int)((_a[j] & 0x0000000000FF0000ULL)>>16)])<<16; 00822 case 16: __t ^= ((word)X[(int)((_a[j] & 0x000000000000FF00ULL)>> 8)])<< 8; 00823 case 8: __t ^= ((word)X[(int)((_a[j] & 0x00000000000000FFULL)>> 0)])<< 0; 00824 }; 00825 _a[j] = __t; 00826 00827 } else if (A->w == 16) { 00828 switch( (start/16) %4 ) { 00829 case 0: __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16; 00830 case 1: __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16; 00831 case 2: __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16; 00832 case 3: __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48; break; 00833 default: m4ri_die("impossible"); 00834 } 00835 if(A->x->width-startblock == 1) { 00836 _a[startblock] &= ~(bitmask_begin & bitmask_end); 00837 _a[startblock] ^= __t & bitmask_begin & bitmask_end; 00838 return; 00839 } else { 00840 _a[startblock] &= ~bitmask_begin; 00841 _a[startblock] ^= __t & bitmask_begin; 00842 } 00843 00844 for(j=startblock+1; j+4<A->x->width; j+=4) { 00845 __a = _a[j], __t = 0; 00846 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16; 00847 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16; 00848 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16; 00849 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48; 00850 _a[j] = __t; 00851 00852 __a = _a[j+1], __t = 0; 00853 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16; 00854 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16; 00855 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16; 00856 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48; 00857 _a[j+1] = __t; 00858 00859 00860 __a = _a[j+2], __t = 0; 00861 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16; 00862 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16; 00863 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16; 00864 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48; 00865 _a[j+2] = __t; 00866 00867 __a = _a[j+3], __t = 0; 00868 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16; 00869 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16; 00870 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16; 00871 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48; 00872 _a[j+3] = __t; 00873 } 00874 for( ; j<A->x->width-1; j++) { 00875 __a = _a[j], __t = 0; 00876 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<0; __a >>= 16; 00877 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<16; __a >>= 16; 00878 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<32; __a >>= 16; 00879 __t ^= (X[((__a)& 0x000000000000FFFFULL)])<<48; 00880 _a[j] = __t; 00881 } 00882 00883 __t = _a[j] & ~bitmask_end; 00884 switch( (A->x->offset + A->x->ncols) % m4ri_radix) { 00885 case 0: __t ^= ((word)X[(int)((_a[j] & 0xFFFF000000000000ULL)>>48)])<<48; 00886 case 48: __t ^= ((word)X[(int)((_a[j] & 0x0000FFFF00000000ULL)>>32)])<<32; 00887 case 32: __t ^= ((word)X[(int)((_a[j] & 0x00000000FFFF0000ULL)>>16)])<<16; 00888 case 16: __t ^= ((word)X[(int)((_a[j] & 0x000000000000FFFFULL)>> 0)])<< 0; 00889 }; 00890 _a[j] = __t; 00891 00892 } else { 00893 for(rci_t j=start_col; j<A->ncols; j++) { 00894 mzed_write_elem(A, r, j, X[mzed_read_elem(A, r, j)]); 00895 } 00896 } 00897 } 00898 00909 static inline void mzed_row_swap(mzed_t *M, const rci_t rowa, const rci_t rowb) { 00910 mzd_row_swap(M->x, rowa, rowb); 00911 } 00912 00927 static inline void mzed_copy_row(mzed_t* B, rci_t i, const mzed_t* A, rci_t j) { 00928 mzd_copy_row(B->x, i, A->x, j); 00929 } 00930 00941 static inline void mzed_col_swap(mzed_t *M, const rci_t cola, const rci_t colb) { 00942 for(rci_t i=0; i<M->w; i++) 00943 mzd_col_swap(M->x,M->w*cola+i, M->w*colb+i); 00944 } 00945 00958 static inline void mzed_col_swap_in_rows(mzed_t *A, const rci_t cola, const rci_t colb, const rci_t start_row, rci_t stop_row) { 00959 for(unsigned int e=0; e < A->finite_field->degree; e++) { 00960 mzd_col_swap_in_rows(A->x, A->w*cola+e, A->w*colb+e, start_row, stop_row); 00961 }; 00962 } 00963 00977 static inline void mzed_row_add(mzed_t *M, const rci_t sourcerow, const rci_t destrow) { 00978 mzd_row_add(M->x, sourcerow, destrow); 00979 } 00980 00991 static inline rci_t mzed_first_zero_row(mzed_t *A) { 00992 return mzd_first_zero_row(A->x); 00993 } 00994 00995 01006 static inline void mzed_row_clear_offset(mzed_t *M, const rci_t row, const rci_t coloffset) { 01007 mzd_row_clear_offset(M->x, row, coloffset*M->w); 01008 } 01009 01023 rci_t mzed_echelonize_naive(mzed_t *A, int full); 01024 01033 void mzed_print(const mzed_t *M); 01034 01035 #endif //M4RIE_MATRIX_H