/* $NetBSD: rf_nwayxor.c,v 1.11 2006/11/16 01:33:23 christos Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. * * Author: Mark Holland, Daniel Stodolsky * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /************************************************************ * * nwayxor.c -- code to do N-way xors for reconstruction * * nWayXorN xors N input buffers into the destination buffer. * adapted from danner's longword_bxor code. * ************************************************************/ #include __KERNEL_RCSID(0, "$NetBSD: rf_nwayxor.c,v 1.11 2006/11/16 01:33:23 christos Exp $"); #include "rf_nwayxor.h" #include "rf_shutdown.h" static int callcount[10]; static void rf_ShutdownNWayXor(void *); static void rf_ShutdownNWayXor(void *ignored) { int i; if (rf_showXorCallCounts == 0) return; printf("Call counts for n-way xor routines: "); for (i = 0; i < 10; i++) printf("%d ", callcount[i]); printf("\n"); } int rf_ConfigureNWayXor(RF_ShutdownList_t **listp) { int i; for (i = 0; i < 10; i++) callcount[i] = 0; rf_ShutdownCreate(listp, rf_ShutdownNWayXor, NULL); return (0); } void rf_nWayXor1(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) { unsigned long *src = (unsigned long *) src_rbs[0]->buffer; unsigned long *dest = (unsigned long *) dest_rb->buffer; unsigned long *end = src + len; unsigned long d0, d1, d2, d3, s0, s1, s2, s3; callcount[1]++; while (len >= 4) { d0 = dest[0]; d1 = dest[1]; d2 = dest[2]; d3 = dest[3]; s0 = src[0]; s1 = src[1]; s2 = src[2]; s3 = src[3]; dest[0] = d0 ^ s0; dest[1] = d1 ^ s1; dest[2] = d2 ^ s2; dest[3] = d3 ^ s3; src += 4; dest += 4; len -= 4; } while (src < end) { *dest++ ^= *src++; } } void rf_nWayXor2(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) { unsigned long *dst = (unsigned long *) dest_rb->buffer; unsigned long *a = dst; unsigned long *b = (unsigned long *) src_rbs[0]->buffer; unsigned long *c = (unsigned long *) src_rbs[1]->buffer; unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[2]++; /* align dest to cache line */ while ((((unsigned long) dst) & 0x1f)) { *dst++ = *a++ ^ *b++ ^ *c++; len--; } while (len > 4) { a0 = a[0]; len -= 4; a1 = a[1]; a2 = a[2]; a3 = a[3]; a += 4; b0 = b[0]; b1 = b[1]; b2 = b[2]; b3 = b[3]; /* start dual issue */ a0 ^= b0; b0 = c[0]; b += 4; a1 ^= b1; a2 ^= b2; a3 ^= b3; b1 = c[1]; a0 ^= b0; b2 = c[2]; a1 ^= b1; b3 = c[3]; a2 ^= b2; dst[0] = a0; a3 ^= b3; dst[1] = a1; c += 4; dst[2] = a2; dst[3] = a3; dst += 4; } while (len) { *dst++ = *a++ ^ *b++ ^ *c++; len--; } } /* note that first arg is not incremented but 2nd arg is */ #define LOAD_FIRST(_dst,_b) \ a0 = _dst[0]; len -= 4; \ a1 = _dst[1]; \ a2 = _dst[2]; \ a3 = _dst[3]; \ b0 = _b[0]; \ b1 = _b[1]; \ b2 = _b[2]; \ b3 = _b[3]; _b += 4; /* note: arg is incremented */ #define XOR_AND_LOAD_NEXT(_n) \ a0 ^= b0; b0 = _n[0]; \ a1 ^= b1; b1 = _n[1]; \ a2 ^= b2; b2 = _n[2]; \ a3 ^= b3; b3 = _n[3]; \ _n += 4; /* arg is incremented */ #define XOR_AND_STORE(_dst) \ a0 ^= b0; _dst[0] = a0; \ a1 ^= b1; _dst[1] = a1; \ a2 ^= b2; _dst[2] = a2; \ a3 ^= b3; _dst[3] = a3; \ _dst += 4; void rf_nWayXor3(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) { unsigned long *dst = (unsigned long *) dest_rb->buffer; unsigned long *b = (unsigned long *) src_rbs[0]->buffer; unsigned long *c = (unsigned long *) src_rbs[1]->buffer; unsigned long *d = (unsigned long *) src_rbs[2]->buffer; unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[3]++; /* align dest to cache line */ while ((((unsigned long) dst) & 0x1f)) { *dst++ ^= *b++ ^ *c++ ^ *d++; len--; } while (len > 4) { LOAD_FIRST(dst, b); XOR_AND_LOAD_NEXT(c); XOR_AND_LOAD_NEXT(d); XOR_AND_STORE(dst); } while (len) { *dst++ ^= *b++ ^ *c++ ^ *d++; len--; } } void rf_nWayXor4(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) { unsigned long *dst = (unsigned long *) dest_rb->buffer; unsigned long *b = (unsigned long *) src_rbs[0]->buffer; unsigned long *c = (unsigned long *) src_rbs[1]->buffer; unsigned long *d = (unsigned long *) src_rbs[2]->buffer; unsigned long *e = (unsigned long *) src_rbs[3]->buffer; unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[4]++; /* align dest to cache line */ while ((((unsigned long) dst) & 0x1f)) { *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++; len--; } while (len > 4) { LOAD_FIRST(dst, b); XOR_AND_LOAD_NEXT(c); XOR_AND_LOAD_NEXT(d); XOR_AND_LOAD_NEXT(e); XOR_AND_STORE(dst); } while (len) { *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++; len--; } } void rf_nWayXor5(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) { unsigned long *dst = (unsigned long *) dest_rb->buffer; unsigned long *b = (unsigned long *) src_rbs[0]->buffer; unsigned long *c = (unsigned long *) src_rbs[1]->buffer; unsigned long *d = (unsigned long *) src_rbs[2]->buffer; unsigned long *e = (unsigned long *) src_rbs[3]->buffer; unsigned long *f = (unsigned long *) src_rbs[4]->buffer; unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[5]++; /* align dest to cache line */ while ((((unsigned long) dst) & 0x1f)) { *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++; len--; } while (len > 4) { LOAD_FIRST(dst, b); XOR_AND_LOAD_NEXT(c); XOR_AND_LOAD_NEXT(d); XOR_AND_LOAD_NEXT(e); XOR_AND_LOAD_NEXT(f); XOR_AND_STORE(dst); } while (len) { *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++; len--; } } void rf_nWayXor6(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) { unsigned long *dst = (unsigned long *) dest_rb->buffer; unsigned long *b = (unsigned long *) src_rbs[0]->buffer; unsigned long *c = (unsigned long *) src_rbs[1]->buffer; unsigned long *d = (unsigned long *) src_rbs[2]->buffer; unsigned long *e = (unsigned long *) src_rbs[3]->buffer; unsigned long *f = (unsigned long *) src_rbs[4]->buffer; unsigned long *g = (unsigned long *) src_rbs[5]->buffer; unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[6]++; /* align dest to cache line */ while ((((unsigned long) dst) & 0x1f)) { *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++; len--; } while (len > 4) { LOAD_FIRST(dst, b); XOR_AND_LOAD_NEXT(c); XOR_AND_LOAD_NEXT(d); XOR_AND_LOAD_NEXT(e); XOR_AND_LOAD_NEXT(f); XOR_AND_LOAD_NEXT(g); XOR_AND_STORE(dst); } while (len) { *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++; len--; } } void rf_nWayXor7(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) { unsigned long *dst = (unsigned long *) dest_rb->buffer; unsigned long *b = (unsigned long *) src_rbs[0]->buffer; unsigned long *c = (unsigned long *) src_rbs[1]->buffer; unsigned long *d = (unsigned long *) src_rbs[2]->buffer; unsigned long *e = (unsigned long *) src_rbs[3]->buffer; unsigned long *f = (unsigned long *) src_rbs[4]->buffer; unsigned long *g = (unsigned long *) src_rbs[5]->buffer; unsigned long *h = (unsigned long *) src_rbs[6]->buffer; unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[7]++; /* align dest to cache line */ while ((((unsigned long) dst) & 0x1f)) { *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++; len--; } while (len > 4) { LOAD_FIRST(dst, b); XOR_AND_LOAD_NEXT(c); XOR_AND_LOAD_NEXT(d); XOR_AND_LOAD_NEXT(e); XOR_AND_LOAD_NEXT(f); XOR_AND_LOAD_NEXT(g); XOR_AND_LOAD_NEXT(h); XOR_AND_STORE(dst); } while (len) { *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++; len--; } } void rf_nWayXor8(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) { unsigned long *dst = (unsigned long *) dest_rb->buffer; unsigned long *b = (unsigned long *) src_rbs[0]->buffer; unsigned long *c = (unsigned long *) src_rbs[1]->buffer; unsigned long *d = (unsigned long *) src_rbs[2]->buffer; unsigned long *e = (unsigned long *) src_rbs[3]->buffer; unsigned long *f = (unsigned long *) src_rbs[4]->buffer; unsigned long *g = (unsigned long *) src_rbs[5]->buffer; unsigned long *h = (unsigned long *) src_rbs[6]->buffer; unsigned long *i = (unsigned long *) src_rbs[7]->buffer; unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[8]++; /* align dest to cache line */ while ((((unsigned long) dst) & 0x1f)) { *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++; len--; } while (len > 4) { LOAD_FIRST(dst, b); XOR_AND_LOAD_NEXT(c); XOR_AND_LOAD_NEXT(d); XOR_AND_LOAD_NEXT(e); XOR_AND_LOAD_NEXT(f); XOR_AND_LOAD_NEXT(g); XOR_AND_LOAD_NEXT(h); XOR_AND_LOAD_NEXT(i); XOR_AND_STORE(dst); } while (len) { *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++; len--; } } void rf_nWayXor9(RF_ReconBuffer_t **src_rbs, RF_ReconBuffer_t *dest_rb, int len) { unsigned long *dst = (unsigned long *) dest_rb->buffer; unsigned long *b = (unsigned long *) src_rbs[0]->buffer; unsigned long *c = (unsigned long *) src_rbs[1]->buffer; unsigned long *d = (unsigned long *) src_rbs[2]->buffer; unsigned long *e = (unsigned long *) src_rbs[3]->buffer; unsigned long *f = (unsigned long *) src_rbs[4]->buffer; unsigned long *g = (unsigned long *) src_rbs[5]->buffer; unsigned long *h = (unsigned long *) src_rbs[6]->buffer; unsigned long *i = (unsigned long *) src_rbs[7]->buffer; unsigned long *j = (unsigned long *) src_rbs[8]->buffer; unsigned long a0, a1, a2, a3, b0, b1, b2, b3; callcount[9]++; /* align dest to cache line */ while ((((unsigned long) dst) & 0x1f)) { *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; len--; } while (len > 4) { LOAD_FIRST(dst, b); XOR_AND_LOAD_NEXT(c); XOR_AND_LOAD_NEXT(d); XOR_AND_LOAD_NEXT(e); XOR_AND_LOAD_NEXT(f); XOR_AND_LOAD_NEXT(g); XOR_AND_LOAD_NEXT(h); XOR_AND_LOAD_NEXT(i); XOR_AND_LOAD_NEXT(j); XOR_AND_STORE(dst); } while (len) { *dst++ ^= *b++ ^ *c++ ^ *d++ ^ *e++ ^ *f++ ^ *g++ ^ *h++ ^ *i++ ^ *j++; len--; } }