#ifndef lint
static char sccsid[] = "@(#)mem_rop.c 4.14 87/04/30 SMI";
#endif

/*
 * Copyright 1986 by Sun Microsystems,  Inc.
 */

/*
 * Memory pixrect rasterop
 *
 * 32-bit version 
 *
 * David DiGiacomo, Sun Microsystems
 */

/*
 * Compile-time options:
 *
 * define NEVER_SLOW to generate fast code only
 * define NEVER_FAST to generate slow code only
 *
 * define NO_OPS to turn off all ops except PIX_SRC for debugging
 *
 * define SHORT_ALIGN to 0 to force 32 bit alignment of 32 bit accesses,
 *		      to 1 to allow 32 bit accesses on 16 bit boundaries
 */

#include <sys/types.h>
#include <stdio.h>
#include <pixrect/pixrect.h>
#include <pixrect/memvar.h>
#include <pixrect/pr_util.h>
#include <pixrect/mem_rop_impl_ops.h>
#include <pixrect/mem_rop_impl_util.h>
#ifndef PIX_OPSRC
#include <pixrect/pr_impl_util.h>
#endif
#include "shape.h"

extern int magic_hack_bgcolor;
/* 
 * Include stupid ROP chip code in Sun-2 kernel.
 * This is probably unnecessary, but who knows?
 */
#if !defined(ROP_CHIP) && defined(KERNEL) && defined(mc68010)
#define	ROP_CHIP	1
#endif

#if ROP_CHIP
/* stupid rop chip initialization flag */
int mem_needinit;
#endif

/* misaligned rop split flag */
#ifndef SHORT_ALIGN
#define	SHORT_ALIGN	mc68000
#endif

/* shift count 32 valid test macro */
#define	IFSHIFT32	IF68000


/* basic rop data type */
typedef	unsigned long	ROP_T;

#ifndef sparc
/* memory pixrect data type */
typedef short		MPR_T;

/* unsigned equivalent of memory pixrect data type */
typedef	u_short		UMPR_T;
#endif

/* 1 -> 8 op src data type */
typedef	u_short		XOPS_T;

#ifndef sparc
/* pseudo-integer type (kludge to use 680x0 address registers) */
#if mc68000
typedef	caddr_t		INT_T;
#else
typedef	int		INT_T;
#endif
#endif

/* reverse video op tables */
extern char pr_reversesrc[];
extern char pr_reversedst[];

/**************************************************************/

/* direction macros */
#define	L_TO_R	IFTRUE
#define	R_TO_L	IFFALSE

/* do-while loop termination macro */
#define	ENDLOOP(isreg, count) IF68000(isreg(IFTRUE,IFFALSE),IFFALSE)( \
	while (--count != -1), \
	while (--count >= 0))

/* 
 * generic function dispatch macro
 *
 * (code speed op)
 * 0 - clear   1 S ~(d|s)     2 S d & ~s     3 F ~s
 * 4 S ~d & s  5 - ~d         6 F d ^ s      7 S ~(d & s)
 * 8 F d & s   9 F ~(d ^ s)   A - d          B S d|~s
 * C F s       D F ~d|s       E F d|s        F - set
 *
 * fast/slow assignments are pretty arbitrary
 */
#ifndef	NO_OPS

#define	CASE_OP(op,macro,dir,noskew,noprime,nomask,noedges) \
	do switch (op) { \
	case 0x0: break; \
	case 0x1: macro(1,SLOW,dir,noskew,noprime,nomask,noedges); break; \
	case 0x2: macro(2,SLOW,dir,noskew,noprime,nomask,noedges); break; \
	case 0x3: macro(3,FAST,dir,noskew,noprime,nomask,noedges); break; \
	case 0x4: macro(4,SLOW,dir,noskew,noprime,nomask,noedges); break; \
	case 0x5: break; \
	case 0x6: macro(6,FAST,dir,noskew,noprime,nomask,noedges); break; \
	case 0x7: macro(7,SLOW,dir,noskew,noprime,nomask,noedges); break; \
	case 0x8: macro(8,FAST,dir,noskew,noprime,nomask,noedges); break; \
	case 0x9: macro(9,FAST,dir,noskew,noprime,nomask,noedges); break; \
	case 0xA: break; \
	case 0xB: macro(B,SLOW,dir,noskew,noprime,nomask,noedges); break; \
	case 0xC: macro(C,FAST,dir,noskew,noprime,nomask,noedges); break; \
	case 0xD: macro(D,FAST,dir,noskew,noprime,nomask,noedges); break; \
	case 0xE: macro(E,FAST,dir,noskew,noprime,nomask,noedges); break; \
	case 0xF: break; } while (_ZERO_) \

#else NO_OPS

#define	CASE_OP(op,macro,dir,noskew,noprime,nomask,noedges) \
	do { macro(C,FAST,dir,noskew,noprime,nomask,noedges); } while (_ZERO_)

#endif

/* narrow fills */
#define	FILLN(op,speed,dir,noskew,noprime,nomask,noedges) do { \
	CAT(nomask,T)(CAT(speed,T), m == ~0, \
		_FILLN(op,IFTRUE), \
		_FILLN(op,IFFALSE)); \
	} while (_ZERO_)

#define _FILLN(op,nomask) do { \
	nomask( \
		k = CAT(OP_ufgen,op)(k); \
	, \
		k = CAT(OP_mfgen,op)(m,k); \
		m = CAT(OP_mfmsk,op)(m); \
	) \
	do { \
		nomask( \
			*d = CAT(OP_ufill,op)(*d, (UMPR_T) k); \
		, \
			*d = CAT(OP_mfill,op)(*d, (UMPR_T) m, (UMPR_T) k); \
		) \
		d++; \
	} ENDLOOP(IFTRUE, h); } while (_ZERO_)

/* wide fills */
#define	FILLW(op,speed,dir,noskew,noprime,nomask,noedges) do { \
	CAT(noedges,T)(CAT(speed,T), edges == 0, \
		CAT(nomask,T)(CAT(speed,T), m == ~0, \
			_FILLW(op,IFTRUE,IFTRUE), \
			_FILLW(op,IFFALSE,IFTRUE)), \
		CAT(nomask,T)(CAT(speed,T), m == ~0, \
			_FILLW(op,IFTRUE,IFFALSE), \
			_FILLW(op,IFFALSE,IFFALSE))); \
	} while (_ZERO_)

#define	_FILLW(op,nomask,noedges) do { \
	noedges(, \
		if (lm != 0) \
			lk = CAT(OP_mfgen,op)(lm, k); \
		lm = CAT(OP_mfmsk,op)(lm); \
		if (rm != 0) \
			rk = CAT(OP_mfgen,op)(rm, k); \
		rm = CAT(OP_mfmsk,op)(rm); \
	) \
	nomask( \
		k = CAT(OP_ufgen,op)(k); \
	, \
		k = CAT(OP_mfgen,op)(m, k); \
		m = CAT(OP_mfmsk,op)(m); \
	) \
	do { \
		noedges(, \
			if (lm != CAT(OP_mfmsk,op)(0)) { \
				*d = CAT(OP_mfill,op)(*d, lm, lk); \
				d++; \
			} \
		) \
		noedges( \
			x = (short) w; do \
		, \
			if ((x = (short) w) >= 0) do \
		) { \
			nomask( \
				*d = CAT(OP_ufill,op)(*d, k); \
			, \
				*d = CAT(OP_mfill,op)(*d, m, k); \
			) \
			d++; \
		} ENDLOOP(IFTRUE, x); \
		noedges(, \
			if (rm != CAT(OP_mfmsk,op)(0)) \
				*d = CAT(OP_mfill,op)(*d, rm, rk); \
		) \
		d = (ROP_T *) ((caddr_t) d + (int) rdoffset); \
	} ENDLOOP(IFTRUE, h); } while (_ZERO_)

/* narrow 1 -> n rops */
#define	XOPN(op,speed,dir,noskew,noprime,nomask,noedges) do { \
	CAT(noprime,T)(CAT(speed,T), rsprime == 0, \
		CAT(nomask,T)(CAT(speed,T), m == (UMPR_T) ~0, \
			_XOPN(op,IFTRUE,IFTRUE), \
			_XOPN(op,IFFALSE,IFTRUE)), \
		CAT(nomask,T)(CAT(speed,T), m == (UMPR_T) ~0, \
			_XOPN(op,IFTRUE,IFFALSE), \
			_XOPN(op,IFFALSE,IFFALSE))); \
	} while (_ZERO_)

#define	_XOPN(op,noprime,nomask) do { \
	nomask(, m = CAT(OP_mmsk,op)(m);) \
	do { \
		noprime( \
			scurr = *s++; \
		, \
			scurr = 0; \
			if (rsprime != 0) \
				scurr = *s++ << 16; \
			if (rsprime >= 0) \
				scurr |= *s++; \
		) \
		scurr >>= sshift; \
		scurr &= 3; \
		nomask( \
			*d = CAT(OP_uxrop,op)(*d, \
				(UMPR_T) k, smask[(XOPS_T) scurr]); \
		, \
			*d = CAT(OP_mxrop,op)(*d, (UMPR_T) m, \
				(UMPR_T) k, smask[(XOPS_T) scurr]); \
		) \
		d++; \
		s = (XOPS_T *) ((caddr_t) s + (int) rsoffset); \
	} ENDLOOP(IFTRUE, h); } while (_ZERO_)

/* wide 1 -> n rops */
#define	XOPW(op,speed,dir,noskew,noprime,nomask,noedges) do { \
	CAT(noedges,T)(CAT(speed,T), edges == 0, \
		CAT(noprime,T)(CAT(speed,T), sprime == 0 && sflush == 0, \
			CAT(nomask,T)(CAT(speed,T), m == ~0, \
				_XOPW(op,IFFALSE,IFTRUE,IFTRUE,IFTRUE), \
				_XOPW(op,IFFALSE,IFTRUE,IFFALSE,IFTRUE)), \
			CAT(nomask,T)(CAT(speed,T), m == ~0, \
				_XOPW(op,IFFALSE,IFFALSE,IFTRUE,IFTRUE), \
				_XOPW(op,IFFALSE,IFFALSE,IFFALSE,IFTRUE))), \
		CAT(noprime,T)(CAT(speed,T), sprime == 0 && sflush == 0, \
			CAT(nomask,T)(CAT(speed,T), m == ~0, \
				_XOPW(op,IFFALSE,IFTRUE,IFTRUE,IFFALSE), \
				_XOPW(op,IFFALSE,IFTRUE,IFFALSE,IFFALSE)), \
			CAT(nomask,T)(CAT(speed,T), m == ~0, \
				_XOPW(op,IFFALSE,IFFALSE,IFTRUE,IFFALSE), \
				_XOPW(op,IFFALSE,IFFALSE,IFFALSE,IFFALSE)))); \
	} while (_ZERO_)

#define	_XOPW(op,noskew,noprime,nomask,noedges) do { \
	noedges(, \
		lm = CAT(OP_mmsk,op)(lm); \
		rm = CAT(OP_mmsk,op)(rm); \
	) \
	nomask(, m = CAT(OP_mmsk,op)(m);) \
	do { \
		sshift = skew; \
		scurr = 0; \
		noprime(, \
			if (sprime) \
				scurr |= *s++; \
		) \
		noedges(, \
			if (lm != CAT(OP_mmsk,op)(0)) { \
				noskew(, \
					scurr <<= 16; \
				) \
				scurr |= *s++; \
				*d = CAT(OP_mxrop,op)(*d, lm, \
					k, smask[scurr >> sshift & 15]); \
				d++; \
			} \
		) \
		noedges( \
			x = w; do \
		, \
			if ((x = w) >= 0) do \
		) { \
			if ((sshift -= sbits) < 0) { \
				sshift += 16; \
				noskew( \
					scurr = *s++; \
				, \
					scurr <<= 16; \
					scurr |= *s++; \
				) \
			} \
			nomask( \
				*d = CAT(OP_uxrop,op)(*d, \
					k, smask[scurr >> sshift & 15]); \
			, \
				*d = CAT(OP_mxrop,op)(*d, m, \
					k, smask[scurr >> sshift & 15]); \
			) \
			d++; \
		} ENDLOOP(IFTRUE, x); \
		noedges(, \
			if (rm != CAT(OP_mmsk,op)(0)) { \
				if ((sshift -= sbits) < 0) { \
					sshift += 16; \
					noskew( \
						scurr = *s++; \
					, \
						scurr <<= 16; \
					) \
					noprime(, \
						if (sflush) \
							scurr |= *s++; \
					) \
				} \
				scurr >>= sshift; \
				scurr &= 15; \
				*d   = CAT(OP_mxrop,op)(*d, rm, \
					k, smask[scurr]); \
			} \
		) \
		d = (ROP_T *) ((caddr_t) d + doffset); \
		s = (XOPS_T *) ((caddr_t) s + soffset); \
	} ENDLOOP(IFFALSE, h); } while (_ZERO_)

/* narrow rops */
#define	ROPN(op,speed,dir,noskew,noprime,nomask,noedges) do { \
	CAT(noskew,T)(CAT(speed,T), rskew == 0, \
		_ROPN(op,IFTRUE,IFTRUE), \
		CAT(noprime,T)(CAT(speed,T), sprime == 0 && sflush == 0, \
			_ROPN(op,IFFALSE,IFTRUE), \
			_ROPN(op,IFFALSE,IFFALSE))); \
	} while (_ZERO_)

#define	_ROPN(op,noskew,noprime) do { \
	lm = CAT(OP_mmsk,op)(lm); \
	m = CAT(OP_mmsk,op)(m); \
	do { \
		noskew(, \
			sprev = 0; \
			noprime(, \
				if (sprime) \
					sprev |= *s++; \
			) \
		) \
		if (lm != (UMPR_T) CAT(OP_mmsk,op)(0)) { \
			noskew( \
				*d = CAT(OP_mrop,op)(*d, lm, *s++); \
				d++; \
			, \
				sprev <<= lskew; \
				IFSHIFT32(, \
					if (lskew >= 32) \
						sprev = 0; \
				) \
				scurr = *s++; \
				*d = CAT(OP_mrop,op)(*d, lm, \
					sprev | scurr >> rskew); \
				d++; \
				sprev = scurr; \
			) \
		} \
		if (m != (UMPR_T) CAT(OP_mmsk,op)(0)) { \
			noskew( \
				*d = CAT(OP_mrop,op)(*d, m, *s); \
			, \
				sprev <<= lskew; \
				IFSHIFT32(, \
					if (lskew >= 32) \
						sprev = 0; \
				) \
				noprime(, \
					if (sflush) \
						sprev |= \
							*s >> rskew; \
				) \
				*d   = CAT(OP_mrop,op)(*d, m, \
					sprev); \
			) \
		} \
		d = (UMPR_T *) ((caddr_t) d + (int) rdoffset); \
		s = (UMPR_T *) ((caddr_t) s + (int) rsoffset); \
	} ENDLOOP(IFTRUE, h); } while (_ZERO_)

/* wide rops */
#define	ROPW(op,speed,dir,noskew,noprime,nomask,noedges) do { \
	CAT(noskew,T)(CAT(speed,T), rskew == 0, \
		CAT(noedges,T)(CAT(speed,T), edges == 0, \
			/* noskew && noedges implies noprime */ \
			CAT(nomask,T)(CAT(speed,T), m == ~0, \
				_ROPW(op,dir(IFTRUE,IFFALSE), \
					IFTRUE,IFTRUE,IFTRUE,IFTRUE), \
				_ROPW(op,dir(IFTRUE,IFFALSE), \
					IFTRUE,IFTRUE,IFFALSE,IFTRUE)), \
			/* noskew implies sprime == 0 */ \
			CAT(noprime,T)(CAT(speed,T), sflush == 0, \
				CAT(nomask,T)(CAT(speed,T), m == ~0, \
					_ROPW(op,dir(IFTRUE,IFFALSE), \
					IFTRUE,IFTRUE,IFTRUE,IFFALSE), \
					_ROPW(op,dir(IFTRUE,IFFALSE), \
					IFTRUE,IFTRUE,IFFALSE,IFFALSE)), \
				CAT(nomask,T)(CAT(speed,T), m == ~0, \
					_ROPW(op,dir(IFTRUE,IFFALSE), \
					IFTRUE,IFFALSE,IFTRUE,IFFALSE), \
					_ROPW(op,dir(IFTRUE,IFFALSE), \
					IFTRUE,IFFALSE,IFFALSE,IFFALSE)))), \
		CAT(noedges,T)(CAT(speed,T), edges == 0, \
			/* noedges implies sflush == 0 */ \
			CAT(noprime,T)(CAT(speed,T), sprime == 0, \
				CAT(nomask,T)(CAT(speed,T), m == ~0, \
					_ROPW(op,dir(IFTRUE,IFFALSE), \
					IFFALSE,IFTRUE,IFTRUE,IFTRUE), \
					_ROPW(op,dir(IFTRUE,IFFALSE), \
					IFFALSE,IFTRUE,IFFALSE,IFTRUE)), \
				CAT(nomask,T)(CAT(speed,T), m == ~0, \
					_ROPW(op,dir(IFTRUE,IFFALSE), \
					IFFALSE,IFFALSE,IFTRUE,IFTRUE), \
					_ROPW(op,dir(IFTRUE,IFFALSE), \
					IFFALSE,IFFALSE,IFFALSE,IFTRUE))), \
			CAT(nomask,T)(CAT(speed,T), m == ~0, \
				_ROPW(op,dir(IFTRUE,IFFALSE), \
					IFFALSE,IFFALSE,IFTRUE,IFFALSE), \
				_ROPW(op,dir(IFTRUE,IFFALSE), \
					IFFALSE,IFFALSE,IFFALSE,IFFALSE)))); \
	} while (_ZERO_)

#define	_ROPW(op,dir,noskew,noprime,nomask,noedges) do { \
	noedges(, \
		lm = CAT(OP_mmsk,op)(lm); \
		rm = CAT(OP_mmsk,op)(rm); \
	) \
	nomask(, \
		m = CAT(OP_mmsk,op)(m); \
	) \
	do { \
	dir( \
		noskew(, \
			noprime(, \
				if (sprime) \
					sprev = *s++; \
			) \
		) \
		noedges(, \
			if (lm != CAT(OP_mmsk,op)(0)) { \
				noskew( \
					*d = CAT(OP_mrop,op)(*d, lm, *s++); \
					d++; \
				, \
					sprev <<= lskew; \
					IFSHIFT32(, \
						if (lskew >= 32) \
							sprev = 0; \
					) \
					scurr = *s++; \
					*d = CAT(OP_mrop,op)(*d, lm, \
						sprev | scurr >> rskew); \
					d++; \
					sprev = scurr; \
				) \
			} \
		) \
	, \
		noedges( \
			noskew(, \
				if (rskew != 0) \
					scurr = *--s; \
			) \
		, \
			if (rm != CAT(OP_mmsk,op)(0)) { \
				noskew( \
					*d = CAT(OP_mrop,op)(*d, rm, *s); \
				, \
					if (sflush) { \
						scurr = *s; \
						if (rskew != 0) { \
							scurr >>= rskew; \
							sprev = *--s; \
						} \
					} \
					else { \
						scurr = 0; \
						sprev = *--s; \
					} \
					IFSHIFT32(, \
						if (lskew >= 32) \
							sprev = 0; \
					) \
					*d = CAT(OP_mrop,op)(*d, rm, \
						sprev << lskew | scurr); \
					scurr = sprev; \
				) \
			} \
		noskew(, \
			else { \
				if (rskew != 0) \
					scurr = *--s; \
			} \
			if (rskew == 0) { \
				rskew = lskew; \
				lskew = 0; \
			} \
		) \
		) \
	) \
	noedges( \
		x = w; do \
	, \
		if ((x = w) >= 0) do \
	) { \
		noskew( \
			dir(, --s;) \
			dir(, --d;) \
			nomask( \
				*d = CAT(OP_urop,op)(*d, *s); \
			, \
				*d = CAT(OP_mrop,op)(*d, m, *s); \
			) \
			dir(s++; ,) \
			dir(d++; ,) \
		, \
			dir( \
				sprev <<= lskew; \
				IFSHIFT32(, \
					if (lskew >= 32) \
						sprev = 0; \
				) \
				scurr = *s++; \
				nomask( \
					*d = CAT(OP_urop,op)(*d, \
						sprev | scurr >> rskew); \
				, \
					*d = CAT(OP_mrop,op)(*d, m, \
						sprev | scurr >> rskew); \
				) \
				d++; \
				sprev = scurr; \
			, \
				scurr >>= rskew; \
				IFSHIFT32(, \
					if (rskew >= 32) \
						scurr = 0; \
				) \
				sprev = *--s; \
				--d; \
				nomask( \
					*d = CAT(OP_urop,op)(*d, \
						sprev << lskew | scurr); \
				, \
					*d = CAT(OP_mrop,op)(*d, m, \
						sprev << lskew | scurr); \
				) \
				scurr = sprev; \
			) \
		) \
	} ENDLOOP(IFTRUE, x); \
	dir( \
		noedges(, \
			if (rm != CAT(OP_mmsk,op)(0)) { \
				noskew( \
					*d = CAT(OP_mrop,op)(*d, rm, *s); \
				, \
					sprev <<= lskew; \
					IFSHIFT32(, \
						if (lskew >= 32) \
							sprev = 0; \
					) \
					noprime(, \
						if (sflush) \
							scurr = *s; \
					) \
					*d = CAT(OP_mrop,op)(*d, rm, \
						sprev | scurr >> rskew); \
				) \
			} \
		) \
	, \
		noedges(, \
			noskew(, \
				if (lskew == 0) { \
					lskew = rskew; \
					rskew = 0; \
				} \
			) \
			if (lm != CAT(OP_mmsk,op)(0)) { \
				noskew( \
					--d; \
					*d = CAT(OP_mrop,op)(*d, lm, *--s); \
				, \
					if (rskew == 0) \
						scurr = *--s; \
					scurr >>= rskew; \
					noprime(, \
						if (sprime) \
							sprev = *--s; \
					) \
					--d; \
					IFSHIFT32(, \
						if (lskew >= 32) \
							sprev = 0; \
					) \
					*d = CAT(OP_mrop,op)(*d, lm, \
						sprev << lskew | scurr); \
				) \
			} \
		) \
	) \
		d = (ROP_T *) ((caddr_t) d + (int) rdoffset); \
		s = (ROP_T *) ((caddr_t) s + (int) rsoffset); \
	} ENDLOOP(IFFALSE, h); } while (_ZERO_)

/**************************************************************/
static int magicolor;

mem_rop(dpr, dx, dy, dw, dh, op, spr, sx, sy)
Pixrect	*dpr;
int dx, dy, dw, dh;
int op;
Pixrect	*spr;
int sx, sy;
{
	int	color;		/* color argument */
	int	planes;		/* plane mask; ~0 means unmasked */

	MPR_T	*dimage;	/* dst image */
	ROP_T	*dfirst;	/* dst first word */
	int	dlinebytes;	/* dst linebytes */
	int	dxbits;		/* dst x, bits */
	int	dwbits;		/* dst width, bits */
	int	ddepth;		/* dst depth */
				/* -1, 0, 1, 2 -> 1, 8, 16, 32 bits */

	MPR_T	*simage;	/* src image */
	ROP_T	*sfirst;	/* src first word */
	int	slinebytes;	/* src linebytes */
	int	sxbits;		/* src x, bits */

	int	notrop;		/* not an n -> n rop */
	int	narrow;		/* dst or src is < 32 bits wide */
	int	rtolrop;	/* do rop from right to left */

	int	words;		/* complete dst words to be written */
	int	edges;		/* partial words on dst left, right */
	int	sprime;		/* src must be primed on each line */
	int	sflush;		/* src must be flushed on each line */

	int	skew;		/* src skew relative to dst */
	int	doffset;	/* dst line to line offset */
	int	soffset;	/* src line to line offset */

	ROP_T	lmask;		/* left edge dst mask */
	ROP_T	rmask;		/* right edge dst mask */
	int	origdx = dx;	/* original dx, dy before subsequent modifications */
	int	origdy = dy;
	int	origdw = dw;	/* original dw, dh before subsequent modifications */
	int	origdh = dh;
	int	origsx = sx;	/* original sx, sy before subsequent modifications */
	int	origsy = sy;

#if ROP_CHIP
	/* initialize the stupid ROP chip */
	if (mem_needinit != 0)
		mem_init();
#endif

	{
		register Pixrect *rdpr;
		register Pixrect *rspr;
		register struct mprp_data *dprd;
		register struct mpr_data *sprd;
		register int rop;
		register int rcolor;
		register int rddepth;
		register int rsdepth;

		/* 
		 * Make sure dst is a memory pixrect.
		 * (Another historical thing, probably unnecessary.)
		 */
		rdpr = dpr;
		if (MP_NOTMPR(rdpr))
    			return PIX_ERR;

		/*
		 * We support 1, 8, 16 and 32 bit dsts.
		 * Src must have the same depth as dst, or depth 1.
		 */
		rddepth = rdpr->pr_depth;

		rspr = spr;
		if (rspr != 0) {
			rsdepth = rspr->pr_depth;
			if (rddepth != rsdepth && rsdepth != 1)
				return PIX_ERR;
		}

		/* convert dst depth to pseudo-log code */
		switch (rddepth) {
		case 1:  rddepth = -1; break;
		case 8:  rddepth =  0; break;
		case 16: rddepth =  1; break;
		case 32: rddepth =  2; break;
		default: return PIX_ERR;
		}

		/* convert src depth */
		if (rsdepth == 1)
			rsdepth = -1;
		else
			rsdepth = rddepth;

		/* handle clipping */
		rop = op;
		if ((rop & PIX_DONTCLIP) == 0) {
			struct pr_subregion dst;
			struct pr_prpos src;
			extern int pr_clip();

			dst.pr = rdpr;
			dst.pos.x = dx;
			dst.pos.y = dy;
			dst.size.x = dw;
			dst.size.y = dh;

			src.pr = rspr;
			if (rspr != 0) {
				src.pos.x = sx;
				src.pos.y = sy;
			}

			(void) pr_clip(&dst, &src);

			dx = dst.pos.x;
			dy = dst.pos.y;
			dw = dst.size.x;
			dh = dst.size.y;

			if (rspr != 0) {
				sx = src.pos.x;
				sy = src.pos.y;
			}
		}

		/* check for zero-size op */
		if (dw <= 0 || dh <= 0)
			return 0;

		assert(dx>=0 && dy>=0);
		assert(dx+dw <= rdpr->pr_size.x);
		assert(dy+dh <= rdpr->pr_size.y);

		/* extract color and op code */
		rcolor = PIX_OPCOLOR(rop);
		rop = (rop >> 1) & 15;

		/* if op is PIX_DST, do nothing (quickly) */
		if (rop == PIX_OPDST)
			return 0;

		/* correct op for reverse video dst */
		dprd = mprp_d(rdpr);
		if (rddepth < 0 &&
			dprd->mpr.md_flags & MP_REVERSEVIDEO)
			rop = pr_reversedst[rop];

		/*
		 * Reduce ops which do not require source
		 * (PIX_CLR = 0, PIX_NOT(PIX_DST) = 5, PIX_SET = 15)
		 */
		switch (rop) {
		case PIX_OPCLR:
			rcolor = 0;
			rop = PIX_OPDST & PIX_OPSRC;
			rspr = 0;
			break;
		case PIX_OPNOT(PIX_OPDST):
			rcolor = ~0;
			rop = PIX_OPDST ^ PIX_OPSRC;
			rspr = 0;
			break;
		case PIX_OPSET:
			rcolor = ~0;
			rop = PIX_OPDST | PIX_OPSRC;
			rspr = 0;
			break;
		}

#ifdef undef	/* This is a curio that does nothing but shoot us in the foot -- jag */
#ifndef	KERNEL
		/*
		 * Identify weird type of fill: src is 1 x 1
		 * (Yet another historical curio.)
		 */
		if (rspr != 0 &&
			rspr->pr_size.x == 1 && rspr->pr_size.y == 1) {

			if (rsdepth < 0) {
				if (rcolor == 0)
					rcolor = ~rcolor;
				if (pr_get(rspr, 0, 0) == 0)
					rcolor = 0;
			}
			else
				rcolor = pr_get(rspr, 0, 0);

			rspr = 0;
		}
#endif	!KERNEL
#endif

		/* if it's a fill it can't be a 1 -> 8 */
		if (rspr == 0) 
			rsdepth = 0;
		/* not a fill, do some src things */
		else {
			register int rslinebytes;

			/* if spr is not an mpr, let it do the work */
			if (MP_NOTMPR(rspr))
				return (*(rspr)->pr_ops->pro_rop)(rdpr, 
					dx, dy, dw, dh, 
					op | PIX_DONTCLIP, 
					rspr, sx, sy);

			sprd = mpr_d(rspr);

			/* correct for src offset */
			sx += sprd->md_offset.x;
			sy += sprd->md_offset.y;

			if (rsdepth < 0) {
				/* save sx as bit number */
				sxbits = sx;

				/* correct op for reverse video src */
				if (sprd->md_flags & MP_REVERSEVIDEO)
					rop = pr_reversesrc[rop];

				/* clear rsdepth if 1 bit -> 1 bit op */
				if (rddepth < 0) {
					rsdepth = 0;
				}
			}
			else {
				/* convert sx to bit number */
				sxbits = sx << rsdepth + 3;

				/* clear rsdepth, not 1 bit -> n bit op */
				rsdepth = 0;
			}

			/* point to src pixrect image */
			simage = sprd->md_image;

			rslinebytes = sprd->md_linebytes;
			if (rsdepth == 0 && rslinebytes > 2) {
				/* compute first 32-bit word of src */
				sfirst = (ROP_T *) ((caddr_t) simage +
					pr_product(rslinebytes, sy));

				/* correct for misaligned pixrect data */
				if (((int) sfirst & 2) != 0) {
					sfirst = (ROP_T *) 
						((caddr_t) sfirst - 2);
					sxbits += 16;
				}

				sfirst = (ROP_T *) ((caddr_t) sfirst +
					(sxbits >> 5 << 2));

				sxbits &= 31;
			}
			else {
				/* compute first 16-bit word of src */
				sfirst = (ROP_T *) ((caddr_t) simage +
					pr_product(rslinebytes, sy) +
					(sxbits >> 4 << 1));

				sxbits &= 15;
			}

			/* save register variable */
			slinebytes = rslinebytes;
		}

		{
			register int rdlinebytes;

			/* correct for dst offset */
			dx += dprd->mpr.md_offset.x;
			dy += dprd->mpr.md_offset.y;

			/* convert dx, dw to bit numbers */
			if (rddepth < 0) {
				dxbits = dx;
				dwbits = dw;
			}
			else {
				dxbits = dx << rddepth + 3;
				dwbits = dw << rddepth + 3;
			}

			/* point to dst pixrect image */
			dimage = dprd->mpr.md_image;

			if ((rdlinebytes = dprd->mpr.md_linebytes) > 2) {
				/* compute first 32-bit word of dst */
				dfirst = (ROP_T *) ((caddr_t) dimage +
					pr_product(rdlinebytes, dy));

				/* correct for misaligned pixrect data */
				if (((int) dfirst & 2) != 0) {
					dfirst = (ROP_T *) 
						((caddr_t) dfirst - 2);
					dxbits += 16;

					/* update dx for 1->n rops */
					if (rsdepth != 0) {
						switch (rddepth) {
						case -1: dx += 16; break;
						case  0: dx +=  2; break;
						case  1: dx +=  1; break;
						/* can't re-align 1->32 */
						case  2:
#if SHORT_ALIGN
							dfirst = (ROP_T *) 
								((caddr_t) 
								dfirst + 2);
							dxbits -= 16;
							break;
#else SHORT_ALIGN
							return PIX_ERR;
#endif SHORT_ALIGN
						}

#if !SHORT_ALIGN
						/* 
						 * mung dimage so split rops
						 * will come out right even
						 * though dx was adjusted
						 */
						dimage--;
#endif !SHORT_ALIGN
					}
				}

				dfirst = (ROP_T *) ((caddr_t) dfirst +
					(dxbits >> 5 << 2));

				dxbits &= 31;
			}
			else {
				/* compute first 16-bit word of dst */
				dfirst = (ROP_T *) ((caddr_t) dimage +
					pr_product(rdlinebytes, dy) +
					(dxbits >> 4 << 1));

				dxbits &= 15;
			}

			/* save register variable */
			dlinebytes = rdlinebytes;
		}

#if !SHORT_ALIGN
		/* split rop if we have to */
		if (dh > 1 && dlinebytes > 2 &&
			(rspr == 0 || rsdepth != 0 ?
				dlinebytes & 2 :
				slinebytes > 2 &&
				(dlinebytes & 2 || slinebytes & 2))) {

			Pixrect	dpr1, spr1;
			struct mprp_data dprd1;
			struct mpr_data sprd1;

			/*
			 * pathological overlapped scrolls
			 * are done line-by-line
			 */
			if (rspr != 0 && simage == dimage &&
				dx + dw > sx && dx < sx + dw &&
				dy + dh > sy && dy < sy + dh &&
				(dy & 1) != (sy & 1)) {
				register int yinc = 1;

				dy = origdy;
				sy = origsy;
				dx = origdx;
				sx = origsx;
				op |= PIX_DONTCLIP;

				/* southward rops are bottom-to-top */
				if (dy > sy) {
					dy += dh - 1;
					sy += dh - 1;
					yinc = -yinc;
				}

				while (--dh >= 0) {
					(void) mem_rop(rdpr, dx, dy, dw, 1, 
						op, rspr, sx, sy);

					dy += yinc;
					sy += yinc;
				}
				return 0;
			}

			dpr1 = *rdpr;
			dpr1.pr_data = (caddr_t) &dprd1;
			dpr1.pr_size.x += (dx - origdx);
			dpr1.pr_size.y += (dy - origdy);

			if (rspr != 0) {
				spr1 = *rspr;
				spr1.pr_data = (caddr_t) &sprd1;
			}

			dprd1.mpr.md_image = (MPR_T *) 
				((caddr_t) dimage + dlinebytes);
			dprd1.mpr.md_linebytes = (dlinebytes <<= 1);
			dprd1.mpr.md_offset.x = 0;
			dprd1.mpr.md_offset.y = 0;
			dprd1.mpr.md_primary = 0;
			if ((dprd1.mpr.md_flags = dprd->mpr.md_flags) &
				MP_PLANEMASK)
				dprd1.planes = dprd->planes;

			if (dy & 1) 
				dprd1.mpr.md_image = (MPR_T *) 
					((caddr_t) dimage + dlinebytes);

			if (rspr != 0) {
				sprd1.md_image = (MPR_T *) 
					((caddr_t) simage + slinebytes);
				sprd1.md_linebytes = (slinebytes <<= 1);
				sprd1.md_offset.x = 0;
				sprd1.md_offset.y = 0;
				sprd1.md_primary = 0;
				sprd1.md_flags = sprd->md_flags;

				if (sy & 1) 
					sprd1.md_image = (MPR_T *) 
						((caddr_t) simage + 
						slinebytes);
			}

			/* do odd lines */
			(void) mem_rop(&dpr1, dx, dy >> 1, dw, dh >> 1,
				op | PIX_DONTCLIP , 
				rspr ? &spr1 : 0, sx, sy >> 1);

			/* do even lines */
			dh = (dh + 1) >> 1;
		}
#endif !SHORT_ALIGN

		/* save register variable */
		op = rop;

		/*
		 * If op is fill or 1 bit -> n bits, replicate color
		 * bits for dst depth.
		 */
		if (rspr == 0 || rsdepth != 0) {
			/* if 1 -> n w/color 0, use ~0 */
			magicolor = magic_hack_bgcolor;
#ifdef undef
			if (rsdepth != 0 && rcolor == 0)
				rcolor = ~0;
			else 
#endif
				switch (rddepth) {
				case -1: /* depth 1 */
					if (rcolor != 0)
						rcolor = ~0;
					break;

				case 0:	/* depth 8 */
					magicolor = magicolor & 0xff |
							magicolor << 8;
					rcolor = rcolor & 0xff | 
						rcolor << 8;
					/* fall through */
				case 1: /* depth 16 */
					magicolor = magicolor & 0xffff |
						magicolor << 16;
					rcolor = rcolor & 0xffff | 
						rcolor << 16;
					break;

				case 2: /* depth 32 */
				/* if dst. is misaligned, swap halves */
					if (dxbits != 0) {
						rcolor = rcolor << 16 |
							rcolor >> 16;
						magicolor = magicolor << 16 |
							magicolor >> 16;
					}
				}

			/* save register variable */
			color = rcolor;

			/* flag non-rop */
			notrop = ~0;
		}
		else
			notrop = 0;

		/* see if dst pixrect has a plane mask */
		if (rddepth >= 0 &&
			dprd->mpr.md_flags & MP_PLANEMASK) {

			register int rplanes;
			static int allplanes[] = {
				0x000000ff, 0x0000ffff, 0xffffffff
			};

			/* bit mask for modifying all planes */
			rplanes = allplanes[rddepth];

			/* if all dst planes are being modified, flag it */
			if ((rplanes & dprd->planes) == rplanes)
				rplanes = ~0;
			/* if no planes are being modified, return */
			else if ((rplanes &= dprd->planes) == 0)
				return 0;

			/* replicate plane mask for dst depth */
			switch (rddepth) {

			case 0:	/* depth 8 */
				rplanes |= rplanes << 8;
				/* fall through */
			case 1: /* depth 16 */
				rplanes |= rplanes << 16;
				break;

			case 2: /* depth 32 */
				/* if dst. is misaligned, swap halves */
				if (dxbits != 0)
					rplanes = rplanes << 16 |
						rplanes >> 16;
			}

			/* save register variable */
			planes = rplanes;
		}
		else
			/* no plane mask, modify all planes */
			planes = ~0;


		/* save register variables */
		ddepth = rddepth;
		spr = rspr;
	}

	/* calculate masks, skews, etc. */
	{
		register int lbit, rbit;
		register ROP_T rlmask, rrmask;
		register int rrskew;

		lbit = dxbits;
		rbit = lbit + dwbits;

		words = ((rbit >> 5) - ((lbit + 31) >> 5));
		if (words < 0)
			words = 0;

		rlmask = 0;
		if (lbit != 0)
			rlmask = (ROP_T) ~0 >> lbit;

		rrmask = (ROP_T) ~0;

		/* if dst is one word, merge left mask into right mask */
		if (rbit < 32 && rlmask != 0) {
			rrmask = rlmask;
			rlmask = 0;
		}

		if ((rbit &= 31) == 0)
			rbit = 32;
		else
			rrmask &= (ROP_T) ~0 << (32 - rbit);

		if (rrmask == (ROP_T) ~0)
			rrmask = 0;

		/* fills and 1 -> n ops */
		if (notrop) {
			
			/* classify as narrow or wide */
			if (dlinebytes == 2) {
				narrow = ~0;
				rrmask >>= 16;
			}
			else {
				narrow = 0;
				doffset = dlinebytes - (words << 2);
				if (rlmask != 0)
					doffset -= 4;
			}

			/* 1 -> n specific stuff */
			if (spr != 0) {

				rrskew = 0;
				sprime = 0;

				/* narrow 1 -> n ops */
				if (narrow) {
					soffset = slinebytes - 2;

					/* src straddles two shorts */
					if (ddepth == 0 && sxbits == 15) {
						/* need left half ? */
						if (lbit > 15) {
							/* no */
							sxbits = 0;
							sfirst = (ROP_T *)
								((XOPS_T *) 
								sfirst + 1);
						}
						else {
							sxbits = -1;

							/* need right half ? */
							if (rbit <= 16)  {
								/* no */
								sprime--;
							}
							else {
								sprime++;
								soffset -= 2;
							}
						}
					}
					rrskew = 16 - sxbits - (2 >> ddepth);
				}
				/* wide 1 -> n ops */
				else {
					/* compute # bits per src field */
					register int bits = (4 >> ddepth);

					/* adjust dw for src offset */
					dw += sxbits;

					/* compute src words/line */
					soffset = slinebytes -
						((dw + 15) >> 4 << 1);

					/*
				 	 * For single dst word 1 -> n ops
				 	 * we want to use the left mask only.
				 	 * This is inconsistent with rops and
				 	 * should be fixed.
				 	 */
					if (rlmask == 0 && words == 0) {
						rlmask = rrmask;
						rrmask = 0;
						doffset -= 4;
					}

					/*
					 * adjust sx for partial
					 * write of first dst word on left
					 */
					if (rlmask != 0) 
						sxbits -= dx & bits - 1;

					/* 
					 * compute how far to shift to get 
					 * the first src field in the low
					 * part of the source word
					 */
					rrskew = 16 - sxbits;

					/* 
					 * if a src field can be split 
					 * across words, we may have to
					 * read an extra word at the
					 * beginning or end of line
					 */
					sflush = 0;

					/* at start of line */
					if ((sxbits & (bits - 1)) != 0 &&
						rrskew < bits)
						sprime++;

					/* at end of line */
					if ((dw &= 15) != 0 &&
						dw < bits)
						sflush++;

					/*
					 * adjust shift to valid range
					 */
					rrskew = rrskew - bits & 15;

					if (rlmask == 0) {
						/*
					 	 * adjust for initial subtract
					 	 * in center/right code
					 	 */
						rrskew += bits;

						/*
					 	 * force read of src since 
					 	 * it won't be done by the
						 * left partial code
					 	 */
						if (rrskew >= bits)
							rrskew -= 16;

						/* this may be redundant */
						if (sprime == 0 && words == 0)
							sflush++;
					}
				}
			}
		}

		/* rop specific stuff */
		else {
			sprime = 0;
			sflush = 0;
			rrskew = lbit - sxbits;
			if (dlinebytes == 2 || slinebytes == 2) {
				narrow = ~0;

				/* 
				 * if narrow dst may have to skip
				 * first short of src
				 */
				if (sxbits >= 16) {
					rrskew += 16;
					sfirst = (ROP_T *)
						((MPR_T *) sfirst + 1);
				}

				if (rrskew < 0)
					rrskew += 16;

				doffset = dlinebytes;
				soffset = slinebytes;

				/* right long only? */
				if (rlmask == 0) {
					/* move high part of rmask to lmask */
					rlmask = rrmask >> 16;
					rrmask &= 0xffff;
				}
				else {
					rlmask &= 0xffff;
					rrmask >>= 16;
					dfirst = (ROP_T *)
						((MPR_T *) dfirst + 1);
					rrskew -= 16;
				}

				/* right short only -- bump dst addr */
				if (rlmask == 0) {
					dfirst = (ROP_T *)
						((MPR_T *) dfirst + 1);
					lbit &= 15;
					rrskew &= 15;
				}

				/* left short only -- use right code */
				if (rrmask == 0) {
					rrmask = rlmask;
					rlmask = 0;
				}

				if (lbit < rrskew) {
					soffset -= 2;
					sprime++;
				}

				if (rlmask != 0)  {
					doffset -= 2;
					soffset -= 2;
				}

				if (rbit > rrskew)
					sflush++;
			}
			else {
				narrow = 0;

				if (rrskew < 0)
					rrskew += 32;

				doffset = dlinebytes - (words << 2);
				soffset = slinebytes - (words << 2);

				if (rlmask != 0) {
					doffset -= 4;
					soffset -= 4;
				}
				if (lbit < rrskew) {
					sprime++;
					soffset -= 4;
				}
				if (rrmask != 0) {
					if (rbit > rrskew)
						sflush++;
				}
			}

			/* assume left to right rop */
			rtolrop = 0;

			/*
			 * Rops are normally done top-to-bottom 
			 * left-to-right, but if src and dst are same 
			 * pixrect and overlap horizontally, it may be
			 * necessary to change this.
			 */
			if (dimage == simage &&
				dx + dw > sx && dx < sx + dw) {
				register int offset;

				/*
				 * If src and dst overlap vertically,
				 * do southward rops from bottom to top.
				 */
				if (dy > sy) {
					if (dy < sy + dh) {
						/*
					 	 * line to line adjustment
					 	 */
						offset = dlinebytes << 1;

						doffset -= offset;
						soffset -= offset;

						/* 
					 	 * adjust dst and src to 
					 	 * beginning of last line
					 	 */
						offset >>= 1;
						offset = pr_product(offset, 
							dh - 1);

						dfirst = (ROP_T *) ((caddr_t) 
							dfirst + offset);
						sfirst = (ROP_T *) ((caddr_t) 
							sfirst + offset);
					}
				}
				/* 
				 * If src and dst overlap horizontally,
				 * do wide due east rops from right to left.
				 */
				else if (dy == sy && 
					dx > sx && 
					narrow == 0 && 
					(rlmask != 0 || words > 0)) {

					/* flag right to left rop */
					rtolrop++;

					/*
					 * adjust dst and src to end of
					 * first line
					 */
					offset = doffset - dlinebytes;

					dfirst = (ROP_T *) 
						((caddr_t) dfirst - offset);

					doffset -= offset << 1;

					offset = soffset - slinebytes;

					sfirst = (ROP_T *) 
						((caddr_t) sfirst - offset);

					soffset -= offset << 1;
				}
			}
		}

		/* save register variables */
		skew  = rrskew;
		edges = rlmask | rrmask;
#ifdef lint
		/* 
		 * Depending on CASE_OP options, edges may never be used.
		 * Frob it here to keep lint happy.
		 */
		if (edges != 0)
			edges++;
#endif
		lmask = rlmask;
		rmask = rrmask;
	}

	if (notrop) {
		register ROP_T m = planes;
		register ROP_T k = color;

		/* fills */
		if (spr == 0) {
			register short h = dh - 1;

			if (narrow) {
				register UMPR_T *d = (UMPR_T *) dfirst;

				m &= rmask;
				CASE_OP(op, FILLN, L_TO_R, NEVER, NEVER, NEVER, NEVER);
			}
			else {
				register ROP_T *d = dfirst;
				register short x;
				register ROP_T lm = lmask & m;
				register ROP_T rm = rmask & m;
				register INT_T w = (INT_T) (words - 1);
				register INT_T rdoffset = (INT_T) doffset;
				register ROP_T lk, rk;

#ifndef	KERNEL
				CASE_OP(op, FILLW, L_TO_R, 
					NEVER, NEVER, OPTION, OPTION);
#else
				CASE_OP(op, FILLW, L_TO_R, 
					NEVER, NEVER, OPTION, NEVER);
#endif
			}
		}
		/* 1 bit -> n bit ops */
		else {
			register XOPS_T *s = (XOPS_T *) sfirst;

			static UMPR_T 
			mtabs8[4] = {
				0x0000, 0x00ff, 0xff00, 0xffff
			},
			mtabs16[4] = {
				0x0000, 0xffff, 0x0000, 0xffff
			},
			*mtabs[2] = {
				mtabs8, mtabs16
			};

			static ROP_T
			mtabl8[16] = {
				0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff,
				0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff,
				0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff,
				0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff
			},
			mtabl16[16] = {
				0x00000000, 0x0000ffff, 0xffff0000, 0xffffffff,
				0x00000000, 0x0000ffff, 0xffff0000, 0xffffffff,
				0x00000000, 0x0000ffff, 0xffff0000, 0xffffffff,
				0x00000000, 0x0000ffff, 0xffff0000, 0xffffffff
			},
			mtabl32[16] = {
				0x00000000, 0xffffffff, 0x00000000, 0xffffffff,
				0x00000000, 0xffffffff, 0x00000000, 0xffffffff,
				0x00000000, 0xffffffff, 0x00000000, 0xffffffff,
				0x00000000, 0xffffffff, 0x00000000, 0xffffffff
			},
			*mtabl[3] = {
				mtabl8, mtabl16, mtabl32
			};

			if (narrow) {
				register UMPR_T *d = (UMPR_T *) dfirst;
				register ROP_T scurr;
				register UMPR_T *smask = mtabs[ddepth];
				register unsigned sshift = skew;
				register short h = dh - 1;
#ifdef mc68020
/* Sun-3 compiler bug? -Don */
/* "mem_rop.c", line 1599: compiler error: no table entry for op REG */
/*				register int rsprime = sprime; */
				int rsprime = sprime;
#else
				register int rsprime = sprime;
#endif
				register INT_T rsoffset = (INT_T) soffset;

				m &= rmask;

#ifndef KERNEL
				CASE_OP(op, XOPN, L_TO_R, 
					NEVER, OPTION, OPTION, NEVER);
#else
				CASE_OP(op, XOPN, L_TO_R, 
					NEVER, NEVER, NEVER, NEVER);
#endif
			}
			else {
				register ROP_T *d = dfirst;
				register ROP_T scurr;
				register ROP_T *smask = mtabl[ddepth];
				register sshift;
				register sbits = 4 >> ddepth;
				register short x;
				register ROP_T lm = lmask & m;
				register ROP_T rm = rmask & m;
				register short h = dh - 1;
				register short w = words - 1;

#ifndef KERNEL
				CASE_OP(op, XOPW, L_TO_R, 
					NEVER, OPTION, OPTION, OPTION);
#else
				CASE_OP(op, XOPW, L_TO_R, 
					NEVER, OPTION, NEVER, NEVER);
#endif
			}
		}
	}

	/* n bit to n bit ops */
	else {
		register ROP_T m = planes;
		register unsigned rskew = skew;
		register unsigned lskew;
		register INT_T rdoffset = (INT_T) doffset;
		register INT_T rsoffset = (INT_T) soffset;

		if (narrow) {
			register UMPR_T *d = (UMPR_T *) dfirst;
			register UMPR_T *s = (UMPR_T *) sfirst;
			register UMPR_T sprev, scurr;
			register short h = dh - 1;
			register UMPR_T lm = lmask & m;

			/* use register variable m for right mask */
			m &= rmask;

			lskew = 16 - rskew;

			CASE_OP(op, ROPN, L_TO_R, 
				NEVER, NEVER, OPTION, NEVER);
		}
		else {
			register ROP_T *d = dfirst;
			register ROP_T *s = sfirst;
			register ROP_T sprev, scurr;
			register short x;
			register ROP_T lm = lmask & m;
			register ROP_T rm = rmask & m;
			register short h = dh - 1;
			register short w = words - 1;

			lskew = 32 - rskew;

			if (rtolrop == 0) 
#ifndef KERNEL
				CASE_OP(op, ROPW, L_TO_R, 
					OPTION, OPTION, ALWAYS, NEVER);
#else
				CASE_OP(op, ROPW, L_TO_R, 
					NEVER, NEVER, NEVER, NEVER);
#endif
			else
#ifndef KERNEL
				CASE_OP(op, ROPW, R_TO_L, 
					NEVER, NEVER, OPTION, NEVER);
#else
				CASE_OP(op, ROPW, R_TO_L, 
					NEVER, NEVER, NEVER, NEVER);
#endif
		}
	}

	/* successfully did something */
	return 0;
}