/* Optimized pixel-doubling graphics blit routine for PPC systems */
/* written by Jon Watte <pixelblit@mindcontrol.org> */
/* This loop does exactly 0 error checking. */
/* Do that somewhere else, before calling this routine. */
/* Make sure the destination has space for the doubled */
/* source image, for instance! */
/* While this code may work with different compilers and CPUs, */
/* it is optimized for getting the most out of PPC systems with */
/* the Metrowerks CodeWarrior C/C++ compiler. YMMV. If you don't */
/* have a 64-bit path between CPU and graphics memory, or your */
/* CPU doesn't do native 64-bit data transfers when doing I/O on */
/* doubles, this code will probably not be optimal. */
/* The code is tested to work on the Intel version of BeOS, too, though. */
/* Copyright © 1997 Jon Watte - permission granted to */
/* use at your own risk provided you give me credit in */
/* on-screen display and printed documentation. */
/* Date: 1998-04-28 */

#include <Debug.h>

/* put these prototypes in a header, like pixel_doubling_blit.h */
extern	void	pixel_doubling_blit_8(
					const void * source_bitmap,
					int          source_rowbytes,
					int          source_width,
					int          source_height,
					void *       dest_bitmap,
					int          dest_rowbytes);
extern	void	pixel_doubling_blit_32(
					const void * source_bitmap,
					int          source_rowbytes,
					int          source_width,
					int          source_height,
					void *       dest_bitmap,
					int          dest_rowbytes);

#pragma global_optimizer on
#pragma peephole on

void
pixel_doubling_blit_8(
	const void * source_bitmap,
	int          source_rowbytes,
	int          source_width,
	int          source_height,
	void *       dest_bitmap,
	int          dest_rowbytes)
{
	/*	ensure alignment that we depend on	*/
	ASSERT(!(((long)source_bitmap)&3) && !(((long)dest_bitmap)&7));
	double temp[1];
	register double temp2;
	while (--source_height >= 0)
	{
		unsigned long * src = ((unsigned long *)source_bitmap)-1;
		double * dst1 = ((double *)dest_bitmap)-1;
		double * dst2 = dst1+dest_rowbytes/8;
		int w = source_width;
		while ((w-=4) >= 0)
		{

		/* This loop might be optimizable by assembling the */
		/* "temp" double in another way; check profiling and */
		/* disassembly to make sure. I think this is close to */
		/* optimal, though. */

		/* If we're in little-endian mode, the bytes go the other way */
#if __LITTLE_ENDIAN__
			unsigned long pixx = *(++src);
			unsigned char * mid = (unsigned char *)&temp[0];
			*(mid++) = pixx;
			*(mid++) = pixx;
			pixx >>= 8;
			*(mid++) = pixx;
			*(mid++) = pixx;
			pixx >>= 8;
			*(mid++) = pixx;
			*(mid++) = pixx;
			pixx >>= 8;
			*(mid++) = pixx;
			*(mid++) = pixx;
#else
			unsigned long pixx = *(++src);
			unsigned char * mid = (unsigned char *)&temp[1];
			*(--mid) = pixx;
			*(--mid) = pixx;
			pixx >>= 8;
			*(--mid) = pixx;
			*(--mid) = pixx;
			pixx >>= 8;
			*(--mid) = pixx;
			*(--mid) = pixx;
			pixx >>= 8;
			*(--mid) = pixx;
			*(--mid) = pixx;
#endif

		/* This is the kick-ass magic part! Don't touch! */

			temp2 = temp[0];
			*(++dst1) = temp2;
			*(++dst2) = temp2;
		} 
		source_bitmap = ((char *)source_bitmap)+source_rowbytes;
		dest_bitmap = ((char *)dest_bitmap)+2*dest_rowbytes;
	}
}

void
pixel_doubling_blit_32(
	const void * source_bitmap,
	int          source_rowbytes,
	int          source_width,
	int          source_height,
	void *       dest_bitmap,
	int          dest_rowbytes)
{
	/*	ensure alignment that we depend on	*/
	ASSERT(!(((long)source_bitmap)&3) && !(((long)dest_bitmap)&7));
	double temp[1];
	register double temp2;
	while (--source_height >= 0)
	{
		unsigned long * src = ((unsigned long *)source_bitmap)-1;
		double * dst1 = ((double *)dest_bitmap)-1;
		double * dst2 = dst1+dest_rowbytes/8;
		int w = source_width;
		while (--w >= 0)
		{

		/* This loop might be optimizable by assembling the */
		/* "temp" double in another way; check profiling and */
		/* disassembly to make sure. I think this is close to */
		/* optimal, though. */

			unsigned long pixx = *(++src);
			unsigned long * mid = (unsigned long *)&temp[1];
			*(--mid) = pixx;
			*(--mid) = pixx;

		/* This is the kick-ass magic part! Don't touch! */

			temp2 = temp[0];
			*(++dst1) = temp2;
			*(++dst2) = temp2;
		} 
		source_bitmap = ((char *)source_bitmap)+source_rowbytes;
		dest_bitmap = ((char *)dest_bitmap)+2*dest_rowbytes;
	}
}
