Add mesa headers from git commit fa7829c36b78b8ecc42238cbc0a02d1059320c77
This commit is contained in:
parent
810c434324
commit
535b1cb0ab
|
@ -0,0 +1,145 @@
|
|||
/**************************************************************************
|
||||
*
|
||||
* Copyright 2007-2013 VMware, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sub license, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the
|
||||
* next paragraph) shall be included in all copies or substantial portions
|
||||
* of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
|
||||
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
|
||||
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
#ifndef _C99_COMPAT_H_
|
||||
#define _C99_COMPAT_H_
|
||||
|
||||
|
||||
/*
|
||||
* MSVC hacks.
|
||||
*/
|
||||
#if defined(_MSC_VER)
|
||||
/*
|
||||
* Visual Studio 2012 will complain if we define the `inline` keyword, but
|
||||
* actually it only supports the keyword on C++.
|
||||
*
|
||||
* To avoid this the _ALLOW_KEYWORD_MACROS must be set.
|
||||
*/
|
||||
# if (_MSC_VER >= 1700) && !defined(_ALLOW_KEYWORD_MACROS)
|
||||
# define _ALLOW_KEYWORD_MACROS
|
||||
# endif
|
||||
|
||||
/*
|
||||
* XXX: MSVC has a `__restrict` keyword, but it also has a
|
||||
* `__declspec(restrict)` modifier, so it is impossible to define a
|
||||
* `restrict` macro without interfering with the latter. Furthermore the
|
||||
* MSVC standard library uses __declspec(restrict) under the _CRTRESTRICT
|
||||
* macro. For now resolve this issue by redefining _CRTRESTRICT, but going
|
||||
* forward we should probably should stop using restrict, especially
|
||||
* considering that our code does not obbey strict aliasing rules any way.
|
||||
*/
|
||||
# include <crtdefs.h>
|
||||
# undef _CRTRESTRICT
|
||||
# define _CRTRESTRICT
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* C99 inline keyword
|
||||
*/
|
||||
#ifndef inline
|
||||
# ifdef __cplusplus
|
||||
/* C++ supports inline keyword */
|
||||
# elif defined(__GNUC__)
|
||||
# define inline __inline__
|
||||
# elif defined(_MSC_VER)
|
||||
# define inline __inline
|
||||
# elif defined(__ICL)
|
||||
# define inline __inline
|
||||
# elif defined(__INTEL_COMPILER)
|
||||
/* Intel compiler supports inline keyword */
|
||||
# elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
|
||||
# define inline __inline
|
||||
# elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
|
||||
/* C99 supports inline keyword */
|
||||
# elif (__STDC_VERSION__ >= 199901L)
|
||||
/* C99 supports inline keyword */
|
||||
# else
|
||||
# define inline
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* C99 restrict keyword
|
||||
*
|
||||
* See also:
|
||||
* - http://cellperformance.beyond3d.com/articles/2006/05/demystifying-the-restrict-keyword.html
|
||||
*/
|
||||
#ifndef restrict
|
||||
# if (__STDC_VERSION__ >= 199901L)
|
||||
/* C99 */
|
||||
# elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
|
||||
/* C99 */
|
||||
# elif defined(__GNUC__)
|
||||
# define restrict __restrict__
|
||||
# elif defined(_MSC_VER)
|
||||
# define restrict __restrict
|
||||
# else
|
||||
# define restrict /* */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* C99 __func__ macro
|
||||
*/
|
||||
#ifndef __func__
|
||||
# if (__STDC_VERSION__ >= 199901L)
|
||||
/* C99 */
|
||||
# elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
|
||||
/* C99 */
|
||||
# elif defined(__GNUC__)
|
||||
# if __GNUC__ >= 2
|
||||
# define __func__ __FUNCTION__
|
||||
# else
|
||||
# define __func__ "<unknown>"
|
||||
# endif
|
||||
# elif defined(_MSC_VER)
|
||||
# if _MSC_VER >= 1300
|
||||
# define __func__ __FUNCTION__
|
||||
# else
|
||||
# define __func__ "<unknown>"
|
||||
# endif
|
||||
# else
|
||||
# define __func__ "<unknown>"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
/* Simple test case for debugging */
|
||||
#if 0
|
||||
static inline const char *
|
||||
test_c99_compat_h(const void * restrict a,
|
||||
const void * restrict b)
|
||||
{
|
||||
return __func__;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* _C99_COMPAT_H_ */
|
|
@ -0,0 +1,445 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
|
||||
* Copyright (C) 2009 VMware, Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* \file compiler.h
|
||||
* Compiler-related stuff.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef COMPILER_H
|
||||
#define COMPILER_H
|
||||
|
||||
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#if defined(__alpha__) && defined(CCPML)
|
||||
#include <cpml.h> /* use Compaq's Fast Math Library on Alpha */
|
||||
#else
|
||||
#include <math.h>
|
||||
#endif
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <float.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
#include "c99_compat.h" /* inline, __func__, etc. */
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* Get standard integer types
|
||||
*/
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
/**
|
||||
* Sun compilers define __i386 instead of the gcc-style __i386__
|
||||
*/
|
||||
#ifdef __SUNPRO_C
|
||||
# if !defined(__i386__) && defined(__i386)
|
||||
# define __i386__
|
||||
# elif !defined(__amd64__) && defined(__amd64)
|
||||
# define __amd64__
|
||||
# elif !defined(__sparc__) && defined(__sparc)
|
||||
# define __sparc__
|
||||
# endif
|
||||
# if !defined(__volatile)
|
||||
# define __volatile volatile
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* finite macro.
|
||||
*/
|
||||
#if defined(_MSC_VER)
|
||||
# define finite _finite
|
||||
#elif defined(__WATCOMC__)
|
||||
# define finite _finite
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* Disable assorted warnings
|
||||
*/
|
||||
#if !defined(OPENSTEP) && (defined(_WIN32) && !defined(__CYGWIN__)) && !defined(BUILD_FOR_SNAP)
|
||||
# if !defined(__GNUC__) /* mingw environment */
|
||||
# pragma warning( disable : 4068 ) /* unknown pragma */
|
||||
# pragma warning( disable : 4710 ) /* function 'foo' not inlined */
|
||||
# pragma warning( disable : 4711 ) /* function 'foo' selected for automatic inline expansion */
|
||||
# pragma warning( disable : 4127 ) /* conditional expression is constant */
|
||||
# if defined(MESA_MINWARN)
|
||||
# pragma warning( disable : 4244 ) /* '=' : conversion from 'const double ' to 'float ', possible loss of data */
|
||||
# pragma warning( disable : 4018 ) /* '<' : signed/unsigned mismatch */
|
||||
# pragma warning( disable : 4305 ) /* '=' : truncation from 'const double ' to 'float ' */
|
||||
# pragma warning( disable : 4550 ) /* 'function' undefined; assuming extern returning int */
|
||||
# pragma warning( disable : 4761 ) /* integral size mismatch in argument; conversion supplied */
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
#if defined(__WATCOMC__)
|
||||
# pragma disable_message(201) /* Disable unreachable code warnings */
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* XXX: Use standard `inline` keyword instead */
|
||||
#ifndef INLINE
|
||||
# define INLINE inline
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* PUBLIC/USED macros
|
||||
*
|
||||
* If we build the library with gcc's -fvisibility=hidden flag, we'll
|
||||
* use the PUBLIC macro to mark functions that are to be exported.
|
||||
*
|
||||
* We also need to define a USED attribute, so the optimizer doesn't
|
||||
* inline a static function that we later use in an alias. - ajax
|
||||
*/
|
||||
#ifndef PUBLIC
|
||||
# if (defined(__GNUC__) && __GNUC__ >= 4) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
|
||||
# define PUBLIC __attribute__((visibility("default")))
|
||||
# define USED __attribute__((used))
|
||||
# else
|
||||
# define PUBLIC
|
||||
# define USED
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* __builtin_expect macros
|
||||
*/
|
||||
#if !defined(__GNUC__)
|
||||
# define __builtin_expect(x, y) (x)
|
||||
#endif
|
||||
|
||||
#ifndef likely
|
||||
# ifdef __GNUC__
|
||||
# define likely(x) __builtin_expect(!!(x), 1)
|
||||
# define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
# else
|
||||
# define likely(x) (x)
|
||||
# define unlikely(x) (x)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* XXX: Use standard `__func__` instead */
|
||||
#ifndef __FUNCTION__
|
||||
# define __FUNCTION__ __func__
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Either define MESA_BIG_ENDIAN or MESA_LITTLE_ENDIAN, and CPU_TO_LE32.
|
||||
* Do not use these unless absolutely necessary!
|
||||
* Try to use a runtime test instead.
|
||||
* For now, only used by some DRI hardware drivers for color/texel packing.
|
||||
*/
|
||||
#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN
|
||||
#if defined(__linux__)
|
||||
#include <byteswap.h>
|
||||
#define CPU_TO_LE32( x ) bswap_32( x )
|
||||
#elif defined(__APPLE__)
|
||||
#include <CoreFoundation/CFByteOrder.h>
|
||||
#define CPU_TO_LE32( x ) CFSwapInt32HostToLittle( x )
|
||||
#elif (defined(_AIX) || defined(__blrts))
|
||||
static INLINE GLuint CPU_TO_LE32(GLuint x)
|
||||
{
|
||||
return (((x & 0x000000ff) << 24) |
|
||||
((x & 0x0000ff00) << 8) |
|
||||
((x & 0x00ff0000) >> 8) |
|
||||
((x & 0xff000000) >> 24));
|
||||
}
|
||||
#elif defined(__OpenBSD__)
|
||||
#include <sys/types.h>
|
||||
#define CPU_TO_LE32( x ) htole32( x )
|
||||
#else /*__linux__ */
|
||||
#include <sys/endian.h>
|
||||
#define CPU_TO_LE32( x ) bswap32( x )
|
||||
#endif /*__linux__*/
|
||||
#define MESA_BIG_ENDIAN 1
|
||||
#else
|
||||
#define CPU_TO_LE32( x ) ( x )
|
||||
#define MESA_LITTLE_ENDIAN 1
|
||||
#endif
|
||||
#define LE32_TO_CPU( x ) CPU_TO_LE32( x )
|
||||
|
||||
|
||||
|
||||
#if !defined(CAPI) && defined(_WIN32) && !defined(BUILD_FOR_SNAP)
|
||||
#define CAPI _cdecl
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* Create a macro so that asm functions can be linked into compilers other
|
||||
* than GNU C
|
||||
*/
|
||||
#ifndef _ASMAPI
|
||||
#if defined(_WIN32) && !defined(BUILD_FOR_SNAP)/* was: !defined( __GNUC__ ) && !defined( VMS ) && !defined( __INTEL_COMPILER )*/
|
||||
#define _ASMAPI __cdecl
|
||||
#else
|
||||
#define _ASMAPI
|
||||
#endif
|
||||
#ifdef PTR_DECL_IN_FRONT
|
||||
#define _ASMAPIP * _ASMAPI
|
||||
#else
|
||||
#define _ASMAPIP _ASMAPI *
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef USE_X86_ASM
|
||||
#define _NORMAPI _ASMAPI
|
||||
#define _NORMAPIP _ASMAPIP
|
||||
#else
|
||||
#define _NORMAPI
|
||||
#define _NORMAPIP *
|
||||
#endif
|
||||
|
||||
|
||||
/* Turn off macro checking systems used by other libraries */
|
||||
#ifdef CHECK
|
||||
#undef CHECK
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* ASSERT macro
|
||||
*/
|
||||
#if !defined(_WIN32_WCE)
|
||||
#if defined(BUILD_FOR_SNAP) && defined(CHECKED)
|
||||
# define ASSERT(X) _CHECK(X)
|
||||
#elif defined(DEBUG)
|
||||
# define ASSERT(X) assert(X)
|
||||
#else
|
||||
# define ASSERT(X)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* Static (compile-time) assertion.
|
||||
* Basically, use COND to dimension an array. If COND is false/zero the
|
||||
* array size will be -1 and we'll get a compilation error.
|
||||
*/
|
||||
#define STATIC_ASSERT(COND) \
|
||||
do { \
|
||||
(void) sizeof(char [1 - 2*!(COND)]); \
|
||||
} while (0)
|
||||
|
||||
|
||||
#if (__GNUC__ >= 3)
|
||||
#define PRINTFLIKE(f, a) __attribute__ ((format(__printf__, f, a)))
|
||||
#else
|
||||
#define PRINTFLIKE(f, a)
|
||||
#endif
|
||||
|
||||
#ifndef NULL
|
||||
#define NULL 0
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* LONGSTRING macro
|
||||
* gcc -pedantic warns about long string literals, LONGSTRING silences that.
|
||||
*/
|
||||
#if !defined(__GNUC__)
|
||||
# define LONGSTRING
|
||||
#else
|
||||
# define LONGSTRING __extension__
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI (3.14159265358979323846)
|
||||
#endif
|
||||
|
||||
#ifndef M_E
|
||||
#define M_E (2.7182818284590452354)
|
||||
#endif
|
||||
|
||||
#ifndef M_LOG2E
|
||||
#define M_LOG2E (1.4426950408889634074)
|
||||
#endif
|
||||
|
||||
#ifndef ONE_DIV_SQRT_LN2
|
||||
#define ONE_DIV_SQRT_LN2 (1.201122408786449815)
|
||||
#endif
|
||||
|
||||
#ifndef FLT_MAX_EXP
|
||||
#define FLT_MAX_EXP 128
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* USE_IEEE: Determine if we're using IEEE floating point
|
||||
*/
|
||||
#if defined(__i386__) || defined(__386__) || defined(__sparc__) || \
|
||||
defined(__s390__) || defined(__s390x__) || defined(__powerpc__) || \
|
||||
defined(__x86_64__) || \
|
||||
defined(__m68k__) || \
|
||||
defined(ia64) || defined(__ia64__) || \
|
||||
defined(__hppa__) || defined(hpux) || \
|
||||
defined(__mips) || defined(_MIPS_ARCH) || \
|
||||
defined(__arm__) || \
|
||||
defined(__sh__) || defined(__m32r__) || \
|
||||
(defined(__sun) && defined(_IEEE_754)) || \
|
||||
defined(__alpha__)
|
||||
#define USE_IEEE
|
||||
#define IEEE_ONE 0x3f800000
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* START/END_FAST_MATH macros:
|
||||
*
|
||||
* START_FAST_MATH: Set x86 FPU to faster, 32-bit precision mode (and save
|
||||
* original mode to a temporary).
|
||||
* END_FAST_MATH: Restore x86 FPU to original mode.
|
||||
*/
|
||||
#if defined(__GNUC__) && defined(__i386__)
|
||||
/*
|
||||
* Set the x86 FPU control word to guarentee only 32 bits of precision
|
||||
* are stored in registers. Allowing the FPU to store more introduces
|
||||
* differences between situations where numbers are pulled out of memory
|
||||
* vs. situations where the compiler is able to optimize register usage.
|
||||
*
|
||||
* In the worst case, we force the compiler to use a memory access to
|
||||
* truncate the float, by specifying the 'volatile' keyword.
|
||||
*/
|
||||
/* Hardware default: All exceptions masked, extended double precision,
|
||||
* round to nearest (IEEE compliant):
|
||||
*/
|
||||
#define DEFAULT_X86_FPU 0x037f
|
||||
/* All exceptions masked, single precision, round to nearest:
|
||||
*/
|
||||
#define FAST_X86_FPU 0x003f
|
||||
/* The fldcw instruction will cause any pending FP exceptions to be
|
||||
* raised prior to entering the block, and we clear any pending
|
||||
* exceptions before exiting the block. Hence, asm code has free
|
||||
* reign over the FPU while in the fast math block.
|
||||
*/
|
||||
#if defined(NO_FAST_MATH)
|
||||
#define START_FAST_MATH(x) \
|
||||
do { \
|
||||
static GLuint mask = DEFAULT_X86_FPU; \
|
||||
__asm__ ( "fnstcw %0" : "=m" (*&(x)) ); \
|
||||
__asm__ ( "fldcw %0" : : "m" (mask) ); \
|
||||
} while (0)
|
||||
#else
|
||||
#define START_FAST_MATH(x) \
|
||||
do { \
|
||||
static GLuint mask = FAST_X86_FPU; \
|
||||
__asm__ ( "fnstcw %0" : "=m" (*&(x)) ); \
|
||||
__asm__ ( "fldcw %0" : : "m" (mask) ); \
|
||||
} while (0)
|
||||
#endif
|
||||
/* Restore original FPU mode, and clear any exceptions that may have
|
||||
* occurred in the FAST_MATH block.
|
||||
*/
|
||||
#define END_FAST_MATH(x) \
|
||||
do { \
|
||||
__asm__ ( "fnclex ; fldcw %0" : : "m" (*&(x)) ); \
|
||||
} while (0)
|
||||
|
||||
#elif defined(__WATCOMC__) && defined(__386__)
|
||||
#define DEFAULT_X86_FPU 0x037f /* See GCC comments above */
|
||||
#define FAST_X86_FPU 0x003f /* See GCC comments above */
|
||||
void _watcom_start_fast_math(unsigned short *x,unsigned short *mask);
|
||||
#pragma aux _watcom_start_fast_math = \
|
||||
"fnstcw word ptr [eax]" \
|
||||
"fldcw word ptr [ecx]" \
|
||||
parm [eax] [ecx] \
|
||||
modify exact [];
|
||||
void _watcom_end_fast_math(unsigned short *x);
|
||||
#pragma aux _watcom_end_fast_math = \
|
||||
"fnclex" \
|
||||
"fldcw word ptr [eax]" \
|
||||
parm [eax] \
|
||||
modify exact [];
|
||||
#if defined(NO_FAST_MATH)
|
||||
#define START_FAST_MATH(x) \
|
||||
do { \
|
||||
static GLushort mask = DEFAULT_X86_FPU; \
|
||||
_watcom_start_fast_math(&x,&mask); \
|
||||
} while (0)
|
||||
#else
|
||||
#define START_FAST_MATH(x) \
|
||||
do { \
|
||||
static GLushort mask = FAST_X86_FPU; \
|
||||
_watcom_start_fast_math(&x,&mask); \
|
||||
} while (0)
|
||||
#endif
|
||||
#define END_FAST_MATH(x) _watcom_end_fast_math(&x)
|
||||
|
||||
#elif defined(_MSC_VER) && defined(_M_IX86)
|
||||
#define DEFAULT_X86_FPU 0x037f /* See GCC comments above */
|
||||
#define FAST_X86_FPU 0x003f /* See GCC comments above */
|
||||
#if defined(NO_FAST_MATH)
|
||||
#define START_FAST_MATH(x) do {\
|
||||
static GLuint mask = DEFAULT_X86_FPU;\
|
||||
__asm fnstcw word ptr [x]\
|
||||
__asm fldcw word ptr [mask]\
|
||||
} while(0)
|
||||
#else
|
||||
#define START_FAST_MATH(x) do {\
|
||||
static GLuint mask = FAST_X86_FPU;\
|
||||
__asm fnstcw word ptr [x]\
|
||||
__asm fldcw word ptr [mask]\
|
||||
} while(0)
|
||||
#endif
|
||||
#define END_FAST_MATH(x) do {\
|
||||
__asm fnclex\
|
||||
__asm fldcw word ptr [x]\
|
||||
} while(0)
|
||||
|
||||
#else
|
||||
#define START_FAST_MATH(x) x = 0
|
||||
#define END_FAST_MATH(x) (void)(x)
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef Elements
|
||||
#define Elements(x) (sizeof(x)/sizeof(*(x)))
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* COMPILER_H */
|
|
@ -0,0 +1,196 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 7.5
|
||||
*
|
||||
* Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* \file glheader.h
|
||||
* Wrapper for GL/gl.h and GL/glext.h
|
||||
*/
|
||||
|
||||
|
||||
#ifndef GLHEADER_H
|
||||
#define GLHEADER_H
|
||||
|
||||
|
||||
#ifdef WGLAPI
|
||||
#undef WGLAPI
|
||||
#endif
|
||||
|
||||
|
||||
#if !defined(OPENSTEP) && (defined(__WIN32__) && !defined(__CYGWIN__)) && !defined(BUILD_FOR_SNAP)
|
||||
# if (defined(_MSC_VER) || defined(__MINGW32__)) && defined(BUILD_GL32) /* tag specify we're building mesa as a DLL */
|
||||
# define WGLAPI __declspec(dllexport)
|
||||
# elif (defined(_MSC_VER) || defined(__MINGW32__)) && defined(_DLL) /* tag specifying we're building for DLL runtime support */
|
||||
# define WGLAPI __declspec(dllimport)
|
||||
# else /* for use with static link lib build of Win32 edition only */
|
||||
# define WGLAPI __declspec(dllimport)
|
||||
# endif /* _STATIC_MESA support */
|
||||
#endif /* WIN32 / CYGWIN bracket */
|
||||
|
||||
|
||||
#define GL_GLEXT_PROTOTYPES
|
||||
#include "GL/gl.h"
|
||||
#include "GL/glext.h"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* GL_FIXED is defined in glext.h version 64 but these typedefs aren't (yet).
|
||||
*/
|
||||
typedef int GLfixed;
|
||||
typedef int GLclampx;
|
||||
|
||||
|
||||
#ifndef GL_OES_EGL_image
|
||||
typedef void *GLeglImageOES;
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef GL_OES_EGL_image_external
|
||||
#define GL_TEXTURE_EXTERNAL_OES 0x8D65
|
||||
#define GL_SAMPLER_EXTERNAL_OES 0x8D66
|
||||
#define GL_TEXTURE_BINDING_EXTERNAL_OES 0x8D67
|
||||
#define GL_REQUIRED_TEXTURE_IMAGE_UNITS_OES 0x8D68
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef GL_OES_point_size_array
|
||||
#define GL_POINT_SIZE_ARRAY_OES 0x8B9C
|
||||
#define GL_POINT_SIZE_ARRAY_TYPE_OES 0x898A
|
||||
#define GL_POINT_SIZE_ARRAY_STRIDE_OES 0x898B
|
||||
#define GL_POINT_SIZE_ARRAY_POINTER_OES 0x898C
|
||||
#define GL_POINT_SIZE_ARRAY_BUFFER_BINDING_OES 0x8B9F
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef GL_OES_draw_texture
|
||||
#define GL_TEXTURE_CROP_RECT_OES 0x8B9D
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef GL_PROGRAM_BINARY_LENGTH_OES
|
||||
#define GL_PROGRAM_BINARY_LENGTH_OES 0x8741
|
||||
#endif
|
||||
|
||||
/* GLES 2.0 tokens */
|
||||
#ifndef GL_RGB565
|
||||
#define GL_RGB565 0x8D62
|
||||
#endif
|
||||
|
||||
#ifndef GL_TEXTURE_GEN_STR_OES
|
||||
#define GL_TEXTURE_GEN_STR_OES 0x8D60
|
||||
#endif
|
||||
|
||||
#ifndef GL_OES_compressed_paletted_texture
|
||||
#define GL_PALETTE4_RGB8_OES 0x8B90
|
||||
#define GL_PALETTE4_RGBA8_OES 0x8B91
|
||||
#define GL_PALETTE4_R5_G6_B5_OES 0x8B92
|
||||
#define GL_PALETTE4_RGBA4_OES 0x8B93
|
||||
#define GL_PALETTE4_RGB5_A1_OES 0x8B94
|
||||
#define GL_PALETTE8_RGB8_OES 0x8B95
|
||||
#define GL_PALETTE8_RGBA8_OES 0x8B96
|
||||
#define GL_PALETTE8_R5_G6_B5_OES 0x8B97
|
||||
#define GL_PALETTE8_RGBA4_OES 0x8B98
|
||||
#define GL_PALETTE8_RGB5_A1_OES 0x8B99
|
||||
#endif
|
||||
|
||||
#ifndef GL_OES_matrix_get
|
||||
#define GL_MODELVIEW_MATRIX_FLOAT_AS_INT_BITS_OES 0x898D
|
||||
#define GL_PROJECTION_MATRIX_FLOAT_AS_INT_BITS_OES 0x898E
|
||||
#define GL_TEXTURE_MATRIX_FLOAT_AS_INT_BITS_OES 0x898F
|
||||
#endif
|
||||
|
||||
#ifndef GL_ES_VERSION_2_0
|
||||
#define GL_SHADER_BINARY_FORMATS 0x8DF8
|
||||
#define GL_NUM_SHADER_BINARY_FORMATS 0x8DF9
|
||||
#define GL_SHADER_COMPILER 0x8DFA
|
||||
#define GL_MAX_VERTEX_UNIFORM_VECTORS 0x8DFB
|
||||
#define GL_MAX_VARYING_VECTORS 0x8DFC
|
||||
#define GL_MAX_FRAGMENT_UNIFORM_VECTORS 0x8DFD
|
||||
#endif
|
||||
|
||||
#ifndef GL_ATI_texture_compression_3dc
|
||||
#define GL_ATI_texture_compression_3dc 1
|
||||
#define GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI 0x8837
|
||||
#endif
|
||||
|
||||
#ifndef GL_OES_compressed_ETC1_RGB8_texture
|
||||
#define GL_ETC1_RGB8_OES 0x8D64
|
||||
#endif
|
||||
|
||||
|
||||
/* Inexplicably, GL_HALF_FLOAT_OES has a different value than GL_HALF_FLOAT.
|
||||
*/
|
||||
#ifndef GL_HALF_FLOAT_OES
|
||||
#define GL_HALF_FLOAT_OES 0x8D61
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* Internal token to represent a GLSL shader program (a collection of
|
||||
* one or more shaders that get linked together). Note that GLSL
|
||||
* shaders and shader programs share one name space (one hash table)
|
||||
* so we need a value that's different from any of the
|
||||
* GL_VERTEX/FRAGMENT/GEOMETRY_PROGRAM tokens.
|
||||
*/
|
||||
#define GL_SHADER_PROGRAM_MESA 0x9999
|
||||
|
||||
|
||||
/**
|
||||
* Internal token for geometry programs.
|
||||
* Use the value for GL_GEOMETRY_PROGRAM_NV for now.
|
||||
*/
|
||||
#define MESA_GEOMETRY_PROGRAM 0x8c26
|
||||
|
||||
/* Several fields of struct gl_config can take these as values. Since
|
||||
* GLX header files may not be available everywhere they need to be used,
|
||||
* redefine them here.
|
||||
*/
|
||||
#define GLX_NONE 0x8000
|
||||
#define GLX_SLOW_CONFIG 0x8001
|
||||
#define GLX_TRUE_COLOR 0x8002
|
||||
#define GLX_DIRECT_COLOR 0x8003
|
||||
#define GLX_PSEUDO_COLOR 0x8004
|
||||
#define GLX_STATIC_COLOR 0x8005
|
||||
#define GLX_GRAY_SCALE 0x8006
|
||||
#define GLX_STATIC_GRAY 0x8007
|
||||
#define GLX_TRANSPARENT_RGB 0x8008
|
||||
#define GLX_TRANSPARENT_INDEX 0x8009
|
||||
#define GLX_NON_CONFORMANT_CONFIG 0x800D
|
||||
#define GLX_SWAP_EXCHANGE_OML 0x8061
|
||||
#define GLX_SWAP_COPY_OML 0x8062
|
||||
#define GLX_SWAP_UNDEFINED_OML 0x8063
|
||||
|
||||
#define GLX_DONT_CARE 0xFFFFFFFF
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* GLHEADER_H */
|
|
@ -0,0 +1 @@
|
|||
../glheader.h
|
|
@ -0,0 +1,605 @@
|
|||
|
||||
#include "sparc_matrix.h"
|
||||
|
||||
.register %g2, #scratch
|
||||
.register %g3, #scratch
|
||||
|
||||
.text
|
||||
|
||||
#ifdef __arch64__
|
||||
#define STACK_VAR_OFF (2047 + (8 * 16))
|
||||
#else
|
||||
#define STACK_VAR_OFF (4 * 16)
|
||||
#endif
|
||||
|
||||
/* Newton-Raphson approximation turns out to be slower
|
||||
* (and less accurate) than direct fsqrts/fdivs.
|
||||
*/
|
||||
#define ONE_DOT_ZERO 0x3f800000
|
||||
|
||||
.globl _mesa_sparc_transform_normalize_normals
|
||||
_mesa_sparc_transform_normalize_normals:
|
||||
/* o0=mat o1=scale o2=in o3=lengths o4=dest */
|
||||
|
||||
sethi %hi(ONE_DOT_ZERO), %g2
|
||||
sub %sp, 16, %sp
|
||||
st %g2, [%sp + STACK_VAR_OFF+0x0]
|
||||
st %o1, [%sp + STACK_VAR_OFF+0x4]
|
||||
ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
|
||||
ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
|
||||
add %sp, 16, %sp
|
||||
|
||||
LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
|
||||
LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
|
||||
ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
|
||||
ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
|
||||
LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
|
||||
|
||||
LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
|
||||
|
||||
/* dest->count = in->count */
|
||||
st %g1, [%o4 + V4F_COUNT]
|
||||
|
||||
cmp %g1, 1
|
||||
bl 7f
|
||||
cmp %o3, 0
|
||||
bne 4f
|
||||
clr %o4 ! 'i' for STRIDE_LOOP
|
||||
|
||||
1: /* LENGTHS == NULL */
|
||||
ld [%o5 + 0x00], %f0 ! ux = from[0]
|
||||
ld [%o5 + 0x04], %f1 ! uy = from[1]
|
||||
ld [%o5 + 0x08], %f2 ! uz = from[2]
|
||||
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
|
||||
add %o4, 1, %o4 ! i++
|
||||
|
||||
/* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
|
||||
* ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
|
||||
* tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
|
||||
*/
|
||||
fmuls %f0, M0, %f3 ! FGM Group
|
||||
fmuls %f1, M1, %f4 ! FGM Group
|
||||
fmuls %f0, M4, %f5 ! FGM Group
|
||||
fmuls %f1, M5, %f6 ! FGM Group
|
||||
fmuls %f0, M8, %f7 ! FGM Group f3 available
|
||||
fmuls %f1, M9, %f8 ! FGM Group f4 available
|
||||
fadds %f3, %f4, %f3 ! FGA
|
||||
fmuls %f2, M2, %f10 ! FGM Group f5 available
|
||||
fmuls %f2, M6, %f0 ! FGM Group f6 available
|
||||
fadds %f5, %f6, %f5 ! FGA
|
||||
fmuls %f2, M10, %f4 ! FGM Group f7 available
|
||||
fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
|
||||
fadds %f3, %f10, %f3 ! FGA Group f10 available
|
||||
fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
|
||||
fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
|
||||
|
||||
/* f3=tx, f5=ty, f7=tz */
|
||||
|
||||
/* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
|
||||
fmuls %f3, %f3, %f6 ! FGM Group f3 available
|
||||
fmuls %f5, %f5, %f8 ! FGM Group f5 available
|
||||
fmuls %f7, %f7, %f10 ! FGM Group f7 available
|
||||
fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
|
||||
fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
|
||||
|
||||
/* scale (f6) = 1.0 / sqrt(len) */
|
||||
fsqrts %f6, %f6 ! FDIV 20 cycles
|
||||
fdivs %f12, %f6, %f6 ! FDIV 14 cycles
|
||||
|
||||
fmuls %f3, %f6, %f3
|
||||
st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
|
||||
fmuls %f5, %f6, %f5
|
||||
st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
|
||||
fmuls %f7, %f6, %f7
|
||||
st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
|
||||
|
||||
cmp %o4, %g1 ! continue if (i < count)
|
||||
bl 1b
|
||||
add %g3, 0x10, %g3 ! advance out vector pointer
|
||||
|
||||
ba 7f
|
||||
nop
|
||||
|
||||
4: /* LENGTHS != NULL */
|
||||
fmuls M0, %f15, M0
|
||||
fmuls M1, %f15, M1
|
||||
fmuls M2, %f15, M2
|
||||
fmuls M4, %f15, M4
|
||||
fmuls M5, %f15, M5
|
||||
fmuls M6, %f15, M6
|
||||
fmuls M8, %f15, M8
|
||||
fmuls M9, %f15, M9
|
||||
fmuls M10, %f15, M10
|
||||
|
||||
5:
|
||||
ld [%o5 + 0x00], %f0 ! ux = from[0]
|
||||
ld [%o5 + 0x04], %f1 ! uy = from[1]
|
||||
ld [%o5 + 0x08], %f2 ! uz = from[2]
|
||||
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
|
||||
add %o4, 1, %o4 ! i++
|
||||
|
||||
/* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
|
||||
* ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
|
||||
* tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
|
||||
*/
|
||||
fmuls %f0, M0, %f3 ! FGM Group
|
||||
fmuls %f1, M1, %f4 ! FGM Group
|
||||
fmuls %f0, M4, %f5 ! FGM Group
|
||||
fmuls %f1, M5, %f6 ! FGM Group
|
||||
fmuls %f0, M8, %f7 ! FGM Group f3 available
|
||||
fmuls %f1, M9, %f8 ! FGM Group f4 available
|
||||
fadds %f3, %f4, %f3 ! FGA
|
||||
fmuls %f2, M2, %f10 ! FGM Group f5 available
|
||||
fmuls %f2, M6, %f0 ! FGM Group f6 available
|
||||
fadds %f5, %f6, %f5 ! FGA
|
||||
fmuls %f2, M10, %f4 ! FGM Group f7 available
|
||||
fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
|
||||
fadds %f3, %f10, %f3 ! FGA Group f10 available
|
||||
ld [%o3], %f13 ! LSU
|
||||
fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
|
||||
add %o3, 4, %o3 ! IEU0
|
||||
fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
|
||||
|
||||
/* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
|
||||
|
||||
fmuls %f3, %f13, %f3
|
||||
st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
|
||||
fmuls %f5, %f13, %f5
|
||||
st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
|
||||
fmuls %f7, %f13, %f7
|
||||
st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
|
||||
|
||||
cmp %o4, %g1 ! continue if (i < count)
|
||||
bl 5b
|
||||
add %g3, 0x10, %g3 ! advance out vector pointer
|
||||
|
||||
7: retl
|
||||
nop
|
||||
|
||||
.globl _mesa_sparc_transform_normalize_normals_no_rot
|
||||
_mesa_sparc_transform_normalize_normals_no_rot:
|
||||
/* o0=mat o1=scale o2=in o3=lengths o4=dest */
|
||||
|
||||
sethi %hi(ONE_DOT_ZERO), %g2
|
||||
sub %sp, 16, %sp
|
||||
st %g2, [%sp + STACK_VAR_OFF+0x0]
|
||||
st %o1, [%sp + STACK_VAR_OFF+0x4]
|
||||
ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
|
||||
ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
|
||||
add %sp, 16, %sp
|
||||
|
||||
LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
|
||||
LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
|
||||
ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
|
||||
ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
|
||||
LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
|
||||
|
||||
LDMATRIX_0_5_10(%o0)
|
||||
|
||||
/* dest->count = in->count */
|
||||
st %g1, [%o4 + V4F_COUNT]
|
||||
|
||||
cmp %g1, 1
|
||||
bl 7f
|
||||
cmp %o3, 0
|
||||
bne 4f
|
||||
clr %o4 ! 'i' for STRIDE_LOOP
|
||||
|
||||
1: /* LENGTHS == NULL */
|
||||
ld [%o5 + 0x00], %f0 ! ux = from[0]
|
||||
ld [%o5 + 0x04], %f1 ! uy = from[1]
|
||||
ld [%o5 + 0x08], %f2 ! uz = from[2]
|
||||
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
|
||||
add %o4, 1, %o4 ! i++
|
||||
|
||||
/* tx (f3) = (ux * m0)
|
||||
* ty (f5) = (uy * m5)
|
||||
* tz (f7) = (uz * m10)
|
||||
*/
|
||||
fmuls %f0, M0, %f3 ! FGM Group
|
||||
fmuls %f1, M5, %f5 ! FGM Group
|
||||
fmuls %f2, M10, %f7 ! FGM Group
|
||||
|
||||
/* f3=tx, f5=ty, f7=tz */
|
||||
|
||||
/* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
|
||||
fmuls %f3, %f3, %f6 ! FGM Group stall, f3 available
|
||||
fmuls %f5, %f5, %f8 ! FGM Group f5 available
|
||||
fmuls %f7, %f7, %f10 ! FGM Group f7 available
|
||||
fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
|
||||
fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
|
||||
|
||||
/* scale (f6) = 1.0 / sqrt(len) */
|
||||
fsqrts %f6, %f6 ! FDIV 20 cycles
|
||||
fdivs %f12, %f6, %f6 ! FDIV 14 cycles
|
||||
|
||||
fmuls %f3, %f6, %f3
|
||||
st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
|
||||
fmuls %f5, %f6, %f5
|
||||
st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
|
||||
fmuls %f7, %f6, %f7
|
||||
st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
|
||||
|
||||
cmp %o4, %g1 ! continue if (i < count)
|
||||
bl 1b
|
||||
add %g3, 0x10, %g3 ! advance out vector pointer
|
||||
|
||||
ba 7f
|
||||
nop
|
||||
|
||||
4: /* LENGTHS != NULL */
|
||||
fmuls M0, %f15, M0
|
||||
fmuls M5, %f15, M5
|
||||
fmuls M10, %f15, M10
|
||||
|
||||
5:
|
||||
ld [%o5 + 0x00], %f0 ! ux = from[0]
|
||||
ld [%o5 + 0x04], %f1 ! uy = from[1]
|
||||
ld [%o5 + 0x08], %f2 ! uz = from[2]
|
||||
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
|
||||
add %o4, 1, %o4 ! i++
|
||||
|
||||
/* tx (f3) = (ux * m0)
|
||||
* ty (f5) = (uy * m5)
|
||||
* tz (f7) = (uz * m10)
|
||||
*/
|
||||
fmuls %f0, M0, %f3 ! FGM Group
|
||||
ld [%o3], %f13 ! LSU
|
||||
fmuls %f1, M5, %f5 ! FGM Group
|
||||
add %o3, 4, %o3 ! IEU0
|
||||
fmuls %f2, M10, %f7 ! FGM Group
|
||||
|
||||
/* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
|
||||
|
||||
fmuls %f3, %f13, %f3
|
||||
st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
|
||||
fmuls %f5, %f13, %f5
|
||||
st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
|
||||
fmuls %f7, %f13, %f7
|
||||
st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
|
||||
|
||||
cmp %o4, %g1 ! continue if (i < count)
|
||||
bl 5b
|
||||
add %g3, 0x10, %g3 ! advance out vector pointer
|
||||
|
||||
7: retl
|
||||
nop
|
||||
|
||||
.globl _mesa_sparc_transform_rescale_normals_no_rot
|
||||
_mesa_sparc_transform_rescale_normals_no_rot:
|
||||
/* o0=mat o1=scale o2=in o3=lengths o4=dest */
|
||||
sub %sp, 16, %sp
|
||||
st %o1, [%sp + STACK_VAR_OFF+0x0]
|
||||
ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
|
||||
add %sp, 16, %sp
|
||||
|
||||
LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
|
||||
LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
|
||||
ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
|
||||
ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
|
||||
LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
|
||||
|
||||
LDMATRIX_0_5_10(%o0)
|
||||
|
||||
/* dest->count = in->count */
|
||||
st %g1, [%o4 + V4F_COUNT]
|
||||
|
||||
cmp %g1, 1
|
||||
bl 7f
|
||||
clr %o4 ! 'i' for STRIDE_LOOP
|
||||
|
||||
fmuls M0, %f15, M0
|
||||
fmuls M5, %f15, M5
|
||||
fmuls M10, %f15, M10
|
||||
|
||||
1: ld [%o5 + 0x00], %f0 ! ux = from[0]
|
||||
ld [%o5 + 0x04], %f1 ! uy = from[1]
|
||||
ld [%o5 + 0x08], %f2 ! uz = from[2]
|
||||
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
|
||||
add %o4, 1, %o4 ! i++
|
||||
|
||||
/* tx (f3) = (ux * m0)
|
||||
* ty (f5) = (uy * m5)
|
||||
* tz (f7) = (uz * m10)
|
||||
*/
|
||||
fmuls %f0, M0, %f3 ! FGM Group
|
||||
st %f3, [%g3 + 0x00] ! LSU
|
||||
fmuls %f1, M5, %f5 ! FGM Group
|
||||
st %f5, [%g3 + 0x04] ! LSU
|
||||
fmuls %f2, M10, %f7 ! FGM Group
|
||||
st %f7, [%g3 + 0x08] ! LSU
|
||||
|
||||
cmp %o4, %g1 ! continue if (i < count)
|
||||
bl 1b
|
||||
add %g3, 0x10, %g3 ! advance out vector pointer
|
||||
|
||||
7: retl
|
||||
nop
|
||||
|
||||
.globl _mesa_sparc_transform_rescale_normals
|
||||
_mesa_sparc_transform_rescale_normals:
|
||||
/* o0=mat o1=scale o2=in o3=lengths o4=dest */
|
||||
sub %sp, 16, %sp
|
||||
st %o1, [%sp + STACK_VAR_OFF+0x0]
|
||||
ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
|
||||
add %sp, 16, %sp
|
||||
|
||||
LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
|
||||
LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
|
||||
ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
|
||||
ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
|
||||
LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
|
||||
|
||||
LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
|
||||
|
||||
/* dest->count = in->count */
|
||||
st %g1, [%o4 + V4F_COUNT]
|
||||
|
||||
cmp %g1, 1
|
||||
bl 7f
|
||||
clr %o4 ! 'i' for STRIDE_LOOP
|
||||
|
||||
fmuls M0, %f15, M0
|
||||
fmuls M1, %f15, M1
|
||||
fmuls M2, %f15, M2
|
||||
fmuls M4, %f15, M4
|
||||
fmuls M5, %f15, M5
|
||||
fmuls M6, %f15, M6
|
||||
fmuls M8, %f15, M8
|
||||
fmuls M9, %f15, M9
|
||||
fmuls M10, %f15, M10
|
||||
|
||||
1: ld [%o5 + 0x00], %f0 ! ux = from[0]
|
||||
ld [%o5 + 0x04], %f1 ! uy = from[1]
|
||||
ld [%o5 + 0x08], %f2 ! uz = from[2]
|
||||
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
|
||||
add %o4, 1, %o4 ! i++
|
||||
|
||||
fmuls %f0, M0, %f3 ! FGM Group
|
||||
fmuls %f1, M1, %f4 ! FGM Group
|
||||
fmuls %f0, M4, %f5 ! FGM Group
|
||||
fmuls %f1, M5, %f6 ! FGM Group
|
||||
fmuls %f0, M8, %f7 ! FGM Group f3 available
|
||||
fmuls %f1, M9, %f8 ! FGM Group f4 available
|
||||
fadds %f3, %f4, %f3 ! FGA
|
||||
fmuls %f2, M2, %f10 ! FGM Group f5 available
|
||||
fmuls %f2, M6, %f0 ! FGM Group f6 available
|
||||
fadds %f5, %f6, %f5 ! FGA
|
||||
fmuls %f2, M10, %f4 ! FGM Group f7 available
|
||||
fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
|
||||
fadds %f3, %f10, %f3 ! FGA Group f10 available
|
||||
st %f3, [%g3 + 0x00] ! LSU
|
||||
fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
|
||||
st %f5, [%g3 + 0x04] ! LSU
|
||||
fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
|
||||
st %f7, [%g3 + 0x08] ! LSU
|
||||
|
||||
cmp %o4, %g1 ! continue if (i < count)
|
||||
bl 1b
|
||||
add %g3, 0x10, %g3 ! advance out vector pointer
|
||||
|
||||
7: retl
|
||||
nop
|
||||
|
||||
.globl _mesa_sparc_transform_normals_no_rot
|
||||
_mesa_sparc_transform_normals_no_rot:
|
||||
/* o0=mat o1=scale o2=in o3=lengths o4=dest */
|
||||
LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
|
||||
LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
|
||||
ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
|
||||
ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
|
||||
LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
|
||||
|
||||
LDMATRIX_0_5_10(%o0)
|
||||
|
||||
/* dest->count = in->count */
|
||||
st %g1, [%o4 + V4F_COUNT]
|
||||
|
||||
cmp %g1, 1
|
||||
bl 7f
|
||||
clr %o4 ! 'i' for STRIDE_LOOP
|
||||
|
||||
1: ld [%o5 + 0x00], %f0 ! ux = from[0]
|
||||
ld [%o5 + 0x04], %f1 ! uy = from[1]
|
||||
ld [%o5 + 0x08], %f2 ! uz = from[2]
|
||||
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
|
||||
add %o4, 1, %o4 ! i++
|
||||
|
||||
/* tx (f3) = (ux * m0)
|
||||
* ty (f5) = (uy * m5)
|
||||
* tz (f7) = (uz * m10)
|
||||
*/
|
||||
fmuls %f0, M0, %f3 ! FGM Group
|
||||
st %f3, [%g3 + 0x00] ! LSU
|
||||
fmuls %f1, M5, %f5 ! FGM Group
|
||||
st %f5, [%g3 + 0x04] ! LSU
|
||||
fmuls %f2, M10, %f7 ! FGM Group
|
||||
st %f7, [%g3 + 0x08] ! LSU
|
||||
|
||||
cmp %o4, %g1 ! continue if (i < count)
|
||||
bl 1b
|
||||
add %g3, 0x10, %g3 ! advance out vector pointer
|
||||
|
||||
7: retl
|
||||
nop
|
||||
|
||||
.globl _mesa_sparc_transform_normals
|
||||
_mesa_sparc_transform_normals:
|
||||
/* o0=mat o1=scale o2=in o3=lengths o4=dest */
|
||||
LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
|
||||
LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
|
||||
ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
|
||||
ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
|
||||
LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
|
||||
|
||||
LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
|
||||
|
||||
/* dest->count = in->count */
|
||||
st %g1, [%o4 + V4F_COUNT]
|
||||
|
||||
cmp %g1, 1
|
||||
bl 7f
|
||||
clr %o4 ! 'i' for STRIDE_LOOP
|
||||
|
||||
1: ld [%o5 + 0x00], %f0 ! ux = from[0]
|
||||
ld [%o5 + 0x04], %f1 ! uy = from[1]
|
||||
ld [%o5 + 0x08], %f2 ! uz = from[2]
|
||||
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
|
||||
add %o4, 1, %o4 ! i++
|
||||
|
||||
fmuls %f0, M0, %f3 ! FGM Group
|
||||
fmuls %f1, M1, %f4 ! FGM Group
|
||||
fmuls %f0, M4, %f5 ! FGM Group
|
||||
fmuls %f1, M5, %f6 ! FGM Group
|
||||
fmuls %f0, M8, %f7 ! FGM Group f3 available
|
||||
fmuls %f1, M9, %f8 ! FGM Group f4 available
|
||||
fadds %f3, %f4, %f3 ! FGA
|
||||
fmuls %f2, M2, %f10 ! FGM Group f5 available
|
||||
fmuls %f2, M6, %f0 ! FGM Group f6 available
|
||||
fadds %f5, %f6, %f5 ! FGA
|
||||
fmuls %f2, M10, %f4 ! FGM Group f7 available
|
||||
fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
|
||||
fadds %f3, %f10, %f3 ! FGA Group f10 available
|
||||
st %f3, [%g3 + 0x00] ! LSU
|
||||
fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
|
||||
st %f5, [%g3 + 0x04] ! LSU
|
||||
fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
|
||||
st %f7, [%g3 + 0x08] ! LSU
|
||||
|
||||
cmp %o4, %g1 ! continue if (i < count)
|
||||
bl 1b
|
||||
add %g3, 0x10, %g3 ! advance out vector pointer
|
||||
|
||||
7: retl
|
||||
nop
|
||||
|
||||
.globl _mesa_sparc_normalize_normals
|
||||
_mesa_sparc_normalize_normals:
|
||||
/* o0=mat o1=scale o2=in o3=lengths o4=dest */
|
||||
|
||||
sethi %hi(ONE_DOT_ZERO), %g2
|
||||
sub %sp, 16, %sp
|
||||
st %g2, [%sp + STACK_VAR_OFF+0x0]
|
||||
ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
|
||||
add %sp, 16, %sp
|
||||
|
||||
LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
|
||||
ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
|
||||
ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
|
||||
LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
|
||||
|
||||
/* dest->count = in->count */
|
||||
st %g1, [%o4 + V4F_COUNT]
|
||||
|
||||
cmp %g1, 1
|
||||
bl 7f
|
||||
cmp %o3, 0
|
||||
bne 4f
|
||||
clr %o4 ! 'i' for STRIDE_LOOP
|
||||
|
||||
1: /* LENGTHS == NULL */
|
||||
ld [%o5 + 0x00], %f3 ! ux = from[0]
|
||||
ld [%o5 + 0x04], %f5 ! uy = from[1]
|
||||
ld [%o5 + 0x08], %f7 ! uz = from[2]
|
||||
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
|
||||
add %o4, 1, %o4 ! i++
|
||||
|
||||
/* f3=tx, f5=ty, f7=tz */
|
||||
|
||||
/* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
|
||||
fmuls %f3, %f3, %f6 ! FGM Group f3 available
|
||||
fmuls %f5, %f5, %f8 ! FGM Group f5 available
|
||||
fmuls %f7, %f7, %f10 ! FGM Group f7 available
|
||||
fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
|
||||
fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
|
||||
|
||||
/* scale (f6) = 1.0 / sqrt(len) */
|
||||
fsqrts %f6, %f6 ! FDIV 20 cycles
|
||||
fdivs %f12, %f6, %f6 ! FDIV 14 cycles
|
||||
|
||||
fmuls %f3, %f6, %f3
|
||||
st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
|
||||
fmuls %f5, %f6, %f5
|
||||
st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
|
||||
fmuls %f7, %f6, %f7
|
||||
st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
|
||||
|
||||
cmp %o4, %g1 ! continue if (i < count)
|
||||
bl 1b
|
||||
add %g3, 0x10, %g3 ! advance out vector pointer
|
||||
|
||||
ba 7f
|
||||
nop
|
||||
|
||||
4: /* LENGTHS != NULL */
|
||||
|
||||
5:
|
||||
ld [%o5 + 0x00], %f3 ! ux = from[0]
|
||||
ld [%o5 + 0x04], %f5 ! uy = from[1]
|
||||
ld [%o5 + 0x08], %f7 ! uz = from[2]
|
||||
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
|
||||
add %o4, 1, %o4 ! i++
|
||||
|
||||
ld [%o3], %f13 ! LSU
|
||||
add %o3, 4, %o3 ! IEU0
|
||||
|
||||
/* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
|
||||
|
||||
fmuls %f3, %f13, %f3
|
||||
st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
|
||||
fmuls %f5, %f13, %f5
|
||||
st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
|
||||
fmuls %f7, %f13, %f7
|
||||
st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
|
||||
|
||||
cmp %o4, %g1 ! continue if (i < count)
|
||||
bl 5b
|
||||
add %g3, 0x10, %g3 ! advance out vector pointer
|
||||
|
||||
7: retl
|
||||
nop
|
||||
|
||||
.globl _mesa_sparc_rescale_normals
|
||||
_mesa_sparc_rescale_normals:
|
||||
/* o0=mat o1=scale o2=in o3=lengths o4=dest */
|
||||
|
||||
sethi %hi(ONE_DOT_ZERO), %g2
|
||||
sub %sp, 16, %sp
|
||||
st %o1, [%sp + STACK_VAR_OFF+0x0]
|
||||
ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
|
||||
add %sp, 16, %sp
|
||||
|
||||
LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
|
||||
ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
|
||||
ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
|
||||
LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
|
||||
|
||||
/* dest->count = in->count */
|
||||
st %g1, [%o4 + V4F_COUNT]
|
||||
|
||||
cmp %g1, 1
|
||||
bl 7f
|
||||
clr %o4 ! 'i' for STRIDE_LOOP
|
||||
|
||||
1:
|
||||
ld [%o5 + 0x00], %f3 ! ux = from[0]
|
||||
ld [%o5 + 0x04], %f5 ! uy = from[1]
|
||||
ld [%o5 + 0x08], %f7 ! uz = from[2]
|
||||
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
|
||||
add %o4, 1, %o4 ! i++
|
||||
|
||||
/* f3=tx, f5=ty, f7=tz */
|
||||
|
||||
fmuls %f3, %f15, %f3
|
||||
st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
|
||||
fmuls %f5, %f15, %f5
|
||||
st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
|
||||
fmuls %f7, %f15, %f7
|
||||
st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
|
||||
|
||||
cmp %o4, %g1 ! continue if (i < count)
|
||||
bl 1b
|
||||
add %g3, 0x10, %g3 ! advance out vector pointer
|
||||
|
||||
7: retl
|
||||
nop
|
|
@ -0,0 +1,142 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 6.3
|
||||
*
|
||||
* Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Sparc assembly code by David S. Miller
|
||||
*/
|
||||
|
||||
|
||||
#include "sparc.h"
|
||||
|
||||
#ifdef USE_SPARC_ASM
|
||||
|
||||
#include "main/context.h"
|
||||
#include "math/m_xform.h"
|
||||
#include "tnl/t_context.h"
|
||||
|
||||
#ifdef DEBUG
|
||||
#include "math/m_debug.h"
|
||||
#endif
|
||||
|
||||
#define XFORM_ARGS GLvector4f *to_vec, \
|
||||
const GLfloat m[16], \
|
||||
const GLvector4f *from_vec
|
||||
|
||||
#define DECLARE_XFORM_GROUP(pfx, sz) \
|
||||
extern void _mesa_##pfx##_transform_points##sz##_general(XFORM_ARGS); \
|
||||
extern void _mesa_##pfx##_transform_points##sz##_identity(XFORM_ARGS); \
|
||||
extern void _mesa_##pfx##_transform_points##sz##_3d_no_rot(XFORM_ARGS); \
|
||||
extern void _mesa_##pfx##_transform_points##sz##_perspective(XFORM_ARGS); \
|
||||
extern void _mesa_##pfx##_transform_points##sz##_2d(XFORM_ARGS); \
|
||||
extern void _mesa_##pfx##_transform_points##sz##_2d_no_rot(XFORM_ARGS); \
|
||||
extern void _mesa_##pfx##_transform_points##sz##_3d(XFORM_ARGS);
|
||||
|
||||
#define ASSIGN_XFORM_GROUP(pfx, sz) \
|
||||
_mesa_transform_tab[sz][MATRIX_GENERAL] = \
|
||||
_mesa_##pfx##_transform_points##sz##_general; \
|
||||
_mesa_transform_tab[sz][MATRIX_IDENTITY] = \
|
||||
_mesa_##pfx##_transform_points##sz##_identity; \
|
||||
_mesa_transform_tab[sz][MATRIX_3D_NO_ROT] = \
|
||||
_mesa_##pfx##_transform_points##sz##_3d_no_rot; \
|
||||
_mesa_transform_tab[sz][MATRIX_PERSPECTIVE] = \
|
||||
_mesa_##pfx##_transform_points##sz##_perspective; \
|
||||
_mesa_transform_tab[sz][MATRIX_2D] = \
|
||||
_mesa_##pfx##_transform_points##sz##_2d; \
|
||||
_mesa_transform_tab[sz][MATRIX_2D_NO_ROT] = \
|
||||
_mesa_##pfx##_transform_points##sz##_2d_no_rot; \
|
||||
_mesa_transform_tab[sz][MATRIX_3D] = \
|
||||
_mesa_##pfx##_transform_points##sz##_3d;
|
||||
|
||||
|
||||
DECLARE_XFORM_GROUP(sparc, 1)
|
||||
DECLARE_XFORM_GROUP(sparc, 2)
|
||||
DECLARE_XFORM_GROUP(sparc, 3)
|
||||
DECLARE_XFORM_GROUP(sparc, 4)
|
||||
|
||||
extern GLvector4f *_mesa_sparc_cliptest_points4(GLvector4f *clip_vec,
|
||||
GLvector4f *proj_vec,
|
||||
GLubyte clipMask[],
|
||||
GLubyte *orMask,
|
||||
GLubyte *andMask,
|
||||
GLboolean viewport_z_clip);
|
||||
|
||||
extern GLvector4f *_mesa_sparc_cliptest_points4_np(GLvector4f *clip_vec,
|
||||
GLvector4f *proj_vec,
|
||||
GLubyte clipMask[],
|
||||
GLubyte *orMask,
|
||||
GLubyte *andMask,
|
||||
GLboolean viewport_z_clip);
|
||||
|
||||
#define NORM_ARGS const GLmatrix *mat, \
|
||||
GLfloat scale, \
|
||||
const GLvector4f *in, \
|
||||
const GLfloat *lengths, \
|
||||
GLvector4f *dest
|
||||
|
||||
extern void _mesa_sparc_transform_normalize_normals(NORM_ARGS);
|
||||
extern void _mesa_sparc_transform_normalize_normals_no_rot(NORM_ARGS);
|
||||
extern void _mesa_sparc_transform_rescale_normals_no_rot(NORM_ARGS);
|
||||
extern void _mesa_sparc_transform_rescale_normals(NORM_ARGS);
|
||||
extern void _mesa_sparc_transform_normals_no_rot(NORM_ARGS);
|
||||
extern void _mesa_sparc_transform_normals(NORM_ARGS);
|
||||
extern void _mesa_sparc_normalize_normals(NORM_ARGS);
|
||||
extern void _mesa_sparc_rescale_normals(NORM_ARGS);
|
||||
|
||||
|
||||
|
||||
void _mesa_init_all_sparc_transform_asm(void)
|
||||
{
|
||||
ASSIGN_XFORM_GROUP(sparc, 1)
|
||||
ASSIGN_XFORM_GROUP(sparc, 2)
|
||||
ASSIGN_XFORM_GROUP(sparc, 3)
|
||||
ASSIGN_XFORM_GROUP(sparc, 4)
|
||||
|
||||
_mesa_clip_tab[4] = _mesa_sparc_cliptest_points4;
|
||||
_mesa_clip_np_tab[4] = _mesa_sparc_cliptest_points4_np;
|
||||
|
||||
_mesa_normal_tab[NORM_TRANSFORM | NORM_NORMALIZE] =
|
||||
_mesa_sparc_transform_normalize_normals;
|
||||
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE] =
|
||||
_mesa_sparc_transform_normalize_normals_no_rot;
|
||||
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE] =
|
||||
_mesa_sparc_transform_rescale_normals_no_rot;
|
||||
_mesa_normal_tab[NORM_TRANSFORM | NORM_RESCALE] =
|
||||
_mesa_sparc_transform_rescale_normals;
|
||||
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT] =
|
||||
_mesa_sparc_transform_normals_no_rot;
|
||||
_mesa_normal_tab[NORM_TRANSFORM] =
|
||||
_mesa_sparc_transform_normals;
|
||||
_mesa_normal_tab[NORM_NORMALIZE] =
|
||||
_mesa_sparc_normalize_normals;
|
||||
_mesa_normal_tab[NORM_RESCALE] =
|
||||
_mesa_sparc_rescale_normals;
|
||||
|
||||
#ifdef DEBUG_MATH
|
||||
_math_test_all_transform_functions("sparc");
|
||||
_math_test_all_cliptest_functions("sparc");
|
||||
_math_test_all_normal_transform_functions("sparc");
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* USE_SPARC_ASM */
|
|
@ -0,0 +1,36 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.1
|
||||
*
|
||||
* Copyright (C) 1999 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Sparc assembly code by David S. Miller
|
||||
*/
|
||||
|
||||
|
||||
#ifndef SPARC_H
|
||||
#define SPARC_H
|
||||
|
||||
extern void _mesa_init_all_sparc_transform_asm(void);
|
||||
|
||||
#endif /* !(SPARC_H) */
|
|
@ -0,0 +1,233 @@
|
|||
/*
|
||||
* Clip testing in SPARC assembly
|
||||
*/
|
||||
|
||||
#if __arch64__
|
||||
#define LDPTR ldx
|
||||
#define V4F_DATA 0x00
|
||||
#define V4F_START 0x08
|
||||
#define V4F_COUNT 0x10
|
||||
#define V4F_STRIDE 0x14
|
||||
#define V4F_SIZE 0x18
|
||||
#define V4F_FLAGS 0x1c
|
||||
#else
|
||||
#define LDPTR ld
|
||||
#define V4F_DATA 0x00
|
||||
#define V4F_START 0x04
|
||||
#define V4F_COUNT 0x08
|
||||
#define V4F_STRIDE 0x0c
|
||||
#define V4F_SIZE 0x10
|
||||
#define V4F_FLAGS 0x14
|
||||
#endif
|
||||
|
||||
#define VEC_SIZE_1 1
|
||||
#define VEC_SIZE_2 3
|
||||
#define VEC_SIZE_3 7
|
||||
#define VEC_SIZE_4 15
|
||||
|
||||
.register %g2, #scratch
|
||||
.register %g3, #scratch
|
||||
|
||||
.text
|
||||
.align 64
|
||||
|
||||
one_dot_zero:
|
||||
.word 0x3f800000 /* 1.0f */
|
||||
|
||||
/* This trick is shamelessly stolen from the x86
|
||||
* Mesa asm. Very clever, and we can do it too
|
||||
* since we have the necessary add with carry
|
||||
* instructions on Sparc.
|
||||
*/
|
||||
clip_table:
|
||||
.byte 0, 1, 0, 2, 4, 5, 4, 6
|
||||
.byte 0, 1, 0, 2, 8, 9, 8, 10
|
||||
.byte 32, 33, 32, 34, 36, 37, 36, 38
|
||||
.byte 32, 33, 32, 34, 40, 41, 40, 42
|
||||
.byte 0, 1, 0, 2, 4, 5, 4, 6
|
||||
.byte 0, 1, 0, 2, 8, 9, 8, 10
|
||||
.byte 16, 17, 16, 18, 20, 21, 20, 22
|
||||
.byte 16, 17, 16, 18, 24, 25, 24, 26
|
||||
.byte 63, 61, 63, 62, 55, 53, 55, 54
|
||||
.byte 63, 61, 63, 62, 59, 57, 59, 58
|
||||
.byte 47, 45, 47, 46, 39, 37, 39, 38
|
||||
.byte 47, 45, 47, 46, 43, 41, 43, 42
|
||||
.byte 63, 61, 63, 62, 55, 53, 55, 54
|
||||
.byte 63, 61, 63, 62, 59, 57, 59, 58
|
||||
.byte 31, 29, 31, 30, 23, 21, 23, 22
|
||||
.byte 31, 29, 31, 30, 27, 25, 27, 26
|
||||
|
||||
/* GLvector4f *clip_vec, GLvector4f *proj_vec,
|
||||
GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask,
|
||||
GLboolean viewport_z_enable */
|
||||
|
||||
.align 64
|
||||
__pc_tramp:
|
||||
retl
|
||||
nop
|
||||
|
||||
.globl _mesa_sparc_cliptest_points4
|
||||
_mesa_sparc_cliptest_points4:
|
||||
save %sp, -64, %sp
|
||||
call __pc_tramp
|
||||
sub %o7, (. - one_dot_zero - 4), %g1
|
||||
ld [%g1 + 0x0], %f4
|
||||
add %g1, 0x4, %g1
|
||||
|
||||
ld [%i0 + V4F_STRIDE], %l1
|
||||
ld [%i0 + V4F_COUNT], %l3
|
||||
LDPTR [%i0 + V4F_START], %i0
|
||||
LDPTR [%i1 + V4F_START], %i5
|
||||
ldub [%i3], %g2
|
||||
ldub [%i4], %g3
|
||||
sll %g3, 8, %g3
|
||||
or %g2, %g3, %g2
|
||||
|
||||
ld [%i1 + V4F_FLAGS], %g3
|
||||
or %g3, VEC_SIZE_4, %g3
|
||||
st %g3, [%i1 + V4F_FLAGS]
|
||||
mov 3, %g3
|
||||
st %g3, [%i1 + V4F_SIZE]
|
||||
st %l3, [%i1 + V4F_COUNT]
|
||||
clr %l2
|
||||
clr %l0
|
||||
|
||||
/* l0: i
|
||||
* l3: count
|
||||
* l1: stride
|
||||
* l2: c
|
||||
* g2: (tmpAndMask << 8) | tmpOrMask
|
||||
* g1: clip_table
|
||||
* i0: from[stride][i]
|
||||
* i2: clipMask
|
||||
* i5: vProj[4][i]
|
||||
*/
|
||||
|
||||
1: ld [%i0 + 0x0c], %f3 ! LSU Group
|
||||
ld [%i0 + 0x0c], %g5 ! LSU Group
|
||||
ld [%i0 + 0x08], %g4 ! LSU Group
|
||||
fdivs %f4, %f3, %f8 ! FGM
|
||||
addcc %g5, %g5, %g5 ! IEU1 Group
|
||||
addx %g0, 0x0, %g3 ! IEU1 Group
|
||||
addcc %g4, %g4, %g4 ! IEU1 Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
subcc %g5, %g4, %g0 ! IEU1 Group
|
||||
ld [%i0 + 0x04], %g4 ! LSU Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
addcc %g4, %g4, %g4 ! IEU1 Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
subcc %g5, %g4, %g0 ! IEU1 Group
|
||||
ld [%i0 + 0x00], %g4 ! LSU Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
addcc %g4, %g4, %g4 ! IEU1 Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
subcc %g5, %g4, %g0 ! IEU1 Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
ldub [%g1 + %g3], %g3 ! LSU Group
|
||||
cmp %g3, 0 ! IEU1 Group, stall
|
||||
be 2f ! CTI
|
||||
stb %g3, [%i2] ! LSU
|
||||
sll %g3, 8, %g4 ! IEU1 Group
|
||||
add %l2, 1, %l2 ! IEU0
|
||||
st %g0, [%i5 + 0x00] ! LSU
|
||||
or %g4, 0xff, %g4 ! IEU0 Group
|
||||
or %g2, %g3, %g2 ! IEU1
|
||||
st %g0, [%i5 + 0x04] ! LSU
|
||||
and %g2, %g4, %g2 ! IEU0 Group
|
||||
st %g0, [%i5 + 0x08] ! LSU
|
||||
b 3f ! CTI
|
||||
st %f4, [%i5 + 0x0c] ! LSU Group
|
||||
2: ld [%i0 + 0x00], %f0 ! LSU Group
|
||||
ld [%i0 + 0x04], %f1 ! LSU Group
|
||||
ld [%i0 + 0x08], %f2 ! LSU Group
|
||||
fmuls %f0, %f8, %f0 ! FGM
|
||||
st %f0, [%i5 + 0x00] ! LSU Group
|
||||
fmuls %f1, %f8, %f1 ! FGM
|
||||
st %f1, [%i5 + 0x04] ! LSU Group
|
||||
fmuls %f2, %f8, %f2 ! FGM
|
||||
st %f2, [%i5 + 0x08] ! LSU Group
|
||||
st %f8, [%i5 + 0x0c] ! LSU Group
|
||||
3: add %i5, 0x10, %i5 ! IEU1
|
||||
add %l0, 1, %l0 ! IEU0 Group
|
||||
add %i2, 1, %i2 ! IEU0 Group
|
||||
cmp %l0, %l3 ! IEU1 Group
|
||||
bne 1b ! CTI
|
||||
add %i0, %l1, %i0 ! IEU0 Group
|
||||
stb %g2, [%i3] ! LSU
|
||||
srl %g2, 8, %g3 ! IEU0 Group
|
||||
cmp %l2, %l3 ! IEU1 Group
|
||||
bl,a 1f ! CTI
|
||||
clr %g3 ! IEU0
|
||||
1: stb %g3, [%i4] ! LSU Group
|
||||
ret ! CTI Group
|
||||
restore %i1, 0x0, %o0
|
||||
|
||||
.globl _mesa_sparc_cliptest_points4_np
|
||||
_mesa_sparc_cliptest_points4_np:
|
||||
save %sp, -64, %sp
|
||||
|
||||
call __pc_tramp
|
||||
sub %o7, (. - one_dot_zero - 4), %g1
|
||||
add %g1, 0x4, %g1
|
||||
|
||||
ld [%i0 + V4F_STRIDE], %l1
|
||||
ld [%i0 + V4F_COUNT], %l3
|
||||
LDPTR [%i0 + V4F_START], %i0
|
||||
ldub [%i3], %g2
|
||||
ldub [%i4], %g3
|
||||
sll %g3, 8, %g3
|
||||
or %g2, %g3, %g2
|
||||
|
||||
clr %l2
|
||||
clr %l0
|
||||
|
||||
/* l0: i
|
||||
* l3: count
|
||||
* l1: stride
|
||||
* l2: c
|
||||
* g2: (tmpAndMask << 8) | tmpOrMask
|
||||
* g1: clip_table
|
||||
* i0: from[stride][i]
|
||||
* i2: clipMask
|
||||
*/
|
||||
|
||||
1: ld [%i0 + 0x0c], %g5 ! LSU Group
|
||||
ld [%i0 + 0x08], %g4 ! LSU Group
|
||||
addcc %g5, %g5, %g5 ! IEU1 Group
|
||||
addx %g0, 0x0, %g3 ! IEU1 Group
|
||||
addcc %g4, %g4, %g4 ! IEU1 Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
subcc %g5, %g4, %g0 ! IEU1 Group
|
||||
ld [%i0 + 0x04], %g4 ! LSU Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
addcc %g4, %g4, %g4 ! IEU1 Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
subcc %g5, %g4, %g0 ! IEU1 Group
|
||||
ld [%i0 + 0x00], %g4 ! LSU Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
addcc %g4, %g4, %g4 ! IEU1 Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
subcc %g5, %g4, %g0 ! IEU1 Group
|
||||
addx %g3, %g3, %g3 ! IEU1 Group
|
||||
ldub [%g1 + %g3], %g3 ! LSU Group
|
||||
cmp %g3, 0 ! IEU1 Group, stall
|
||||
be 2f ! CTI
|
||||
stb %g3, [%i2] ! LSU
|
||||
sll %g3, 8, %g4 ! IEU1 Group
|
||||
add %l2, 1, %l2 ! IEU0
|
||||
or %g4, 0xff, %g4 ! IEU0 Group
|
||||
or %g2, %g3, %g2 ! IEU1
|
||||
and %g2, %g4, %g2 ! IEU0 Group
|
||||
2: add %l0, 1, %l0 ! IEU0 Group
|
||||
add %i2, 1, %i2 ! IEU0 Group
|
||||
cmp %l0, %l3 ! IEU1 Group
|
||||
bne 1b ! CTI
|
||||
add %i0, %l1, %i0 ! IEU0 Group
|
||||
stb %g2, [%i3] ! LSU
|
||||
srl %g2, 8, %g3 ! IEU0 Group
|
||||
cmp %l2, %l3 ! IEU1 Group
|
||||
bl,a 1f ! CTI
|
||||
clr %g3 ! IEU0
|
||||
1: stb %g3, [%i4] ! LSU Group
|
||||
ret ! CTI Group
|
||||
restore %i1, 0x0, %o0
|
|
@ -0,0 +1,170 @@
|
|||
/*
|
||||
* SPARC assembly matrix code.
|
||||
*/
|
||||
|
||||
#ifndef _SPARC_MATRIX_H
|
||||
#define _SPARC_MATRIX_H
|
||||
|
||||
#ifdef __arch64__
|
||||
#define LDPTR ldx
|
||||
#define MAT_M 0x00
|
||||
#define MAT_INV 0x08
|
||||
#define V4F_DATA 0x00
|
||||
#define V4F_START 0x08
|
||||
#define V4F_COUNT 0x10
|
||||
#define V4F_STRIDE 0x14
|
||||
#define V4F_SIZE 0x18
|
||||
#define V4F_FLAGS 0x1c
|
||||
#else
|
||||
#define LDPTR ld
|
||||
#define MAT_M 0x00
|
||||
#define MAT_INV 0x04
|
||||
#define V4F_DATA 0x00
|
||||
#define V4F_START 0x04
|
||||
#define V4F_COUNT 0x08
|
||||
#define V4F_STRIDE 0x0c
|
||||
#define V4F_SIZE 0x10
|
||||
#define V4F_FLAGS 0x14
|
||||
#endif
|
||||
|
||||
#define VEC_SIZE_1 1
|
||||
#define VEC_SIZE_2 3
|
||||
#define VEC_SIZE_3 7
|
||||
#define VEC_SIZE_4 15
|
||||
|
||||
#define M0 %f16
|
||||
#define M1 %f17
|
||||
#define M2 %f18
|
||||
#define M3 %f19
|
||||
#define M4 %f20
|
||||
#define M5 %f21
|
||||
#define M6 %f22
|
||||
#define M7 %f23
|
||||
#define M8 %f24
|
||||
#define M9 %f25
|
||||
#define M10 %f26
|
||||
#define M11 %f27
|
||||
#define M12 %f28
|
||||
#define M13 %f29
|
||||
#define M14 %f30
|
||||
#define M15 %f31
|
||||
|
||||
#define LDMATRIX_0_1_2_3_12_13_14_15(BASE) \
|
||||
ldd [BASE + ( 0 * 0x4)], M0; \
|
||||
ldd [BASE + ( 2 * 0x4)], M2; \
|
||||
ldd [BASE + (12 * 0x4)], M12; \
|
||||
ldd [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_1_12_13(BASE) \
|
||||
ldd [BASE + ( 0 * 0x4)], M0; \
|
||||
ldd [BASE + (12 * 0x4)], M12
|
||||
|
||||
#define LDMATRIX_0_12_13(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ldd [BASE + (12 * 0x4)], M12
|
||||
|
||||
#define LDMATRIX_0_1_2_12_13_14(BASE) \
|
||||
ldd [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 2 * 0x4)], M2; \
|
||||
ldd [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_12_13_14(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ldd [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_14(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(BASE) \
|
||||
ldd [BASE + ( 0 * 0x4)], M0; \
|
||||
ldd [BASE + ( 2 * 0x4)], M2; \
|
||||
ldd [BASE + ( 4 * 0x4)], M4; \
|
||||
ldd [BASE + ( 6 * 0x4)], M6; \
|
||||
ldd [BASE + (12 * 0x4)], M12; \
|
||||
ldd [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_5_12_13(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ldd [BASE + (12 * 0x4)], M12
|
||||
|
||||
#define LDMATRIX_0_1_2_3_4_5_6_12_13_14(BASE) \
|
||||
ldd [BASE + ( 0 * 0x4)], M0; \
|
||||
ldd [BASE + ( 2 * 0x4)], M2; \
|
||||
ldd [BASE + ( 4 * 0x4)], M4; \
|
||||
ld [BASE + ( 6 * 0x4)], M6; \
|
||||
ldd [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_5_12_13_14(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ldd [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_5_14(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(BASE) \
|
||||
ldd [BASE + ( 0 * 0x4)], M0; \
|
||||
ldd [BASE + ( 2 * 0x4)], M2; \
|
||||
ldd [BASE + ( 4 * 0x4)], M4; \
|
||||
ldd [BASE + ( 6 * 0x4)], M6; \
|
||||
ldd [BASE + ( 8 * 0x4)], M8; \
|
||||
ldd [BASE + (10 * 0x4)], M10; \
|
||||
ldd [BASE + (12 * 0x4)], M12; \
|
||||
ldd [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_1_4_5_12_13(BASE) \
|
||||
ldd [BASE + ( 0 * 0x4)], M0; \
|
||||
ldd [BASE + ( 4 * 0x4)], M4; \
|
||||
ldd [BASE + (12 * 0x4)], M12
|
||||
|
||||
#define LDMATRIX_0_5_12_13(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ldd [BASE + (12 * 0x4)], M12
|
||||
|
||||
#define LDMATRIX_0_1_2_4_5_6_8_9_10(BASE) \
|
||||
ldd [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 2 * 0x4)], M2; \
|
||||
ldd [BASE + ( 4 * 0x4)], M4; \
|
||||
ld [BASE + ( 6 * 0x4)], M6; \
|
||||
ldd [BASE + ( 8 * 0x4)], M8; \
|
||||
ld [BASE + (10 * 0x4)], M10
|
||||
|
||||
#define LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(BASE) \
|
||||
ldd [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 2 * 0x4)], M2; \
|
||||
ldd [BASE + ( 4 * 0x4)], M4; \
|
||||
ld [BASE + ( 6 * 0x4)], M6; \
|
||||
ldd [BASE + ( 8 * 0x4)], M8; \
|
||||
ld [BASE + (10 * 0x4)], M10; \
|
||||
ldd [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_5_10(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ld [BASE + (10 * 0x4)], M10; \
|
||||
|
||||
#define LDMATRIX_0_5_10_12_13_14(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ld [BASE + (10 * 0x4)], M10; \
|
||||
ldd [BASE + (12 * 0x4)], M12; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#define LDMATRIX_0_5_8_9_10_14(BASE) \
|
||||
ld [BASE + ( 0 * 0x4)], M0; \
|
||||
ld [BASE + ( 5 * 0x4)], M5; \
|
||||
ldd [BASE + ( 8 * 0x4)], M8; \
|
||||
ld [BASE + (10 * 0x4)], M10; \
|
||||
ld [BASE + (14 * 0x4)], M14
|
||||
|
||||
#endif /* !(_SPARC_MATRIX_H) */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,40 @@
|
|||
# Copyright © 2012 Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
if HAVE_X86_64_ASM
|
||||
|
||||
AM_CPPFLAGS = \
|
||||
-I$(top_srcdir)/include \
|
||||
-I$(top_srcdir)/src/mesa \
|
||||
-I$(top_srcdir)/src/GLdispatch/mapi \
|
||||
$(API_DEFINES) \
|
||||
$(DEFINES)
|
||||
|
||||
noinst_PROGRAMS = gen_matypes
|
||||
|
||||
gen_matypes_SOURCES = ../x86/gen_matypes.c
|
||||
BUILT_SOURCES = matypes.h
|
||||
CLEANFILES = matypes.h
|
||||
|
||||
matypes.h: gen_matypes
|
||||
$(AM_V_GEN)./gen_matypes > $@
|
||||
|
||||
endif
|
|
@ -0,0 +1,50 @@
|
|||
Register Usage
|
||||
rax temporary register; with variable arguments passes information
|
||||
about the number of SSE registers used; 1st return register
|
||||
|
||||
rbx* callee-saved register; optionally used as base pointer
|
||||
|
||||
rcx used to pass 4th integer argument to functions
|
||||
|
||||
rdx used to pass 3rd argument to functions 2nd return register
|
||||
|
||||
rsp* stack pointer
|
||||
|
||||
rbp* callee-saved register; optionally used as frame pointer
|
||||
|
||||
rsi used to pass 2nd argument to functions
|
||||
|
||||
rdi used to pass 1st argument to functions
|
||||
|
||||
r8 used to pass 5th argument to functions
|
||||
|
||||
r9 used to pass 6th argument to functions
|
||||
|
||||
r10 temporary register, used for passing a function's static chain pointer
|
||||
|
||||
r11 temporary register
|
||||
|
||||
r12-15* callee-saved registers
|
||||
|
||||
xmm01 used to pass and return floating point arguments
|
||||
|
||||
xmm27 used to pass floating point arguments
|
||||
|
||||
xmm815 temporary registers
|
||||
|
||||
mmx07 temporary registers
|
||||
|
||||
st0 temporary register; used to return long double arguments
|
||||
|
||||
st1 temporary registers; used to return long double arguments
|
||||
|
||||
st27 temporary registers
|
||||
|
||||
fs Reserved for system use (as thread specific data register)
|
||||
|
||||
|
||||
|
||||
*) must be preserved across function calls
|
||||
|
||||
Integer arguments from list: rdi,rsi,rdx,rcx,r8,r9,stack
|
||||
Floating point arguments from list: xmm0-xmm7
|
|
@ -0,0 +1,119 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 6.3
|
||||
*
|
||||
* Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* x86-64 optimizations shamelessy converted from x86/sse/3dnow assembly by
|
||||
* Mikko Tiihonen
|
||||
*/
|
||||
|
||||
#ifdef USE_X86_64_ASM
|
||||
|
||||
#include "main/glheader.h"
|
||||
#include "main/context.h"
|
||||
#include "math/m_xform.h"
|
||||
#include "tnl/t_context.h"
|
||||
#include "x86-64.h"
|
||||
#include "../x86/x86_xform.h"
|
||||
|
||||
#ifdef DEBUG
|
||||
#include "math/m_debug.h"
|
||||
#endif
|
||||
|
||||
extern void _mesa_x86_64_cpuid(unsigned int *regs);
|
||||
|
||||
DECLARE_XFORM_GROUP( x86_64, 4 )
|
||||
DECLARE_XFORM_GROUP( 3dnow, 4 )
|
||||
|
||||
#else
|
||||
/* just to silence warning below */
|
||||
#include "x86-64.h"
|
||||
#endif
|
||||
|
||||
/*
|
||||
extern void _mesa_x86_64_transform_points4_general( XFORM_ARGS );
|
||||
extern void _mesa_x86_64_transform_points4_identity( XFORM_ARGS );
|
||||
extern void _mesa_x86_64_transform_points4_perspective( XFORM_ARGS );
|
||||
extern void _mesa_x86_64_transform_points4_3d( XFORM_ARGS );
|
||||
extern void _mesa_x86_64_transform_points4_3d_no_rot( XFORM_ARGS );
|
||||
extern void _mesa_x86_64_transform_points4_2d_no_rot( XFORM_ARGS );
|
||||
extern void _mesa_x86_64_transform_points4_2d( XFORM_ARGS );
|
||||
*/
|
||||
|
||||
#ifdef USE_X86_64_ASM
|
||||
static void message( const char *msg )
|
||||
{
|
||||
if (_mesa_getenv("MESA_DEBUG")) {
|
||||
_mesa_debug( NULL, "%s", msg );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void _mesa_init_all_x86_64_transform_asm(void)
|
||||
{
|
||||
#ifdef USE_X86_64_ASM
|
||||
unsigned int regs[4];
|
||||
|
||||
if ( _mesa_getenv( "MESA_NO_ASM" ) ) {
|
||||
return;
|
||||
}
|
||||
|
||||
message("Initializing x86-64 optimizations\n");
|
||||
|
||||
|
||||
_mesa_transform_tab[4][MATRIX_GENERAL] =
|
||||
_mesa_x86_64_transform_points4_general;
|
||||
_mesa_transform_tab[4][MATRIX_IDENTITY] =
|
||||
_mesa_x86_64_transform_points4_identity;
|
||||
_mesa_transform_tab[4][MATRIX_3D] =
|
||||
_mesa_x86_64_transform_points4_3d;
|
||||
|
||||
regs[0] = 0x80000001;
|
||||
regs[1] = 0x00000000;
|
||||
regs[2] = 0x00000000;
|
||||
regs[3] = 0x00000000;
|
||||
_mesa_x86_64_cpuid(regs);
|
||||
if (regs[3] & (1U << 31)) {
|
||||
message("3Dnow! detected\n");
|
||||
_mesa_transform_tab[4][MATRIX_3D_NO_ROT] =
|
||||
_mesa_3dnow_transform_points4_3d_no_rot;
|
||||
_mesa_transform_tab[4][MATRIX_PERSPECTIVE] =
|
||||
_mesa_3dnow_transform_points4_perspective;
|
||||
_mesa_transform_tab[4][MATRIX_2D_NO_ROT] =
|
||||
_mesa_3dnow_transform_points4_2d_no_rot;
|
||||
_mesa_transform_tab[4][MATRIX_2D] =
|
||||
_mesa_3dnow_transform_points4_2d;
|
||||
|
||||
}
|
||||
|
||||
|
||||
#ifdef DEBUG_MATH
|
||||
_math_test_all_transform_functions("x86_64");
|
||||
_math_test_all_cliptest_functions("x86_64");
|
||||
_math_test_all_normal_transform_functions("x86_64");
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __X86_64_ASM_H__
|
||||
#define __X86_64_ASM_H__
|
||||
|
||||
extern void _mesa_init_all_x86_64_transform_asm( void );
|
||||
|
||||
#endif
|
|
@ -0,0 +1,483 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 7.1
|
||||
*
|
||||
* Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifdef USE_X86_64_ASM
|
||||
|
||||
#include "matypes.h"
|
||||
|
||||
.text
|
||||
|
||||
.align 16
|
||||
.globl _mesa_x86_64_cpuid
|
||||
.hidden _mesa_x86_64_cpuid
|
||||
_mesa_x86_64_cpuid:
|
||||
pushq %rbx
|
||||
movl (%rdi), %eax
|
||||
movl 8(%rdi), %ecx
|
||||
|
||||
cpuid
|
||||
|
||||
movl %ebx, 4(%rdi)
|
||||
movl %eax, (%rdi)
|
||||
movl %ecx, 8(%rdi)
|
||||
movl %edx, 12(%rdi)
|
||||
popq %rbx
|
||||
ret
|
||||
|
||||
.align 16
|
||||
.globl _mesa_x86_64_transform_points4_general
|
||||
.hidden _mesa_x86_64_transform_points4_general
|
||||
_mesa_x86_64_transform_points4_general:
|
||||
/*
|
||||
* rdi = dest
|
||||
* rsi = matrix
|
||||
* rdx = source
|
||||
*/
|
||||
movl V4F_COUNT(%rdx), %ecx /* count */
|
||||
movzbl V4F_STRIDE(%rdx), %eax /* stride */
|
||||
|
||||
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
|
||||
movl $4, V4F_SIZE(%rdi) /* set dest size */
|
||||
.byte 0x66, 0x66, 0x66, 0x90 /* manual align += 3 */
|
||||
orl $VEC_SIZE_4, V4F_FLAGS(%rdi)/* set dest flags */
|
||||
|
||||
testl %ecx, %ecx /* verify non-zero count */
|
||||
prefetchnta 64(%rsi)
|
||||
jz p4_general_done
|
||||
|
||||
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
|
||||
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
|
||||
|
||||
prefetch 16(%rdx)
|
||||
|
||||
movaps 0(%rsi), %xmm4 /* m3 | m2 | m1 | m0 */
|
||||
movaps 16(%rsi), %xmm5 /* m7 | m6 | m5 | m4 */
|
||||
.byte 0x66, 0x66, 0x90 /* manual align += 3 */
|
||||
movaps 32(%rsi), %xmm6 /* m11 | m10 | m9 | m8 */
|
||||
movaps 48(%rsi), %xmm7 /* m15 | m14 | m13 | m12 */
|
||||
|
||||
p4_general_loop:
|
||||
|
||||
movups (%rdx), %xmm8 /* ox | oy | oz | ow */
|
||||
prefetchw 16(%rdi)
|
||||
|
||||
pshufd $0x00, %xmm8, %xmm0 /* ox | ox | ox | ox */
|
||||
addq %rax, %rdx
|
||||
pshufd $0x55, %xmm8, %xmm1 /* oy | oy | oy | oy */
|
||||
mulps %xmm4, %xmm0 /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
|
||||
pshufd $0xAA, %xmm8, %xmm2 /* oz | oz | oz | ox */
|
||||
mulps %xmm5, %xmm1 /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
|
||||
pshufd $0xFF, %xmm8, %xmm3 /* ow | ow | ow | ow */
|
||||
mulps %xmm6, %xmm2 /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */
|
||||
addps %xmm1, %xmm0 /* ox*m3+oy*m7 | ... */
|
||||
mulps %xmm7, %xmm3 /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
|
||||
addps %xmm2, %xmm0 /* ox*m3+oy*m7+oz*m11 | ... */
|
||||
prefetch 16(%rdx)
|
||||
addps %xmm3, %xmm0 /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
|
||||
|
||||
movaps %xmm0, (%rdi) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
|
||||
addq $16, %rdi
|
||||
|
||||
decl %ecx
|
||||
jnz p4_general_loop
|
||||
|
||||
p4_general_done:
|
||||
.byte 0xf3
|
||||
ret
|
||||
|
||||
.section .rodata
|
||||
|
||||
.align 16
|
||||
p4_constants:
|
||||
.byte 0xff, 0xff, 0xff, 0xff
|
||||
.byte 0xff, 0xff, 0xff, 0xff
|
||||
.byte 0xff, 0xff, 0xff, 0xff
|
||||
.byte 0x00, 0x00, 0x00, 0x00
|
||||
|
||||
.byte 0x00, 0x00, 0x00, 0x00
|
||||
.byte 0x00, 0x00, 0x00, 0x00
|
||||
.byte 0x00, 0x00, 0x00, 0x00
|
||||
.float 1.0
|
||||
|
||||
.text
|
||||
.align 16
|
||||
.globl _mesa_x86_64_transform_points4_3d
|
||||
.hidden _mesa_x86_64_transform_points4_3d
|
||||
/*
|
||||
* this is slower than _mesa_x86_64_transform_points4_general
|
||||
* because it ensures that the last matrix row (or is it column?) is 0,0,0,1
|
||||
*/
|
||||
_mesa_x86_64_transform_points4_3d:
|
||||
|
||||
leaq p4_constants(%rip), %rax
|
||||
|
||||
prefetchnta 64(%rsi)
|
||||
|
||||
movaps (%rax), %xmm9
|
||||
movaps 16(%rax), %xmm10
|
||||
|
||||
movl V4F_COUNT(%rdx), %ecx /* count */
|
||||
movzbl V4F_STRIDE(%rdx), %eax /* stride */
|
||||
|
||||
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
|
||||
movl $4, V4F_SIZE(%rdi) /* set dest size */
|
||||
orl $VEC_SIZE_4, V4F_FLAGS(%rdi)/* set dest flags */
|
||||
|
||||
testl %ecx, %ecx /* verify non-zero count */
|
||||
jz p4_3d_done
|
||||
|
||||
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
|
||||
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
|
||||
|
||||
prefetch 16(%rdx)
|
||||
|
||||
movaps 0(%rsi), %xmm4 /* m3 | m2 | m1 | m0 */
|
||||
movaps 16(%rsi), %xmm5 /* m7 | m6 | m5 | m4 */
|
||||
andps %xmm9, %xmm4 /* 0.0 | m2 | m1 | m0 */
|
||||
movaps 32(%rsi), %xmm6 /* m11 | m10 | m9 | m8 */
|
||||
andps %xmm9, %xmm5 /* 0.0 | m6 | m5 | m4 */
|
||||
movaps 48(%rsi), %xmm7 /* m15 | m14 | m13 | m12 */
|
||||
andps %xmm9, %xmm6 /* 0.0 | m10 | m9 | m8 */
|
||||
andps %xmm9, %xmm7 /* 0.0 | m14 | m13 | m12 */
|
||||
.byte 0x66, 0x66, 0x90 /* manual align += 3 */
|
||||
orps %xmm10, %xmm7 /* 1.0 | m14 | m13 | m12 */
|
||||
|
||||
p4_3d_loop:
|
||||
|
||||
movups (%rdx), %xmm8 /* ox | oy | oz | ow */
|
||||
prefetchw 16(%rdi)
|
||||
|
||||
pshufd $0x00, %xmm8, %xmm0 /* ox | ox | ox | ox */
|
||||
addq %rax, %rdx
|
||||
pshufd $0x55, %xmm8, %xmm1 /* oy | oy | oy | oy */
|
||||
mulps %xmm4, %xmm0 /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
|
||||
pshufd $0xAA, %xmm8, %xmm2 /* oz | oz | oz | ox */
|
||||
mulps %xmm5, %xmm1 /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
|
||||
pshufd $0xFF, %xmm8, %xmm3 /* ow | ow | ow | ow */
|
||||
mulps %xmm6, %xmm2 /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */
|
||||
addps %xmm1, %xmm0 /* ox*m3+oy*m7 | ... */
|
||||
mulps %xmm7, %xmm3 /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
|
||||
addps %xmm2, %xmm0 /* ox*m3+oy*m7+oz*m11 | ... */
|
||||
prefetch 16(%rdx)
|
||||
addps %xmm3, %xmm0 /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
|
||||
|
||||
movaps %xmm0, (%rdi) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
|
||||
addq $16, %rdi
|
||||
|
||||
dec %ecx
|
||||
jnz p4_3d_loop
|
||||
|
||||
p4_3d_done:
|
||||
.byte 0xf3
|
||||
ret
|
||||
|
||||
|
||||
.align 16
|
||||
.globl _mesa_x86_64_transform_points4_identity
|
||||
.hidden _mesa_x86_64_transform_points4_identity
|
||||
_mesa_x86_64_transform_points4_identity:
|
||||
|
||||
movl V4F_COUNT(%rdx), %ecx /* count */
|
||||
movzbl V4F_STRIDE(%rdx), %eax /* stride */
|
||||
|
||||
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
|
||||
movl $4, V4F_SIZE(%rdi) /* set dest size */
|
||||
orl $VEC_SIZE_4, V4F_FLAGS(%rdi)/* set dest flags */
|
||||
|
||||
test %ecx, %ecx
|
||||
jz p4_identity_done
|
||||
|
||||
movq V4F_START(%rdx), %rsi /* ptr to first src vertex */
|
||||
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
|
||||
prefetch 64(%rsi)
|
||||
prefetchw 64(%rdi)
|
||||
|
||||
add %ecx, %ecx
|
||||
|
||||
rep movsq
|
||||
|
||||
p4_identity_done:
|
||||
.byte 0xf3
|
||||
ret
|
||||
|
||||
|
||||
.align 16
|
||||
.globl _mesa_3dnow_transform_points4_3d_no_rot
|
||||
.hidden _mesa_3dnow_transform_points4_3d_no_rot
|
||||
_mesa_3dnow_transform_points4_3d_no_rot:
|
||||
|
||||
movl V4F_COUNT(%rdx), %ecx /* count */
|
||||
movzbl V4F_STRIDE(%rdx), %eax /* stride */
|
||||
|
||||
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
|
||||
movl $4, V4F_SIZE(%rdi) /* set dest size */
|
||||
.byte 0x66, 0x66, 0x90 /* manual align += 3 */
|
||||
orl $VEC_SIZE_4, V4F_FLAGS(%rdi)/* set dest flags */
|
||||
|
||||
test %ecx, %ecx
|
||||
.byte 0x66, 0x66, 0x90 /* manual align += 3 */
|
||||
jz p4_3d_no_rot_done
|
||||
|
||||
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
|
||||
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
|
||||
|
||||
prefetch (%rdx)
|
||||
|
||||
movd (%rsi), %mm0 /* | m00 */
|
||||
.byte 0x66, 0x66, 0x90 /* manual align += 3 */
|
||||
punpckldq 20(%rsi), %mm0 /* m11 | m00 */
|
||||
|
||||
movd 40(%rsi), %mm2 /* | m22 */
|
||||
movq 48(%rsi), %mm1 /* m31 | m30 */
|
||||
|
||||
punpckldq 56(%rsi), %mm2 /* m11 | m00 */
|
||||
|
||||
p4_3d_no_rot_loop:
|
||||
|
||||
prefetchw 32(%rdi)
|
||||
|
||||
movq (%rdx), %mm4 /* x1 | x0 */
|
||||
movq 8(%rdx), %mm5 /* x3 | x2 */
|
||||
movd 12(%rdx), %mm7 /* | x3 */
|
||||
|
||||
movq %mm5, %mm6 /* x3 | x2 */
|
||||
pfmul %mm0, %mm4 /* x1*m11 | x0*m00 */
|
||||
|
||||
punpckhdq %mm6, %mm6 /* x3 | x3 */
|
||||
pfmul %mm2, %mm5 /* x3*m32 | x2*m22 */
|
||||
|
||||
pfmul %mm1, %mm6 /* x3*m31 | x3*m30 */
|
||||
pfacc %mm7, %mm5 /* x3 | x2*m22+x3*m32 */
|
||||
|
||||
pfadd %mm6, %mm4 /* x1*m11+x3*m31 | x0*m00+x3*m30 */
|
||||
|
||||
addq %rax, %rdx
|
||||
movq %mm4, (%rdi) /* write r0, r1 */
|
||||
movq %mm5, 8(%rdi) /* write r2, r3 */
|
||||
|
||||
addq $16, %rdi
|
||||
|
||||
decl %ecx
|
||||
prefetch 32(%rdx)
|
||||
jnz p4_3d_no_rot_loop
|
||||
|
||||
p4_3d_no_rot_done:
|
||||
femms
|
||||
ret
|
||||
|
||||
|
||||
.align 16
|
||||
.globl _mesa_3dnow_transform_points4_perspective
|
||||
.hidden _mesa_3dnow_transform_points4_perspective
|
||||
_mesa_3dnow_transform_points4_perspective:
|
||||
|
||||
movl V4F_COUNT(%rdx), %ecx /* count */
|
||||
movzbl V4F_STRIDE(%rdx), %eax /* stride */
|
||||
|
||||
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
|
||||
movl $4, V4F_SIZE(%rdi) /* set dest size */
|
||||
orl $VEC_SIZE_4, V4F_FLAGS(%rdi)/* set dest flags */
|
||||
|
||||
test %ecx, %ecx
|
||||
.byte 0x66, 0x66, 0x90 /* manual align += 3 */
|
||||
jz p4_perspective_done
|
||||
|
||||
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
|
||||
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
|
||||
|
||||
movd (%rsi), %mm0 /* | m00 */
|
||||
pxor %mm7, %mm7 /* 0 | 0 */
|
||||
punpckldq 20(%rsi), %mm0 /* m11 | m00 */
|
||||
|
||||
movq 32(%rsi), %mm2 /* m21 | m20 */
|
||||
prefetch (%rdx)
|
||||
|
||||
movd 40(%rsi), %mm1 /* | m22 */
|
||||
|
||||
.byte 0x66, 0x66, 0x90 /* manual align += 3 */
|
||||
punpckldq 56(%rsi), %mm1 /* m32 | m22 */
|
||||
|
||||
|
||||
p4_perspective_loop:
|
||||
|
||||
prefetchw 32(%rdi) /* prefetch 2 vertices ahead */
|
||||
|
||||
movq (%rdx), %mm4 /* x1 | x0 */
|
||||
movq 8(%rdx), %mm5 /* x3 | x2 */
|
||||
movd 8(%rdx), %mm3 /* | x2 */
|
||||
|
||||
movq %mm5, %mm6 /* x3 | x2 */
|
||||
pfmul %mm0, %mm4 /* x1*m11 | x0*m00 */
|
||||
|
||||
punpckldq %mm5, %mm5 /* x2 | x2 */
|
||||
|
||||
pfmul %mm2, %mm5 /* x2*m21 | x2*m20 */
|
||||
pfsubr %mm7, %mm3 /* | -x2 */
|
||||
|
||||
pfmul %mm1, %mm6 /* x3*m32 | x2*m22 */
|
||||
pfadd %mm4, %mm5 /* x1*m11+x2*m21 | x0*m00+x2*m20 */
|
||||
|
||||
pfacc %mm3, %mm6 /* -x2 | x2*m22+x3*m32 */
|
||||
|
||||
movq %mm5, (%rdi) /* write r0, r1 */
|
||||
addq %rax, %rdx
|
||||
movq %mm6, 8(%rdi) /* write r2, r3 */
|
||||
|
||||
addq $16, %rdi
|
||||
|
||||
decl %ecx
|
||||
prefetch 32(%rdx) /* hopefully stride is zero */
|
||||
jnz p4_perspective_loop
|
||||
|
||||
p4_perspective_done:
|
||||
femms
|
||||
ret
|
||||
|
||||
.align 16
|
||||
.globl _mesa_3dnow_transform_points4_2d_no_rot
|
||||
.hidden _mesa_3dnow_transform_points4_2d_no_rot
|
||||
_mesa_3dnow_transform_points4_2d_no_rot:
|
||||
|
||||
movl V4F_COUNT(%rdx), %ecx /* count */
|
||||
movzbl V4F_STRIDE(%rdx), %eax /* stride */
|
||||
|
||||
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
|
||||
movl $4, V4F_SIZE(%rdi) /* set dest size */
|
||||
orl $VEC_SIZE_4, V4F_FLAGS(%rdi)/* set dest flags */
|
||||
|
||||
test %ecx, %ecx
|
||||
.byte 0x90 /* manual align += 1 */
|
||||
jz p4_2d_no_rot_done
|
||||
|
||||
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
|
||||
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
|
||||
|
||||
movd (%rsi), %mm0 /* | m00 */
|
||||
prefetch (%rdx)
|
||||
punpckldq 20(%rsi), %mm0 /* m11 | m00 */
|
||||
|
||||
movq 48(%rsi), %mm1 /* m31 | m30 */
|
||||
|
||||
p4_2d_no_rot_loop:
|
||||
|
||||
prefetchw 32(%rdi) /* prefetch 2 vertices ahead */
|
||||
|
||||
movq (%rdx), %mm4 /* x1 | x0 */
|
||||
movq 8(%rdx), %mm5 /* x3 | x2 */
|
||||
|
||||
pfmul %mm0, %mm4 /* x1*m11 | x0*m00 */
|
||||
movq %mm5, %mm6 /* x3 | x2 */
|
||||
|
||||
punpckhdq %mm6, %mm6 /* x3 | x3 */
|
||||
|
||||
addq %rax, %rdx
|
||||
pfmul %mm1, %mm6 /* x3*m31 | x3*m30 */
|
||||
|
||||
prefetch 32(%rdx) /* hopefully stride is zero */
|
||||
pfadd %mm4, %mm6 /* x1*m11+x3*m31 | x0*m00+x3*m30 */
|
||||
|
||||
movq %mm6, (%rdi) /* write r0, r1 */
|
||||
movq %mm5, 8(%rdi) /* write r2, r3 */
|
||||
|
||||
addq $16, %rdi
|
||||
|
||||
decl %ecx
|
||||
jnz p4_2d_no_rot_loop
|
||||
|
||||
p4_2d_no_rot_done:
|
||||
femms
|
||||
ret
|
||||
|
||||
|
||||
.align 16
|
||||
.globl _mesa_3dnow_transform_points4_2d
|
||||
.hidden _mesa_3dnow_transform_points4_2d
|
||||
_mesa_3dnow_transform_points4_2d:
|
||||
|
||||
movl V4F_COUNT(%rdx), %ecx /* count */
|
||||
movzbl V4F_STRIDE(%rdx), %eax /* stride */
|
||||
|
||||
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
|
||||
movl $4, V4F_SIZE(%rdi) /* set dest size */
|
||||
.byte 0x66, 0x66, 0x90 /* manual align += 4 */
|
||||
orl $VEC_SIZE_4, V4F_FLAGS(%rdi)/* set dest flags */
|
||||
|
||||
test %ecx, %ecx
|
||||
.byte 0x66, 0x66, 0x90 /* manual align += 4 */
|
||||
jz p4_2d_done
|
||||
|
||||
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
|
||||
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
|
||||
|
||||
movd (%rsi), %mm0 /* | m00 */
|
||||
movd 4(%rsi), %mm1 /* | m01 */
|
||||
|
||||
prefetch (%rdx)
|
||||
|
||||
punpckldq 16(%rsi), %mm0 /* m10 | m00 */
|
||||
.byte 0x66, 0x66, 0x90 /* manual align += 4 */
|
||||
punpckldq 20(%rsi), %mm1 /* m11 | m01 */
|
||||
|
||||
movq 48(%rsi), %mm2 /* m31 | m30 */
|
||||
|
||||
p4_2d_loop:
|
||||
|
||||
prefetchw 32(%rdi) /* prefetch 2 vertices ahead */
|
||||
|
||||
movq (%rdx), %mm3 /* x1 | x0 */
|
||||
movq 8(%rdx), %mm5 /* x3 | x2 */
|
||||
|
||||
movq %mm3, %mm4 /* x1 | x0 */
|
||||
movq %mm5, %mm6 /* x3 | x2 */
|
||||
|
||||
pfmul %mm1, %mm4 /* x1*m11 | x0*m01 */
|
||||
punpckhdq %mm6, %mm6 /* x3 | x3 */
|
||||
|
||||
pfmul %mm0, %mm3 /* x1*m10 | x0*m00 */
|
||||
|
||||
addq %rax, %rdx
|
||||
pfacc %mm4, %mm3 /* x0*m01+x1*m11 | x0*m00+x1*m10 */
|
||||
|
||||
pfmul %mm2, %mm6 /* x3*m31 | x3*m30 */
|
||||
prefetch 32(%rdx) /* hopefully stride is zero */
|
||||
|
||||
pfadd %mm6, %mm3 /* r1 | r0 */
|
||||
|
||||
movq %mm3, (%rdi) /* write r0, r1 */
|
||||
movq %mm5, 8(%rdi) /* write r2, r3 */
|
||||
|
||||
addq $16, %rdi
|
||||
|
||||
decl %ecx
|
||||
jnz p4_2d_loop
|
||||
|
||||
p4_2d_done:
|
||||
femms
|
||||
ret
|
||||
|
||||
#endif
|
||||
|
||||
#if defined (__ELF__) && defined (__linux__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
|
@ -0,0 +1,91 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 5.0.1
|
||||
*
|
||||
* Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* 3DNow! optimizations contributed by
|
||||
* Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
|
||||
*/
|
||||
|
||||
#include "main/glheader.h"
|
||||
#include "main/context.h"
|
||||
#include "math/m_xform.h"
|
||||
#include "tnl/t_context.h"
|
||||
|
||||
#include "3dnow.h"
|
||||
#include "x86_xform.h"
|
||||
|
||||
#ifdef DEBUG_MATH
|
||||
#include "math/m_debug.h"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef USE_3DNOW_ASM
|
||||
DECLARE_XFORM_GROUP( 3dnow, 2 )
|
||||
DECLARE_XFORM_GROUP( 3dnow, 3 )
|
||||
DECLARE_XFORM_GROUP( 3dnow, 4 )
|
||||
|
||||
DECLARE_NORM_GROUP( 3dnow )
|
||||
|
||||
|
||||
extern void _ASMAPI
|
||||
_mesa_v16_3dnow_general_xform( GLfloat *first_vert,
|
||||
const GLfloat *m,
|
||||
const GLfloat *src,
|
||||
GLuint src_stride,
|
||||
GLuint count );
|
||||
|
||||
extern void _ASMAPI
|
||||
_mesa_3dnow_project_vertices( GLfloat *first,
|
||||
GLfloat *last,
|
||||
const GLfloat *m,
|
||||
GLuint stride );
|
||||
|
||||
extern void _ASMAPI
|
||||
_mesa_3dnow_project_clipped_vertices( GLfloat *first,
|
||||
GLfloat *last,
|
||||
const GLfloat *m,
|
||||
GLuint stride,
|
||||
const GLubyte *clipmask );
|
||||
#endif
|
||||
|
||||
|
||||
void _mesa_init_3dnow_transform_asm( void )
|
||||
{
|
||||
#ifdef USE_3DNOW_ASM
|
||||
ASSIGN_XFORM_GROUP( 3dnow, 2 );
|
||||
ASSIGN_XFORM_GROUP( 3dnow, 3 );
|
||||
ASSIGN_XFORM_GROUP( 3dnow, 4 );
|
||||
|
||||
/* There's a bug somewhere in the 3dnow_normal.S file that causes
|
||||
* bad shading. Disable for now.
|
||||
ASSIGN_NORM_GROUP( 3dnow );
|
||||
*/
|
||||
|
||||
#ifdef DEBUG_MATH
|
||||
_math_test_all_transform_functions( "3DNow!" );
|
||||
_math_test_all_normal_transform_functions( "3DNow!" );
|
||||
#endif
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* 3DNow! optimizations contributed by
|
||||
* Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
|
||||
*/
|
||||
|
||||
#ifndef __3DNOW_H__
|
||||
#define __3DNOW_H__
|
||||
|
||||
void _mesa_init_3dnow_transform_asm( void );
|
||||
|
||||
#endif
|
|
@ -0,0 +1,852 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 5.1
|
||||
*
|
||||
* Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* 3Dnow assembly code by Holger Waechtler
|
||||
*/
|
||||
|
||||
#ifdef USE_3DNOW_ASM
|
||||
|
||||
#include "assyntax.h"
|
||||
#include "matypes.h"
|
||||
#include "norm_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define M(i) REGOFF(i * 4, ECX)
|
||||
#define STRIDE REGOFF(12, ESI)
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals)
|
||||
HIDDEN(_mesa_3dnow_transform_normalize_normals)
|
||||
GLNAME(_mesa_3dnow_transform_normalize_normals):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
|
||||
PUSH_L ( EDI )
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EBP )
|
||||
|
||||
MOV_L ( ARG_LENGTHS, EDI )
|
||||
MOV_L ( ARG_IN, ESI )
|
||||
MOV_L ( ARG_DEST, EAX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
|
||||
MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
|
||||
MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
|
||||
MOV_L ( ARG_MAT, ECX )
|
||||
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
|
||||
|
||||
CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
|
||||
JE ( LLBL (G3TN_end) )
|
||||
|
||||
MOV_L ( REGOFF (V4F_COUNT, ESI), EBP )
|
||||
FEMMS
|
||||
|
||||
PUSH_L ( EBP )
|
||||
PUSH_L ( EAX )
|
||||
PUSH_L ( EDX ) /* save counter & pointer for */
|
||||
/* the normalize pass */
|
||||
#undef FRAME_OFFSET
|
||||
#define FRAME_OFFSET 24
|
||||
|
||||
MOVQ ( M(0), MM3 ) /* m1 | m0 */
|
||||
MOVQ ( M(4), MM4 ) /* m5 | m4 */
|
||||
|
||||
MOVD ( M(2), MM5 ) /* | m2 */
|
||||
PUNPCKLDQ ( M(6), MM5 ) /* m6 | m2 */
|
||||
|
||||
MOVQ ( M(8), MM6 ) /* m9 | m8 */
|
||||
MOVQ ( M(10), MM7 ) /* | m10 */
|
||||
|
||||
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
|
||||
JNE ( LLBL (G3TN_scale_end ) )
|
||||
|
||||
MOVD ( ARG_SCALE, MM0 ) /* | scale */
|
||||
PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
|
||||
|
||||
PFMUL ( MM0, MM3 ) /* scale * m1 | scale * m0 */
|
||||
PFMUL ( MM0, MM4 ) /* scale * m5 | scale * m4 */
|
||||
PFMUL ( MM0, MM5 ) /* scale * m6 | scale * m2 */
|
||||
PFMUL ( MM0, MM6 ) /* scale * m9 | scale * m8 */
|
||||
PFMUL ( MM0, MM7 ) /* | scale * m10 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3TN_scale_end):
|
||||
LLBL (G3TN_transform):
|
||||
MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
|
||||
|
||||
MOVQ ( MM0, MM1 ) /* x1 | x0 */
|
||||
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
|
||||
|
||||
PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
|
||||
ADD_L ( CONST(16), EAX ) /* next r */
|
||||
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
|
||||
PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
|
||||
PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
|
||||
|
||||
PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
|
||||
PFADD ( MM2, MM0 ) /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/
|
||||
|
||||
MOVQ ( REGIND (EDX), MM1 ) /* x1 | x0 */
|
||||
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
|
||||
|
||||
PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
|
||||
MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
|
||||
|
||||
PFMUL ( MM7, MM2 ) /* | x2*m10 */
|
||||
PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
|
||||
|
||||
PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m*/
|
||||
ADD_L ( STRIDE, EDX ) /* next normal */
|
||||
|
||||
PREFETCH ( REGIND(EDX) )
|
||||
|
||||
MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
|
||||
SUB_L ( CONST(1), EBP ) /* decrement normal counter */
|
||||
JNZ ( LLBL (G3TN_transform) )
|
||||
|
||||
|
||||
POP_L ( EDX ) /* end of transform --- */
|
||||
POP_L ( EAX ) /* now normalizing ... */
|
||||
POP_L ( EBP )
|
||||
|
||||
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
|
||||
JE ( LLBL (G3TN_norm ) ) /* calculate lengths */
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3TN_norm_w_lengths):
|
||||
|
||||
PREFETCHW ( REGOFF(12,EAX) )
|
||||
|
||||
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
|
||||
|
||||
MOVD ( REGIND (EDI), MM3 ) /* | length (x) */
|
||||
PFMUL ( MM3, MM1 ) /* | x2 (normalize*/
|
||||
|
||||
PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
|
||||
PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalize*/
|
||||
|
||||
ADD_L ( STRIDE, EDX ) /* next normal */
|
||||
ADD_L ( CONST(4), EDI ) /* next length */
|
||||
|
||||
PREFETCH ( REGIND(EDI) )
|
||||
|
||||
MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
|
||||
MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
|
||||
|
||||
ADD_L ( CONST(16), EAX ) /* next r */
|
||||
SUB_L ( CONST(1), EBP ) /* decrement normal counter */
|
||||
|
||||
JNZ ( LLBL (G3TN_norm_w_lengths) )
|
||||
JMP ( LLBL (G3TN_exit_3dnow) )
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3TN_norm):
|
||||
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( REGIND (EAX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
|
||||
|
||||
MOVQ ( MM0, MM3 ) /* x1 | x0 */
|
||||
MOVQ ( MM1, MM4 ) /* | x2 */
|
||||
|
||||
PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
|
||||
ADD_L ( CONST(16), EAX ) /* next r */
|
||||
|
||||
PFMUL ( MM1, MM4 ) /* | x2*x2 */
|
||||
PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
|
||||
|
||||
PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1+x2**/
|
||||
PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
|
||||
|
||||
MOVQ ( MM5, MM4 )
|
||||
PUNPCKLDQ ( MM3, MM3 )
|
||||
|
||||
SUB_L ( CONST(1), EBP ) /* decrement normal counter */
|
||||
PFMUL ( MM5, MM5 )
|
||||
|
||||
PFRSQIT1 ( MM3, MM5 )
|
||||
PFRCPIT2 ( MM4, MM5 )
|
||||
|
||||
PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalize*/
|
||||
|
||||
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */
|
||||
PFMUL ( MM5, MM1 ) /* | x2 (normalize*/
|
||||
|
||||
MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */
|
||||
JNZ ( LLBL (G3TN_norm) )
|
||||
|
||||
LLBL (G3TN_exit_3dnow):
|
||||
FEMMS
|
||||
|
||||
LLBL (G3TN_end):
|
||||
POP_L ( EBP )
|
||||
POP_L ( ESI )
|
||||
POP_L ( EDI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot)
|
||||
HIDDEN(_mesa_3dnow_transform_normalize_normals_no_rot)
|
||||
GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot):
|
||||
|
||||
#undef FRAME_OFFSET
|
||||
#define FRAME_OFFSET 12
|
||||
|
||||
PUSH_L ( EDI )
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EBP )
|
||||
|
||||
MOV_L ( ARG_LENGTHS, EDI )
|
||||
MOV_L ( ARG_IN, ESI )
|
||||
MOV_L ( ARG_DEST, EAX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
|
||||
MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
|
||||
MOV_L ( ARG_MAT, ECX )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
|
||||
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
|
||||
MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
|
||||
|
||||
CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
|
||||
JE ( LLBL (G3TNNR_end) )
|
||||
|
||||
FEMMS
|
||||
|
||||
MOVD ( M(0), MM0 ) /* | m0 */
|
||||
PUNPCKLDQ ( M(5), MM0 ) /* m5 | m0 */
|
||||
|
||||
MOVD ( M(10), MM2 ) /* | m10 */
|
||||
PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
|
||||
|
||||
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
|
||||
JNE ( LLBL (G3TNNR_scale_end ) )
|
||||
|
||||
MOVD ( ARG_SCALE, MM7 ) /* | scale */
|
||||
PUNPCKLDQ ( MM7, MM7 ) /* scale | scale */
|
||||
|
||||
PFMUL ( MM7, MM0 ) /* scale * m5 | scale * m0 */
|
||||
PFMUL ( MM7, MM2 ) /* scale * m10 | scale * m10 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3TNNR_scale_end):
|
||||
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
|
||||
JE ( LLBL (G3TNNR_norm) ) /* need to calculate lengths */
|
||||
|
||||
MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3TNNR_norm_w_lengths): /* use precalculated lengths */
|
||||
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
|
||||
|
||||
PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
|
||||
ADD_L ( STRIDE, EDX ) /* next normal */
|
||||
|
||||
PREFETCH ( REGIND(EDX) )
|
||||
|
||||
PFMUL ( MM2, MM7 ) /* | x2*m10 */
|
||||
ADD_L ( CONST(16), EAX ) /* next r */
|
||||
|
||||
PFMUL ( MM3, MM7 ) /* | x2 (normalized) */
|
||||
PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
|
||||
|
||||
ADD_L ( CONST(4), EDI ) /* next length */
|
||||
PFMUL ( MM3, MM6 ) /* x1 (normalized) | x0 (normalized) */
|
||||
|
||||
SUB_L ( CONST(1), EBP ) /* decrement normal counter */
|
||||
MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */
|
||||
|
||||
MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */
|
||||
MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
|
||||
|
||||
JNZ ( LLBL (G3TNNR_norm_w_lengths) )
|
||||
JMP ( LLBL (G3TNNR_exit_3dnow) )
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3TNNR_norm): /* need to calculate lengths */
|
||||
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
|
||||
|
||||
PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
|
||||
ADD_L ( CONST(16), EAX ) /* next r */
|
||||
|
||||
PFMUL ( MM2, MM7 ) /* | x2*m10 */
|
||||
MOVQ ( MM6, MM3 ) /* x1 (transformed)| x0 (transformed) */
|
||||
|
||||
MOVQ ( MM7, MM4 ) /* | x2 (transformed) */
|
||||
PFMUL ( MM6, MM3 ) /* x1*x1 | x0*x0 */
|
||||
|
||||
|
||||
PFMUL ( MM7, MM4 ) /* | x2*x2 */
|
||||
PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1 */
|
||||
|
||||
PFADD ( MM4, MM3 ) /* | x0*x0+x1*x1+x2*x2*/
|
||||
ADD_L ( STRIDE, EDX ) /* next normal */
|
||||
|
||||
PREFETCH ( REGIND(EDX) )
|
||||
|
||||
PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
|
||||
MOVQ ( MM5, MM4 )
|
||||
|
||||
PUNPCKLDQ ( MM3, MM3 )
|
||||
PFMUL ( MM5, MM5 )
|
||||
|
||||
PFRSQIT1 ( MM3, MM5 )
|
||||
SUB_L ( CONST(1), EBP ) /* decrement normal counter */
|
||||
|
||||
PFRCPIT2 ( MM4, MM5 )
|
||||
PFMUL ( MM5, MM6 ) /* x1 (normalized) | x0 (normalized) */
|
||||
|
||||
MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */
|
||||
PFMUL ( MM5, MM7 ) /* | x2 (normalized) */
|
||||
|
||||
MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */
|
||||
JNZ ( LLBL (G3TNNR_norm) )
|
||||
|
||||
|
||||
LLBL (G3TNNR_exit_3dnow):
|
||||
FEMMS
|
||||
|
||||
LLBL (G3TNNR_end):
|
||||
POP_L ( EBP )
|
||||
POP_L ( ESI )
|
||||
POP_L ( EDI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot)
|
||||
HIDDEN(_mesa_3dnow_transform_rescale_normals_no_rot)
|
||||
GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot):
|
||||
|
||||
#undef FRAME_OFFSET
|
||||
#define FRAME_OFFSET 12
|
||||
|
||||
PUSH_L ( EDI )
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EBP )
|
||||
|
||||
MOV_L ( ARG_IN, EAX )
|
||||
MOV_L ( ARG_DEST, EDX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EBP ) /* dest->count = in->count */
|
||||
MOV_L ( EBP, REGOFF(V4F_COUNT, EDX) )
|
||||
MOV_L ( ARG_IN, ESI )
|
||||
MOV_L ( ARG_MAT, ECX )
|
||||
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
|
||||
MOV_L ( REGOFF(V4F_START, EDX), EAX ) /* dest->start */
|
||||
MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
|
||||
|
||||
CMP_L ( CONST(0), EBP )
|
||||
JE ( LLBL (G3TRNR_end) )
|
||||
|
||||
FEMMS
|
||||
|
||||
MOVD ( ARG_SCALE, MM6 ) /* | scale */
|
||||
PUNPCKLDQ ( MM6, MM6 ) /* scale | scale */
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m0 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
|
||||
|
||||
PFMUL ( MM6, MM0 ) /* scale*m5 | scale*m0 */
|
||||
MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
|
||||
|
||||
PFMUL ( MM6, MM2 ) /* | scale*m10 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3TRNR_rescale):
|
||||
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
|
||||
|
||||
PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
|
||||
ADD_L ( STRIDE, EDX ) /* next normal */
|
||||
|
||||
PREFETCH ( REGIND(EDX) )
|
||||
|
||||
PFMUL ( MM2, MM5 ) /* | x2*m10 */
|
||||
ADD_L ( CONST(16), EAX ) /* next r */
|
||||
|
||||
SUB_L ( CONST(1), EBP ) /* decrement normal counter */
|
||||
MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */
|
||||
|
||||
MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */
|
||||
JNZ ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal */
|
||||
|
||||
FEMMS
|
||||
|
||||
LLBL (G3TRNR_end):
|
||||
POP_L ( EBP )
|
||||
POP_L ( ESI )
|
||||
POP_L ( EDI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals)
|
||||
HIDDEN(_mesa_3dnow_transform_rescale_normals)
|
||||
GLNAME(_mesa_3dnow_transform_rescale_normals):
|
||||
|
||||
#undef FRAME_OFFSET
|
||||
#define FRAME_OFFSET 8
|
||||
|
||||
PUSH_L ( EDI )
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_IN, ESI )
|
||||
MOV_L ( ARG_DEST, EAX )
|
||||
MOV_L ( ARG_MAT, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
|
||||
MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
|
||||
MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
|
||||
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
|
||||
|
||||
CMP_L ( CONST(0), EDI )
|
||||
JE ( LLBL (G3TR_end) )
|
||||
|
||||
FEMMS
|
||||
|
||||
MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
|
||||
|
||||
MOVQ ( REGOFF(16,ECX), MM4 ) /* m5 | m4 */
|
||||
MOVD ( ARG_SCALE, MM0 ) /* scale */
|
||||
|
||||
MOVD ( REGOFF(8,ECX), MM5 ) /* | m2 */
|
||||
PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
|
||||
|
||||
PUNPCKLDQ ( REGOFF(24, ECX), MM5 )
|
||||
PFMUL ( MM0, MM3 ) /* scale*m1 | scale*m0 */
|
||||
|
||||
MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8*/
|
||||
PFMUL ( MM0, MM4 ) /* scale*m5 | scale*m4 */
|
||||
|
||||
MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
|
||||
PFMUL ( MM0, MM5 ) /* scale*m6 | scale*m2 */
|
||||
|
||||
PFMUL ( MM0, MM6 ) /* scale*m9 | scale*m8 */
|
||||
|
||||
PFMUL ( MM0, MM7 ) /* | scale*m10 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3TR_rescale):
|
||||
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
|
||||
|
||||
MOVQ ( MM0, MM1 ) /* x1 | x0 */
|
||||
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
|
||||
|
||||
PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
|
||||
ADD_L ( CONST(16), EAX ) /* next r */
|
||||
|
||||
PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
|
||||
PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
|
||||
|
||||
MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
|
||||
|
||||
PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
|
||||
PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
|
||||
|
||||
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
|
||||
ADD_L ( STRIDE, EDX ) /* next normal */
|
||||
|
||||
PREFETCH ( REGIND(EDX) )
|
||||
|
||||
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
|
||||
PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
|
||||
|
||||
PFMUL ( MM7, MM2 ) /* | x2*m10 */
|
||||
PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
|
||||
|
||||
PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
|
||||
MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
|
||||
|
||||
SUB_L ( CONST(1), EDI ) /* decrement normal counter */
|
||||
JNZ ( LLBL (G3TR_rescale) )
|
||||
|
||||
FEMMS
|
||||
|
||||
LLBL (G3TR_end):
|
||||
POP_L ( ESI )
|
||||
POP_L ( EDI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot)
|
||||
HIDDEN(_mesa_3dnow_transform_normals_no_rot)
|
||||
GLNAME(_mesa_3dnow_transform_normals_no_rot):
|
||||
|
||||
#undef FRAME_OFFSET
|
||||
#define FRAME_OFFSET 8
|
||||
|
||||
PUSH_L ( EDI )
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_IN, ESI )
|
||||
MOV_L ( ARG_DEST, EAX )
|
||||
MOV_L ( ARG_MAT, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
|
||||
MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
|
||||
MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
|
||||
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
|
||||
|
||||
CMP_L ( CONST(0), EDI )
|
||||
JE ( LLBL (G3TNR_end) )
|
||||
|
||||
FEMMS
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m0 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
|
||||
|
||||
MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
|
||||
PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3TNR_transform):
|
||||
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
|
||||
|
||||
PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
|
||||
ADD_L ( STRIDE, EDX) /* next normal */
|
||||
|
||||
PREFETCH ( REGIND(EDX) )
|
||||
|
||||
PFMUL ( MM2, MM5 ) /* | x2*m10 */
|
||||
ADD_L ( CONST(16), EAX ) /* next r */
|
||||
|
||||
SUB_L ( CONST(1), EDI ) /* decrement normal counter */
|
||||
MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */
|
||||
|
||||
MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */
|
||||
JNZ ( LLBL (G3TNR_transform) )
|
||||
|
||||
FEMMS
|
||||
|
||||
LLBL (G3TNR_end):
|
||||
POP_L ( ESI )
|
||||
POP_L ( EDI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_3dnow_transform_normals)
|
||||
HIDDEN(_mesa_3dnow_transform_normals)
|
||||
GLNAME(_mesa_3dnow_transform_normals):
|
||||
|
||||
#undef FRAME_OFFSET
|
||||
#define FRAME_OFFSET 8
|
||||
|
||||
PUSH_L ( EDI )
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_IN, ESI )
|
||||
MOV_L ( ARG_DEST, EAX )
|
||||
MOV_L ( ARG_MAT, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
|
||||
MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
|
||||
MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
|
||||
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
|
||||
|
||||
CMP_L ( CONST(0), EDI ) /* count > 0 ?? */
|
||||
JE ( LLBL (G3T_end) )
|
||||
|
||||
FEMMS
|
||||
|
||||
MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
|
||||
MOVQ ( REGOFF(16, ECX), MM4 ) /* m5 | m4 */
|
||||
|
||||
MOVD ( REGOFF(8, ECX), MM5 ) /* | m2 */
|
||||
PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) /* m6 | m2 */
|
||||
|
||||
MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8 */
|
||||
MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3T_transform):
|
||||
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
|
||||
|
||||
MOVQ ( MM0, MM1 ) /* x1 | x0 */
|
||||
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
|
||||
|
||||
PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
|
||||
ADD_L ( CONST(16), EAX ) /* next r */
|
||||
|
||||
PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
|
||||
PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
|
||||
|
||||
PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
|
||||
PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
|
||||
|
||||
MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
|
||||
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
|
||||
|
||||
PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
|
||||
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
|
||||
|
||||
PFMUL ( MM7, MM2 ) /* | x2*m10 */
|
||||
ADD_L ( STRIDE, EDX ) /* next normal */
|
||||
|
||||
PREFETCH ( REGIND(EDX) )
|
||||
|
||||
PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
|
||||
PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
|
||||
|
||||
MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
|
||||
SUB_L ( CONST(1), EDI ) /* decrement normal counter */
|
||||
|
||||
JNZ ( LLBL (G3T_transform) )
|
||||
|
||||
FEMMS
|
||||
|
||||
LLBL (G3T_end):
|
||||
POP_L ( ESI )
|
||||
POP_L ( EDI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_3dnow_normalize_normals)
|
||||
HIDDEN(_mesa_3dnow_normalize_normals)
|
||||
GLNAME(_mesa_3dnow_normalize_normals):
|
||||
|
||||
#undef FRAME_OFFSET
|
||||
#define FRAME_OFFSET 12
|
||||
|
||||
PUSH_L ( EDI )
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EBP )
|
||||
|
||||
MOV_L ( ARG_IN, ESI )
|
||||
MOV_L ( ARG_DEST, EAX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
|
||||
MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
|
||||
MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */
|
||||
MOV_L ( ARG_LENGTHS, EDX )
|
||||
|
||||
CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
|
||||
JE ( LLBL (G3N_end) )
|
||||
|
||||
FEMMS
|
||||
|
||||
CMP_L ( CONST(0), EDX ) /* lengths == 0 ? */
|
||||
JE ( LLBL (G3N_norm2) ) /* calculate lengths */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3N_norm1): /* use precalculated lengths */
|
||||
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
|
||||
|
||||
MOVD ( REGIND(EDX), MM3 ) /* | length (x) */
|
||||
PFMUL ( MM3, MM1 ) /* | x2 (normalized) */
|
||||
|
||||
PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
|
||||
ADD_L ( STRIDE, ECX ) /* next normal */
|
||||
|
||||
PREFETCH ( REGIND(ECX) )
|
||||
|
||||
PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalized) */
|
||||
MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
|
||||
|
||||
MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
|
||||
ADD_L ( CONST(16), EAX ) /* next r */
|
||||
|
||||
ADD_L ( CONST(4), EDX ) /* next length */
|
||||
SUB_L ( CONST(1), EBP ) /* decrement normal counter */
|
||||
|
||||
JNZ ( LLBL (G3N_norm1) )
|
||||
|
||||
JMP ( LLBL (G3N_end1) )
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3N_norm2): /* need to calculate lengths */
|
||||
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
|
||||
PREFETCH ( REGIND(ECX) )
|
||||
|
||||
MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
|
||||
|
||||
MOVQ ( MM0, MM3 ) /* x1 | x0 */
|
||||
ADD_L ( STRIDE, ECX ) /* next normal */
|
||||
|
||||
PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
|
||||
MOVQ ( MM1, MM4 ) /* | x2 */
|
||||
|
||||
ADD_L ( CONST(16), EAX ) /* next r */
|
||||
PFMUL ( MM1, MM4 ) /* | x2*x2 */
|
||||
|
||||
PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
|
||||
PFACC ( MM3, MM3 ) /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/
|
||||
|
||||
PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
|
||||
MOVQ ( MM5, MM4 )
|
||||
|
||||
PUNPCKLDQ ( MM3, MM3 )
|
||||
PFMUL ( MM5, MM5 )
|
||||
|
||||
PFRSQIT1 ( MM3, MM5 )
|
||||
SUB_L ( CONST(1), EBP ) /* decrement normal counter */
|
||||
|
||||
PFRCPIT2 ( MM4, MM5 )
|
||||
|
||||
PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalized) */
|
||||
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */
|
||||
|
||||
PFMUL ( MM5, MM1 ) /* | x2 (normalized) */
|
||||
MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */
|
||||
|
||||
JNZ ( LLBL (G3N_norm2) )
|
||||
|
||||
LLBL (G3N_end1):
|
||||
FEMMS
|
||||
|
||||
LLBL (G3N_end):
|
||||
POP_L ( EBP )
|
||||
POP_L ( ESI )
|
||||
POP_L ( EDI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_3dnow_rescale_normals)
|
||||
HIDDEN(_mesa_3dnow_rescale_normals)
|
||||
GLNAME(_mesa_3dnow_rescale_normals):
|
||||
|
||||
#undef FRAME_OFFSET
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( EDI )
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_IN, ESI )
|
||||
MOV_L ( ARG_DEST, EAX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, ESI), EDX ) /* dest->count = in->count */
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, EAX) )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
|
||||
MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */
|
||||
|
||||
CMP_L ( CONST(0), EDX )
|
||||
JE ( LLBL (G3R_end) )
|
||||
|
||||
FEMMS
|
||||
|
||||
MOVD ( ARG_SCALE, MM0 ) /* scale */
|
||||
PUNPCKLDQ ( MM0, MM0 )
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL (G3R_rescale):
|
||||
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */
|
||||
|
||||
PFMUL ( MM0, MM1 ) /* x1*scale | x0*scale */
|
||||
ADD_L ( STRIDE, ECX ) /* next normal */
|
||||
|
||||
PREFETCH ( REGIND(ECX) )
|
||||
|
||||
PFMUL ( MM0, MM2 ) /* | x2*scale */
|
||||
ADD_L ( CONST(16), EAX ) /* next r */
|
||||
|
||||
MOVQ ( MM1, REGOFF(-16, EAX) ) /* write r0, r1 */
|
||||
MOVD ( MM2, REGOFF(-8, EAX) ) /* write r2 */
|
||||
|
||||
SUB_L ( CONST(1), EDX ) /* decrement normal counter */
|
||||
JNZ ( LLBL (G3R_rescale) )
|
||||
|
||||
FEMMS
|
||||
|
||||
LLBL (G3R_end):
|
||||
POP_L ( ESI )
|
||||
POP_L ( EDI )
|
||||
RET
|
||||
|
||||
#endif
|
||||
|
||||
#if defined (__ELF__) && defined (__linux__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
|
@ -0,0 +1,437 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifdef USE_3DNOW_ASM
|
||||
#include "assyntax.h"
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define FRAME_OFFSET 4
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points1_general )
|
||||
HIDDEN(_mesa_3dnow_transform_points1_general)
|
||||
GLNAME( _mesa_3dnow_transform_points1_general ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(4, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPGR_3 ) )
|
||||
|
||||
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */
|
||||
MOVQ ( REGOFF(8, ECX), MM1 ) /* m03 | m02 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
|
||||
MOVQ ( REGOFF(56, ECX), MM3 ) /* m33 | m32 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPGR_2 ):
|
||||
|
||||
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
|
||||
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */
|
||||
|
||||
MOVQ ( MM4, MM5 ) /* x0 | x0 */
|
||||
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */
|
||||
|
||||
PFMUL ( MM1, MM5 ) /* x0*m03 | x0*m02 */
|
||||
PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */
|
||||
|
||||
PFADD ( MM3, MM5 ) /* x0*m03+m33 | x0*m02+m32 */
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
|
||||
|
||||
MOVQ ( MM5, REGOFF(8, EDX) ) /* write r3, r2 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
|
||||
JNZ ( LLBL( G3TPGR_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPGR_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points1_identity )
|
||||
HIDDEN(_mesa_3dnow_transform_points1_identity)
|
||||
GLNAME( _mesa_3dnow_transform_points1_identity ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(1), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(4, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPIR_4) )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPIR_3 ):
|
||||
|
||||
MOVD ( REGIND(EAX), MM0 ) /* | x0 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
MOVD ( MM0, REGIND(EDX) ) /* | r0 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TPIR_3 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPIR_4 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points1_3d_no_rot )
|
||||
HIDDEN(_mesa_3dnow_transform_points1_3d_no_rot)
|
||||
GLNAME( _mesa_3dnow_transform_points1_3d_no_rot ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(4, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP3NRR_3 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
|
||||
|
||||
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP3NRR_2 ):
|
||||
|
||||
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
|
||||
PFMUL ( MM0, MM4 ) /* | x0*m00 */
|
||||
|
||||
PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
|
||||
|
||||
MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
|
||||
JNZ ( LLBL( G3TP3NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP3NRR_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points1_perspective )
|
||||
HIDDEN(_mesa_3dnow_transform_points1_perspective)
|
||||
GLNAME( _mesa_3dnow_transform_points1_perspective ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(4, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPPR_3 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPPR_2 ):
|
||||
|
||||
MOVD ( REGIND(EAX), MM4 ) /* 0 | x0 */
|
||||
PFMUL ( MM0, MM4 ) /* 0 | x0*m00 */
|
||||
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
|
||||
MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TPPR_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPPR_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points1_2d )
|
||||
HIDDEN(_mesa_3dnow_transform_points1_2d)
|
||||
GLNAME( _mesa_3dnow_transform_points1_2d ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(4, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP2R_3 ) )
|
||||
|
||||
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */
|
||||
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP2R_2 ):
|
||||
|
||||
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
|
||||
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */
|
||||
|
||||
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */
|
||||
PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */
|
||||
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
|
||||
JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP2R_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points1_2d_no_rot )
|
||||
HIDDEN(_mesa_3dnow_transform_points1_2d_no_rot)
|
||||
GLNAME( _mesa_3dnow_transform_points1_2d_no_rot ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(4, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP2NRR_3 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP2NRR_2 ):
|
||||
|
||||
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
PFMUL ( MM0, MM4 ) /* | x0*m00 */
|
||||
PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */
|
||||
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP2NRR_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points1_3d )
|
||||
HIDDEN(_mesa_3dnow_transform_points1_3d)
|
||||
GLNAME( _mesa_3dnow_transform_points1_3d ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(4, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP3R_3 ) )
|
||||
|
||||
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */
|
||||
MOVD ( REGOFF(8, ECX), MM1 ) /* | m02 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
|
||||
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP3R_2 ):
|
||||
|
||||
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
|
||||
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */
|
||||
|
||||
MOVQ ( MM4, MM5 ) /* | x0 */
|
||||
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */
|
||||
|
||||
PFMUL ( MM1, MM5 ) /* | x0*m02 */
|
||||
PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */
|
||||
|
||||
PFADD ( MM3, MM5 ) /* | x0*m02+m32 */
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
|
||||
|
||||
MOVD ( MM5, REGOFF(8, EDX) ) /* write r2 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
|
||||
JNZ ( LLBL( G3TP3R_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP3R_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
#endif
|
||||
|
||||
#if defined (__ELF__) && defined (__linux__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
|
@ -0,0 +1,477 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifdef USE_3DNOW_ASM
|
||||
#include "assyntax.h"
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define FRAME_OFFSET 4
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points2_general )
|
||||
HIDDEN(_mesa_3dnow_transform_points2_general)
|
||||
GLNAME( _mesa_3dnow_transform_points2_general ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPGR_3 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
|
||||
|
||||
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
|
||||
|
||||
MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */
|
||||
PUNPCKLDQ ( REGOFF(24, ECX), MM2 ) /* m12 | m02 */
|
||||
|
||||
MOVD ( REGOFF(12, ECX), MM3 ) /* | m03 */
|
||||
PUNPCKLDQ ( REGOFF(28, ECX), MM3 ) /* m13 | m03 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */
|
||||
MOVQ ( REGOFF(56, ECX), MM5 ) /* m33 | m32 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPGR_2 ):
|
||||
|
||||
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */
|
||||
MOVQ ( MM6, MM7 ) /* x1 | x0 */
|
||||
|
||||
PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */
|
||||
PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */
|
||||
|
||||
PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */
|
||||
PFADD ( MM4, MM6 ) /* x0*...*m11+m31 | x0*...*m10+m30 */
|
||||
|
||||
MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */
|
||||
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */
|
||||
|
||||
MOVQ ( MM6, MM7 ) /* x1 | x0 */
|
||||
PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */
|
||||
|
||||
PFMUL ( MM3, MM7 ) /* x1*m13 | x0*m03 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
PFACC ( MM7, MM6 ) /* x0*m03+x1*m13 | x0*x02+x1*m12 */
|
||||
PFADD ( MM5, MM6 ) /* x0*...*m13+m33 | x0*...*m12+m32 */
|
||||
|
||||
MOVQ ( MM6, REGOFF(8, EDX) ) /* write r3, r2 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TPGR_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPGR_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points2_perspective )
|
||||
HIDDEN(_mesa_3dnow_transform_points2_perspective)
|
||||
GLNAME( _mesa_3dnow_transform_points2_perspective ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPPR_3 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
|
||||
|
||||
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPPR_2 ):
|
||||
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
|
||||
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
|
||||
MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TPPR_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPPR_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points2_3d )
|
||||
HIDDEN(_mesa_3dnow_transform_points2_3d)
|
||||
GLNAME( _mesa_3dnow_transform_points2_3d ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP3R_3 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
|
||||
|
||||
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
|
||||
|
||||
MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */
|
||||
PUNPCKLDQ ( REGOFF(24, ECX), MM2 ) /* m12 | m02 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */
|
||||
MOVD ( REGOFF(56, ECX), MM5 ) /* | m32 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP3R_2 ):
|
||||
|
||||
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */
|
||||
MOVQ ( MM6, MM7 ) /* x1 | x0 */
|
||||
|
||||
PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */
|
||||
PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */
|
||||
|
||||
PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */
|
||||
PFADD ( MM4, MM6 ) /* x0*...*m11+m31 | x0*...*m10+m30 */
|
||||
|
||||
MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */
|
||||
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */
|
||||
|
||||
MOVQ ( MM6, MM7 ) /* x1 | x0 */
|
||||
PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */
|
||||
|
||||
PFACC ( MM7, MM6 ) /* ***trash*** | x0*x02+x1*m12 */
|
||||
PFADD ( MM5, MM6 ) /* ***trash*** | x0*...*m12+m32 */
|
||||
|
||||
MOVD ( MM6, REGOFF(8, EDX) ) /* write r2 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
|
||||
JNZ ( LLBL( G3TP3R_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP3R_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points2_3d_no_rot )
|
||||
HIDDEN(_mesa_3dnow_transform_points2_3d_no_rot)
|
||||
GLNAME( _mesa_3dnow_transform_points2_3d_no_rot ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP3NRR_3 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
|
||||
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP3NRR_2 ):
|
||||
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
|
||||
|
||||
PFADD ( MM2, MM4 ) /* x1*m11+m31 | x0*m00+m30 */
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
|
||||
|
||||
MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
|
||||
JNZ ( LLBL( G3TP3NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP3NRR_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points2_2d )
|
||||
HIDDEN(_mesa_3dnow_transform_points2_2d)
|
||||
GLNAME( _mesa_3dnow_transform_points2_2d ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP2R_3 ) )
|
||||
|
||||
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */
|
||||
MOVQ ( REGOFF(16, ECX), MM1 ) /* m11 | m10 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP2R_2 ):
|
||||
|
||||
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
|
||||
MOVD ( REGOFF(4, EAX), MM5 ) /* | x1 */
|
||||
|
||||
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */
|
||||
PUNPCKLDQ ( MM5, MM5 ) /* x1 | x1 */
|
||||
|
||||
PFMUL ( MM1, MM5 ) /* x1*m11 | x1*m10 */
|
||||
PFADD ( MM2, MM4 ) /* x...x1*m11+31 | x0*..*m10+m30 */
|
||||
|
||||
PFADD ( MM5, MM4 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
|
||||
JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP2R_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points2_2d_no_rot )
|
||||
HIDDEN(_mesa_3dnow_transform_points2_2d_no_rot)
|
||||
GLNAME( _mesa_3dnow_transform_points2_2d_no_rot ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP2NRR_3 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP2NRR_2 ):
|
||||
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
|
||||
PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */
|
||||
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP2NRR_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points2_identity )
|
||||
HIDDEN(_mesa_3dnow_transform_points2_identity)
|
||||
GLNAME( _mesa_3dnow_transform_points2_identity ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPIR_3 ) )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPIR_3 ):
|
||||
|
||||
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
|
||||
MOVQ ( MM0, REGIND(EDX) ) /* r1 | r0 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TPIR_3 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPIR_4 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#endif
|
||||
|
||||
#if defined (__ELF__) && defined (__linux__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
|
@ -0,0 +1,561 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifdef USE_3DNOW_ASM
|
||||
#include "assyntax.h"
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define FRAME_OFFSET 4
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points3_general )
|
||||
HIDDEN(_mesa_3dnow_transform_points3_general)
|
||||
GLNAME( _mesa_3dnow_transform_points3_general ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPGR_2 ) )
|
||||
|
||||
PREFETCHW ( REGIND(EDX) )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPGR_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EAX), MM2 ) /* | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( MM0, MM1 ) /* x1 | x0 */
|
||||
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
|
||||
|
||||
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
|
||||
MOVQ ( MM2, MM5 ) /* x2 | x2 */
|
||||
|
||||
PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */
|
||||
PFMUL ( REGOFF(32, ECX), MM2 ) /* x2*m9 | x2*m8 */
|
||||
|
||||
MOVQ ( MM0, MM3 ) /* x0 | x0 */
|
||||
PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */
|
||||
|
||||
MOVQ ( MM1, MM4 ) /* x1 | x1 */
|
||||
PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
|
||||
|
||||
PFADD ( REGOFF(48, ECX), MM2 ) /* x2*m9+m13 | x2*m8+m12 */
|
||||
PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */
|
||||
|
||||
PFADD ( REGOFF(56, ECX), MM5 ) /* x2*m11+m15 | x2*m10+m14 */
|
||||
PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */
|
||||
|
||||
PFMUL ( REGOFF(8, ECX), MM3 ) /* x0*m3 | x0*m2 */
|
||||
PFADD ( MM1, MM2 ) /* r1 | r0 */
|
||||
|
||||
PFMUL ( REGOFF(24, ECX), MM4 ) /* x1*m7 | x1*m6 */
|
||||
ADD_L ( CONST(16), EDX ) /* next output vertex */
|
||||
|
||||
PFADD ( MM3, MM4 ) /* x0*m3+x1*m7 | x0*m2+x1*m6 */
|
||||
MOVQ ( MM2, REGOFF(-16, EDX) ) /* write r0, r1 */
|
||||
|
||||
PFADD ( MM4, MM5 ) /* r3 | r2 */
|
||||
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TPGR_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPGR_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points3_perspective )
|
||||
HIDDEN(_mesa_3dnow_transform_points3_perspective)
|
||||
GLNAME( _mesa_3dnow_transform_points3_perspective ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPPR_2 ) )
|
||||
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
PREFETCHW ( REGIND(EDX) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
|
||||
|
||||
MOVQ ( REGOFF(32, ECX), MM1 ) /* m21 | m20 */
|
||||
MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */
|
||||
|
||||
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPPR_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
|
||||
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
|
||||
PXOR ( MM7, MM7 ) /* 0 | 0 */
|
||||
MOVQ ( MM5, MM6 ) /* | x2 */
|
||||
|
||||
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
|
||||
PFSUB ( MM5, MM7 ) /* | -x2 */
|
||||
|
||||
PFMUL ( MM2, MM6 ) /* | x2*m22 */
|
||||
PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
PFMUL ( MM1, MM5 ) /* x2*m21 | x2*m20 */
|
||||
|
||||
PFADD ( MM3, MM6 ) /* | x2*m22+m32 */
|
||||
PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */
|
||||
|
||||
MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */
|
||||
MOVD ( MM6, REGOFF(-8, EDX) ) /* write r2 */
|
||||
|
||||
MOVD ( MM7, REGOFF(-4, EDX) ) /* write r3 */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TPPR_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPPR_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points3_3d )
|
||||
HIDDEN(_mesa_3dnow_transform_points3_3d)
|
||||
GLNAME( _mesa_3dnow_transform_points3_3d ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP3R_2 ) )
|
||||
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
PREFETCH ( REGIND(EDX) )
|
||||
|
||||
MOVD ( REGOFF(8, ECX), MM7 ) /* | m2 */
|
||||
PUNPCKLDQ ( REGOFF(24, ECX), MM7 ) /* m6 | m2 */
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP3R_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( MM0, MM2 ) /* x1 | x0 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
PUNPCKLDQ ( MM2, MM2 ) /* x0 | x0 */
|
||||
MOVQ ( MM0, MM3 ) /* x1 | x0 */
|
||||
|
||||
PFMUL ( REGIND(ECX), MM2 ) /* x0*m1 | x0*m0 */
|
||||
PUNPCKHDQ ( MM3, MM3 ) /* x1 | x1 */
|
||||
|
||||
MOVQ ( MM1, MM4 ) /* | x2 */
|
||||
PFMUL ( REGOFF(16, ECX), MM3 ) /* x1*m5 | x1*m4 */
|
||||
|
||||
PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */
|
||||
PFADD ( MM2, MM3 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */
|
||||
|
||||
PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */
|
||||
PFADD ( REGOFF(48, ECX), MM3 ) /* x0*m1+...+m11 | x0*m0+x1*m4+m12 */
|
||||
|
||||
PFMUL ( MM7, MM0 ) /* x1*m6 | x0*m2 */
|
||||
PFADD ( MM4, MM3 ) /* r1 | r0 */
|
||||
|
||||
PFMUL ( REGOFF(40, ECX), MM1 ) /* | x2*m10 */
|
||||
PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m14 | x2*m10 */
|
||||
|
||||
PFACC ( MM0, MM1 )
|
||||
|
||||
MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */
|
||||
PFACC ( MM1, MM1 ) /* | r2 */
|
||||
|
||||
MOVD ( MM1, REGOFF(-8, EDX) ) /* write r2 */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TP3R_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP3R_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points3_3d_no_rot )
|
||||
HIDDEN(_mesa_3dnow_transform_points3_3d_no_rot)
|
||||
GLNAME( _mesa_3dnow_transform_points3_3d_no_rot ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP3NRR_2 ) )
|
||||
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
PREFETCHW ( REGIND(EDX) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
|
||||
|
||||
MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */
|
||||
PUNPCKLDQ ( MM2, MM2 ) /* m22 | m22 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
|
||||
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
|
||||
|
||||
PUNPCKLDQ ( MM3, MM3 ) /* m32 | m32 */
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP3NRR_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
PREFETCHW ( REGIND(EAX) )
|
||||
|
||||
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
|
||||
|
||||
PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */
|
||||
PFMUL ( MM2, MM5 ) /* | x2*m22 */
|
||||
|
||||
PFADD ( MM3, MM5 ) /* | x2*m22+m32 */
|
||||
MOVQ ( MM4, REGIND(EDX) ) /* write r0, r1 */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
|
||||
MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 */
|
||||
JNZ ( LLBL( G3TP3NRR_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP3NRR_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points3_2d )
|
||||
HIDDEN(_mesa_3dnow_transform_points3_2d)
|
||||
GLNAME( _mesa_3dnow_transform_points3_2d ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP2R_3) )
|
||||
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
PREFETCHW ( REGIND(EDX) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
|
||||
|
||||
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP2R_2 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( MM3, MM4 ) /* x1 | x0 */
|
||||
PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */
|
||||
|
||||
PFACC ( MM4, MM3 ) /* x0*m00+x1*m10 | x0*m01+x1*m11 */
|
||||
MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 (=x2) */
|
||||
|
||||
PFADD ( MM2, MM3 ) /* x0*...*m10+m30 | x0*...*m11+m31 */
|
||||
MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP2R_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points3_2d_no_rot )
|
||||
HIDDEN(_mesa_3dnow_transform_points3_2d_no_rot)
|
||||
GLNAME( _mesa_3dnow_transform_points3_2d_no_rot ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP2NRR_2 ) )
|
||||
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
PREFETCHW ( REGIND(EDX) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP2NRR_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
|
||||
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */
|
||||
|
||||
MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
|
||||
MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 (=x2) */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TP2NRR_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP2NRR_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points3_identity )
|
||||
HIDDEN(_mesa_3dnow_transform_points3_identity)
|
||||
GLNAME( _mesa_3dnow_transform_points3_identity ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPIR_2 ) )
|
||||
|
||||
PREFETCHW ( REGIND(EDX) )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPIR_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) )
|
||||
|
||||
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
|
||||
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */
|
||||
|
||||
MOVD ( MM1, REGOFF(-8, EDX) ) /* | r2 */
|
||||
JNZ ( LLBL( G3TPIR_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPIR_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#endif
|
||||
|
||||
#if defined (__ELF__) && defined (__linux__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
|
@ -0,0 +1,570 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifdef USE_3DNOW_ASM
|
||||
#include "assyntax.h"
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define FRAME_OFFSET 4
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points4_general )
|
||||
HIDDEN(_mesa_3dnow_transform_points4_general)
|
||||
GLNAME( _mesa_3dnow_transform_points4_general ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPGR_2 ) )
|
||||
|
||||
PREFETCHW ( REGIND(EDX) )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPGR_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
|
||||
MOVQ ( REGOFF(8, EAX), MM4 ) /* x3 | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( MM0, MM2 ) /* x1 | x0 */
|
||||
MOVQ ( MM4, MM6 ) /* x3 | x2 */
|
||||
|
||||
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
|
||||
PUNPCKHDQ ( MM2, MM2 ) /* x1 | x1 */
|
||||
|
||||
MOVQ ( MM0, MM1 ) /* x0 | x0 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
|
||||
MOVQ ( MM2, MM3 ) /* x1 | x1 */
|
||||
|
||||
PFMUL ( REGOFF(8, ECX), MM1 ) /* x0*m3 | x0*m2 */
|
||||
PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */
|
||||
|
||||
PFMUL ( REGOFF(16, ECX), MM2 ) /* x1*m5 | x1*m4 */
|
||||
MOVQ ( MM4, MM5 ) /* x2 | x2 */
|
||||
|
||||
PFMUL ( REGOFF(24, ECX), MM3 ) /* x1*m7 | x1*m6 */
|
||||
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
|
||||
|
||||
PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */
|
||||
MOVQ ( MM6, MM7 ) /* x3 | x3 */
|
||||
|
||||
PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */
|
||||
PFADD ( MM0, MM2 )
|
||||
|
||||
PFMUL ( REGOFF(48, ECX), MM6 ) /* x3*m13 | x3*m12 */
|
||||
PFADD ( MM1, MM3 )
|
||||
|
||||
PFMUL ( REGOFF(56, ECX), MM7 ) /* x3*m15 | x3*m14 */
|
||||
PFADD ( MM4, MM6 )
|
||||
|
||||
PFADD ( MM5, MM7 )
|
||||
PFADD ( MM2, MM6 )
|
||||
|
||||
PFADD ( MM3, MM7 )
|
||||
MOVQ ( MM6, REGOFF(-16, EDX) )
|
||||
|
||||
MOVQ ( MM7, REGOFF(-8, EDX) )
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TPGR_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPGR_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points4_perspective )
|
||||
HIDDEN(_mesa_3dnow_transform_points4_perspective)
|
||||
GLNAME( _mesa_3dnow_transform_points4_perspective ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPPR_2 ) )
|
||||
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
PREFETCHW ( REGIND(EDX) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
|
||||
|
||||
MOVD ( REGOFF(40, ECX), MM1 ) /* | m22 */
|
||||
PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m32 | m22 */
|
||||
|
||||
MOVQ ( REGOFF(32, ECX), MM2 ) /* m21 | m20 */
|
||||
PXOR ( MM7, MM7 ) /* 0 | 0 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPPR_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
|
||||
MOVD ( REGOFF(8, EAX), MM3 ) /* | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */
|
||||
|
||||
MOVQ ( MM5, MM6 ) /* x3 | x2 */
|
||||
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
|
||||
|
||||
PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
PFMUL ( MM2, MM5 ) /* x2*m21 | x2*m20 */
|
||||
PFSUBR ( MM7, MM3 ) /* | -x2 */
|
||||
|
||||
PFMUL ( MM1, MM6 ) /* x3*m32 | x2*m22 */
|
||||
PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */
|
||||
|
||||
PFACC ( MM3, MM6 ) /* -x2 | x2*m22+x3*m32 */
|
||||
MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */
|
||||
|
||||
MOVQ ( MM6, REGOFF(-8, EDX) ) /* write r2, r3 */
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
|
||||
JNZ ( LLBL( G3TPPR_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPPR_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points4_3d )
|
||||
HIDDEN(_mesa_3dnow_transform_points4_3d)
|
||||
GLNAME( _mesa_3dnow_transform_points4_3d ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP3R_2 ) )
|
||||
|
||||
MOVD ( REGOFF(8, ECX), MM6 ) /* | m2 */
|
||||
PUNPCKLDQ ( REGOFF(24, ECX), MM6 ) /* m6 | m2 */
|
||||
|
||||
MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
|
||||
PUNPCKLDQ ( REGOFF(56, ECX), MM7 ) /* m14 | m10 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP3R_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
PREFETCH ( REGOFF(32, EAX) ) /* hopefully array is tightly packed */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM2 ) /* x1 | x0 */
|
||||
MOVQ ( REGOFF(8, EAX), MM3 ) /* x3 | x2 */
|
||||
|
||||
MOVQ ( MM2, MM0 ) /* x1 | x0 */
|
||||
MOVQ ( MM3, MM4 ) /* x3 | x2 */
|
||||
|
||||
MOVQ ( MM0, MM1 ) /* x1 | x0 */
|
||||
MOVQ ( MM4, MM5 ) /* x3 | x2 */
|
||||
|
||||
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
|
||||
PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */
|
||||
|
||||
PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
|
||||
PUNPCKLDQ ( MM3, MM3 ) /* x2 | x2 */
|
||||
|
||||
PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */
|
||||
PUNPCKHDQ ( MM4, MM4 ) /* x3 | x3 */
|
||||
|
||||
PFMUL ( MM6, MM2 ) /* x1*m6 | x0*m2 */
|
||||
PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */
|
||||
|
||||
PFMUL ( REGOFF(32, ECX), MM3 ) /* x2*m9 | x2*m8 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
PFMUL ( REGOFF(48, ECX), MM4 ) /* x3*m13 | x3*m12 */
|
||||
PFADD ( MM1, MM3 ) /* x0*m1+..+x2*m9 | x0*m0+...+x2*m8 */
|
||||
|
||||
PFMUL ( MM7, MM5 ) /* x3*m14 | x2*m10 */
|
||||
PFADD ( MM3, MM4 ) /* r1 | r0 */
|
||||
|
||||
PFACC ( MM2, MM5 ) /* x0*m2+x1*m6 | x2*m10+x3*m14 */
|
||||
MOVD ( REGOFF(12, EAX), MM0 ) /* | x3 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
PFACC ( MM0, MM5 ) /* r3 | r2 */
|
||||
|
||||
MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
|
||||
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TP3R_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP3R_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points4_3d_no_rot )
|
||||
HIDDEN(_mesa_3dnow_transform_points4_3d_no_rot)
|
||||
GLNAME( _mesa_3dnow_transform_points4_3d_no_rot ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP3NRR_2 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
|
||||
|
||||
MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */
|
||||
PUNPCKLDQ ( REGOFF(56, ECX), MM2 ) /* m32 | m22 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP3NRR_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
|
||||
MOVD ( REGOFF(12, EAX), MM7 ) /* | x3 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */
|
||||
|
||||
MOVQ ( MM5, MM6 ) /* x3 | x2 */
|
||||
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
|
||||
|
||||
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
|
||||
PFMUL ( MM2, MM5 ) /* x3*m32 | x2*m22 */
|
||||
|
||||
PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */
|
||||
PFACC ( MM7, MM5 ) /* x3 | x2*m22+x3*m32 */
|
||||
|
||||
PFADD ( MM6, MM4 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
|
||||
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TP3NRR_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP3NRR_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points4_2d )
|
||||
HIDDEN(_mesa_3dnow_transform_points4_2d)
|
||||
GLNAME( _mesa_3dnow_transform_points4_2d ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP2R_2 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
|
||||
|
||||
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP2R_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */
|
||||
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
|
||||
MOVQ ( MM3, MM4 ) /* x1 | x0 */
|
||||
MOVQ ( MM5, MM6 ) /* x3 | x2 */
|
||||
|
||||
PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */
|
||||
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
|
||||
|
||||
PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
|
||||
PFACC ( MM4, MM3 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */
|
||||
PFMUL ( MM2, MM6 ) /* x3*m31 | x3*m30 */
|
||||
|
||||
PFADD ( MM6, MM3 ) /* r1 | r0 */
|
||||
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
|
||||
|
||||
MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TP2R_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP2R_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points4_2d_no_rot )
|
||||
HIDDEN(_mesa_3dnow_transform_points4_2d_no_rot)
|
||||
GLNAME( _mesa_3dnow_transform_points4_2d_no_rot ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TP2NRR_3 ) )
|
||||
|
||||
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
|
||||
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
|
||||
|
||||
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TP2NRR_2 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
|
||||
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
|
||||
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
|
||||
MOVQ ( MM5, MM6 ) /* x3 | x2 */
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
|
||||
|
||||
PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */
|
||||
PFADD ( MM4, MM6 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */
|
||||
|
||||
MOVQ ( MM6, REGOFF(-16, EDX) ) /* write r0, r1 */
|
||||
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
|
||||
JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TP2NRR_3 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_3dnow_transform_points4_identity )
|
||||
HIDDEN(_mesa_3dnow_transform_points4_identity)
|
||||
GLNAME( _mesa_3dnow_transform_points4_identity ):
|
||||
|
||||
PUSH_L ( ESI )
|
||||
|
||||
MOV_L ( ARG_DEST, ECX )
|
||||
MOV_L ( ARG_MATRIX, ESI )
|
||||
MOV_L ( ARG_SOURCE, EAX )
|
||||
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
|
||||
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
|
||||
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
|
||||
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( REGOFF(V4F_START, ECX), EDX )
|
||||
MOV_L ( ESI, ECX )
|
||||
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
|
||||
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
|
||||
MOV_L ( REGOFF(V4F_START, EAX), EAX )
|
||||
|
||||
TEST_L ( ESI, ESI )
|
||||
JZ ( LLBL( G3TPIR_2 ) )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( G3TPIR_1 ):
|
||||
|
||||
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
|
||||
|
||||
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
|
||||
MOVQ ( REGOFF(8, EAX), MM1 ) /* x3 | x2 */
|
||||
|
||||
ADD_L ( EDI, EAX ) /* next vertex */
|
||||
PREFETCH ( REGIND(EAX) )
|
||||
|
||||
ADD_L ( CONST(16), EDX ) /* next r */
|
||||
MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */
|
||||
|
||||
MOVQ ( MM1, REGOFF(-8, EDX) ) /* r3 | r2 */
|
||||
|
||||
DEC_L ( ESI ) /* decrement vertex counter */
|
||||
JNZ ( LLBL( G3TPIR_1 ) ) /* cnt > 0 ? -> process next vertex */
|
||||
|
||||
LLBL( G3TPIR_2 ):
|
||||
|
||||
FEMMS
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#endif
|
||||
|
||||
#if defined (__ELF__) && defined (__linux__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
|
@ -0,0 +1,40 @@
|
|||
# Copyright © 2012 Intel Corporation
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice (including the next
|
||||
# paragraph) shall be included in all copies or substantial portions of the
|
||||
# Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
if HAVE_X86_ASM
|
||||
|
||||
AM_CPPFLAGS = \
|
||||
-I$(top_srcdir)/include \
|
||||
-I$(top_srcdir)/src/mesa \
|
||||
-I$(top_srcdir)/src/GLdispatch/mapi \
|
||||
$(API_DEFINES) \
|
||||
$(DEFINES)
|
||||
|
||||
noinst_PROGRAMS = gen_matypes
|
||||
|
||||
gen_matypes_SOURCES = gen_matypes.c
|
||||
BUILT_SOURCES = matypes.h
|
||||
CLEANFILES = matypes.h
|
||||
|
||||
matypes.h: gen_matypes
|
||||
$(AM_V_GEN)./gen_matypes > $@
|
||||
|
||||
endif
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,59 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Clip test function interface for assembly code. Simply define
|
||||
* FRAME_OFFSET to the number of bytes pushed onto the stack before
|
||||
* using the ARG_* argument macros.
|
||||
*
|
||||
* Gareth Hughes
|
||||
*/
|
||||
|
||||
#ifndef __CLIP_ARGS_H__
|
||||
#define __CLIP_ARGS_H__
|
||||
|
||||
/*
|
||||
* Offsets for clip_func arguments
|
||||
*
|
||||
* typedef GLvector4f *(*clip_func)( GLvector4f *clip_vec,
|
||||
* GLvector4f *proj_vec,
|
||||
* GLubyte clipMask[],
|
||||
* GLubyte *orMask,
|
||||
* GLubyte *andMask );
|
||||
*/
|
||||
|
||||
#define OFFSET_SOURCE 4
|
||||
#define OFFSET_DEST 8
|
||||
#define OFFSET_CLIP 12
|
||||
#define OFFSET_OR 16
|
||||
#define OFFSET_AND 20
|
||||
|
||||
#define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP)
|
||||
#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP)
|
||||
#define ARG_CLIP REGOFF(FRAME_OFFSET+OFFSET_CLIP, ESP)
|
||||
#define ARG_OR REGOFF(FRAME_OFFSET+OFFSET_OR, ESP)
|
||||
#define ARG_AND REGOFF(FRAME_OFFSET+OFFSET_AND, ESP)
|
||||
|
||||
#endif
|
|
@ -0,0 +1,336 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 6.5.1
|
||||
*
|
||||
* Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file common_x86.c
|
||||
*
|
||||
* Check CPU capabilities & initialize optimized funtions for this particular
|
||||
* processor.
|
||||
*
|
||||
* Changed by Andre Werthmann for using the new SSE functions.
|
||||
*
|
||||
* \author Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
|
||||
* \author Andre Werthmann <wertmann@cs.uni-potsdam.de>
|
||||
*/
|
||||
|
||||
/* XXX these includes should probably go into imports.h or glheader.h */
|
||||
#if defined(USE_SSE_ASM) && defined(__linux__)
|
||||
#include <linux/version.h>
|
||||
#endif
|
||||
#if defined(USE_SSE_ASM) && defined(__FreeBSD__)
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysctl.h>
|
||||
#endif
|
||||
#if defined(USE_SSE_ASM) && defined(__OpenBSD__)
|
||||
#include <sys/param.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <machine/cpu.h>
|
||||
#endif
|
||||
|
||||
#include "main/imports.h"
|
||||
#include "common_x86_asm.h"
|
||||
|
||||
|
||||
/** Bitmask of X86_FEATURE_x bits */
|
||||
int _mesa_x86_cpu_features = 0x0;
|
||||
|
||||
static int detection_debug = GL_FALSE;
|
||||
|
||||
/* No reason for this to be public.
|
||||
*/
|
||||
extern GLuint _ASMAPI _mesa_x86_has_cpuid(void);
|
||||
extern void _ASMAPI _mesa_x86_cpuid(GLuint op, GLuint *reg_eax, GLuint *reg_ebx, GLuint *reg_ecx, GLuint *reg_edx);
|
||||
extern GLuint _ASMAPI _mesa_x86_cpuid_eax(GLuint op);
|
||||
extern GLuint _ASMAPI _mesa_x86_cpuid_ebx(GLuint op);
|
||||
extern GLuint _ASMAPI _mesa_x86_cpuid_ecx(GLuint op);
|
||||
extern GLuint _ASMAPI _mesa_x86_cpuid_edx(GLuint op);
|
||||
|
||||
|
||||
#if defined(USE_SSE_ASM)
|
||||
/*
|
||||
* We must verify that the Streaming SIMD Extensions are truly supported
|
||||
* on this processor before we go ahead and hook out the optimized code.
|
||||
*
|
||||
* However, I have been told by Alan Cox that all 2.4 (and later) Linux
|
||||
* kernels provide full SSE support on all processors that expose SSE via
|
||||
* the CPUID mechanism.
|
||||
*/
|
||||
|
||||
/* These are assembly functions: */
|
||||
extern void _mesa_test_os_sse_support( void );
|
||||
extern void _mesa_test_os_sse_exception_support( void );
|
||||
|
||||
|
||||
#if defined(_WIN32)
|
||||
#ifndef STATUS_FLOAT_MULTIPLE_TRAPS
|
||||
# define STATUS_FLOAT_MULTIPLE_TRAPS (0xC00002B5L)
|
||||
#endif
|
||||
static LONG WINAPI ExceptionFilter(LPEXCEPTION_POINTERS exp)
|
||||
{
|
||||
PEXCEPTION_RECORD rec = exp->ExceptionRecord;
|
||||
PCONTEXT ctx = exp->ContextRecord;
|
||||
|
||||
if ( rec->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION ) {
|
||||
_mesa_debug(NULL, "EXCEPTION_ILLEGAL_INSTRUCTION\n" );
|
||||
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
|
||||
} else if ( rec->ExceptionCode == STATUS_FLOAT_MULTIPLE_TRAPS ) {
|
||||
_mesa_debug(NULL, "STATUS_FLOAT_MULTIPLE_TRAPS\n");
|
||||
/* Windows seems to clear the exception flag itself, we just have to increment Eip */
|
||||
} else {
|
||||
_mesa_debug(NULL, "UNEXPECTED EXCEPTION (0x%08x), terminating!\n" );
|
||||
return EXCEPTION_EXECUTE_HANDLER;
|
||||
}
|
||||
|
||||
if ( (ctx->ContextFlags & CONTEXT_CONTROL) != CONTEXT_CONTROL ) {
|
||||
_mesa_debug(NULL, "Context does not contain control registers, terminating!\n");
|
||||
return EXCEPTION_EXECUTE_HANDLER;
|
||||
}
|
||||
ctx->Eip += 3;
|
||||
|
||||
return EXCEPTION_CONTINUE_EXECUTION;
|
||||
}
|
||||
#endif /* _WIN32 */
|
||||
|
||||
|
||||
/**
|
||||
* Check if SSE is supported.
|
||||
* If not, turn off the X86_FEATURE_XMM flag in _mesa_x86_cpu_features.
|
||||
*/
|
||||
void _mesa_check_os_sse_support( void )
|
||||
{
|
||||
#if defined(__FreeBSD__)
|
||||
{
|
||||
int ret, enabled;
|
||||
unsigned int len;
|
||||
len = sizeof(enabled);
|
||||
ret = sysctlbyname("hw.instruction_sse", &enabled, &len, NULL, 0);
|
||||
if (ret || !enabled)
|
||||
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
|
||||
}
|
||||
#elif defined (__NetBSD__)
|
||||
{
|
||||
int ret, enabled;
|
||||
size_t len = sizeof(enabled);
|
||||
ret = sysctlbyname("machdep.sse", &enabled, &len, (void *)NULL, 0);
|
||||
if (ret || !enabled)
|
||||
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
|
||||
}
|
||||
#elif defined(__OpenBSD__)
|
||||
{
|
||||
int mib[2];
|
||||
int ret, enabled;
|
||||
size_t len = sizeof(enabled);
|
||||
|
||||
mib[0] = CTL_MACHDEP;
|
||||
mib[1] = CPU_SSE;
|
||||
|
||||
ret = sysctl(mib, 2, &enabled, &len, NULL, 0);
|
||||
if (ret || !enabled)
|
||||
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
|
||||
}
|
||||
#elif defined(_WIN32)
|
||||
LPTOP_LEVEL_EXCEPTION_FILTER oldFilter;
|
||||
|
||||
/* Install our ExceptionFilter */
|
||||
oldFilter = SetUnhandledExceptionFilter( ExceptionFilter );
|
||||
|
||||
if ( cpu_has_xmm ) {
|
||||
_mesa_debug(NULL, "Testing OS support for SSE...\n");
|
||||
|
||||
_mesa_test_os_sse_support();
|
||||
|
||||
if ( cpu_has_xmm ) {
|
||||
_mesa_debug(NULL, "Yes.\n");
|
||||
} else {
|
||||
_mesa_debug(NULL, "No!\n");
|
||||
}
|
||||
}
|
||||
|
||||
if ( cpu_has_xmm ) {
|
||||
_mesa_debug(NULL, "Testing OS support for SSE unmasked exceptions...\n");
|
||||
|
||||
_mesa_test_os_sse_exception_support();
|
||||
|
||||
if ( cpu_has_xmm ) {
|
||||
_mesa_debug(NULL, "Yes.\n");
|
||||
} else {
|
||||
_mesa_debug(NULL, "No!\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* Restore previous exception filter */
|
||||
SetUnhandledExceptionFilter( oldFilter );
|
||||
|
||||
if ( cpu_has_xmm ) {
|
||||
_mesa_debug(NULL, "Tests of OS support for SSE passed.\n");
|
||||
} else {
|
||||
_mesa_debug(NULL, "Tests of OS support for SSE failed!\n");
|
||||
}
|
||||
#else
|
||||
/* Do nothing on other platforms for now.
|
||||
*/
|
||||
if (detection_debug)
|
||||
_mesa_debug(NULL, "Not testing OS support for SSE, leaving enabled.\n");
|
||||
#endif /* __FreeBSD__ */
|
||||
}
|
||||
|
||||
#endif /* USE_SSE_ASM */
|
||||
|
||||
|
||||
/**
|
||||
* Initialize the _mesa_x86_cpu_features bitfield.
|
||||
* This is a no-op if called more than once.
|
||||
*/
|
||||
void
|
||||
_mesa_get_x86_features(void)
|
||||
{
|
||||
static int called = 0;
|
||||
|
||||
if (called)
|
||||
return;
|
||||
|
||||
called = 1;
|
||||
|
||||
#ifdef USE_X86_ASM
|
||||
_mesa_x86_cpu_features = 0x0;
|
||||
|
||||
if (_mesa_getenv( "MESA_NO_ASM")) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!_mesa_x86_has_cpuid()) {
|
||||
_mesa_debug(NULL, "CPUID not detected\n");
|
||||
}
|
||||
else {
|
||||
GLuint cpu_features;
|
||||
GLuint cpu_ext_features;
|
||||
GLuint cpu_ext_info;
|
||||
char cpu_vendor[13];
|
||||
GLuint result;
|
||||
|
||||
/* get vendor name */
|
||||
_mesa_x86_cpuid(0, &result, (GLuint *)(cpu_vendor + 0), (GLuint *)(cpu_vendor + 8), (GLuint *)(cpu_vendor + 4));
|
||||
cpu_vendor[12] = '\0';
|
||||
|
||||
if (detection_debug)
|
||||
_mesa_debug(NULL, "CPU vendor: %s\n", cpu_vendor);
|
||||
|
||||
/* get cpu features */
|
||||
cpu_features = _mesa_x86_cpuid_edx(1);
|
||||
|
||||
if (cpu_features & X86_CPU_FPU)
|
||||
_mesa_x86_cpu_features |= X86_FEATURE_FPU;
|
||||
if (cpu_features & X86_CPU_CMOV)
|
||||
_mesa_x86_cpu_features |= X86_FEATURE_CMOV;
|
||||
|
||||
#ifdef USE_MMX_ASM
|
||||
if (cpu_features & X86_CPU_MMX)
|
||||
_mesa_x86_cpu_features |= X86_FEATURE_MMX;
|
||||
#endif
|
||||
|
||||
#ifdef USE_SSE_ASM
|
||||
if (cpu_features & X86_CPU_XMM)
|
||||
_mesa_x86_cpu_features |= X86_FEATURE_XMM;
|
||||
if (cpu_features & X86_CPU_XMM2)
|
||||
_mesa_x86_cpu_features |= X86_FEATURE_XMM2;
|
||||
#endif
|
||||
|
||||
/* query extended cpu features */
|
||||
if ((cpu_ext_info = _mesa_x86_cpuid_eax(0x80000000)) > 0x80000000) {
|
||||
if (cpu_ext_info >= 0x80000001) {
|
||||
|
||||
cpu_ext_features = _mesa_x86_cpuid_edx(0x80000001);
|
||||
|
||||
if (cpu_features & X86_CPU_MMX) {
|
||||
|
||||
#ifdef USE_3DNOW_ASM
|
||||
if (cpu_ext_features & X86_CPUEXT_3DNOW)
|
||||
_mesa_x86_cpu_features |= X86_FEATURE_3DNOW;
|
||||
if (cpu_ext_features & X86_CPUEXT_3DNOW_EXT)
|
||||
_mesa_x86_cpu_features |= X86_FEATURE_3DNOWEXT;
|
||||
#endif
|
||||
|
||||
#ifdef USE_MMX_ASM
|
||||
if (cpu_ext_features & X86_CPUEXT_MMX_EXT)
|
||||
_mesa_x86_cpu_features |= X86_FEATURE_MMXEXT;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/* query cpu name */
|
||||
if (cpu_ext_info >= 0x80000002) {
|
||||
GLuint ofs;
|
||||
char cpu_name[49];
|
||||
for (ofs = 0; ofs < 3; ofs++)
|
||||
_mesa_x86_cpuid(0x80000002+ofs, (GLuint *)(cpu_name + (16*ofs)+0), (GLuint *)(cpu_name + (16*ofs)+4), (GLuint *)(cpu_name + (16*ofs)+8), (GLuint *)(cpu_name + (16*ofs)+12));
|
||||
cpu_name[48] = '\0'; /* the name should be NULL terminated, but just to be sure */
|
||||
|
||||
if (detection_debug)
|
||||
_mesa_debug(NULL, "CPU name: %s\n", cpu_name);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#ifdef USE_MMX_ASM
|
||||
if ( cpu_has_mmx ) {
|
||||
if ( _mesa_getenv( "MESA_NO_MMX" ) == 0 ) {
|
||||
if (detection_debug)
|
||||
_mesa_debug(NULL, "MMX cpu detected.\n");
|
||||
} else {
|
||||
_mesa_x86_cpu_features &= ~(X86_FEATURE_MMX);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_3DNOW_ASM
|
||||
if ( cpu_has_3dnow ) {
|
||||
if ( _mesa_getenv( "MESA_NO_3DNOW" ) == 0 ) {
|
||||
if (detection_debug)
|
||||
_mesa_debug(NULL, "3DNow! cpu detected.\n");
|
||||
} else {
|
||||
_mesa_x86_cpu_features &= ~(X86_FEATURE_3DNOW);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_SSE_ASM
|
||||
if ( cpu_has_xmm ) {
|
||||
if ( _mesa_getenv( "MESA_NO_SSE" ) == 0 ) {
|
||||
if (detection_debug)
|
||||
_mesa_debug(NULL, "SSE cpu detected.\n");
|
||||
if ( _mesa_getenv( "MESA_FORCE_SSE" ) == 0 ) {
|
||||
_mesa_check_os_sse_support();
|
||||
}
|
||||
} else {
|
||||
_mesa_debug(NULL, "SSE cpu detected, but switched off by user.\n");
|
||||
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* USE_X86_ASM */
|
||||
|
||||
(void) detection_debug;
|
||||
}
|
|
@ -0,0 +1,220 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 6.3
|
||||
*
|
||||
* Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Check extended CPU capabilities. Now justs returns the raw CPUID
|
||||
* feature information, allowing the higher level code to interpret the
|
||||
* results.
|
||||
*
|
||||
* Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
|
||||
*
|
||||
* Cleaned up and simplified by Gareth Hughes <gareth@valinux.com>
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* NOTE: Avoid using spaces in between '(' ')' and arguments, especially
|
||||
* with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
|
||||
* in there will break the build on some platforms.
|
||||
*/
|
||||
|
||||
#include "matypes.h"
|
||||
#include "assyntax.h"
|
||||
#include "common_x86_features.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_x86_has_cpuid)
|
||||
HIDDEN(_mesa_x86_has_cpuid)
|
||||
GLNAME(_mesa_x86_has_cpuid):
|
||||
|
||||
/* Test for the CPUID command. If the ID Flag bit in EFLAGS
|
||||
* (bit 21) is writable, the CPUID command is present */
|
||||
PUSHF_L
|
||||
POP_L (EAX)
|
||||
MOV_L (EAX, ECX)
|
||||
XOR_L (CONST(0x00200000), EAX)
|
||||
PUSH_L (EAX)
|
||||
POPF_L
|
||||
PUSHF_L
|
||||
POP_L (EAX)
|
||||
|
||||
/* Verify the ID Flag bit has been written. */
|
||||
CMP_L (ECX, EAX)
|
||||
SETNE (AL)
|
||||
XOR_L (CONST(0xff), EAX)
|
||||
|
||||
RET
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_x86_cpuid)
|
||||
HIDDEN(_mesa_x86_cpuid)
|
||||
GLNAME(_mesa_x86_cpuid):
|
||||
|
||||
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */
|
||||
PUSH_L (EDI)
|
||||
PUSH_L (EBX)
|
||||
|
||||
CPUID
|
||||
|
||||
MOV_L (REGOFF(16, ESP), EDI) /* *eax */
|
||||
MOV_L (EAX, REGIND(EDI))
|
||||
MOV_L (REGOFF(20, ESP), EDI) /* *ebx */
|
||||
MOV_L (EBX, REGIND(EDI))
|
||||
MOV_L (REGOFF(24, ESP), EDI) /* *ecx */
|
||||
MOV_L (ECX, REGIND(EDI))
|
||||
MOV_L (REGOFF(28, ESP), EDI) /* *edx */
|
||||
MOV_L (EDX, REGIND(EDI))
|
||||
|
||||
POP_L (EBX)
|
||||
POP_L (EDI)
|
||||
RET
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_x86_cpuid_eax)
|
||||
HIDDEN(_mesa_x86_cpuid_eax)
|
||||
GLNAME(_mesa_x86_cpuid_eax):
|
||||
|
||||
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */
|
||||
PUSH_L (EBX)
|
||||
|
||||
CPUID
|
||||
|
||||
POP_L (EBX)
|
||||
RET
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_x86_cpuid_ebx)
|
||||
HIDDEN(_mesa_x86_cpuid_ebx)
|
||||
GLNAME(_mesa_x86_cpuid_ebx):
|
||||
|
||||
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */
|
||||
PUSH_L (EBX)
|
||||
|
||||
CPUID
|
||||
MOV_L (EBX, EAX) /* return EBX */
|
||||
|
||||
POP_L (EBX)
|
||||
RET
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_x86_cpuid_ecx)
|
||||
HIDDEN(_mesa_x86_cpuid_ecx)
|
||||
GLNAME(_mesa_x86_cpuid_ecx):
|
||||
|
||||
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */
|
||||
PUSH_L (EBX)
|
||||
|
||||
CPUID
|
||||
MOV_L (ECX, EAX) /* return ECX */
|
||||
|
||||
POP_L (EBX)
|
||||
RET
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_x86_cpuid_edx)
|
||||
HIDDEN(_mesa_x86_cpuid_edx)
|
||||
GLNAME(_mesa_x86_cpuid_edx):
|
||||
|
||||
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */
|
||||
PUSH_L (EBX)
|
||||
|
||||
CPUID
|
||||
MOV_L (EDX, EAX) /* return EDX */
|
||||
|
||||
POP_L (EBX)
|
||||
RET
|
||||
|
||||
#ifdef USE_SSE_ASM
|
||||
/* Execute an SSE instruction to see if the operating system correctly
|
||||
* supports SSE. A signal handler for SIGILL should have been set
|
||||
* before calling this function, otherwise this could kill the client
|
||||
* application.
|
||||
*
|
||||
* -----> !!!! ATTENTION DEVELOPERS !!!! <-----
|
||||
*
|
||||
* If you're debugging with gdb and you get stopped in this function,
|
||||
* just type 'continue'! Execution will proceed normally.
|
||||
* See freedesktop.org bug #1709 for more info.
|
||||
*/
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME( _mesa_test_os_sse_support )
|
||||
HIDDEN(_mesa_test_os_sse_support)
|
||||
GLNAME( _mesa_test_os_sse_support ):
|
||||
|
||||
XORPS ( XMM0, XMM0 )
|
||||
|
||||
RET
|
||||
|
||||
|
||||
/* Perform an SSE divide-by-zero to see if the operating system
|
||||
* correctly supports unmasked SIMD FPU exceptions. Signal handlers for
|
||||
* SIGILL and SIGFPE should have been set before calling this function,
|
||||
* otherwise this could kill the client application.
|
||||
*/
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME( _mesa_test_os_sse_exception_support )
|
||||
HIDDEN(_mesa_test_os_sse_exception_support)
|
||||
GLNAME( _mesa_test_os_sse_exception_support ):
|
||||
|
||||
PUSH_L ( EBP )
|
||||
MOV_L ( ESP, EBP )
|
||||
SUB_L ( CONST( 8 ), ESP )
|
||||
|
||||
/* Save the original MXCSR register value.
|
||||
*/
|
||||
STMXCSR ( REGOFF( -4, EBP ) )
|
||||
|
||||
/* Unmask the divide-by-zero exception and perform one.
|
||||
*/
|
||||
STMXCSR ( REGOFF( -8, EBP ) )
|
||||
AND_L ( CONST( 0xfffffdff ), REGOFF( -8, EBP ) )
|
||||
LDMXCSR ( REGOFF( -8, EBP ) )
|
||||
|
||||
XORPS ( XMM0, XMM0 )
|
||||
|
||||
PUSH_L ( CONST( 0x3f800000 ) )
|
||||
PUSH_L ( CONST( 0x3f800000 ) )
|
||||
PUSH_L ( CONST( 0x3f800000 ) )
|
||||
PUSH_L ( CONST( 0x3f800000 ) )
|
||||
|
||||
MOVUPS ( REGIND( ESP ), XMM1 )
|
||||
|
||||
DIVPS ( XMM0, XMM1 )
|
||||
|
||||
/* Restore the original MXCSR register value.
|
||||
*/
|
||||
LDMXCSR ( REGOFF( -4, EBP ) )
|
||||
|
||||
LEAVE
|
||||
RET
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#if defined (__ELF__) && defined (__linux__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
|
@ -0,0 +1,53 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Check CPU capabilities & initialize optimized funtions for this particular
|
||||
* processor.
|
||||
*
|
||||
* Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
|
||||
* Changed by Andre Werthmann <wertmann@cs.uni-potsdam.de> for using the
|
||||
* new SSE functions
|
||||
*
|
||||
* Reimplemented by Gareth Hughes in a more
|
||||
* future-proof manner, based on code in the Linux kernel.
|
||||
*/
|
||||
|
||||
#ifndef __COMMON_X86_ASM_H__
|
||||
#define __COMMON_X86_ASM_H__
|
||||
|
||||
/* Do not reference mtypes.h from this file.
|
||||
*/
|
||||
#include "common_x86_features.h"
|
||||
|
||||
extern int _mesa_x86_cpu_features;
|
||||
|
||||
extern void _mesa_get_x86_features(void);
|
||||
|
||||
extern void _mesa_check_os_sse_support(void);
|
||||
|
||||
extern void _mesa_init_all_x86_transform_asm( void );
|
||||
|
||||
#endif
|
|
@ -0,0 +1,67 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 5.1
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* x86 CPUID feature information. The raw data is returned by
|
||||
* _mesa_identify_x86_cpu_features() and interpreted with the cpu_has_*
|
||||
* helper macros.
|
||||
*
|
||||
* Gareth Hughes
|
||||
*/
|
||||
|
||||
#ifndef __COMMON_X86_FEATURES_H__
|
||||
#define __COMMON_X86_FEATURES_H__
|
||||
|
||||
#define X86_FEATURE_FPU (1<<0)
|
||||
#define X86_FEATURE_CMOV (1<<1)
|
||||
#define X86_FEATURE_MMXEXT (1<<2)
|
||||
#define X86_FEATURE_MMX (1<<3)
|
||||
#define X86_FEATURE_FXSR (1<<4)
|
||||
#define X86_FEATURE_XMM (1<<5)
|
||||
#define X86_FEATURE_XMM2 (1<<6)
|
||||
#define X86_FEATURE_3DNOWEXT (1<<7)
|
||||
#define X86_FEATURE_3DNOW (1<<8)
|
||||
|
||||
/* standard X86 CPU features */
|
||||
#define X86_CPU_FPU (1<<0)
|
||||
#define X86_CPU_CMOV (1<<15)
|
||||
#define X86_CPU_MMX (1<<23)
|
||||
#define X86_CPU_XMM (1<<25)
|
||||
#define X86_CPU_XMM2 (1<<26)
|
||||
|
||||
/* extended X86 CPU features */
|
||||
#define X86_CPUEXT_MMX_EXT (1<<22)
|
||||
#define X86_CPUEXT_3DNOW_EXT (1<<30)
|
||||
#define X86_CPUEXT_3DNOW (1<<31)
|
||||
|
||||
#define cpu_has_mmx (_mesa_x86_cpu_features & X86_FEATURE_MMX)
|
||||
#define cpu_has_mmxext (_mesa_x86_cpu_features & X86_FEATURE_MMXEXT)
|
||||
#define cpu_has_xmm (_mesa_x86_cpu_features & X86_FEATURE_XMM)
|
||||
#define cpu_has_xmm2 (_mesa_x86_cpu_features & X86_FEATURE_XMM2)
|
||||
#define cpu_has_3dnow (_mesa_x86_cpu_features & X86_FEATURE_3DNOW)
|
||||
#define cpu_has_3dnowext (_mesa_x86_cpu_features & X86_FEATURE_3DNOWEXT)
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,240 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 6.5.1
|
||||
*
|
||||
* Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Gareth Hughes
|
||||
*/
|
||||
|
||||
/*
|
||||
* This generates an asm version of mtypes.h (called matypes.h), so that
|
||||
* Mesa's x86 assembly code can access the internal structures easily.
|
||||
* This will be particularly useful when developing new x86 asm code for
|
||||
* Mesa, including lighting, clipping, texture image conversion etc.
|
||||
*/
|
||||
|
||||
#ifndef __STDC_FORMAT_MACROS
|
||||
#define __STDC_FORMAT_MACROS
|
||||
#endif
|
||||
#include <inttypes.h>
|
||||
|
||||
#include "main/glheader.h"
|
||||
#include "main/mtypes.h"
|
||||
#include "tnl/t_context.h"
|
||||
|
||||
|
||||
#undef offsetof
|
||||
#define offsetof( type, member ) ((size_t) &((type *)0)->member)
|
||||
|
||||
|
||||
#define OFFSET_HEADER( x ) \
|
||||
do { \
|
||||
printf( "\n" ); \
|
||||
printf( "\n" ); \
|
||||
printf( "/* =====================================================" \
|
||||
"========\n" ); \
|
||||
printf( " * Offsets for %s\n", x ); \
|
||||
printf( " */\n" ); \
|
||||
printf( "\n" ); \
|
||||
} while (0)
|
||||
|
||||
#define DEFINE_HEADER( x ) \
|
||||
do { \
|
||||
printf( "\n" ); \
|
||||
printf( "/*\n" ); \
|
||||
printf( " * Flags for %s\n", x ); \
|
||||
printf( " */\n" ); \
|
||||
printf( "\n" ); \
|
||||
} while (0)
|
||||
|
||||
#define OFFSET( s, t, m ) \
|
||||
printf( "#define %s\t%lu\n", s, (unsigned long) offsetof( t, m ) );
|
||||
|
||||
#define SIZEOF( s, t ) \
|
||||
printf( "#define %s\t%lu\n", s, (unsigned long) sizeof(t) );
|
||||
|
||||
#define DEFINE( s, d ) \
|
||||
printf( "#define %s\t0x%" PRIx64 "\n", s, (uint64_t) d );
|
||||
|
||||
|
||||
|
||||
int main( int argc, char **argv )
|
||||
{
|
||||
printf( "/*\n" );
|
||||
printf( " * This file is automatically generated from the Mesa internal type\n" );
|
||||
printf( " * definitions. Do not edit directly.\n" );
|
||||
printf( " */\n" );
|
||||
printf( "\n" );
|
||||
printf( "#ifndef __ASM_TYPES_H__\n" );
|
||||
printf( "#define __ASM_TYPES_H__\n" );
|
||||
printf( "\n" );
|
||||
|
||||
|
||||
/* struct gl_context offsets:
|
||||
*/
|
||||
OFFSET_HEADER( "struct gl_context" );
|
||||
|
||||
printf( "\n" );
|
||||
OFFSET( "CTX_LIGHT_ENABLED ", struct gl_context, Light.Enabled );
|
||||
OFFSET( "CTX_LIGHT_SHADE_MODEL ", struct gl_context, Light.ShadeModel );
|
||||
OFFSET( "CTX_LIGHT_COLOR_MAT_FACE ", struct gl_context, Light.ColorMaterialFace );
|
||||
OFFSET( "CTX_LIGHT_COLOR_MAT_MODE ", struct gl_context, Light.ColorMaterialMode );
|
||||
OFFSET( "CTX_LIGHT_COLOR_MAT_MASK ", struct gl_context, Light._ColorMaterialBitmask );
|
||||
OFFSET( "CTX_LIGHT_COLOR_MAT_ENABLED ", struct gl_context, Light.ColorMaterialEnabled );
|
||||
OFFSET( "CTX_LIGHT_ENABLED_LIST ", struct gl_context, Light.EnabledList );
|
||||
OFFSET( "CTX_LIGHT_NEED_VERTS ", struct gl_context, Light._NeedVertices );
|
||||
OFFSET( "CTX_LIGHT_BASE_COLOR ", struct gl_context, Light._BaseColor );
|
||||
|
||||
|
||||
/* struct vertex_buffer offsets:
|
||||
*/
|
||||
OFFSET_HEADER( "struct vertex_buffer" );
|
||||
|
||||
OFFSET( "VB_SIZE ", struct vertex_buffer, Size );
|
||||
OFFSET( "VB_COUNT ", struct vertex_buffer, Count );
|
||||
printf( "\n" );
|
||||
OFFSET( "VB_ELTS ", struct vertex_buffer, Elts );
|
||||
OFFSET( "VB_OBJ_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_POS] );
|
||||
OFFSET( "VB_EYE_PTR ", struct vertex_buffer, EyePtr );
|
||||
OFFSET( "VB_CLIP_PTR ", struct vertex_buffer, ClipPtr );
|
||||
OFFSET( "VB_PROJ_CLIP_PTR ", struct vertex_buffer, NdcPtr );
|
||||
OFFSET( "VB_CLIP_OR_MASK ", struct vertex_buffer, ClipOrMask );
|
||||
OFFSET( "VB_CLIP_MASK ", struct vertex_buffer, ClipMask );
|
||||
OFFSET( "VB_NORMAL_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_NORMAL] );
|
||||
OFFSET( "VB_EDGE_FLAG ", struct vertex_buffer, EdgeFlag );
|
||||
OFFSET( "VB_TEX0_COORD_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_TEX0] );
|
||||
OFFSET( "VB_TEX1_COORD_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_TEX1] );
|
||||
OFFSET( "VB_TEX2_COORD_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_TEX2] );
|
||||
OFFSET( "VB_TEX3_COORD_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_TEX3] );
|
||||
OFFSET( "VB_INDEX_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_COLOR_INDEX] );
|
||||
OFFSET( "VB_COLOR_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_COLOR0] );
|
||||
OFFSET( "VB_SECONDARY_COLOR_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_COLOR1] );
|
||||
OFFSET( "VB_FOG_COORD_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_FOG] );
|
||||
OFFSET( "VB_PRIMITIVE ", struct vertex_buffer, Primitive );
|
||||
printf( "\n" );
|
||||
|
||||
DEFINE_HEADER( "struct vertex_buffer" );
|
||||
|
||||
/* XXX use new labels here someday after vertex proram is done */
|
||||
DEFINE( "VERT_BIT_OBJ ", VERT_BIT_POS );
|
||||
DEFINE( "VERT_BIT_NORM ", VERT_BIT_NORMAL );
|
||||
DEFINE( "VERT_BIT_RGBA ", VERT_BIT_COLOR0 );
|
||||
DEFINE( "VERT_BIT_SPEC_RGB ", VERT_BIT_COLOR1 );
|
||||
DEFINE( "VERT_BIT_FOG_COORD ", VERT_BIT_FOG );
|
||||
DEFINE( "VERT_BIT_TEX0 ", VERT_BIT_TEX0 );
|
||||
DEFINE( "VERT_BIT_TEX1 ", VERT_BIT_TEX1 );
|
||||
DEFINE( "VERT_BIT_TEX2 ", VERT_BIT_TEX2 );
|
||||
DEFINE( "VERT_BIT_TEX3 ", VERT_BIT_TEX3 );
|
||||
|
||||
|
||||
/* GLvector4f offsets:
|
||||
*/
|
||||
OFFSET_HEADER( "GLvector4f" );
|
||||
|
||||
OFFSET( "V4F_DATA ", GLvector4f, data );
|
||||
OFFSET( "V4F_START ", GLvector4f, start );
|
||||
OFFSET( "V4F_COUNT ", GLvector4f, count );
|
||||
OFFSET( "V4F_STRIDE ", GLvector4f, stride );
|
||||
OFFSET( "V4F_SIZE ", GLvector4f, size );
|
||||
OFFSET( "V4F_FLAGS ", GLvector4f, flags );
|
||||
|
||||
DEFINE_HEADER( "GLvector4f" );
|
||||
|
||||
DEFINE( "VEC_MALLOC ", VEC_MALLOC );
|
||||
DEFINE( "VEC_NOT_WRITEABLE ", VEC_NOT_WRITEABLE );
|
||||
DEFINE( "VEC_BAD_STRIDE ", VEC_BAD_STRIDE );
|
||||
printf( "\n" );
|
||||
DEFINE( "VEC_SIZE_1 ", VEC_SIZE_1 );
|
||||
DEFINE( "VEC_SIZE_2 ", VEC_SIZE_2 );
|
||||
DEFINE( "VEC_SIZE_3 ", VEC_SIZE_3 );
|
||||
DEFINE( "VEC_SIZE_4 ", VEC_SIZE_4 );
|
||||
|
||||
|
||||
/* GLmatrix offsets:
|
||||
*/
|
||||
OFFSET_HEADER( "GLmatrix" );
|
||||
|
||||
OFFSET( "MATRIX_DATA ", GLmatrix, m );
|
||||
OFFSET( "MATRIX_INV ", GLmatrix, inv );
|
||||
OFFSET( "MATRIX_FLAGS ", GLmatrix, flags );
|
||||
OFFSET( "MATRIX_TYPE ", GLmatrix, type );
|
||||
|
||||
|
||||
/* struct gl_light offsets:
|
||||
*/
|
||||
OFFSET_HEADER( "struct gl_light" );
|
||||
|
||||
OFFSET( "LIGHT_NEXT ", struct gl_light, next );
|
||||
OFFSET( "LIGHT_PREV ", struct gl_light, prev );
|
||||
printf( "\n" );
|
||||
OFFSET( "LIGHT_AMBIENT ", struct gl_light, Ambient );
|
||||
OFFSET( "LIGHT_DIFFUSE ", struct gl_light, Diffuse );
|
||||
OFFSET( "LIGHT_SPECULAR ", struct gl_light, Specular );
|
||||
OFFSET( "LIGHT_EYE_POSITION ", struct gl_light, EyePosition );
|
||||
OFFSET( "LIGHT_SPOT_DIRECTION ", struct gl_light, SpotDirection );
|
||||
OFFSET( "LIGHT_SPOT_EXPONENT ", struct gl_light, SpotExponent );
|
||||
OFFSET( "LIGHT_SPOT_CUTOFF ", struct gl_light, SpotCutoff );
|
||||
OFFSET( "LIGHT_COS_CUTOFF ", struct gl_light, _CosCutoff );
|
||||
OFFSET( "LIGHT_CONST_ATTEN ", struct gl_light, ConstantAttenuation );
|
||||
OFFSET( "LIGHT_LINEAR_ATTEN ", struct gl_light, LinearAttenuation );
|
||||
OFFSET( "LIGHT_QUADRATIC_ATTEN ", struct gl_light, QuadraticAttenuation );
|
||||
OFFSET( "LIGHT_ENABLED ", struct gl_light, Enabled );
|
||||
printf( "\n" );
|
||||
OFFSET( "LIGHT_FLAGS ", struct gl_light, _Flags );
|
||||
printf( "\n" );
|
||||
OFFSET( "LIGHT_POSITION ", struct gl_light, _Position );
|
||||
OFFSET( "LIGHT_VP_INF_NORM ", struct gl_light, _VP_inf_norm );
|
||||
OFFSET( "LIGHT_H_INF_NORM ", struct gl_light, _h_inf_norm );
|
||||
OFFSET( "LIGHT_NORM_DIRECTION ", struct gl_light, _NormSpotDirection );
|
||||
OFFSET( "LIGHT_VP_INF_SPOT_ATTEN ", struct gl_light, _VP_inf_spot_attenuation );
|
||||
printf( "\n" );
|
||||
OFFSET( "LIGHT_MAT_AMBIENT ", struct gl_light, _MatAmbient );
|
||||
OFFSET( "LIGHT_MAT_DIFFUSE ", struct gl_light, _MatDiffuse );
|
||||
OFFSET( "LIGHT_MAT_SPECULAR ", struct gl_light, _MatSpecular );
|
||||
printf( "\n" );
|
||||
SIZEOF( "SIZEOF_GL_LIGHT ", struct gl_light );
|
||||
|
||||
DEFINE_HEADER( "struct gl_light" );
|
||||
|
||||
DEFINE( "LIGHT_SPOT ", LIGHT_SPOT );
|
||||
DEFINE( "LIGHT_LOCAL_VIEWER ", LIGHT_LOCAL_VIEWER );
|
||||
DEFINE( "LIGHT_POSITIONAL ", LIGHT_POSITIONAL );
|
||||
printf( "\n" );
|
||||
DEFINE( "LIGHT_NEED_VERTICES ", LIGHT_NEED_VERTICES );
|
||||
|
||||
|
||||
/* struct gl_lightmodel offsets:
|
||||
*/
|
||||
OFFSET_HEADER( "struct gl_lightmodel" );
|
||||
|
||||
OFFSET( "LIGHT_MODEL_AMBIENT ", struct gl_lightmodel, Ambient );
|
||||
OFFSET( "LIGHT_MODEL_LOCAL_VIEWER ", struct gl_lightmodel, LocalViewer );
|
||||
OFFSET( "LIGHT_MODEL_TWO_SIDE ", struct gl_lightmodel, TwoSide );
|
||||
OFFSET( "LIGHT_MODEL_COLOR_CONTROL ", struct gl_lightmodel, ColorControl );
|
||||
|
||||
|
||||
printf( "\n" );
|
||||
printf( "\n" );
|
||||
printf( "#endif /* __ASM_TYPES_H__ */\n" );
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 6.5.2
|
||||
*
|
||||
* Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef ASM_MMX_H
|
||||
#define ASM_MMX_H
|
||||
|
||||
#include "main/compiler.h"
|
||||
#include "main/glheader.h"
|
||||
|
||||
struct gl_context;
|
||||
|
||||
extern void _ASMAPI
|
||||
_mesa_mmx_blend_transparency( struct gl_context *ctx, GLuint n, const GLubyte mask[],
|
||||
GLvoid *rgba, const GLvoid *dest,
|
||||
GLenum chanType );
|
||||
|
||||
extern void _ASMAPI
|
||||
_mesa_mmx_blend_add( struct gl_context *ctx, GLuint n, const GLubyte mask[],
|
||||
GLvoid *rgba, const GLvoid *dest,
|
||||
GLenum chanType );
|
||||
|
||||
extern void _ASMAPI
|
||||
_mesa_mmx_blend_min( struct gl_context *ctx, GLuint n, const GLubyte mask[],
|
||||
GLvoid *rgba, const GLvoid *dest,
|
||||
GLenum chanType );
|
||||
|
||||
extern void _ASMAPI
|
||||
_mesa_mmx_blend_max( struct gl_context *ctx, GLuint n, const GLubyte mask[],
|
||||
GLvoid *rgba, const GLvoid *dest,
|
||||
GLenum chanType );
|
||||
|
||||
extern void _ASMAPI
|
||||
_mesa_mmx_blend_modulate( struct gl_context *ctx, GLuint n, const GLubyte mask[],
|
||||
GLvoid *rgba, const GLvoid *dest,
|
||||
GLenum chanType );
|
||||
|
||||
#endif
|
|
@ -0,0 +1,402 @@
|
|||
;
|
||||
/*
|
||||
* Written by Jos<EFBFBD> Fonseca <j_r_fonseca@yahoo.co.uk>
|
||||
*/
|
||||
|
||||
|
||||
#ifdef USE_MMX_ASM
|
||||
#include "assyntax.h"
|
||||
#include "matypes.h"
|
||||
|
||||
/* integer multiplication - alpha plus one
|
||||
*
|
||||
* makes the following approximation to the division (Sree)
|
||||
*
|
||||
* rgb*a/255 ~= (rgb*(a+1)) >> 256
|
||||
*
|
||||
* which is the fastest method that satisfies the following OpenGL criteria
|
||||
*
|
||||
* 0*0 = 0 and 255*255 = 255
|
||||
*
|
||||
* note that MX1 is a register with 0xffffffffffffffff constant which can be easily obtained making
|
||||
*
|
||||
* PCMPEQW ( MX1, MX1 )
|
||||
*/
|
||||
#define GMB_MULT_AP1( MP1, MA1, MP2, MA2, MX1 ) \
|
||||
PSUBW ( MX1, MA1 ) /* a1 + 1 | a1 + 1 | a1 + 1 | a1 + 1 */ ;\
|
||||
PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\
|
||||
;\
|
||||
TWO(PSUBW ( MX1, MA2 )) /* a2 + 1 | a2 + 1 | a2 + 1 | a2 + 1 */ ;\
|
||||
TWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\
|
||||
;\
|
||||
PSRLW ( CONST(8), MA1 ) /* t1 >> 8 ~= t1/255 */ ;\
|
||||
TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 ~= t2/255 */
|
||||
|
||||
|
||||
/* integer multiplication - geometric series
|
||||
*
|
||||
* takes the geometric series approximation to the division
|
||||
*
|
||||
* t/255 = (t >> 8) + (t >> 16) + (t >> 24) ..
|
||||
*
|
||||
* in this case just the first two terms to fit in 16bit arithmetic
|
||||
*
|
||||
* t/255 ~= (t + (t >> 8)) >> 8
|
||||
*
|
||||
* note that just by itself it doesn't satisfies the OpenGL criteria, as 255*255 = 254,
|
||||
* so the special case a = 255 must be accounted or roundoff must be used
|
||||
*/
|
||||
#define GMB_MULT_GS( MP1, MA1, MP2, MA2 ) \
|
||||
PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\
|
||||
TWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\
|
||||
;\
|
||||
MOVQ ( MA1, MP1 ) ;\
|
||||
PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\
|
||||
;\
|
||||
TWO(MOVQ ( MA2, MP2 )) ;\
|
||||
TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\
|
||||
;\
|
||||
PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\
|
||||
PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\
|
||||
;\
|
||||
TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\
|
||||
TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */
|
||||
|
||||
|
||||
/* integer multiplication - geometric series plus rounding
|
||||
*
|
||||
* when using a geometric series division instead of truncating the result
|
||||
* use roundoff in the approximation (Jim Blinn)
|
||||
*
|
||||
* t = rgb*a + 0x80
|
||||
*
|
||||
* achieving the exact results
|
||||
*
|
||||
* note that M80 is register with the 0x0080008000800080 constant
|
||||
*/
|
||||
#define GMB_MULT_GSR( MP1, MA1, MP2, MA2, M80 ) \
|
||||
PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\
|
||||
PADDW ( M80, MA1 ) /* t1 += 0x80 */ ;\
|
||||
;\
|
||||
TWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\
|
||||
TWO(PADDW ( M80, MA2 )) /* t2 += 0x80 */ ;\
|
||||
;\
|
||||
MOVQ ( MA1, MP1 ) ;\
|
||||
PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\
|
||||
;\
|
||||
TWO(MOVQ ( MA2, MP2 )) ;\
|
||||
TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\
|
||||
;\
|
||||
PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\
|
||||
PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\
|
||||
;\
|
||||
TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\
|
||||
TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */
|
||||
|
||||
|
||||
/* linear interpolation - geometric series
|
||||
*/
|
||||
#define GMB_LERP_GS( MP1, MQ1, MA1, MP2, MQ2, MA2) \
|
||||
PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\
|
||||
PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\
|
||||
PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\
|
||||
;\
|
||||
TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\
|
||||
TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\
|
||||
TWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\
|
||||
;\
|
||||
MOVQ ( MA1, MP1 ) ;\
|
||||
PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\
|
||||
;\
|
||||
TWO(MOVQ ( MA2, MP2 )) ;\
|
||||
TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\
|
||||
;\
|
||||
PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\
|
||||
TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\
|
||||
;\
|
||||
PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\
|
||||
TWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\
|
||||
;\
|
||||
PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\
|
||||
TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */
|
||||
|
||||
|
||||
/* linear interpolation - geometric series with roundoff
|
||||
*
|
||||
* this is a generalization of Blinn's formula to signed arithmetic
|
||||
*
|
||||
* note that M80 is a register with the 0x0080008000800080 constant
|
||||
*/
|
||||
#define GMB_LERP_GSR( MP1, MQ1, MA1, MP2, MQ2, MA2, M80) \
|
||||
PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\
|
||||
PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\
|
||||
PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\
|
||||
;\
|
||||
TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\
|
||||
TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\
|
||||
TWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\
|
||||
;\
|
||||
PSRLW ( CONST(15), MP1 ) /* q1 > p1 ? 1 : 0 */ ;\
|
||||
TWO(PSRLW ( CONST(15), MP2 )) /* q2 > q2 ? 1 : 0 */ ;\
|
||||
;\
|
||||
PSLLW ( CONST(8), MP1 ) /* q1 > p1 ? 0x100 : 0 */ ;\
|
||||
TWO(PSLLW ( CONST(8), MP2 )) /* q2 > q2 ? 0x100 : 0 */ ;\
|
||||
;\
|
||||
PSUBW ( MP1, MA1 ) /* t1 -=? 0x100 */ ;\
|
||||
TWO(PSUBW ( MP2, MA2 )) /* t2 -=? 0x100 */ ;\
|
||||
;\
|
||||
PADDW ( M80, MA1 ) /* t1 += 0x80 */ ;\
|
||||
TWO(PADDW ( M80, MA2 )) /* t2 += 0x80 */ ;\
|
||||
;\
|
||||
MOVQ ( MA1, MP1 ) ;\
|
||||
PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\
|
||||
;\
|
||||
TWO(MOVQ ( MA2, MP2 )) ;\
|
||||
TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\
|
||||
;\
|
||||
PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\
|
||||
TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\
|
||||
;\
|
||||
PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\
|
||||
TWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\
|
||||
;\
|
||||
PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\
|
||||
TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */
|
||||
|
||||
|
||||
/* linear interpolation - geometric series with correction
|
||||
*
|
||||
* instead of the roundoff this adds a small correction to satisfy the OpenGL criteria
|
||||
*
|
||||
* t/255 ~= (t + (t >> 8) + (t >> 15)) >> 8
|
||||
*
|
||||
* note that although is faster than rounding off it doesn't give always the exact results
|
||||
*/
|
||||
#define GMB_LERP_GSC( MP1, MQ1, MA1, MP2, MQ2, MA2) \
|
||||
PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\
|
||||
PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\
|
||||
PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\
|
||||
;\
|
||||
TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\
|
||||
TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\
|
||||
TWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\
|
||||
;\
|
||||
MOVQ ( MA1, MP1 ) ;\
|
||||
PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\
|
||||
;\
|
||||
TWO(MOVQ ( MA2, MP2 )) ;\
|
||||
TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\
|
||||
;\
|
||||
PADDW ( MA1, MP1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\
|
||||
PSRLW ( CONST(7), MA1 ) /* t1 >> 15 */ ;\
|
||||
;\
|
||||
TWO(PADDW ( MA2, MP2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\
|
||||
TWO(PSRLW ( CONST(7), MA2 )) /* t2 >> 15 */ ;\
|
||||
;\
|
||||
PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) + (t1 >>15) ~= (t1/255) << 8 */ ;\
|
||||
TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) + (t2 >>15) ~= (t2/255) << 8 */ ;\
|
||||
;\
|
||||
PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\
|
||||
TWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\
|
||||
;\
|
||||
PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\
|
||||
TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */
|
||||
|
||||
|
||||
/* common blending setup code
|
||||
*
|
||||
* note that M00 is a register with 0x0000000000000000 constant which can be easily obtained making
|
||||
*
|
||||
* PXOR ( M00, M00 )
|
||||
*/
|
||||
#define GMB_LOAD(rgba, dest, MPP, MQQ) \
|
||||
ONE(MOVD ( REGIND(rgba), MPP )) /* | | | | qa1 | qb1 | qg1 | qr1 */ ;\
|
||||
ONE(MOVD ( REGIND(dest), MQQ )) /* | | | | pa1 | pb1 | pg1 | pr1 */ ;\
|
||||
;\
|
||||
TWO(MOVQ ( REGIND(rgba), MPP )) /* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */ ;\
|
||||
TWO(MOVQ ( REGIND(dest), MQQ )) /* pa2 | pb2 | pg2 | pr2 | pa1 | pb1 | pg1 | pr1 */
|
||||
|
||||
#define GMB_UNPACK(MP1, MQ1, MP2, MQ2, M00) \
|
||||
TWO(MOVQ ( MP1, MP2 )) ;\
|
||||
TWO(MOVQ ( MQ1, MQ2 )) ;\
|
||||
;\
|
||||
PUNPCKLBW ( M00, MQ1 ) /* qa1 | qb1 | qg1 | qr1 */ ;\
|
||||
TWO(PUNPCKHBW ( M00, MQ2 )) /* qa2 | qb2 | qg2 | qr2 */ ;\
|
||||
PUNPCKLBW ( M00, MP1 ) /* pa1 | pb1 | pg1 | pr1 */ ;\
|
||||
TWO(PUNPCKHBW ( M00, MP2 )) /* pa2 | pb2 | pg2 | pr2 */
|
||||
|
||||
#define GMB_ALPHA(MP1, MA1, MP2, MA2) \
|
||||
MOVQ ( MP1, MA1 ) ;\
|
||||
TWO(MOVQ ( MP2, MA2 )) ;\
|
||||
;\
|
||||
PUNPCKHWD ( MA1, MA1 ) /* pa1 | pa1 | | */ ;\
|
||||
TWO(PUNPCKHWD ( MA2, MA2 )) /* pa2 | pa2 | | */ ;\
|
||||
PUNPCKHDQ ( MA1, MA1 ) /* pa1 | pa1 | pa1 | pa1 */ ;\
|
||||
TWO(PUNPCKHDQ ( MA2, MA2 )) /* pa2 | pa2 | pa2 | pa2 */
|
||||
|
||||
#define GMB_PACK( MS1, MS2 ) \
|
||||
PACKUSWB ( MS2, MS1 ) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ ;\
|
||||
|
||||
#define GMB_STORE(rgba, MSS ) \
|
||||
ONE(MOVD ( MSS, REGIND(rgba) )) /* | | | | sa1 | sb1 | sg1 | sr1 */ ;\
|
||||
TWO(MOVQ ( MSS, REGIND(rgba) )) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */
|
||||
|
||||
/* Kevin F. Quinn <kevquinn@gentoo.org> 2 July 2006
|
||||
* Replace data segment constants with text-segment
|
||||
* constants (via pushl/movq)
|
||||
SEG_DATA
|
||||
|
||||
ALIGNDATA8
|
||||
const_0080:
|
||||
D_LONG 0x00800080, 0x00800080
|
||||
|
||||
const_80:
|
||||
D_LONG 0x80808080, 0x80808080
|
||||
*/
|
||||
#define const_0080_l 0x00800080
|
||||
#define const_0080_h 0x00800080
|
||||
#define const_80_l 0x80808080
|
||||
#define const_80_h 0x80808080
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
|
||||
/* Blend transparency function
|
||||
*/
|
||||
|
||||
#define TAG(x) CONCAT(x,_transparency)
|
||||
#define LLTAG(x) LLBL2(x,_transparency)
|
||||
|
||||
#define INIT \
|
||||
PXOR ( MM0, MM0 ) /* 0x0000 | 0x0000 | 0x0000 | 0x0000 */
|
||||
|
||||
#define MAIN( rgba, dest ) \
|
||||
GMB_LOAD( rgba, dest, MM1, MM2 ) ;\
|
||||
GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 ) ;\
|
||||
GMB_ALPHA( MM1, MM3, MM4, MM6 ) ;\
|
||||
GMB_LERP_GSC( MM1, MM2, MM3, MM4, MM5, MM6 ) ;\
|
||||
GMB_PACK( MM3, MM6 ) ;\
|
||||
GMB_STORE( rgba, MM3 )
|
||||
|
||||
#include "mmx_blendtmp.h"
|
||||
|
||||
|
||||
/* Blend add function
|
||||
*
|
||||
* FIXME: Add some loop unrolling here...
|
||||
*/
|
||||
|
||||
#define TAG(x) CONCAT(x,_add)
|
||||
#define LLTAG(x) LLBL2(x,_add)
|
||||
|
||||
#define INIT
|
||||
|
||||
#define MAIN( rgba, dest ) \
|
||||
ONE(MOVD ( REGIND(rgba), MM1 )) /* | | | | qa1 | qb1 | qg1 | qr1 */ ;\
|
||||
ONE(MOVD ( REGIND(dest), MM2 )) /* | | | | pa1 | pb1 | pg1 | pr1 */ ;\
|
||||
ONE(PADDUSB ( MM2, MM1 )) ;\
|
||||
ONE(MOVD ( MM1, REGIND(rgba) )) /* | | | | sa1 | sb1 | sg1 | sr1 */ ;\
|
||||
;\
|
||||
TWO(MOVQ ( REGIND(rgba), MM1 )) /* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */ ;\
|
||||
TWO(PADDUSB ( REGIND(dest), MM1 )) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ ;\
|
||||
TWO(MOVQ ( MM1, REGIND(rgba) ))
|
||||
|
||||
#include "mmx_blendtmp.h"
|
||||
|
||||
|
||||
/* Blend min function
|
||||
*/
|
||||
|
||||
#define TAG(x) CONCAT(x,_min)
|
||||
#define LLTAG(x) LLBL2(x,_min)
|
||||
|
||||
/* Kevin F. Quinn 2nd July 2006
|
||||
* Replace data segment constants with text-segment instructions
|
||||
#define INIT \
|
||||
MOVQ ( CONTENT(const_80), MM7 )
|
||||
*/
|
||||
#define INIT \
|
||||
PUSH_L ( CONST(const_80_h) ) /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/ ;\
|
||||
PUSH_L ( CONST(const_80_l) ) ;\
|
||||
MOVQ ( REGIND(ESP), MM7 ) ;\
|
||||
ADD_L ( CONST(8), ESP)
|
||||
|
||||
#define MAIN( rgba, dest ) \
|
||||
GMB_LOAD( rgba, dest, MM1, MM2 ) ;\
|
||||
MOVQ ( MM1, MM3 ) ;\
|
||||
MOVQ ( MM2, MM4 ) ;\
|
||||
PXOR ( MM7, MM3 ) /* unsigned -> signed */ ;\
|
||||
PXOR ( MM7, MM4 ) /* unsigned -> signed */ ;\
|
||||
PCMPGTB ( MM3, MM4 ) /* q > p ? 0xff : 0x00 */ ;\
|
||||
PAND ( MM4, MM1 ) /* q > p ? p : 0 */ ;\
|
||||
PANDN ( MM2, MM4 ) /* q > p ? 0 : q */ ;\
|
||||
POR ( MM1, MM4 ) /* q > p ? p : q */ ;\
|
||||
GMB_STORE( rgba, MM4 )
|
||||
|
||||
#include "mmx_blendtmp.h"
|
||||
|
||||
|
||||
/* Blend max function
|
||||
*/
|
||||
|
||||
#define TAG(x) CONCAT(x,_max)
|
||||
#define LLTAG(x) LLBL2(x,_max)
|
||||
|
||||
/* Kevin F. Quinn 2nd July 2006
|
||||
* Replace data segment constants with text-segment instructions
|
||||
#define INIT \
|
||||
MOVQ ( CONTENT(const_80), MM7 )
|
||||
*/
|
||||
#define INIT \
|
||||
PUSH_L ( CONST(const_80_l) ) /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/ ;\
|
||||
PUSH_L ( CONST(const_80_h) ) ;\
|
||||
MOVQ ( REGIND(ESP), MM7 ) ;\
|
||||
ADD_L ( CONST(8), ESP)
|
||||
|
||||
#define MAIN( rgba, dest ) \
|
||||
GMB_LOAD( rgba, dest, MM1, MM2 ) ;\
|
||||
MOVQ ( MM1, MM3 ) ;\
|
||||
MOVQ ( MM2, MM4 ) ;\
|
||||
PXOR ( MM7, MM3 ) /* unsigned -> signed */ ;\
|
||||
PXOR ( MM7, MM4 ) /* unsigned -> signed */ ;\
|
||||
PCMPGTB ( MM3, MM4 ) /* q > p ? 0xff : 0x00 */ ;\
|
||||
PAND ( MM4, MM2 ) /* q > p ? q : 0 */ ;\
|
||||
PANDN ( MM1, MM4 ) /* q > p ? 0 : p */ ;\
|
||||
POR ( MM2, MM4 ) /* q > p ? p : q */ ;\
|
||||
GMB_STORE( rgba, MM4 )
|
||||
|
||||
#include "mmx_blendtmp.h"
|
||||
|
||||
|
||||
/* Blend modulate function
|
||||
*/
|
||||
|
||||
#define TAG(x) CONCAT(x,_modulate)
|
||||
#define LLTAG(x) LLBL2(x,_modulate)
|
||||
|
||||
/* Kevin F. Quinn 2nd July 2006
|
||||
* Replace data segment constants with text-segment instructions
|
||||
#define INIT \
|
||||
MOVQ ( CONTENT(const_0080), MM7 )
|
||||
*/
|
||||
#define INIT \
|
||||
PXOR ( MM0, MM0 ) /* 0x0000 | 0x0000 | 0x0000 | 0x0000 */ ;\
|
||||
PUSH_L ( CONST(const_0080_l) ) /* 0x0080 | 0x0080 | 0x0080 | 0x0080 */ ;\
|
||||
PUSH_L ( CONST(const_0080_h) ) ;\
|
||||
MOVQ ( REGIND(ESP), MM7 ) ;\
|
||||
ADD_L ( CONST(8), ESP)
|
||||
|
||||
#define MAIN( rgba, dest ) \
|
||||
GMB_LOAD( rgba, dest, MM1, MM2 ) ;\
|
||||
GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 ) ;\
|
||||
GMB_MULT_GSR( MM1, MM2, MM4, MM5, MM7 ) ;\
|
||||
GMB_PACK( MM2, MM5 ) ;\
|
||||
GMB_STORE( rgba, MM2 )
|
||||
|
||||
#include "mmx_blendtmp.h"
|
||||
|
||||
#endif
|
||||
|
||||
#if defined (__ELF__) && defined (__linux__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
|
@ -0,0 +1,114 @@
|
|||
/*
|
||||
* Written by José Fonseca <j_r_fonseca@yahoo.co.uk>
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* void _mesa_mmx_blend( struct gl_context *ctx,
|
||||
* GLuint n,
|
||||
* const GLubyte mask[],
|
||||
* GLchan rgba[][4],
|
||||
* CONST GLchan dest[][4] )
|
||||
*
|
||||
*/
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( TAG(_mesa_mmx_blend) )
|
||||
HIDDEN( TAG(_mesa_mmx_blend) )
|
||||
GLNAME( TAG(_mesa_mmx_blend) ):
|
||||
|
||||
PUSH_L ( EBP )
|
||||
MOV_L ( ESP, EBP )
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
PUSH_L ( EBX )
|
||||
|
||||
MOV_L ( REGOFF(12, EBP), ECX ) /* n */
|
||||
CMP_L ( CONST(0), ECX)
|
||||
JE ( LLTAG(GMB_return) )
|
||||
|
||||
MOV_L ( REGOFF(16, EBP), EBX ) /* mask */
|
||||
MOV_L ( REGOFF(20, EBP), EDI ) /* rgba */
|
||||
MOV_L ( REGOFF(24, EBP), ESI ) /* dest */
|
||||
|
||||
INIT
|
||||
|
||||
TEST_L ( CONST(4), EDI ) /* align rgba on an 8-byte boundary */
|
||||
JZ ( LLTAG(GMB_align_end) )
|
||||
|
||||
CMP_B ( CONST(0), REGIND(EBX) ) /* *mask == 0 */
|
||||
JE ( LLTAG(GMB_align_continue) )
|
||||
|
||||
/* runin */
|
||||
#define ONE(x) x
|
||||
#define TWO(x)
|
||||
MAIN ( EDI, ESI )
|
||||
#undef ONE
|
||||
#undef TWO
|
||||
|
||||
LLTAG(GMB_align_continue):
|
||||
|
||||
DEC_L ( ECX ) /* n -= 1 */
|
||||
INC_L ( EBX ) /* mask += 1 */
|
||||
ADD_L ( CONST(4), EDI ) /* rgba += 1 */
|
||||
ADD_L ( CONST(4), ESI ) /* dest += 1 */
|
||||
|
||||
LLTAG(GMB_align_end):
|
||||
|
||||
CMP_L ( CONST(2), ECX)
|
||||
JB ( LLTAG(GMB_loop_end) )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLTAG(GMB_loop_begin):
|
||||
|
||||
CMP_W ( CONST(0), REGIND(EBX) ) /* *mask == 0 && *(mask + 1) == 0 */
|
||||
JE ( LLTAG(GMB_loop_continue) )
|
||||
|
||||
/* main loop */
|
||||
#define ONE(x)
|
||||
#define TWO(x) x
|
||||
MAIN ( EDI, ESI )
|
||||
#undef ONE
|
||||
#undef TWO
|
||||
|
||||
LLTAG(GMB_loop_continue):
|
||||
|
||||
DEC_L ( ECX )
|
||||
DEC_L ( ECX ) /* n -= 2 */
|
||||
ADD_L ( CONST(2), EBX ) /* mask += 2 */
|
||||
ADD_L ( CONST(8), EDI ) /* rgba += 2 */
|
||||
ADD_L ( CONST(8), ESI ) /* dest += 2 */
|
||||
CMP_L ( CONST(2), ECX )
|
||||
JAE ( LLTAG(GMB_loop_begin) )
|
||||
|
||||
LLTAG(GMB_loop_end):
|
||||
|
||||
CMP_L ( CONST(1), ECX )
|
||||
JB ( LLTAG(GMB_done) )
|
||||
|
||||
CMP_B ( CONST(0), REGIND(EBX) ) /* *mask == 0 */
|
||||
JE ( LLTAG(GMB_done) )
|
||||
|
||||
/* runout */
|
||||
#define ONE(x) x
|
||||
#define TWO(x)
|
||||
MAIN ( EDI, ESI )
|
||||
#undef ONE
|
||||
#undef TWO
|
||||
|
||||
LLTAG(GMB_done):
|
||||
|
||||
EMMS
|
||||
|
||||
LLTAG(GMB_return):
|
||||
|
||||
POP_L ( EBX )
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
MOV_L ( EBP, ESP )
|
||||
POP_L ( EBP )
|
||||
RET
|
||||
|
||||
#undef TAG
|
||||
#undef LLTAG
|
||||
#undef INIT
|
||||
#undef MAIN
|
|
@ -0,0 +1,57 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Normal transform function interface for assembly code. Simply define
|
||||
* FRAME_OFFSET to the number of bytes pushed onto the stack before
|
||||
* using the ARG_* argument macros.
|
||||
*
|
||||
* Gareth Hughes
|
||||
*/
|
||||
|
||||
#ifndef __NORM_ARGS_H__
|
||||
#define __NORM_ARGS_H__
|
||||
|
||||
/* Offsets for normal_func arguments
|
||||
*
|
||||
* typedef void (*normal_func)( const GLmatrix *mat,
|
||||
* GLfloat scale,
|
||||
* const GLvector4f *in,
|
||||
* const GLfloat lengths[],
|
||||
* GLvector4f *dest );
|
||||
*/
|
||||
#define OFFSET_MAT 4
|
||||
#define OFFSET_SCALE 8
|
||||
#define OFFSET_IN 12
|
||||
#define OFFSET_LENGTHS 16
|
||||
#define OFFSET_DEST 20
|
||||
|
||||
#define ARG_MAT REGOFF(FRAME_OFFSET+OFFSET_MAT, ESP)
|
||||
#define ARG_SCALE REGOFF(FRAME_OFFSET+OFFSET_SCALE, ESP)
|
||||
#define ARG_IN REGOFF(FRAME_OFFSET+OFFSET_IN, ESP)
|
||||
#define ARG_LENGTHS REGOFF(FRAME_OFFSET+OFFSET_LENGTHS, ESP)
|
||||
#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP)
|
||||
|
||||
#endif
|
|
@ -0,0 +1,686 @@
|
|||
/*
|
||||
* (C) Copyright IBM Corporation 2004
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file read_rgba_span_x86.S
|
||||
* Optimized routines to transfer pixel data from the framebuffer to a
|
||||
* buffer in main memory.
|
||||
*
|
||||
* \author Ian Romanick <idr@us.ibm.com>
|
||||
*/
|
||||
|
||||
.file "read_rgba_span_x86.S"
|
||||
#if !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__) /* this one cries for assyntax.h */
|
||||
/* Kevin F. Quinn 2nd July 2006
|
||||
* Replaced data segment constants with text-segment instructions.
|
||||
*/
|
||||
#define LOAD_MASK(mvins,m1,m2) \
|
||||
pushl $0xff00ff00 ;\
|
||||
pushl $0xff00ff00 ;\
|
||||
pushl $0xff00ff00 ;\
|
||||
pushl $0xff00ff00 ;\
|
||||
mvins (%esp), m1 ;\
|
||||
pushl $0x00ff0000 ;\
|
||||
pushl $0x00ff0000 ;\
|
||||
pushl $0x00ff0000 ;\
|
||||
pushl $0x00ff0000 ;\
|
||||
mvins (%esp), m2 ;\
|
||||
addl $32, %esp
|
||||
|
||||
/* I implemented these as macros because they appear in several places,
|
||||
* and I've tweaked them a number of times. I got tired of changing every
|
||||
* place they appear. :)
|
||||
*/
|
||||
|
||||
#define DO_ONE_PIXEL() \
|
||||
movl (%ebx), %eax ; \
|
||||
addl $4, %ebx ; \
|
||||
bswap %eax /* ARGB -> BGRA */ ; \
|
||||
rorl $8, %eax /* BGRA -> ABGR */ ; \
|
||||
movl %eax, (%ecx) /* ABGR -> R, G, B, A */ ; \
|
||||
addl $4, %ecx
|
||||
|
||||
#define DO_ONE_LAST_PIXEL() \
|
||||
movl (%ebx), %eax ; \
|
||||
bswap %eax /* ARGB -> BGRA */ ; \
|
||||
rorl $8, %eax /* BGRA -> ABGR */ ; \
|
||||
movl %eax, (%ecx) /* ABGR -> R, G, B, A */ ; \
|
||||
|
||||
|
||||
/**
|
||||
* MMX optimized version of the BGRA8888_REV to RGBA copy routine.
|
||||
*
|
||||
* \warning
|
||||
* This function assumes that the caller will issue the EMMS instruction
|
||||
* at the correct places.
|
||||
*/
|
||||
|
||||
.globl _generic_read_RGBA_span_BGRA8888_REV_MMX
|
||||
#ifndef USE_DRICORE
|
||||
.hidden _generic_read_RGBA_span_BGRA8888_REV_MMX
|
||||
#endif
|
||||
.type _generic_read_RGBA_span_BGRA8888_REV_MMX, @function
|
||||
_generic_read_RGBA_span_BGRA8888_REV_MMX:
|
||||
pushl %ebx
|
||||
|
||||
#ifdef USE_INNER_EMMS
|
||||
emms
|
||||
#endif
|
||||
LOAD_MASK(movq,%mm1,%mm2)
|
||||
|
||||
movl 8(%esp), %ebx /* source pointer */
|
||||
movl 16(%esp), %edx /* number of pixels to copy */
|
||||
movl 12(%esp), %ecx /* destination pointer */
|
||||
|
||||
testl %edx, %edx
|
||||
jle .L20 /* Bail if there's nothing to do. */
|
||||
|
||||
movl %ebx, %eax
|
||||
|
||||
negl %eax
|
||||
sarl $2, %eax
|
||||
andl $1, %eax
|
||||
je .L17
|
||||
|
||||
subl %eax, %edx
|
||||
DO_ONE_PIXEL()
|
||||
.L17:
|
||||
|
||||
/* Would it be faster to unroll this loop once and process 4 pixels
|
||||
* per pass, instead of just two?
|
||||
*/
|
||||
|
||||
movl %edx, %eax
|
||||
shrl %eax
|
||||
jmp .L18
|
||||
.L19:
|
||||
movq (%ebx), %mm0
|
||||
addl $8, %ebx
|
||||
|
||||
/* These 9 instructions do what PSHUFB (if there were such an
|
||||
* instruction) could do in 1. :(
|
||||
*/
|
||||
|
||||
movq %mm0, %mm3
|
||||
movq %mm0, %mm4
|
||||
|
||||
pand %mm2, %mm3
|
||||
psllq $16, %mm4
|
||||
psrlq $16, %mm3
|
||||
pand %mm2, %mm4
|
||||
|
||||
pand %mm1, %mm0
|
||||
por %mm4, %mm3
|
||||
por %mm3, %mm0
|
||||
|
||||
movq %mm0, (%ecx)
|
||||
addl $8, %ecx
|
||||
subl $1, %eax
|
||||
.L18:
|
||||
jne .L19
|
||||
|
||||
#ifdef USE_INNER_EMMS
|
||||
emms
|
||||
#endif
|
||||
|
||||
/* At this point there are either 1 or 0 pixels remaining to be
|
||||
* converted. Convert the last pixel, if needed.
|
||||
*/
|
||||
|
||||
testl $1, %edx
|
||||
je .L20
|
||||
|
||||
DO_ONE_LAST_PIXEL()
|
||||
|
||||
.L20:
|
||||
popl %ebx
|
||||
ret
|
||||
.size _generic_read_RGBA_span_BGRA8888_REV_MMX, .-_generic_read_RGBA_span_BGRA8888_REV_MMX
|
||||
|
||||
|
||||
/**
|
||||
* SSE optimized version of the BGRA8888_REV to RGBA copy routine. SSE
|
||||
* instructions are only actually used to read data from the framebuffer.
|
||||
* In practice, the speed-up is pretty small.
|
||||
*
|
||||
* \todo
|
||||
* Do some more testing and determine if there's any reason to have this
|
||||
* function in addition to the MMX version.
|
||||
*
|
||||
* \warning
|
||||
* This function assumes that the caller will issue the EMMS instruction
|
||||
* at the correct places.
|
||||
*/
|
||||
|
||||
.globl _generic_read_RGBA_span_BGRA8888_REV_SSE
|
||||
#ifndef USE_DRICORE
|
||||
.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE
|
||||
#endif
|
||||
.type _generic_read_RGBA_span_BGRA8888_REV_SSE, @function
|
||||
_generic_read_RGBA_span_BGRA8888_REV_SSE:
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
pushl %ebp
|
||||
|
||||
#ifdef USE_INNER_EMMS
|
||||
emms
|
||||
#endif
|
||||
|
||||
LOAD_MASK(movq,%mm1,%mm2)
|
||||
|
||||
movl 16(%esp), %ebx /* source pointer */
|
||||
movl 24(%esp), %edx /* number of pixels to copy */
|
||||
movl 20(%esp), %ecx /* destination pointer */
|
||||
|
||||
testl %edx, %edx
|
||||
jle .L35 /* Bail if there's nothing to do. */
|
||||
|
||||
movl %esp, %ebp
|
||||
subl $16, %esp
|
||||
andl $0xfffffff0, %esp
|
||||
|
||||
movl %ebx, %eax
|
||||
movl %edx, %esi
|
||||
|
||||
negl %eax
|
||||
andl $15, %eax
|
||||
sarl $2, %eax
|
||||
cmpl %edx, %eax
|
||||
cmovle %eax, %esi
|
||||
|
||||
subl %esi, %edx
|
||||
|
||||
testl $1, %esi
|
||||
je .L32
|
||||
|
||||
DO_ONE_PIXEL()
|
||||
.L32:
|
||||
|
||||
testl $2, %esi
|
||||
je .L31
|
||||
|
||||
movq (%ebx), %mm0
|
||||
addl $8, %ebx
|
||||
|
||||
movq %mm0, %mm3
|
||||
movq %mm0, %mm4
|
||||
|
||||
pand %mm2, %mm3
|
||||
psllq $16, %mm4
|
||||
psrlq $16, %mm3
|
||||
pand %mm2, %mm4
|
||||
|
||||
pand %mm1, %mm0
|
||||
por %mm4, %mm3
|
||||
por %mm3, %mm0
|
||||
|
||||
movq %mm0, (%ecx)
|
||||
addl $8, %ecx
|
||||
.L31:
|
||||
|
||||
movl %edx, %eax
|
||||
shrl $2, %eax
|
||||
jmp .L33
|
||||
.L34:
|
||||
movaps (%ebx), %xmm0
|
||||
addl $16, %ebx
|
||||
|
||||
/* This would be so much better if we could just move directly from
|
||||
* an SSE register to an MMX register. Unfortunately, that
|
||||
* functionality wasn't introduced until SSE2 with the MOVDQ2Q
|
||||
* instruction.
|
||||
*/
|
||||
|
||||
movaps %xmm0, (%esp)
|
||||
movq (%esp), %mm0
|
||||
movq 8(%esp), %mm5
|
||||
|
||||
movq %mm0, %mm3
|
||||
movq %mm0, %mm4
|
||||
movq %mm5, %mm6
|
||||
movq %mm5, %mm7
|
||||
|
||||
pand %mm2, %mm3
|
||||
pand %mm2, %mm6
|
||||
|
||||
psllq $16, %mm4
|
||||
psllq $16, %mm7
|
||||
|
||||
psrlq $16, %mm3
|
||||
psrlq $16, %mm6
|
||||
|
||||
pand %mm2, %mm4
|
||||
pand %mm2, %mm7
|
||||
|
||||
pand %mm1, %mm0
|
||||
pand %mm1, %mm5
|
||||
|
||||
por %mm4, %mm3
|
||||
por %mm7, %mm6
|
||||
|
||||
por %mm3, %mm0
|
||||
por %mm6, %mm5
|
||||
|
||||
movq %mm0, (%ecx)
|
||||
movq %mm5, 8(%ecx)
|
||||
addl $16, %ecx
|
||||
|
||||
subl $1, %eax
|
||||
.L33:
|
||||
jne .L34
|
||||
|
||||
#ifdef USE_INNER_EMMS
|
||||
emms
|
||||
#endif
|
||||
movl %ebp, %esp
|
||||
|
||||
/* At this point there are either [0, 3] pixels remaining to be
|
||||
* converted.
|
||||
*/
|
||||
|
||||
testl $2, %edx
|
||||
je .L36
|
||||
|
||||
movq (%ebx), %mm0
|
||||
addl $8, %ebx
|
||||
|
||||
movq %mm0, %mm3
|
||||
movq %mm0, %mm4
|
||||
|
||||
pand %mm2, %mm3
|
||||
psllq $16, %mm4
|
||||
psrlq $16, %mm3
|
||||
pand %mm2, %mm4
|
||||
|
||||
pand %mm1, %mm0
|
||||
por %mm4, %mm3
|
||||
por %mm3, %mm0
|
||||
|
||||
movq %mm0, (%ecx)
|
||||
addl $8, %ecx
|
||||
.L36:
|
||||
|
||||
testl $1, %edx
|
||||
je .L35
|
||||
|
||||
DO_ONE_LAST_PIXEL()
|
||||
.L35:
|
||||
popl %ebp
|
||||
popl %ebx
|
||||
popl %esi
|
||||
ret
|
||||
.size _generic_read_RGBA_span_BGRA8888_REV_SSE, .-_generic_read_RGBA_span_BGRA8888_REV_SSE
|
||||
|
||||
|
||||
/**
|
||||
* SSE2 optimized version of the BGRA8888_REV to RGBA copy routine.
|
||||
*/
|
||||
|
||||
.text
|
||||
.globl _generic_read_RGBA_span_BGRA8888_REV_SSE2
|
||||
#ifndef USE_DRICORE
|
||||
.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE2
|
||||
#endif
|
||||
.type _generic_read_RGBA_span_BGRA8888_REV_SSE2, @function
|
||||
_generic_read_RGBA_span_BGRA8888_REV_SSE2:
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
|
||||
LOAD_MASK(movdqu,%xmm1,%xmm2)
|
||||
|
||||
movl 12(%esp), %ebx /* source pointer */
|
||||
movl 20(%esp), %edx /* number of pixels to copy */
|
||||
movl 16(%esp), %ecx /* destination pointer */
|
||||
|
||||
movl %ebx, %eax
|
||||
movl %edx, %esi
|
||||
|
||||
testl %edx, %edx
|
||||
jle .L46 /* Bail if there's nothing to do. */
|
||||
|
||||
/* If the source pointer isn't a multiple of 16 we have to process
|
||||
* a few pixels the "slow" way to get the address aligned for
|
||||
* the SSE fetch intsructions.
|
||||
*/
|
||||
|
||||
negl %eax
|
||||
andl $15, %eax
|
||||
sarl $2, %eax
|
||||
|
||||
cmpl %edx, %eax
|
||||
cmovbe %eax, %esi
|
||||
subl %esi, %edx
|
||||
|
||||
testl $1, %esi
|
||||
je .L41
|
||||
|
||||
DO_ONE_PIXEL()
|
||||
.L41:
|
||||
testl $2, %esi
|
||||
je .L40
|
||||
|
||||
movq (%ebx), %xmm0
|
||||
addl $8, %ebx
|
||||
|
||||
movdqa %xmm0, %xmm3
|
||||
movdqa %xmm0, %xmm4
|
||||
andps %xmm1, %xmm0
|
||||
|
||||
andps %xmm2, %xmm3
|
||||
pslldq $2, %xmm4
|
||||
psrldq $2, %xmm3
|
||||
andps %xmm2, %xmm4
|
||||
|
||||
orps %xmm4, %xmm3
|
||||
orps %xmm3, %xmm0
|
||||
|
||||
movq %xmm0, (%ecx)
|
||||
addl $8, %ecx
|
||||
.L40:
|
||||
|
||||
/* Would it be worth having a specialized version of this loop for
|
||||
* the case where the destination is 16-byte aligned? That version
|
||||
* would be identical except that it could use movedqa instead of
|
||||
* movdqu.
|
||||
*/
|
||||
|
||||
movl %edx, %eax
|
||||
shrl $2, %eax
|
||||
jmp .L42
|
||||
.L43:
|
||||
movdqa (%ebx), %xmm0
|
||||
addl $16, %ebx
|
||||
|
||||
movdqa %xmm0, %xmm3
|
||||
movdqa %xmm0, %xmm4
|
||||
andps %xmm1, %xmm0
|
||||
|
||||
andps %xmm2, %xmm3
|
||||
pslldq $2, %xmm4
|
||||
psrldq $2, %xmm3
|
||||
andps %xmm2, %xmm4
|
||||
|
||||
orps %xmm4, %xmm3
|
||||
orps %xmm3, %xmm0
|
||||
|
||||
movdqu %xmm0, (%ecx)
|
||||
addl $16, %ecx
|
||||
subl $1, %eax
|
||||
.L42:
|
||||
jne .L43
|
||||
|
||||
|
||||
/* There may be upto 3 pixels remaining to be copied. Take care
|
||||
* of them now. We do the 2 pixel case first because the data
|
||||
* will be aligned.
|
||||
*/
|
||||
|
||||
testl $2, %edx
|
||||
je .L47
|
||||
|
||||
movq (%ebx), %xmm0
|
||||
addl $8, %ebx
|
||||
|
||||
movdqa %xmm0, %xmm3
|
||||
movdqa %xmm0, %xmm4
|
||||
andps %xmm1, %xmm0
|
||||
|
||||
andps %xmm2, %xmm3
|
||||
pslldq $2, %xmm4
|
||||
psrldq $2, %xmm3
|
||||
andps %xmm2, %xmm4
|
||||
|
||||
orps %xmm4, %xmm3
|
||||
orps %xmm3, %xmm0
|
||||
|
||||
movq %xmm0, (%ecx)
|
||||
addl $8, %ecx
|
||||
.L47:
|
||||
|
||||
testl $1, %edx
|
||||
je .L46
|
||||
|
||||
DO_ONE_LAST_PIXEL()
|
||||
.L46:
|
||||
|
||||
popl %ebx
|
||||
popl %esi
|
||||
ret
|
||||
.size _generic_read_RGBA_span_BGRA8888_REV_SSE2, .-_generic_read_RGBA_span_BGRA8888_REV_SSE2
|
||||
|
||||
|
||||
|
||||
#define MASK_565_L 0x07e0f800
|
||||
#define MASK_565_H 0x0000001f
|
||||
/* Setting SCALE_ADJUST to 5 gives a perfect match with the
|
||||
* classic C implementation in Mesa. Setting SCALE_ADJUST
|
||||
* to 0 is slightly faster but at a small cost to accuracy.
|
||||
*/
|
||||
#define SCALE_ADJUST 5
|
||||
#if SCALE_ADJUST == 5
|
||||
#define PRESCALE_L 0x00100001
|
||||
#define PRESCALE_H 0x00000200
|
||||
#define SCALE_L 0x40C620E8
|
||||
#define SCALE_H 0x0000839d
|
||||
#elif SCALE_ADJUST == 0
|
||||
#define PRESCALE_L 0x00200001
|
||||
#define PRESCALE_H 0x00000800
|
||||
#define SCALE_L 0x01040108
|
||||
#define SCALE_H 0x00000108
|
||||
#else
|
||||
#error SCALE_ADJUST must either be 5 or 0.
|
||||
#endif
|
||||
#define ALPHA_L 0x00000000
|
||||
#define ALPHA_H 0x00ff0000
|
||||
|
||||
/**
|
||||
* MMX optimized version of the RGB565 to RGBA copy routine.
|
||||
*/
|
||||
|
||||
.text
|
||||
.globl _generic_read_RGBA_span_RGB565_MMX
|
||||
#ifndef USE_DRICORE
|
||||
.hidden _generic_read_RGBA_span_RGB565_MMX
|
||||
#endif
|
||||
.type _generic_read_RGBA_span_RGB565_MMX, @function
|
||||
|
||||
_generic_read_RGBA_span_RGB565_MMX:
|
||||
|
||||
#ifdef USE_INNER_EMMS
|
||||
emms
|
||||
#endif
|
||||
|
||||
movl 4(%esp), %eax /* source pointer */
|
||||
movl 8(%esp), %edx /* destination pointer */
|
||||
movl 12(%esp), %ecx /* number of pixels to copy */
|
||||
|
||||
pushl $MASK_565_H
|
||||
pushl $MASK_565_L
|
||||
movq (%esp), %mm5
|
||||
pushl $PRESCALE_H
|
||||
pushl $PRESCALE_L
|
||||
movq (%esp), %mm6
|
||||
pushl $SCALE_H
|
||||
pushl $SCALE_L
|
||||
movq (%esp), %mm7
|
||||
pushl $ALPHA_H
|
||||
pushl $ALPHA_L
|
||||
movq (%esp), %mm3
|
||||
addl $32,%esp
|
||||
|
||||
sarl $2, %ecx
|
||||
jl .L01 /* Bail early if the count is negative. */
|
||||
jmp .L02
|
||||
|
||||
.L03:
|
||||
/* Fetch 4 RGB565 pixels into %mm4. Distribute the first and
|
||||
* second pixels into the four words of %mm0 and %mm2.
|
||||
*/
|
||||
|
||||
movq (%eax), %mm4
|
||||
addl $8, %eax
|
||||
|
||||
pshufw $0x00, %mm4, %mm0
|
||||
pshufw $0x55, %mm4, %mm2
|
||||
|
||||
|
||||
/* Mask the pixels so that each word of each register contains only
|
||||
* one color component.
|
||||
*/
|
||||
|
||||
pand %mm5, %mm0
|
||||
pand %mm5, %mm2
|
||||
|
||||
|
||||
/* Adjust the component values so that they are as small as possible,
|
||||
* but large enough so that we can multiply them by an unsigned 16-bit
|
||||
* number and get a value as large as 0x00ff0000.
|
||||
*/
|
||||
|
||||
pmullw %mm6, %mm0
|
||||
pmullw %mm6, %mm2
|
||||
#if SCALE_ADJUST > 0
|
||||
psrlw $SCALE_ADJUST, %mm0
|
||||
psrlw $SCALE_ADJUST, %mm2
|
||||
#endif
|
||||
|
||||
/* Scale the input component values to be on the range
|
||||
* [0, 0x00ff0000]. This it the real magic of the whole routine.
|
||||
*/
|
||||
|
||||
pmulhuw %mm7, %mm0
|
||||
pmulhuw %mm7, %mm2
|
||||
|
||||
|
||||
/* Always set the alpha value to 0xff.
|
||||
*/
|
||||
|
||||
por %mm3, %mm0
|
||||
por %mm3, %mm2
|
||||
|
||||
|
||||
/* Pack the 16-bit values to 8-bit values and store the converted
|
||||
* pixel data.
|
||||
*/
|
||||
|
||||
packuswb %mm2, %mm0
|
||||
movq %mm0, (%edx)
|
||||
addl $8, %edx
|
||||
|
||||
pshufw $0xaa, %mm4, %mm0
|
||||
pshufw $0xff, %mm4, %mm2
|
||||
|
||||
pand %mm5, %mm0
|
||||
pand %mm5, %mm2
|
||||
pmullw %mm6, %mm0
|
||||
pmullw %mm6, %mm2
|
||||
#if SCALE_ADJUST > 0
|
||||
psrlw $SCALE_ADJUST, %mm0
|
||||
psrlw $SCALE_ADJUST, %mm2
|
||||
#endif
|
||||
pmulhuw %mm7, %mm0
|
||||
pmulhuw %mm7, %mm2
|
||||
|
||||
por %mm3, %mm0
|
||||
por %mm3, %mm2
|
||||
|
||||
packuswb %mm2, %mm0
|
||||
|
||||
movq %mm0, (%edx)
|
||||
addl $8, %edx
|
||||
|
||||
subl $1, %ecx
|
||||
.L02:
|
||||
jne .L03
|
||||
|
||||
|
||||
/* At this point there can be at most 3 pixels left to process. If
|
||||
* there is either 2 or 3 left, process 2.
|
||||
*/
|
||||
|
||||
movl 12(%esp), %ecx
|
||||
testl $0x02, %ecx
|
||||
je .L04
|
||||
|
||||
movd (%eax), %mm4
|
||||
addl $4, %eax
|
||||
|
||||
pshufw $0x00, %mm4, %mm0
|
||||
pshufw $0x55, %mm4, %mm2
|
||||
|
||||
pand %mm5, %mm0
|
||||
pand %mm5, %mm2
|
||||
pmullw %mm6, %mm0
|
||||
pmullw %mm6, %mm2
|
||||
#if SCALE_ADJUST > 0
|
||||
psrlw $SCALE_ADJUST, %mm0
|
||||
psrlw $SCALE_ADJUST, %mm2
|
||||
#endif
|
||||
pmulhuw %mm7, %mm0
|
||||
pmulhuw %mm7, %mm2
|
||||
|
||||
por %mm3, %mm0
|
||||
por %mm3, %mm2
|
||||
|
||||
packuswb %mm2, %mm0
|
||||
|
||||
movq %mm0, (%edx)
|
||||
addl $8, %edx
|
||||
|
||||
.L04:
|
||||
/* At this point there can be at most 1 pixel left to process.
|
||||
* Process it if needed.
|
||||
*/
|
||||
|
||||
testl $0x01, %ecx
|
||||
je .L01
|
||||
|
||||
movzwl (%eax), %ecx
|
||||
movd %ecx, %mm4
|
||||
|
||||
pshufw $0x00, %mm4, %mm0
|
||||
|
||||
pand %mm5, %mm0
|
||||
pmullw %mm6, %mm0
|
||||
#if SCALE_ADJUST > 0
|
||||
psrlw $SCALE_ADJUST, %mm0
|
||||
#endif
|
||||
pmulhuw %mm7, %mm0
|
||||
|
||||
por %mm3, %mm0
|
||||
|
||||
packuswb %mm0, %mm0
|
||||
|
||||
movd %mm0, (%edx)
|
||||
|
||||
.L01:
|
||||
#ifdef USE_INNER_EMMS
|
||||
emms
|
||||
#endif
|
||||
ret
|
||||
#endif /* !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__) */
|
||||
|
||||
#if defined (__ELF__) && defined (__linux__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* (C) Copyright IBM Corporation 2004
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file read_rgba_span_x86.h
|
||||
*
|
||||
* \author Ian Romanick <idr@us.ibm.com>
|
||||
*/
|
||||
|
||||
#ifndef READ_RGBA_SPAN_X86_H
|
||||
#define READ_RGBA_SPAN_X86_H
|
||||
|
||||
#if defined(USE_SSE_ASM) || defined(USE_MMX_ASM)
|
||||
#include "x86/common_x86_asm.h"
|
||||
#endif
|
||||
|
||||
#if defined(USE_SSE_ASM)
|
||||
extern void _generic_read_RGBA_span_BGRA8888_REV_SSE2( const unsigned char *,
|
||||
unsigned char *, unsigned );
|
||||
#endif
|
||||
|
||||
#if defined(USE_SSE_ASM)
|
||||
extern void _generic_read_RGBA_span_BGRA8888_REV_SSE( const unsigned char *,
|
||||
unsigned char *, unsigned );
|
||||
#endif
|
||||
|
||||
#if defined(USE_MMX_ASM)
|
||||
extern void _generic_read_RGBA_span_BGRA8888_REV_MMX( const unsigned char *,
|
||||
unsigned char *, unsigned );
|
||||
|
||||
extern void _generic_read_RGBA_span_RGB565_MMX( const unsigned char *,
|
||||
unsigned char *, unsigned );
|
||||
#endif
|
||||
|
||||
#endif /* READ_RGBA_SPAN_X86_H */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,256 @@
|
|||
|
||||
#ifndef _X86SSE_H_
|
||||
#define _X86SSE_H_
|
||||
|
||||
#if defined(__i386__) || defined(__386__)
|
||||
|
||||
/* It is up to the caller to ensure that instructions issued are
|
||||
* suitable for the host cpu. There are no checks made in this module
|
||||
* for mmx/sse/sse2 support on the cpu.
|
||||
*/
|
||||
struct x86_reg {
|
||||
unsigned file:3;
|
||||
unsigned idx:3;
|
||||
unsigned mod:2; /* mod_REG if this is just a register */
|
||||
int disp:24; /* only +/- 23bits of offset - should be enough... */
|
||||
};
|
||||
|
||||
struct x86_function {
|
||||
unsigned size;
|
||||
unsigned char *store;
|
||||
unsigned char *csr;
|
||||
unsigned stack_offset;
|
||||
int need_emms;
|
||||
const char *fn;
|
||||
};
|
||||
|
||||
enum x86_reg_file {
|
||||
file_REG32,
|
||||
file_MMX,
|
||||
file_XMM,
|
||||
file_x87
|
||||
};
|
||||
|
||||
/* Values for mod field of modr/m byte
|
||||
*/
|
||||
enum x86_reg_mod {
|
||||
mod_INDIRECT,
|
||||
mod_DISP8,
|
||||
mod_DISP32,
|
||||
mod_REG
|
||||
};
|
||||
|
||||
enum x86_reg_name {
|
||||
reg_AX,
|
||||
reg_CX,
|
||||
reg_DX,
|
||||
reg_BX,
|
||||
reg_SP,
|
||||
reg_BP,
|
||||
reg_SI,
|
||||
reg_DI
|
||||
};
|
||||
|
||||
|
||||
enum x86_cc {
|
||||
cc_O, /* overflow */
|
||||
cc_NO, /* not overflow */
|
||||
cc_NAE, /* not above or equal / carry */
|
||||
cc_AE, /* above or equal / not carry */
|
||||
cc_E, /* equal / zero */
|
||||
cc_NE /* not equal / not zero */
|
||||
};
|
||||
|
||||
enum sse_cc {
|
||||
cc_Equal,
|
||||
cc_LessThan,
|
||||
cc_LessThanEqual,
|
||||
cc_Unordered,
|
||||
cc_NotEqual,
|
||||
cc_NotLessThan,
|
||||
cc_NotLessThanEqual,
|
||||
cc_Ordered
|
||||
};
|
||||
|
||||
#define cc_Z cc_E
|
||||
#define cc_NZ cc_NE
|
||||
|
||||
/* Begin/end/retreive function creation:
|
||||
*/
|
||||
|
||||
|
||||
void x86_init_func( struct x86_function *p );
|
||||
int x86_init_func_size( struct x86_function *p, unsigned code_size );
|
||||
void x86_release_func( struct x86_function *p );
|
||||
void (*x86_get_func( struct x86_function *p ))( void );
|
||||
|
||||
|
||||
|
||||
/* Create and manipulate registers and regmem values:
|
||||
*/
|
||||
struct x86_reg x86_make_reg( enum x86_reg_file file,
|
||||
enum x86_reg_name idx );
|
||||
|
||||
struct x86_reg x86_make_disp( struct x86_reg reg,
|
||||
int disp );
|
||||
|
||||
struct x86_reg x86_deref( struct x86_reg reg );
|
||||
|
||||
struct x86_reg x86_get_base_reg( struct x86_reg reg );
|
||||
|
||||
|
||||
/* Labels, jumps and fixup:
|
||||
*/
|
||||
unsigned char *x86_get_label( struct x86_function *p );
|
||||
|
||||
void x86_jcc( struct x86_function *p,
|
||||
enum x86_cc cc,
|
||||
unsigned char *label );
|
||||
|
||||
unsigned char *x86_jcc_forward( struct x86_function *p,
|
||||
enum x86_cc cc );
|
||||
|
||||
unsigned char *x86_jmp_forward( struct x86_function *p);
|
||||
|
||||
unsigned char *x86_call_forward( struct x86_function *p);
|
||||
|
||||
void x86_fixup_fwd_jump( struct x86_function *p,
|
||||
unsigned char *fixup );
|
||||
|
||||
void x86_jmp( struct x86_function *p, unsigned char *label );
|
||||
|
||||
/* void x86_call( struct x86_function *p, void (*label)() ); */
|
||||
void x86_call( struct x86_function *p, struct x86_reg reg);
|
||||
|
||||
/* michal:
|
||||
* Temporary. As I need immediate operands, and dont want to mess with the codegen,
|
||||
* I load the immediate into general purpose register and use it.
|
||||
*/
|
||||
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm );
|
||||
|
||||
|
||||
/* Macro for sse_shufps() and sse2_pshufd():
|
||||
*/
|
||||
#define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6))
|
||||
#define SHUF_NOOP RSW(0,1,2,3)
|
||||
#define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
|
||||
|
||||
void mmx_emms( struct x86_function *p );
|
||||
void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
|
||||
void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
|
||||
unsigned char shuf );
|
||||
void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
|
||||
void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src,
|
||||
unsigned char cc );
|
||||
void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
|
||||
unsigned char shuf );
|
||||
void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
|
||||
|
||||
void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void x86_dec( struct x86_function *p, struct x86_reg reg );
|
||||
void x86_inc( struct x86_function *p, struct x86_reg reg );
|
||||
void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void x86_mul( struct x86_function *p, struct x86_reg src );
|
||||
void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void x86_pop( struct x86_function *p, struct x86_reg reg );
|
||||
void x86_push( struct x86_function *p, struct x86_reg reg );
|
||||
void x86_ret( struct x86_function *p );
|
||||
void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
|
||||
void x86_sahf( struct x86_function *p );
|
||||
|
||||
void x87_f2xm1( struct x86_function *p );
|
||||
void x87_fabs( struct x86_function *p );
|
||||
void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
|
||||
void x87_faddp( struct x86_function *p, struct x86_reg dst );
|
||||
void x87_fchs( struct x86_function *p );
|
||||
void x87_fclex( struct x86_function *p );
|
||||
void x87_fcom( struct x86_function *p, struct x86_reg dst );
|
||||
void x87_fcomp( struct x86_function *p, struct x86_reg dst );
|
||||
void x87_fcos( struct x86_function *p );
|
||||
void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
|
||||
void x87_fdivp( struct x86_function *p, struct x86_reg dst );
|
||||
void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
|
||||
void x87_fdivrp( struct x86_function *p, struct x86_reg dst );
|
||||
void x87_fild( struct x86_function *p, struct x86_reg arg );
|
||||
void x87_fist( struct x86_function *p, struct x86_reg dst );
|
||||
void x87_fistp( struct x86_function *p, struct x86_reg dst );
|
||||
void x87_fld( struct x86_function *p, struct x86_reg arg );
|
||||
void x87_fld1( struct x86_function *p );
|
||||
void x87_fldcw( struct x86_function *p, struct x86_reg arg );
|
||||
void x87_fldl2e( struct x86_function *p );
|
||||
void x87_fldln2( struct x86_function *p );
|
||||
void x87_fldz( struct x86_function *p );
|
||||
void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
|
||||
void x87_fmulp( struct x86_function *p, struct x86_reg dst );
|
||||
void x87_fnclex( struct x86_function *p );
|
||||
void x87_fprndint( struct x86_function *p );
|
||||
void x87_fscale( struct x86_function *p );
|
||||
void x87_fsin( struct x86_function *p );
|
||||
void x87_fsincos( struct x86_function *p );
|
||||
void x87_fsqrt( struct x86_function *p );
|
||||
void x87_fst( struct x86_function *p, struct x86_reg dst );
|
||||
void x87_fstp( struct x86_function *p, struct x86_reg dst );
|
||||
void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
|
||||
void x87_fsubp( struct x86_function *p, struct x86_reg dst );
|
||||
void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
|
||||
void x87_fsubrp( struct x86_function *p, struct x86_reg dst );
|
||||
void x87_fxch( struct x86_function *p, struct x86_reg dst );
|
||||
void x87_fxtract( struct x86_function *p );
|
||||
void x87_fyl2x( struct x86_function *p );
|
||||
void x87_fyl2xp1( struct x86_function *p );
|
||||
void x87_fwait( struct x86_function *p );
|
||||
void x87_fnstsw( struct x86_function *p, struct x86_reg dst );
|
||||
void x87_fucompp( struct x86_function *p );
|
||||
void x87_fucomp( struct x86_function *p, struct x86_reg arg );
|
||||
void x87_fucom( struct x86_function *p, struct x86_reg arg );
|
||||
|
||||
|
||||
|
||||
/* Retreive a reference to one of the function arguments, taking into
|
||||
* account any push/pop activity. Note - doesn't track explict
|
||||
* manipulation of ESP by other instructions.
|
||||
*/
|
||||
struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg );
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,123 @@
|
|||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 6.0
|
||||
*
|
||||
* Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* PentiumIII-SIMD (SSE) optimizations contributed by
|
||||
* Andre Werthmann <wertmann@cs.uni-potsdam.de>
|
||||
*/
|
||||
|
||||
#include "main/glheader.h"
|
||||
#include "main/context.h"
|
||||
#include "math/m_xform.h"
|
||||
#include "tnl/t_context.h"
|
||||
|
||||
#include "sse.h"
|
||||
#include "x86_xform.h"
|
||||
|
||||
#ifdef DEBUG_MATH
|
||||
#include "math/m_debug.h"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef USE_SSE_ASM
|
||||
DECLARE_XFORM_GROUP( sse, 2 )
|
||||
DECLARE_XFORM_GROUP( sse, 3 )
|
||||
|
||||
#if 1
|
||||
/* Some functions are not written in SSE-assembly, because the fpu ones are faster */
|
||||
extern void _ASMAPI _mesa_sse_transform_normals_no_rot( NORM_ARGS );
|
||||
extern void _ASMAPI _mesa_sse_transform_rescale_normals( NORM_ARGS );
|
||||
extern void _ASMAPI _mesa_sse_transform_rescale_normals_no_rot( NORM_ARGS );
|
||||
|
||||
extern void _ASMAPI _mesa_sse_transform_points4_general( XFORM_ARGS );
|
||||
extern void _ASMAPI _mesa_sse_transform_points4_3d( XFORM_ARGS );
|
||||
/* XXX this function segfaults, see below */
|
||||
extern void _ASMAPI _mesa_sse_transform_points4_identity( XFORM_ARGS );
|
||||
/* XXX this one works, see below */
|
||||
extern void _ASMAPI _mesa_x86_transform_points4_identity( XFORM_ARGS );
|
||||
#else
|
||||
DECLARE_NORM_GROUP( sse )
|
||||
#endif
|
||||
|
||||
|
||||
extern void _ASMAPI
|
||||
_mesa_v16_sse_general_xform( GLfloat *first_vert,
|
||||
const GLfloat *m,
|
||||
const GLfloat *src,
|
||||
GLuint src_stride,
|
||||
GLuint count );
|
||||
|
||||
extern void _ASMAPI
|
||||
_mesa_sse_project_vertices( GLfloat *first,
|
||||
GLfloat *last,
|
||||
const GLfloat *m,
|
||||
GLuint stride );
|
||||
|
||||
extern void _ASMAPI
|
||||
_mesa_sse_project_clipped_vertices( GLfloat *first,
|
||||
GLfloat *last,
|
||||
const GLfloat *m,
|
||||
GLuint stride,
|
||||
const GLubyte *clipmask );
|
||||
#endif
|
||||
|
||||
|
||||
void _mesa_init_sse_transform_asm( void )
|
||||
{
|
||||
#ifdef USE_SSE_ASM
|
||||
ASSIGN_XFORM_GROUP( sse, 2 );
|
||||
ASSIGN_XFORM_GROUP( sse, 3 );
|
||||
|
||||
#if 1
|
||||
/* TODO: Finish these off.
|
||||
*/
|
||||
_mesa_transform_tab[4][MATRIX_GENERAL] =
|
||||
_mesa_sse_transform_points4_general;
|
||||
_mesa_transform_tab[4][MATRIX_3D] =
|
||||
_mesa_sse_transform_points4_3d;
|
||||
/* XXX NOTE: _mesa_sse_transform_points4_identity segfaults with the
|
||||
conformance tests, so use the x86 version.
|
||||
*/
|
||||
_mesa_transform_tab[4][MATRIX_IDENTITY] =
|
||||
_mesa_x86_transform_points4_identity;/*_mesa_sse_transform_points4_identity;*/
|
||||
|
||||
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT] =
|
||||
_mesa_sse_transform_normals_no_rot;
|
||||
_mesa_normal_tab[NORM_TRANSFORM | NORM_RESCALE] =
|
||||
_mesa_sse_transform_rescale_normals;
|
||||
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE] =
|
||||
_mesa_sse_transform_rescale_normals_no_rot;
|
||||
#else
|
||||
ASSIGN_XFORM_GROUP( sse, 4 );
|
||||
|
||||
ASSIGN_NORM_GROUP( sse );
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_MATH
|
||||
_math_test_all_transform_functions( "SSE" );
|
||||
_math_test_all_normal_transform_functions( "SSE" );
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* PentiumIII-SIMD (SSE) optimizations contributed by
|
||||
* Andre Werthmann <wertmann@cs.uni-potsdam.de>
|
||||
*/
|
||||
|
||||
#ifndef __SSE_H__
|
||||
#define __SSE_H__
|
||||
|
||||
void _mesa_init_sse_transform_asm( void );
|
||||
|
||||
#endif
|
|
@ -0,0 +1,261 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/** TODO:
|
||||
* - insert PREFETCH instructions to avoid cache-misses !
|
||||
* - some more optimizations are possible...
|
||||
* - for 40-50% more performance in the SSE-functions, the
|
||||
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
|
||||
*/
|
||||
|
||||
#ifdef USE_SSE_ASM
|
||||
#include "assyntax.h"
|
||||
#include "matypes.h"
|
||||
#include "norm_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define M(i) REGOFF(i * 4, EDX)
|
||||
#define S(i) REGOFF(i * 4, ESI)
|
||||
#define D(i) REGOFF(i * 4, EDI)
|
||||
#define STRIDE REGOFF(12, ESI)
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_sse_transform_rescale_normals_no_rot)
|
||||
HIDDEN(_mesa_sse_transform_rescale_normals_no_rot)
|
||||
GLNAME(_mesa_sse_transform_rescale_normals_no_rot):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( ARG_IN, ESI ) /* ptr to source GLvector3f */
|
||||
MOV_L ( ARG_DEST, EDI ) /* ptr to dest GLvector3f */
|
||||
|
||||
MOV_L ( ARG_MAT, EDX ) /* ptr to matrix */
|
||||
MOV_L ( REGOFF(MATRIX_INV, EDX), EDX) /* matrix->inv */
|
||||
|
||||
MOV_L ( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L ( ECX, ECX )
|
||||
JZ( LLBL(K_G3TRNNRR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L ( STRIDE, EAX ) /* stride */
|
||||
MOV_L ( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest-count */
|
||||
|
||||
IMUL_L( CONST(16), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS ( M(0), XMM1 ) /* m0 */
|
||||
MOVSS ( M(5), XMM2 ) /* m5 */
|
||||
UNPCKLPS( XMM2, XMM1 ) /* m5 | m0 */
|
||||
MOVSS ( ARG_SCALE, XMM0 ) /* scale */
|
||||
SHUFPS ( CONST(0x0), XMM0, XMM0 ) /* scale | scale */
|
||||
MULPS ( XMM0, XMM1 ) /* m5*scale | m0*scale */
|
||||
MULSS ( M(10), XMM0 ) /* m10*scale */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_G3TRNNRR_top):
|
||||
MOVLPS ( S(0), XMM2 ) /* uy | ux */
|
||||
MULPS ( XMM1, XMM2 ) /* uy*m5*scale | ux*m0*scale */
|
||||
MOVLPS ( XMM2, D(0) ) /* ->D(1) | D(0) */
|
||||
|
||||
MOVSS ( S(2), XMM2 ) /* uz */
|
||||
MULSS ( XMM0, XMM2 ) /* uz*m10*scale */
|
||||
MOVSS ( XMM2, D(2) ) /* ->D(2) */
|
||||
|
||||
LLBL(K_G3TRNNRR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_G3TRNNRR_top) )
|
||||
|
||||
LLBL(K_G3TRNNRR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_sse_transform_rescale_normals)
|
||||
HIDDEN(_mesa_sse_transform_rescale_normals)
|
||||
GLNAME(_mesa_sse_transform_rescale_normals):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( ARG_IN, ESI ) /* ptr to source GLvector3f */
|
||||
MOV_L ( ARG_DEST, EDI ) /* ptr to dest GLvector3f */
|
||||
|
||||
MOV_L ( ARG_MAT, EDX ) /* ptr to matrix */
|
||||
MOV_L ( REGOFF(MATRIX_INV, EDX), EDX) /* matrix->inv */
|
||||
|
||||
MOV_L ( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L ( ECX, ECX )
|
||||
JZ( LLBL(K_G3TRNR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L ( STRIDE, EAX ) /* stride */
|
||||
MOV_L ( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest-count */
|
||||
|
||||
IMUL_L( CONST(16), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS ( M(0), XMM0 ) /* m0 */
|
||||
MOVSS ( M(4), XMM1 ) /* m4 */
|
||||
UNPCKLPS( XMM1, XMM0 ) /* m4 | m0 */
|
||||
|
||||
MOVSS ( ARG_SCALE, XMM4 ) /* scale */
|
||||
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* scale | scale */
|
||||
|
||||
MULPS ( XMM4, XMM0 ) /* m4*scale | m0*scale */
|
||||
MOVSS ( M(1), XMM1 ) /* m1 */
|
||||
MOVSS ( M(5), XMM2 ) /* m5 */
|
||||
UNPCKLPS( XMM2, XMM1 ) /* m5 | m1 */
|
||||
MULPS ( XMM4, XMM1 ) /* m5*scale | m1*scale */
|
||||
MOVSS ( M(2), XMM2 ) /* m2 */
|
||||
MOVSS ( M(6), XMM3 ) /* m6 */
|
||||
UNPCKLPS( XMM3, XMM2 ) /* m6 | m2 */
|
||||
MULPS ( XMM4, XMM2 ) /* m6*scale | m2*scale */
|
||||
|
||||
MOVSS ( M(8), XMM6 ) /* m8 */
|
||||
MULSS ( ARG_SCALE, XMM6 ) /* m8*scale */
|
||||
MOVSS ( M(9), XMM7 ) /* m9 */
|
||||
MULSS ( ARG_SCALE, XMM7 ) /* m9*scale */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_G3TRNR_top):
|
||||
MOVSS ( S(0), XMM3 ) /* ux */
|
||||
SHUFPS ( CONST(0x0), XMM3, XMM3 ) /* ux | ux */
|
||||
MULPS ( XMM0, XMM3 ) /* ux*m4 | ux*m0 */
|
||||
MOVSS ( S(1), XMM4 ) /* uy */
|
||||
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* uy | uy */
|
||||
MULPS ( XMM1, XMM4 ) /* uy*m5 | uy*m1 */
|
||||
MOVSS ( S(2), XMM5 ) /* uz */
|
||||
SHUFPS ( CONST(0x0), XMM5, XMM5 ) /* uz | uz */
|
||||
MULPS ( XMM2, XMM5 ) /* uz*m6 | uz*m2 */
|
||||
|
||||
ADDPS ( XMM4, XMM3 )
|
||||
ADDPS ( XMM5, XMM3 )
|
||||
MOVLPS ( XMM3, D(0) )
|
||||
|
||||
MOVSS ( M(10), XMM3 ) /* m10 */
|
||||
MULSS ( ARG_SCALE, XMM3 ) /* m10*scale */
|
||||
MULSS ( S(2), XMM3 ) /* m10*scale*uz */
|
||||
MOVSS ( S(1), XMM4 ) /* uy */
|
||||
MULSS ( XMM7, XMM4 ) /* uy*m9*scale */
|
||||
MOVSS ( S(0), XMM5 ) /* ux */
|
||||
MULSS ( XMM6, XMM5 ) /* ux*m8*scale */
|
||||
|
||||
ADDSS ( XMM4, XMM3 )
|
||||
ADDSS ( XMM5, XMM3 )
|
||||
MOVSS ( XMM3, D(2) )
|
||||
|
||||
LLBL(K_G3TRNR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_G3TRNR_top) )
|
||||
|
||||
LLBL(K_G3TRNR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_sse_transform_normals_no_rot)
|
||||
HIDDEN(_mesa_sse_transform_normals_no_rot)
|
||||
GLNAME(_mesa_sse_transform_normals_no_rot):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L ( ARG_IN, ESI ) /* ptr to source GLvector3f */
|
||||
MOV_L ( ARG_DEST, EDI ) /* ptr to dest GLvector3f */
|
||||
|
||||
MOV_L ( ARG_MAT, EDX ) /* ptr to matrix */
|
||||
MOV_L ( REGOFF(MATRIX_INV, EDX), EDX) /* matrix->inv */
|
||||
|
||||
MOV_L ( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L ( ECX, ECX )
|
||||
JZ( LLBL(K_G3TNNRR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L ( STRIDE, EAX ) /* stride */
|
||||
MOV_L ( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest-count */
|
||||
|
||||
IMUL_L( CONST(16), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS( M(0), XMM0 ) /* m0 */
|
||||
MOVSS( M(5), XMM1 ) /* m5 */
|
||||
UNPCKLPS( XMM1, XMM0 ) /* m5 | m0 */
|
||||
MOVSS( M(10), XMM1 ) /* m10 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_G3TNNRR_top):
|
||||
MOVLPS( S(0), XMM2 ) /* uy | ux */
|
||||
MULPS( XMM0, XMM2 ) /* uy*m5 | ux*m0 */
|
||||
MOVLPS( XMM2, D(0) )
|
||||
|
||||
MOVSS( S(2), XMM2 ) /* uz */
|
||||
MULSS( XMM1, XMM2 ) /* uz*m10 */
|
||||
MOVSS( XMM2, D(2) )
|
||||
|
||||
LLBL(K_G3TNNRR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_G3TNNRR_top) )
|
||||
|
||||
LLBL(K_G3TNNRR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
#endif
|
||||
|
||||
#if defined (__ELF__) && defined (__linux__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
|
@ -0,0 +1,446 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/** TODO:
|
||||
* - insert PREFETCH instructions to avoid cache-misses !
|
||||
* - some more optimizations are possible...
|
||||
* - for 40-50% more performance in the SSE-functions, the
|
||||
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
|
||||
*/
|
||||
|
||||
#ifdef USE_SSE_ASM
|
||||
#include "assyntax.h"
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define S(i) REGOFF(i * 4, ESI)
|
||||
#define D(i) REGOFF(i * 4, EDI)
|
||||
#define M(i) REGOFF(i * 4, EDX)
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points1_general)
|
||||
HIDDEN( _mesa_sse_transform_points1_general )
|
||||
GLNAME( _mesa_sse_transform_points1_general ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
CMP_L( CONST(0), ECX ) /* count == 0 ? */
|
||||
JE( LLBL(K_GTP1GR_finish) ) /* yes -> nothing to do. */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */
|
||||
MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP1GR_top):
|
||||
MOVSS( S(0), XMM2 ) /* ox */
|
||||
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */
|
||||
MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
|
||||
ADDPS( XMM1, XMM2 ) /* + | + | + | + */
|
||||
MOVUPS( XMM2, D(0) )
|
||||
|
||||
LLBL(K_GTP1GR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTP1GR_top) )
|
||||
|
||||
LLBL(K_GTP1GR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points1_identity)
|
||||
HIDDEN(_mesa_sse_transform_points1_identity)
|
||||
GLNAME( _mesa_sse_transform_points1_identity ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP1IR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(1), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
CMP_L( ESI, EDI )
|
||||
JE( LLBL(K_GTP1IR_finish) )
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP1IR_top):
|
||||
MOV_L( S(0), EDX )
|
||||
MOV_L( EDX, D(0) )
|
||||
|
||||
LLBL(K_GTP1IR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTP1IR_top) )
|
||||
|
||||
LLBL(K_GTP1IR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points1_3d_no_rot)
|
||||
HIDDEN(_mesa_sse_transform_points1_3d_no_rot)
|
||||
GLNAME(_mesa_sse_transform_points1_3d_no_rot):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP13DNRR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS( M(0), XMM0 ) /* m0 */
|
||||
MOVSS( M(12), XMM1 ) /* m12 */
|
||||
MOVSS( M(13), XMM2 ) /* m13 */
|
||||
MOVSS( M(14), XMM3 ) /* m14 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP13DNRR_top):
|
||||
MOVSS( S(0), XMM4 ) /* ox */
|
||||
MULSS( XMM0, XMM4 ) /* ox*m0 */
|
||||
ADDSS( XMM1, XMM4 ) /* ox*m0+m12 */
|
||||
MOVSS( XMM4, D(0) )
|
||||
|
||||
MOVSS( XMM2, D(1) )
|
||||
MOVSS( XMM3, D(2) )
|
||||
|
||||
LLBL(K_GTP13DNRR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTP13DNRR_top) )
|
||||
|
||||
LLBL(K_GTP13DNRR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points1_perspective)
|
||||
HIDDEN(_mesa_sse_transform_points1_perspective)
|
||||
GLNAME(_mesa_sse_transform_points1_perspective):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP13PR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
XORPS( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */
|
||||
MOVSS( M(0), XMM1 ) /* m0 */
|
||||
MOVSS( M(14), XMM2 ) /* m14 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP13PR_top):
|
||||
MOVSS( S(0), XMM3 ) /* ox */
|
||||
MULSS( XMM1, XMM3 ) /* ox*m0 */
|
||||
MOVSS( XMM3, D(0) ) /* ox*m0->D(0) */
|
||||
MOVSS( XMM2, D(2) ) /* m14->D(2) */
|
||||
|
||||
MOVSS( XMM0, D(1) )
|
||||
MOVSS( XMM0, D(3) )
|
||||
|
||||
LLBL(K_GTP13PR_skip):
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(K_GTP13PR_top) )
|
||||
|
||||
LLBL(K_GTP13PR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points1_2d)
|
||||
HIDDEN(_mesa_sse_transform_points1_2d)
|
||||
GLNAME(_mesa_sse_transform_points1_2d):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP13P2DR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVLPS( M(0), XMM0 ) /* m1 | m0 */
|
||||
MOVLPS( M(12), XMM1 ) /* m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP13P2DR_top):
|
||||
MOVSS( S(0), XMM2 ) /* ox */
|
||||
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */
|
||||
MULPS( XMM0, XMM2 ) /* - | - | ox*m1 | ox*m0 */
|
||||
ADDPS( XMM1, XMM2 ) /* - | - | ox*m1+m13 | ox*m0+m12 */
|
||||
MOVLPS( XMM2, D(0) )
|
||||
|
||||
LLBL(K_GTP13P2DR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTP13P2DR_top) )
|
||||
|
||||
LLBL(K_GTP13P2DR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points1_2d_no_rot)
|
||||
HIDDEN(_mesa_sse_transform_points1_2d_no_rot)
|
||||
GLNAME(_mesa_sse_transform_points1_2d_no_rot):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP13P2DNRR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS( M(0), XMM0 ) /* m0 */
|
||||
MOVSS( M(12), XMM1 ) /* m12 */
|
||||
MOVSS( M(13), XMM2 ) /* m13 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP13P2DNRR_top):
|
||||
MOVSS( S(0), XMM3 ) /* ox */
|
||||
MULSS( XMM0, XMM3 ) /* ox*m0 */
|
||||
ADDSS( XMM1, XMM3 ) /* ox*m0+m12 */
|
||||
MOVSS( XMM3, D(0) )
|
||||
MOVSS( XMM2, D(1) )
|
||||
|
||||
LLBL(K_GTP13P2DNRR_skip):
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(K_GTP13P2DNRR_top) )
|
||||
|
||||
LLBL(K_GTP13P2DNRR_finish):
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points1_3d)
|
||||
HIDDEN(_mesa_sse_transform_points1_3d)
|
||||
GLNAME(_mesa_sse_transform_points1_3d):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP13P3DR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */
|
||||
MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP13P3DR_top):
|
||||
MOVSS( S(0), XMM2 ) /* ox */
|
||||
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */
|
||||
MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
|
||||
ADDPS( XMM1, XMM2 ) /* +m15 | +m14 | +m13 | +m12 */
|
||||
MOVLPS( XMM2, D(0) ) /* - | - | ->D(1)| ->D(0)*/
|
||||
UNPCKHPS( XMM2, XMM2 ) /* ox*m3+m15 | ox*m3+m15 | ox*m2+m14 | ox*m2+m14 */
|
||||
MOVSS( XMM2, D(2) )
|
||||
|
||||
LLBL(K_GTP13P3DR_skip):
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(K_GTP13P3DR_top) )
|
||||
|
||||
LLBL(K_GTP13P3DR_finish):
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
#endif
|
||||
|
||||
#if defined (__ELF__) && defined (__linux__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
|
@ -0,0 +1,466 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/** TODO:
|
||||
* - insert PREFETCH instructions to avoid cache-misses !
|
||||
* - some more optimizations are possible...
|
||||
* - for 40-50% more performance in the SSE-functions, the
|
||||
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
|
||||
*/
|
||||
|
||||
#ifdef USE_SSE_ASM
|
||||
#include "assyntax.h"
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define S(i) REGOFF(i * 4, ESI)
|
||||
#define D(i) REGOFF(i * 4, EDI)
|
||||
#define M(i) REGOFF(i * 4, EDX)
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points2_general)
|
||||
HIDDEN (_mesa_sse_transform_points2_general)
|
||||
GLNAME( _mesa_sse_transform_points2_general ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(K_GTP2GR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */
|
||||
MOVAPS( M(4), XMM1 ) /* m7 | m6 | m5 | m4 */
|
||||
MOVAPS( M(12), XMM2 ) /* m15 | m14 | m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP2GR_top):
|
||||
MOVSS( S(0), XMM3 ) /* ox */
|
||||
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox | ox | ox */
|
||||
MULPS( XMM0, XMM3 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
|
||||
MOVSS( S(1), XMM4 ) /* oy */
|
||||
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy | oy | oy */
|
||||
MULPS( XMM1, XMM4 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
|
||||
|
||||
ADDPS( XMM4, XMM3 )
|
||||
ADDPS( XMM2, XMM3 )
|
||||
MOVAPS( XMM3, D(0) )
|
||||
|
||||
LLBL(K_GTP2GR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTP2GR_top) )
|
||||
|
||||
LLBL(K_GTP2GR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points2_identity)
|
||||
HIDDEN(_mesa_sse_transform_points2_identity)
|
||||
GLNAME( _mesa_sse_transform_points2_identity ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP2IR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
CMP_L( ESI, EDI )
|
||||
JE( LLBL(K_GTP2IR_finish) )
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP2IR_top):
|
||||
MOV_L ( S(0), EDX )
|
||||
MOV_L ( EDX, D(0) )
|
||||
MOV_L ( S(1), EDX )
|
||||
MOV_L ( EDX, D(1) )
|
||||
|
||||
LLBL(K_GTP2IR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTP2IR_top) )
|
||||
|
||||
LLBL(K_GTP2IR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points2_3d_no_rot)
|
||||
HIDDEN(_mesa_sse_transform_points2_3d_no_rot)
|
||||
GLNAME(_mesa_sse_transform_points2_3d_no_rot):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP23DNRR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
XORPS( XMM0, XMM0 ) /* clean the working register */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */
|
||||
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */
|
||||
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */
|
||||
MOVLPS ( M(12), XMM2 ) /* - | - | m13 | m12 */
|
||||
MOVSS ( M(14), XMM3 ) /* - | - | - | m14 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP23DNRR_top):
|
||||
MOVLPS ( S(0), XMM0 ) /* - | - | oy | ox */
|
||||
MULPS ( XMM1, XMM0 ) /* - | - | oy*m5 | ox*m0 */
|
||||
ADDPS ( XMM2, XMM0 ) /* - | - | +m13 | +m12 */
|
||||
MOVLPS ( XMM0, D(0) ) /* -> D(1) | -> D(0) */
|
||||
|
||||
MOVSS ( XMM3, D(2) ) /* -> D(2) */
|
||||
|
||||
LLBL(K_GTP23DNRR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTP23DNRR_top) )
|
||||
|
||||
LLBL(K_GTP23DNRR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points2_perspective)
|
||||
HIDDEN(_mesa_sse_transform_points2_perspective)
|
||||
GLNAME(_mesa_sse_transform_points2_perspective):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP23PR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */
|
||||
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */
|
||||
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */
|
||||
MOVSS ( M(14), XMM3 ) /* m14 */
|
||||
XORPS ( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP23PR_top):
|
||||
MOVLPS( S(0), XMM4 ) /* oy | ox */
|
||||
MULPS( XMM1, XMM4 ) /* oy*m5 | ox*m0 */
|
||||
MOVLPS( XMM4, D(0) ) /* ->D(1) | ->D(0) */
|
||||
MOVSS( XMM3, D(2) ) /* ->D(2) */
|
||||
MOVSS( XMM0, D(3) ) /* ->D(3) */
|
||||
|
||||
LLBL(K_GTP23PR_skip):
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(K_GTP23PR_top) )
|
||||
|
||||
LLBL(K_GTP23PR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points2_2d)
|
||||
HIDDEN(_mesa_sse_transform_points2_2d)
|
||||
GLNAME(_mesa_sse_transform_points2_2d):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP23P2DR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVLPS( M(0), XMM0 ) /* m1 | m0 */
|
||||
MOVLPS( M(4), XMM1 ) /* m5 | m4 */
|
||||
MOVLPS( M(12), XMM2 ) /* m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP23P2DR_top):
|
||||
MOVSS( S(0), XMM3 ) /* ox */
|
||||
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox */
|
||||
MULPS( XMM0, XMM3 ) /* ox*m1 | ox*m0 */
|
||||
|
||||
MOVSS( S(1), XMM4 ) /* oy */
|
||||
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy */
|
||||
MULPS( XMM1, XMM4 ) /* oy*m5 | oy*m4 */
|
||||
|
||||
ADDPS( XMM4, XMM3 )
|
||||
ADDPS( XMM2, XMM3 )
|
||||
MOVLPS( XMM3, D(0) ) /* ->D(1) | ->D(0) */
|
||||
|
||||
LLBL(K_GTP23P2DR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTP23P2DR_top) )
|
||||
|
||||
LLBL(K_GTP23P2DR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points2_2d_no_rot)
|
||||
HIDDEN(_mesa_sse_transform_points2_2d_no_rot)
|
||||
GLNAME(_mesa_sse_transform_points2_2d_no_rot):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP23P2DNRR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS ( M(0), XMM1 ) /* m0 */
|
||||
MOVSS ( M(5), XMM2 ) /* m5 */
|
||||
UNPCKLPS ( XMM2, XMM1 ) /* m5 | m0 */
|
||||
MOVLPS ( M(12), XMM2 ) /* m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP23P2DNRR_top):
|
||||
MOVLPS( S(0), XMM0 ) /* oy | ox */
|
||||
MULPS( XMM1, XMM0 ) /* oy*m5 | ox*m0 */
|
||||
ADDPS( XMM2, XMM0 ) /* +m13 | +m12 */
|
||||
MOVLPS( XMM0, D(0) ) /* ->D(1) | ->D(0) */
|
||||
|
||||
LLBL(K_GTP23P2DNRR_skip):
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(K_GTP23P2DNRR_top) )
|
||||
|
||||
LLBL(K_GTP23P2DNRR_finish):
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points2_3d)
|
||||
HIDDEN(_mesa_sse_transform_points2_3d)
|
||||
GLNAME(_mesa_sse_transform_points2_3d):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP23P3DR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVAPS( M(0), XMM0 ) /* m2 | m1 | m0 */
|
||||
MOVAPS( M(4), XMM1 ) /* m6 | m5 | m4 */
|
||||
MOVAPS( M(12), XMM2 ) /* m14 | m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP23P3DR_top):
|
||||
MOVSS( S(0), XMM3 ) /* ox */
|
||||
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox | ox */
|
||||
MULPS( XMM0, XMM3 ) /* ox*m2 | ox*m1 | ox*m0 */
|
||||
|
||||
MOVSS( S(1), XMM4 ) /* oy */
|
||||
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy | oy */
|
||||
MULPS( XMM1, XMM4 ) /* oy*m6 | oy*m5 | oy*m4 */
|
||||
|
||||
ADDPS( XMM4, XMM3 )
|
||||
ADDPS( XMM2, XMM3 )
|
||||
|
||||
MOVLPS( XMM3, D(0) ) /* ->D(1) | ->D(0) */
|
||||
UNPCKHPS( XMM3, XMM3 )
|
||||
MOVSS( XMM3, D(2) ) /* ->D(2) */
|
||||
|
||||
LLBL(K_GTP23P3DR_skip):
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(K_GTP23P3DR_top) )
|
||||
|
||||
LLBL(K_GTP23P3DR_finish):
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
#endif
|
||||
|
||||
#if defined (__ELF__) && defined (__linux__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
|
@ -0,0 +1,512 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/** TODO:
|
||||
* - insert PREFETCH instructions to avoid cache-misses !
|
||||
* - some more optimizations are possible...
|
||||
* - for 40-50% more performance in the SSE-functions, the
|
||||
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
|
||||
*/
|
||||
|
||||
#ifdef USE_SSE_ASM
|
||||
#include "assyntax.h"
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define S(i) REGOFF(i * 4, ESI)
|
||||
#define D(i) REGOFF(i * 4, EDI)
|
||||
#define M(i) REGOFF(i * 4, EDX)
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points3_general)
|
||||
HIDDEN(_mesa_sse_transform_points3_general)
|
||||
GLNAME( _mesa_sse_transform_points3_general ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
CMP_L ( CONST(0), ECX ) /* count == 0 ? */
|
||||
JE ( LLBL(K_GTPGR_finish) ) /* yes -> nothing to do. */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVAPS ( REGOFF(0, EDX), XMM0 ) /* m0 | m1 | m2 | m3 */
|
||||
MOVAPS ( REGOFF(16, EDX), XMM1 ) /* m4 | m5 | m6 | m7 */
|
||||
MOVAPS ( REGOFF(32, EDX), XMM2 ) /* m8 | m9 | m10 | m11 */
|
||||
MOVAPS ( REGOFF(48, EDX), XMM3 ) /* m12 | m13 | m14 | m15 */
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTPGR_top):
|
||||
MOVSS ( REGOFF(0, ESI), XMM4 ) /* | | | ox */
|
||||
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox | ox */
|
||||
MOVSS ( REGOFF(4, ESI), XMM5 ) /* | | | oy */
|
||||
SHUFPS ( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy | oy */
|
||||
MOVSS ( REGOFF(8, ESI), XMM6 ) /* | | | oz */
|
||||
SHUFPS ( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz | oz */
|
||||
|
||||
MULPS ( XMM0, XMM4 ) /* m3*ox | m2*ox | m1*ox | m0*ox */
|
||||
MULPS ( XMM1, XMM5 ) /* m7*oy | m6*oy | m5*oy | m4*oy */
|
||||
MULPS ( XMM2, XMM6 ) /* m11*oz | m10*oz | m9*oz | m8*oz */
|
||||
|
||||
ADDPS ( XMM5, XMM4 )
|
||||
ADDPS ( XMM6, XMM4 )
|
||||
ADDPS ( XMM3, XMM4 )
|
||||
|
||||
MOVAPS ( XMM4, REGOFF(0, EDI) )
|
||||
|
||||
LLBL(K_GTPGR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTPGR_top) )
|
||||
|
||||
LLBL(K_GTPGR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points3_identity)
|
||||
HIDDEN(_mesa_sse_transform_points3_identity)
|
||||
GLNAME( _mesa_sse_transform_points3_identity ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTPIR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
CMP_L( ESI, EDI )
|
||||
JE( LLBL(K_GTPIR_finish) )
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTPIR_top):
|
||||
MOVLPS ( S(0), XMM0 )
|
||||
MOVLPS ( XMM0, D(0) )
|
||||
MOVSS ( S(2), XMM0 )
|
||||
MOVSS ( XMM0, D(2) )
|
||||
|
||||
LLBL(K_GTPIR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTPIR_top) )
|
||||
|
||||
LLBL(K_GTPIR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points3_3d_no_rot)
|
||||
HIDDEN(_mesa_sse_transform_points3_3d_no_rot)
|
||||
GLNAME(_mesa_sse_transform_points3_3d_no_rot):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP3DNRR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
XORPS( XMM0, XMM0 ) /* clean the working register */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */
|
||||
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */
|
||||
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */
|
||||
MOVLPS ( M(12), XMM2 ) /* - | - | m13 | m12 */
|
||||
MOVSS ( M(10), XMM3 ) /* - | - | - | m10 */
|
||||
MOVSS ( M(14), XMM4 ) /* - | - | - | m14 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP3DNRR_top):
|
||||
|
||||
MOVLPS ( S(0), XMM0 ) /* - | - | s1 | s0 */
|
||||
MULPS ( XMM1, XMM0 ) /* - | - | s1*m5 | s0*m0 */
|
||||
ADDPS ( XMM2, XMM0 ) /* - | - | +m13 | +m12 */
|
||||
MOVLPS ( XMM0, D(0) ) /* -> D(1) | -> D(0) */
|
||||
|
||||
MOVSS ( S(2), XMM0 ) /* sz */
|
||||
MULSS ( XMM3, XMM0 ) /* sz*m10 */
|
||||
ADDSS ( XMM4, XMM0 ) /* +m14 */
|
||||
MOVSS ( XMM0, D(2) ) /* -> D(2) */
|
||||
|
||||
LLBL(K_GTP3DNRR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTP3DNRR_top) )
|
||||
|
||||
LLBL(K_GTP3DNRR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points3_perspective)
|
||||
HIDDEN(_mesa_sse_transform_points3_perspective)
|
||||
GLNAME(_mesa_sse_transform_points3_perspective):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L ( ESI )
|
||||
PUSH_L ( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP3PR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */
|
||||
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */
|
||||
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */
|
||||
MOVLPS ( M(8), XMM2 ) /* - | - | m9 | m8 */
|
||||
MOVSS ( M(10), XMM3 ) /* m10 */
|
||||
MOVSS ( M(14), XMM4 ) /* m14 */
|
||||
XORPS ( XMM6, XMM6 ) /* 0 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP3PR_top):
|
||||
MOVLPS ( S(0), XMM0 ) /* oy | ox */
|
||||
MULPS ( XMM1, XMM0 ) /* oy*m5 | ox*m0 */
|
||||
MOVSS ( S(2), XMM5 ) /* oz */
|
||||
SHUFPS ( CONST(0x0), XMM5, XMM5 ) /* oz | oz */
|
||||
MULPS ( XMM2, XMM5 ) /* oz*m9 | oz*m8 */
|
||||
ADDPS ( XMM5, XMM0 ) /* +oy*m5 | +ox*m0 */
|
||||
MOVLPS ( XMM0, D(0) ) /* ->D(1) | ->D(0) */
|
||||
|
||||
MOVSS ( S(2), XMM0 ) /* oz */
|
||||
MULSS ( XMM3, XMM0 ) /* oz*m10 */
|
||||
ADDSS ( XMM4, XMM0 ) /* +m14 */
|
||||
MOVSS ( XMM0, D(2) ) /* ->D(2) */
|
||||
|
||||
MOVSS ( S(2), XMM0 ) /* oz */
|
||||
MOVSS ( XMM6, XMM5 ) /* 0 */
|
||||
SUBPS ( XMM0, XMM5 ) /* -oz */
|
||||
MOVSS ( XMM5, D(3) ) /* ->D(3) */
|
||||
|
||||
LLBL(K_GTP3PR_skip):
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(K_GTP3PR_top) )
|
||||
|
||||
LLBL(K_GTP3PR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points3_2d)
|
||||
HIDDEN(_mesa_sse_transform_points3_2d)
|
||||
GLNAME(_mesa_sse_transform_points3_2d):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP3P2DR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVLPS( M(0), XMM0 ) /* m1 | m0 */
|
||||
MOVLPS( M(4), XMM1 ) /* m5 | m4 */
|
||||
MOVLPS( M(12), XMM2 ) /* m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP3P2DR_top):
|
||||
MOVSS ( S(0), XMM3 ) /* ox */
|
||||
SHUFPS ( CONST(0x0), XMM3, XMM3 ) /* ox | ox */
|
||||
MULPS ( XMM0, XMM3 ) /* ox*m1 | ox*m0 */
|
||||
MOVSS ( S(1), XMM4 ) /* oy */
|
||||
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* oy | oy */
|
||||
MULPS ( XMM1, XMM4 ) /* oy*m5 | oy*m4 */
|
||||
|
||||
ADDPS ( XMM4, XMM3 )
|
||||
ADDPS ( XMM2, XMM3 )
|
||||
MOVLPS ( XMM3, D(0) )
|
||||
|
||||
MOVSS ( S(2), XMM3 )
|
||||
MOVSS ( XMM3, D(2) )
|
||||
|
||||
LLBL(K_GTP3P2DR_skip):
|
||||
ADD_L ( CONST(16), EDI )
|
||||
ADD_L ( EAX, ESI )
|
||||
CMP_L ( ECX, EDI )
|
||||
JNE ( LLBL(K_GTP3P2DR_top) )
|
||||
|
||||
LLBL(K_GTP3P2DR_finish):
|
||||
POP_L ( EDI )
|
||||
POP_L ( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points3_2d_no_rot)
|
||||
HIDDEN(_mesa_sse_transform_points3_2d_no_rot)
|
||||
GLNAME(_mesa_sse_transform_points3_2d_no_rot):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP3P2DNRR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVSS ( M(0), XMM1 ) /* m0 */
|
||||
MOVSS ( M(5), XMM2 ) /* m5 */
|
||||
UNPCKLPS ( XMM2, XMM1 ) /* m5 | m0 */
|
||||
MOVLPS ( M(12), XMM2 ) /* m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP3P2DNRR_top):
|
||||
MOVLPS( S(0), XMM0 ) /* oy | ox */
|
||||
MULPS( XMM1, XMM0 ) /* oy*m5 | ox*m0 */
|
||||
ADDPS( XMM2, XMM0 ) /* +m13 | +m12 */
|
||||
MOVLPS( XMM0, D(0) ) /* ->D(1) | ->D(0) */
|
||||
|
||||
MOVSS( S(2), XMM0 )
|
||||
MOVSS( XMM0, D(2) )
|
||||
|
||||
LLBL(K_GTP3P2DNRR_skip):
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(K_GTP3P2DNRR_top) )
|
||||
|
||||
LLBL(K_GTP3P2DNRR_finish):
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME(_mesa_sse_transform_points3_3d)
|
||||
HIDDEN(_mesa_sse_transform_points3_3d)
|
||||
GLNAME(_mesa_sse_transform_points3_3d):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP3P3DR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
|
||||
ALIGNTEXT32
|
||||
MOVAPS( M(0), XMM0 ) /* m2 | m1 | m0 */
|
||||
MOVAPS( M(4), XMM1 ) /* m6 | m5 | m4 */
|
||||
MOVAPS( M(8), XMM2 ) /* m10 | m9 | m8 */
|
||||
MOVAPS( M(12), XMM3 ) /* m14 | m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL(K_GTP3P3DR_top):
|
||||
MOVSS( S(0), XMM4 )
|
||||
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox */
|
||||
MULPS( XMM0, XMM4 ) /* ox*m2 | ox*m1 | ox*m0 */
|
||||
|
||||
MOVSS( S(1), XMM5 )
|
||||
SHUFPS( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy */
|
||||
MULPS( XMM1, XMM5 ) /* oy*m6 | oy*m5 | oy*m4 */
|
||||
|
||||
MOVSS( S(2), XMM6 )
|
||||
SHUFPS( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz */
|
||||
MULPS( XMM2, XMM6 ) /* oz*m10 | oz*m9 | oz*m8 */
|
||||
|
||||
ADDPS( XMM5, XMM4 ) /* + | + | + */
|
||||
ADDPS( XMM6, XMM4 ) /* + | + | + */
|
||||
ADDPS( XMM3, XMM4 ) /* + | + | + */
|
||||
|
||||
MOVLPS( XMM4, D(0) ) /* => D(1) | => D(0) */
|
||||
UNPCKHPS( XMM4, XMM4 )
|
||||
MOVSS( XMM4, D(2) )
|
||||
|
||||
LLBL(K_GTP3P3DR_skip):
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(K_GTP3P3DR_top) )
|
||||
|
||||
LLBL(K_GTP3P3DR_finish):
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
#endif
|
||||
|
||||
#if defined (__ELF__) && defined (__linux__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
|
@ -0,0 +1,235 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifdef USE_SSE_ASM
|
||||
#include "assyntax.h"
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
|
||||
#define SRC(i) REGOFF(i * 4, ESI)
|
||||
#define DST(i) REGOFF(i * 4, EDI)
|
||||
#define MAT(i) REGOFF(i * 4, EDX)
|
||||
|
||||
#define SELECT(r0, r1, r2, r3) CONST( r0 * 64 + r1 * 16 + r2 * 4 + r3 )
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_sse_transform_points4_general )
|
||||
HIDDEN(_mesa_sse_transform_points4_general)
|
||||
GLNAME( _mesa_sse_transform_points4_general ):
|
||||
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX ) /* verify non-zero count */
|
||||
JE( LLBL( sse_general_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )/* set dest size */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
|
||||
PREFETCHT0( REGIND(ESI) )
|
||||
|
||||
MOVAPS( MAT(0), XMM4 ) /* m3 | m2 | m1 | m0 */
|
||||
MOVAPS( MAT(4), XMM5 ) /* m7 | m6 | m5 | m4 */
|
||||
MOVAPS( MAT(8), XMM6 ) /* m11 | m10 | m9 | m8 */
|
||||
MOVAPS( MAT(12), XMM7 ) /* m15 | m14 | m13 | m12 */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( sse_general_loop ):
|
||||
|
||||
MOVSS( SRC(0), XMM0 ) /* ox */
|
||||
SHUFPS( CONST(0x0), XMM0, XMM0 ) /* ox | ox | ox | ox */
|
||||
MULPS( XMM4, XMM0 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
|
||||
|
||||
MOVSS( SRC(1), XMM1 ) /* oy */
|
||||
SHUFPS( CONST(0x0), XMM1, XMM1 ) /* oy | oy | oy | oy */
|
||||
MULPS( XMM5, XMM1 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
|
||||
|
||||
MOVSS( SRC(2), XMM2 ) /* oz */
|
||||
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* oz | oz | oz | oz */
|
||||
MULPS( XMM6, XMM2 ) /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */
|
||||
|
||||
MOVSS( SRC(3), XMM3 ) /* ow */
|
||||
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ow | ow | ow | ow */
|
||||
MULPS( XMM7, XMM3 ) /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
|
||||
|
||||
ADDPS( XMM1, XMM0 ) /* ox*m3+oy*m7 | ... */
|
||||
ADDPS( XMM2, XMM0 ) /* ox*m3+oy*m7+oz*m11 | ... */
|
||||
ADDPS( XMM3, XMM0 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
|
||||
MOVAPS( XMM0, DST(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
|
||||
DEC_L( ECX )
|
||||
JNZ( LLBL( sse_general_loop ) )
|
||||
|
||||
LLBL( sse_general_done ):
|
||||
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME( _mesa_sse_transform_points4_3d )
|
||||
HIDDEN(_mesa_sse_transform_points4_3d)
|
||||
GLNAME( _mesa_sse_transform_points4_3d ):
|
||||
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */
|
||||
MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
|
||||
|
||||
TEST_L( ECX, ECX)
|
||||
JZ( LLBL(K_GTP43P3DR_finish) ) /* count was zero; go to finish */
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )/* set dest size */
|
||||
|
||||
SHL_L( CONST(4), ECX ) /* count *= 16 */
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
ADD_L( EDI, ECX ) /* count += dest ptr */
|
||||
|
||||
MOVAPS( MAT(0), XMM0 ) /* m3 | m2 | m1 | m0 */
|
||||
MOVAPS( MAT(4), XMM1 ) /* m7 | m6 | m5 | m4 */
|
||||
MOVAPS( MAT(8), XMM2 ) /* m11 | m10 | m9 | m8 */
|
||||
MOVAPS( MAT(12), XMM3 ) /* m15 | m14 | m13 | m12 */
|
||||
|
||||
ALIGNTEXT32
|
||||
LLBL( K_GTP43P3DR_top ):
|
||||
MOVSS( SRC(0), XMM4 ) /* ox */
|
||||
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox | ox */
|
||||
MULPS( XMM0, XMM4 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
|
||||
|
||||
MOVSS( SRC(1), XMM5 ) /* oy */
|
||||
SHUFPS( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy | oy */
|
||||
MULPS( XMM1, XMM5 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
|
||||
|
||||
MOVSS( SRC(2), XMM6 ) /* oz */
|
||||
SHUFPS( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz | oz */
|
||||
MULPS( XMM2, XMM6 ) /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */
|
||||
|
||||
MOVSS( SRC(3), XMM7 ) /* ow */
|
||||
SHUFPS( CONST(0x0), XMM7, XMM7 ) /* ow | ow | ow | ow */
|
||||
MULPS( XMM3, XMM7 ) /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
|
||||
|
||||
ADDPS( XMM5, XMM4 ) /* ox*m3+oy*m7 | ... */
|
||||
ADDPS( XMM6, XMM4 ) /* ox*m3+oy*m7+oz*m11 | ... */
|
||||
ADDPS( XMM7, XMM4 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
|
||||
MOVAPS( XMM4, DST(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
|
||||
|
||||
MOVSS( SRC(3), XMM4 ) /* ow */
|
||||
MOVSS( XMM4, DST(3) ) /* ->D(3) */
|
||||
|
||||
LLBL( K_GTP43P3DR_skip ):
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(K_GTP43P3DR_top) )
|
||||
|
||||
LLBL( K_GTP43P3DR_finish ):
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_sse_transform_points4_identity )
|
||||
HIDDEN(_mesa_sse_transform_points4_identity)
|
||||
GLNAME( _mesa_sse_transform_points4_identity ):
|
||||
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX ) /* verify non-zero count */
|
||||
JE( LLBL( sse_identity_done ) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )/* set dest size */
|
||||
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL( sse_identity_loop ):
|
||||
|
||||
PREFETCHNTA( REGOFF(32, ESI) )
|
||||
|
||||
MOVAPS( REGIND(ESI), XMM0 )
|
||||
ADD_L( EAX, ESI )
|
||||
|
||||
MOVAPS( XMM0, REGIND(EDI) )
|
||||
ADD_L( CONST(16), EDI )
|
||||
|
||||
DEC_L( ECX )
|
||||
JNZ( LLBL( sse_identity_loop ) )
|
||||
|
||||
LLBL( sse_identity_done ):
|
||||
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#endif
|
||||
|
||||
#if defined (__ELF__) && defined (__linux__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
|
@ -0,0 +1,407 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* NOTE: Avoid using spaces in between '(' ')' and arguments, especially
|
||||
* with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
|
||||
* in there will break the build on some platforms.
|
||||
*/
|
||||
|
||||
#include "assyntax.h"
|
||||
#include "matypes.h"
|
||||
#include "clip_args.h"
|
||||
|
||||
#define SRC0 REGOFF(0, ESI)
|
||||
#define SRC1 REGOFF(4, ESI)
|
||||
#define SRC2 REGOFF(8, ESI)
|
||||
#define SRC3 REGOFF(12, ESI)
|
||||
#define DST0 REGOFF(0, EDI)
|
||||
#define DST1 REGOFF(4, EDI)
|
||||
#define DST2 REGOFF(8, EDI)
|
||||
#define DST3 REGOFF(12, EDI)
|
||||
#define MAT0 REGOFF(0, EDX)
|
||||
#define MAT1 REGOFF(4, EDX)
|
||||
#define MAT2 REGOFF(8, EDX)
|
||||
#define MAT3 REGOFF(12, EDX)
|
||||
|
||||
|
||||
/*
|
||||
* Table for clip test.
|
||||
*
|
||||
* bit6 = SRC3 < 0
|
||||
* bit5 = SRC2 < 0
|
||||
* bit4 = abs(S(2)) > abs(S(3))
|
||||
* bit3 = SRC1 < 0
|
||||
* bit2 = abs(S(1)) > abs(S(3))
|
||||
* bit1 = SRC0 < 0
|
||||
* bit0 = abs(S(0)) > abs(S(3))
|
||||
*/
|
||||
|
||||
SEG_DATA
|
||||
|
||||
clip_table:
|
||||
D_BYTE 0x00, 0x01, 0x00, 0x02, 0x04, 0x05, 0x04, 0x06
|
||||
D_BYTE 0x00, 0x01, 0x00, 0x02, 0x08, 0x09, 0x08, 0x0a
|
||||
D_BYTE 0x20, 0x21, 0x20, 0x22, 0x24, 0x25, 0x24, 0x26
|
||||
D_BYTE 0x20, 0x21, 0x20, 0x22, 0x28, 0x29, 0x28, 0x2a
|
||||
D_BYTE 0x00, 0x01, 0x00, 0x02, 0x04, 0x05, 0x04, 0x06
|
||||
D_BYTE 0x00, 0x01, 0x00, 0x02, 0x08, 0x09, 0x08, 0x0a
|
||||
D_BYTE 0x10, 0x11, 0x10, 0x12, 0x14, 0x15, 0x14, 0x16
|
||||
D_BYTE 0x10, 0x11, 0x10, 0x12, 0x18, 0x19, 0x18, 0x1a
|
||||
D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x37, 0x35, 0x37, 0x36
|
||||
D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x3b, 0x39, 0x3b, 0x3a
|
||||
D_BYTE 0x2f, 0x2d, 0x2f, 0x2e, 0x27, 0x25, 0x27, 0x26
|
||||
D_BYTE 0x2f, 0x2d, 0x2f, 0x2e, 0x2b, 0x29, 0x2b, 0x2a
|
||||
D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x37, 0x35, 0x37, 0x36
|
||||
D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x3b, 0x39, 0x3b, 0x3a
|
||||
D_BYTE 0x1f, 0x1d, 0x1f, 0x1e, 0x17, 0x15, 0x17, 0x16
|
||||
D_BYTE 0x1f, 0x1d, 0x1f, 0x1e, 0x1b, 0x19, 0x1b, 0x1a
|
||||
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
/*
|
||||
* _mesa_x86_cliptest_points4
|
||||
*
|
||||
* AL: ormask
|
||||
* AH: andmask
|
||||
* EBX: temp0
|
||||
* ECX: temp1
|
||||
* EDX: clipmask[]
|
||||
* ESI: clip[]
|
||||
* EDI: proj[]
|
||||
* EBP: temp2
|
||||
*/
|
||||
|
||||
#if defined(__ELF__) && defined(__PIC__) && defined(GNU_ASSEMBLER) && !defined(ELFPIC)
|
||||
#define ELFPIC
|
||||
#endif
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_cliptest_points4 )
|
||||
HIDDEN(_mesa_x86_cliptest_points4)
|
||||
GLNAME( _mesa_x86_cliptest_points4 ):
|
||||
|
||||
#ifdef ELFPIC
|
||||
#define FRAME_OFFSET 20
|
||||
#else
|
||||
#define FRAME_OFFSET 16
|
||||
#endif
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBP )
|
||||
PUSH_L( EBX )
|
||||
|
||||
#ifdef ELFPIC
|
||||
/* store pointer to clip_table on stack */
|
||||
CALL( LLBL(ctp4_get_eip) )
|
||||
ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX )
|
||||
MOV_L( REGOFF(clip_table@GOT, EBX), EBX )
|
||||
PUSH_L( EBX )
|
||||
JMP( LLBL(ctp4_clip_table_ready) )
|
||||
|
||||
LLBL(ctp4_get_eip):
|
||||
/* store eip in ebx */
|
||||
MOV_L( REGIND(ESP), EBX )
|
||||
RET
|
||||
|
||||
LLBL(ctp4_clip_table_ready):
|
||||
#endif
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_CLIP, EDX )
|
||||
MOV_L( ARG_OR, EBX )
|
||||
|
||||
MOV_L( ARG_AND, EBP )
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
MOV_L( EAX, ARG_SOURCE ) /* put stride in ARG_SOURCE */
|
||||
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDX, ECX )
|
||||
|
||||
MOV_L( ECX, ARG_CLIP ) /* put clipmask + count in ARG_CLIP */
|
||||
CMP_L( ECX, EDX )
|
||||
|
||||
MOV_B( REGIND(EBX), AL )
|
||||
MOV_B( REGIND(EBP), AH )
|
||||
|
||||
JZ( LLBL(ctp4_finish) )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(ctp4_top):
|
||||
|
||||
FLD1 /* F3 */
|
||||
FDIV_S( SRC3 ) /* GH: don't care about div-by-zero */
|
||||
|
||||
MOV_L( SRC3, EBP )
|
||||
MOV_L( SRC2, EBX )
|
||||
|
||||
XOR_L( ECX, ECX )
|
||||
ADD_L( EBP, EBP ) /* ebp = abs(S(3))*2 ; carry = sign of S(3) */
|
||||
|
||||
ADC_L( ECX, ECX )
|
||||
ADD_L( EBX, EBX ) /* ebx = abs(S(2))*2 ; carry = sign of S(2) */
|
||||
|
||||
ADC_L( ECX, ECX )
|
||||
CMP_L( EBX, EBP ) /* carry = abs(S(2))*2 > abs(S(3))*2 */
|
||||
|
||||
ADC_L( ECX, ECX )
|
||||
MOV_L( SRC1, EBX )
|
||||
|
||||
ADD_L( EBX, EBX ) /* ebx = abs(S(1))*2 ; carry = sign of S(1) */
|
||||
|
||||
ADC_L( ECX, ECX )
|
||||
CMP_L( EBX, EBP ) /* carry = abs(S(1))*2 > abs(S(3))*2 */
|
||||
|
||||
ADC_L( ECX, ECX )
|
||||
MOV_L( SRC0, EBX )
|
||||
|
||||
ADD_L( EBX, EBX ) /* ebx = abs(S(0))*2 ; carry = sign of S(0) */
|
||||
|
||||
ADC_L( ECX, ECX )
|
||||
CMP_L( EBX, EBP ) /* carry = abs(S(0))*2 > abs(S(3))*2 */
|
||||
|
||||
ADC_L( ECX, ECX )
|
||||
|
||||
#ifdef ELFPIC
|
||||
MOV_L( REGIND(ESP), EBP ) /* clip_table */
|
||||
|
||||
MOV_B( REGBI(EBP, ECX), CL )
|
||||
#else
|
||||
MOV_B( REGOFF(clip_table,ECX), CL )
|
||||
#endif
|
||||
|
||||
OR_B( CL, AL )
|
||||
AND_B( CL, AH )
|
||||
|
||||
TEST_B( CL, CL )
|
||||
MOV_B( CL, REGIND(EDX) )
|
||||
|
||||
JZ( LLBL(ctp4_proj) )
|
||||
|
||||
LLBL(ctp4_noproj):
|
||||
|
||||
FSTP( ST(0) ) /* */
|
||||
|
||||
MOV_L( CONST(0), DST0 )
|
||||
MOV_L( CONST(0), DST1 )
|
||||
MOV_L( CONST(0), DST2 )
|
||||
MOV_L( CONST(0x3f800000), DST3 )
|
||||
|
||||
JMP( LLBL(ctp4_next) )
|
||||
|
||||
LLBL(ctp4_proj):
|
||||
|
||||
FLD_S( SRC0 ) /* F0 F3 */
|
||||
FMUL2( ST(1), ST0 )
|
||||
|
||||
FLD_S( SRC1 ) /* F1 F0 F3 */
|
||||
FMUL2( ST(2), ST0 )
|
||||
|
||||
FLD_S( SRC2 ) /* F2 F1 F0 F3 */
|
||||
FMUL2( ST(3), ST0 )
|
||||
|
||||
FXCH( ST(2) ) /* F0 F1 F2 F3 */
|
||||
FSTP_S( DST0 ) /* F1 F2 F3 */
|
||||
FSTP_S( DST1 ) /* F2 F3 */
|
||||
FSTP_S( DST2 ) /* F3 */
|
||||
FSTP_S( DST3 ) /* */
|
||||
|
||||
LLBL(ctp4_next):
|
||||
|
||||
INC_L( EDX )
|
||||
ADD_L( CONST(16), EDI )
|
||||
|
||||
ADD_L( ARG_SOURCE, ESI )
|
||||
CMP_L( EDX, ARG_CLIP )
|
||||
|
||||
JNZ( LLBL(ctp4_top) )
|
||||
|
||||
MOV_L( ARG_OR, ECX )
|
||||
MOV_L( ARG_AND, EDX )
|
||||
|
||||
MOV_B( AL, REGIND(ECX) )
|
||||
MOV_B( AH, REGIND(EDX) )
|
||||
|
||||
LLBL(ctp4_finish):
|
||||
|
||||
MOV_L( ARG_DEST, EAX )
|
||||
#ifdef ELFPIC
|
||||
POP_L( ESI ) /* discard ptr to clip_table */
|
||||
#endif
|
||||
POP_L( EBX )
|
||||
POP_L( EBP )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_cliptest_points4_np )
|
||||
HIDDEN(_mesa_x86_cliptest_points4_np)
|
||||
GLNAME( _mesa_x86_cliptest_points4_np ):
|
||||
|
||||
#ifdef ELFPIC
|
||||
#define FRAME_OFFSET 20
|
||||
#else
|
||||
#define FRAME_OFFSET 16
|
||||
#endif
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBP )
|
||||
PUSH_L( EBX )
|
||||
|
||||
#ifdef ELFPIC
|
||||
/* store pointer to clip_table on stack */
|
||||
CALL( LLBL(ctp4_np_get_eip) )
|
||||
ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX )
|
||||
MOV_L( REGOFF(clip_table@GOT, EBX), EBX )
|
||||
PUSH_L( EBX )
|
||||
JMP( LLBL(ctp4_np_clip_table_ready) )
|
||||
|
||||
LLBL(ctp4_np_get_eip):
|
||||
/* store eip in ebx */
|
||||
MOV_L( REGIND(ESP), EBX )
|
||||
RET
|
||||
|
||||
LLBL(ctp4_np_clip_table_ready):
|
||||
#endif
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
/* slot */
|
||||
|
||||
MOV_L( ARG_CLIP, EDX )
|
||||
MOV_L( ARG_OR, EBX )
|
||||
|
||||
MOV_L( ARG_AND, EBP )
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( EAX, ARG_DEST ) /* put stride in ARG_DEST */
|
||||
ADD_L( EDX, ECX )
|
||||
|
||||
MOV_L( ECX, EDI ) /* put clipmask + count in EDI */
|
||||
CMP_L( ECX, EDX )
|
||||
|
||||
MOV_B( REGIND(EBX), AL )
|
||||
MOV_B( REGIND(EBP), AH )
|
||||
|
||||
JZ( LLBL(ctp4_np_finish) )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(ctp4_np_top):
|
||||
|
||||
MOV_L( SRC3, EBP )
|
||||
MOV_L( SRC2, EBX )
|
||||
|
||||
XOR_L( ECX, ECX )
|
||||
ADD_L( EBP, EBP ) /* ebp = abs(S(3))*2 ; carry = sign of S(3) */
|
||||
|
||||
ADC_L( ECX, ECX )
|
||||
ADD_L( EBX, EBX ) /* ebx = abs(S(2))*2 ; carry = sign of S(2) */
|
||||
|
||||
ADC_L( ECX, ECX )
|
||||
CMP_L( EBX, EBP ) /* carry = abs(S(2))*2 > abs(S(3))*2 */
|
||||
|
||||
ADC_L( ECX, ECX )
|
||||
MOV_L( SRC1, EBX )
|
||||
|
||||
ADD_L( EBX, EBX ) /* ebx = abs(S(1))*2 ; carry = sign of S(1) */
|
||||
|
||||
ADC_L( ECX, ECX )
|
||||
CMP_L( EBX, EBP ) /* carry = abs(S(1))*2 > abs(S(3))*2 */
|
||||
|
||||
ADC_L( ECX, ECX )
|
||||
MOV_L( SRC0, EBX )
|
||||
|
||||
ADD_L( EBX, EBX ) /* ebx = abs(S(0))*2 ; carry = sign of S(0) */
|
||||
|
||||
ADC_L( ECX, ECX )
|
||||
CMP_L( EBX, EBP ) /* carry = abs(S(0))*2 > abs(S(3))*2 */
|
||||
|
||||
ADC_L( ECX, ECX )
|
||||
|
||||
#ifdef ELFPIC
|
||||
MOV_L( REGIND(ESP), EBP ) /* clip_table */
|
||||
|
||||
MOV_B( REGBI(EBP, ECX), CL )
|
||||
#else
|
||||
MOV_B( REGOFF(clip_table,ECX), CL )
|
||||
#endif
|
||||
|
||||
OR_B( CL, AL )
|
||||
AND_B( CL, AH )
|
||||
|
||||
TEST_B( CL, CL )
|
||||
MOV_B( CL, REGIND(EDX) )
|
||||
|
||||
INC_L( EDX )
|
||||
/* slot */
|
||||
|
||||
ADD_L( ARG_DEST, ESI )
|
||||
CMP_L( EDX, EDI )
|
||||
|
||||
JNZ( LLBL(ctp4_np_top) )
|
||||
|
||||
MOV_L( ARG_OR, ECX )
|
||||
MOV_L( ARG_AND, EDX )
|
||||
|
||||
MOV_B( AL, REGIND(ECX) )
|
||||
MOV_B( AH, REGIND(EDX) )
|
||||
|
||||
LLBL(ctp4_np_finish):
|
||||
|
||||
MOV_L( ARG_SOURCE, EAX )
|
||||
#ifdef ELFPIC
|
||||
POP_L( ESI ) /* discard ptr to clip_table */
|
||||
#endif
|
||||
POP_L( EBX )
|
||||
POP_L( EBP )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
|
||||
RET
|
||||
|
||||
#if defined (__ELF__) && defined (__linux__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
|
@ -0,0 +1,126 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Intel x86 assembly code by Josh Vanderhoof
|
||||
*/
|
||||
|
||||
#include "main/glheader.h"
|
||||
#include "main/context.h"
|
||||
#include "math/m_xform.h"
|
||||
|
||||
#include "x86_xform.h"
|
||||
#include "common_x86_asm.h"
|
||||
|
||||
#ifdef USE_X86_ASM
|
||||
#ifdef USE_3DNOW_ASM
|
||||
#include "3dnow.h"
|
||||
#endif
|
||||
#ifdef USE_SSE_ASM
|
||||
#include "sse.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_MATH
|
||||
#include "math/m_debug.h"
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef USE_X86_ASM
|
||||
DECLARE_XFORM_GROUP( x86, 2 )
|
||||
DECLARE_XFORM_GROUP( x86, 3 )
|
||||
DECLARE_XFORM_GROUP( x86, 4 )
|
||||
|
||||
|
||||
extern GLvector4f * _ASMAPI
|
||||
_mesa_x86_cliptest_points4( GLvector4f *clip_vec,
|
||||
GLvector4f *proj_vec,
|
||||
GLubyte clipMask[],
|
||||
GLubyte *orMask,
|
||||
GLubyte *andMask,
|
||||
GLboolean viewport_z_clip );
|
||||
|
||||
extern GLvector4f * _ASMAPI
|
||||
_mesa_x86_cliptest_points4_np( GLvector4f *clip_vec,
|
||||
GLvector4f *proj_vec,
|
||||
GLubyte clipMask[],
|
||||
GLubyte *orMask,
|
||||
GLubyte *andMask,
|
||||
GLboolean viewport_z_clip );
|
||||
|
||||
extern void _ASMAPI
|
||||
_mesa_v16_x86_cliptest_points4( GLfloat *first_vert,
|
||||
GLfloat *last_vert,
|
||||
GLubyte *or_mask,
|
||||
GLubyte *and_mask,
|
||||
GLubyte *clip_mask,
|
||||
GLboolean viewport_z_clip );
|
||||
|
||||
extern void _ASMAPI
|
||||
_mesa_v16_x86_general_xform( GLfloat *dest,
|
||||
const GLfloat *m,
|
||||
const GLfloat *src,
|
||||
GLuint src_stride,
|
||||
GLuint count );
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef USE_X86_ASM
|
||||
static void _mesa_init_x86_transform_asm( void )
|
||||
{
|
||||
ASSIGN_XFORM_GROUP( x86, 2 );
|
||||
ASSIGN_XFORM_GROUP( x86, 3 );
|
||||
ASSIGN_XFORM_GROUP( x86, 4 );
|
||||
|
||||
_mesa_clip_tab[4] = _mesa_x86_cliptest_points4;
|
||||
_mesa_clip_np_tab[4] = _mesa_x86_cliptest_points4_np;
|
||||
|
||||
#ifdef DEBUG_MATH
|
||||
_math_test_all_transform_functions( "x86" );
|
||||
_math_test_all_cliptest_functions( "x86" );
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void _mesa_init_all_x86_transform_asm( void )
|
||||
{
|
||||
_mesa_get_x86_features();
|
||||
|
||||
#ifdef USE_X86_ASM
|
||||
if ( _mesa_x86_cpu_features ) {
|
||||
_mesa_init_x86_transform_asm();
|
||||
}
|
||||
|
||||
if (cpu_has_3dnow) {
|
||||
_mesa_init_3dnow_transform_asm();
|
||||
}
|
||||
|
||||
if ( cpu_has_xmm ) {
|
||||
_mesa_init_sse_transform_asm();
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Gareth Hughes
|
||||
*/
|
||||
|
||||
#ifndef X86_XFORM_H
|
||||
#define X86_XFORM_H
|
||||
|
||||
|
||||
/* =============================================================
|
||||
* Transformation function declarations:
|
||||
*/
|
||||
|
||||
#define XFORM_ARGS GLvector4f *to_vec, \
|
||||
const GLfloat m[16], \
|
||||
const GLvector4f *from_vec
|
||||
|
||||
#define DECLARE_XFORM_GROUP( pfx, sz ) \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_general( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_identity( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_no_rot( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_perspective( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d_no_rot( XFORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d( XFORM_ARGS );
|
||||
|
||||
#define ASSIGN_XFORM_GROUP( pfx, sz ) \
|
||||
_mesa_transform_tab[sz][MATRIX_GENERAL] = \
|
||||
_mesa_##pfx##_transform_points##sz##_general; \
|
||||
_mesa_transform_tab[sz][MATRIX_IDENTITY] = \
|
||||
_mesa_##pfx##_transform_points##sz##_identity; \
|
||||
_mesa_transform_tab[sz][MATRIX_3D_NO_ROT] = \
|
||||
_mesa_##pfx##_transform_points##sz##_3d_no_rot; \
|
||||
_mesa_transform_tab[sz][MATRIX_PERSPECTIVE] = \
|
||||
_mesa_##pfx##_transform_points##sz##_perspective; \
|
||||
_mesa_transform_tab[sz][MATRIX_2D] = \
|
||||
_mesa_##pfx##_transform_points##sz##_2d; \
|
||||
_mesa_transform_tab[sz][MATRIX_2D_NO_ROT] = \
|
||||
_mesa_##pfx##_transform_points##sz##_2d_no_rot; \
|
||||
_mesa_transform_tab[sz][MATRIX_3D] = \
|
||||
_mesa_##pfx##_transform_points##sz##_3d;
|
||||
|
||||
|
||||
/* =============================================================
|
||||
* Normal transformation function declarations:
|
||||
*/
|
||||
|
||||
#define NORM_ARGS const GLmatrix *mat, \
|
||||
GLfloat scale, \
|
||||
const GLvector4f *in, \
|
||||
const GLfloat *lengths, \
|
||||
GLvector4f *dest
|
||||
|
||||
#define DECLARE_NORM_GROUP( pfx ) \
|
||||
extern void _ASMAPI _mesa_##pfx##_rescale_normals( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_normalize_normals( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_normals( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_normals_no_rot( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals_no_rot( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals( NORM_ARGS ); \
|
||||
extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals_no_rot( NORM_ARGS );
|
||||
|
||||
#define ASSIGN_NORM_GROUP( pfx ) \
|
||||
_mesa_normal_tab[NORM_RESCALE] = \
|
||||
_mesa_##pfx##_rescale_normals; \
|
||||
_mesa_normal_tab[NORM_NORMALIZE] = \
|
||||
_mesa_##pfx##_normalize_normals; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM] = \
|
||||
_mesa_##pfx##_transform_normals; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT] = \
|
||||
_mesa_##pfx##_transform_normals_no_rot; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM | NORM_RESCALE] = \
|
||||
_mesa_##pfx##_transform_rescale_normals; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE] = \
|
||||
_mesa_##pfx##_transform_rescale_normals_no_rot; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM | NORM_NORMALIZE] = \
|
||||
_mesa_##pfx##_transform_normalize_normals; \
|
||||
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE] = \
|
||||
_mesa_##pfx##_transform_normalize_normals_no_rot;
|
||||
|
||||
|
||||
#endif
|
|
@ -0,0 +1,574 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* NOTE: Avoid using spaces in between '(' ')' and arguments, especially
|
||||
* with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
|
||||
* in there will break the build on some platforms.
|
||||
*/
|
||||
|
||||
#include "assyntax.h"
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define FP_ONE 1065353216
|
||||
#define FP_ZERO 0
|
||||
|
||||
#define SRC0 REGOFF(0, ESI)
|
||||
#define SRC1 REGOFF(4, ESI)
|
||||
#define SRC2 REGOFF(8, ESI)
|
||||
#define SRC3 REGOFF(12, ESI)
|
||||
#define DST0 REGOFF(0, EDI)
|
||||
#define DST1 REGOFF(4, EDI)
|
||||
#define DST2 REGOFF(8, EDI)
|
||||
#define DST3 REGOFF(12, EDI)
|
||||
#define MAT0 REGOFF(0, EDX)
|
||||
#define MAT1 REGOFF(4, EDX)
|
||||
#define MAT2 REGOFF(8, EDX)
|
||||
#define MAT3 REGOFF(12, EDX)
|
||||
#define MAT4 REGOFF(16, EDX)
|
||||
#define MAT5 REGOFF(20, EDX)
|
||||
#define MAT6 REGOFF(24, EDX)
|
||||
#define MAT7 REGOFF(28, EDX)
|
||||
#define MAT8 REGOFF(32, EDX)
|
||||
#define MAT9 REGOFF(36, EDX)
|
||||
#define MAT10 REGOFF(40, EDX)
|
||||
#define MAT11 REGOFF(44, EDX)
|
||||
#define MAT12 REGOFF(48, EDX)
|
||||
#define MAT13 REGOFF(52, EDX)
|
||||
#define MAT14 REGOFF(56, EDX)
|
||||
#define MAT15 REGOFF(60, EDX)
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points2_general )
|
||||
HIDDEN(_mesa_x86_transform_points2_general)
|
||||
GLNAME( _mesa_x86_transform_points2_general ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(x86_p2_gr_done) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(x86_p2_gr_loop):
|
||||
|
||||
FLD_S( SRC0 ) /* F4 */
|
||||
FMUL_S( MAT0 )
|
||||
FLD_S( SRC0 ) /* F5 F4 */
|
||||
FMUL_S( MAT1 )
|
||||
FLD_S( SRC0 ) /* F6 F5 F4 */
|
||||
FMUL_S( MAT2 )
|
||||
FLD_S( SRC0 ) /* F7 F6 F5 F4 */
|
||||
FMUL_S( MAT3 )
|
||||
|
||||
FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT4 )
|
||||
FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT5 )
|
||||
FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT6 )
|
||||
FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT7 )
|
||||
|
||||
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
|
||||
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
|
||||
|
||||
FXCH( ST(3) ) /* F4 F6 F5 F7 */
|
||||
FADD_S( MAT12 )
|
||||
FXCH( ST(2) ) /* F5 F6 F4 F7 */
|
||||
FADD_S( MAT13 )
|
||||
FXCH( ST(1) ) /* F6 F5 F4 F7 */
|
||||
FADD_S( MAT14 )
|
||||
FXCH( ST(3) ) /* F7 F5 F4 F6 */
|
||||
FADD_S( MAT15 )
|
||||
|
||||
FXCH( ST(2) ) /* F4 F5 F7 F6 */
|
||||
FSTP_S( DST0 ) /* F5 F7 F6 */
|
||||
FSTP_S( DST1 ) /* F7 F6 */
|
||||
FXCH( ST(1) ) /* F6 F7 */
|
||||
FSTP_S( DST2 ) /* F7 */
|
||||
FSTP_S( DST3 ) /* */
|
||||
|
||||
LLBL(x86_p2_gr_skip):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(x86_p2_gr_loop) )
|
||||
|
||||
LLBL(x86_p2_gr_done):
|
||||
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points2_perspective )
|
||||
HIDDEN(_mesa_x86_transform_points2_perspective)
|
||||
GLNAME( _mesa_x86_transform_points2_perspective ):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(x86_p2_pr_done) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
MOV_L( MAT14, EBX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(x86_p2_pr_loop):
|
||||
|
||||
FLD_S( SRC0 ) /* F4 */
|
||||
FMUL_S( MAT0 )
|
||||
|
||||
FLD_S( SRC1 ) /* F1 F4 */
|
||||
FMUL_S( MAT5 )
|
||||
|
||||
FXCH( ST(1) ) /* F4 F1 */
|
||||
FSTP_S( DST0 ) /* F1 */
|
||||
FSTP_S( DST1 ) /* */
|
||||
MOV_L( EBX, DST2 )
|
||||
MOV_L( CONST(FP_ZERO), DST3 )
|
||||
|
||||
LLBL(x86_p2_pr_skip):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(x86_p2_pr_loop) )
|
||||
|
||||
LLBL(x86_p2_pr_done):
|
||||
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points2_3d )
|
||||
HIDDEN(_mesa_x86_transform_points2_3d)
|
||||
GLNAME( _mesa_x86_transform_points2_3d ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(x86_p2_3dr_done) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(x86_p2_3dr_loop):
|
||||
|
||||
FLD_S( SRC0 ) /* F4 */
|
||||
FMUL_S( MAT0 )
|
||||
FLD_S( SRC0 ) /* F5 F4 */
|
||||
FMUL_S( MAT1 )
|
||||
FLD_S( SRC0 ) /* F6 F5 F4 */
|
||||
FMUL_S( MAT2 )
|
||||
|
||||
FLD_S( SRC1 ) /* F0 F6 F5 F4 */
|
||||
FMUL_S( MAT4 )
|
||||
FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT5 )
|
||||
FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT6 )
|
||||
|
||||
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(1) ) /* F6 F5 F4 */
|
||||
|
||||
FXCH( ST(2) ) /* F4 F5 F6 */
|
||||
FADD_S( MAT12 )
|
||||
FXCH( ST(1) ) /* F5 F4 F6 */
|
||||
FADD_S( MAT13 )
|
||||
FXCH( ST(2) ) /* F6 F4 F5 */
|
||||
FADD_S( MAT14 )
|
||||
|
||||
FXCH( ST(1) ) /* F4 F6 F5 */
|
||||
FSTP_S( DST0 ) /* F6 F5 */
|
||||
FXCH( ST(1) ) /* F5 F6 */
|
||||
FSTP_S( DST1 ) /* F6 */
|
||||
FSTP_S( DST2 ) /* */
|
||||
|
||||
LLBL(x86_p2_3dr_skip):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(x86_p2_3dr_loop) )
|
||||
|
||||
LLBL(x86_p2_3dr_done):
|
||||
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points2_3d_no_rot )
|
||||
HIDDEN(_mesa_x86_transform_points2_3d_no_rot)
|
||||
GLNAME( _mesa_x86_transform_points2_3d_no_rot ):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(x86_p2_3dnrr_done) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
MOV_L( MAT14, EBX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(x86_p2_3dnrr_loop):
|
||||
|
||||
FLD_S( SRC0 ) /* F4 */
|
||||
FMUL_S( MAT0 )
|
||||
|
||||
FLD_S( SRC1 ) /* F1 F4 */
|
||||
FMUL_S( MAT5 )
|
||||
|
||||
FXCH( ST(1) ) /* F4 F1 */
|
||||
FADD_S( MAT12 )
|
||||
FLD_S( MAT13 ) /* F5 F4 F1 */
|
||||
FXCH( ST(2) ) /* F1 F4 F5 */
|
||||
FADDP( ST0, ST(2) ) /* F4 F5 */
|
||||
|
||||
FSTP_S( DST0 ) /* F5 */
|
||||
FSTP_S( DST1 ) /* */
|
||||
MOV_L( EBX, DST2 )
|
||||
|
||||
LLBL(x86_p2_3dnrr_skip):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(x86_p2_3dnrr_loop) )
|
||||
|
||||
LLBL(x86_p2_3dnrr_done):
|
||||
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points2_2d )
|
||||
HIDDEN(_mesa_x86_transform_points2_2d)
|
||||
GLNAME( _mesa_x86_transform_points2_2d ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(x86_p2_2dr_done) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(x86_p2_2dr_loop):
|
||||
|
||||
FLD_S( SRC0 ) /* F4 */
|
||||
FMUL_S( MAT0 )
|
||||
FLD_S( SRC0 ) /* F5 F4 */
|
||||
FMUL_S( MAT1 )
|
||||
|
||||
FLD_S( SRC1 ) /* F0 F5 F4 */
|
||||
FMUL_S( MAT4 )
|
||||
FLD_S( SRC1 ) /* F1 F0 F5 F4 */
|
||||
FMUL_S( MAT5 )
|
||||
|
||||
FXCH( ST(1) ) /* F0 F1 F5 F4 */
|
||||
FADDP( ST0, ST(3) ) /* F1 F5 F4 */
|
||||
FADDP( ST0, ST(1) ) /* F5 F4 */
|
||||
|
||||
FXCH( ST(1) ) /* F4 F5 */
|
||||
FADD_S( MAT12 )
|
||||
FXCH( ST(1) ) /* F5 F4 */
|
||||
FADD_S( MAT13 )
|
||||
|
||||
FXCH( ST(1) ) /* F4 F5 */
|
||||
FSTP_S( DST0 ) /* F5 */
|
||||
FSTP_S( DST1 ) /* */
|
||||
|
||||
LLBL(x86_p2_2dr_skip):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(x86_p2_2dr_loop) )
|
||||
|
||||
LLBL(x86_p2_2dr_done):
|
||||
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT4
|
||||
GLOBL GLNAME( _mesa_x86_transform_points2_2d_no_rot )
|
||||
HIDDEN(_mesa_x86_transform_points2_2d_no_rot)
|
||||
GLNAME( _mesa_x86_transform_points2_2d_no_rot ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(x86_p2_2dnrr_done) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(x86_p2_2dnrr_loop):
|
||||
|
||||
FLD_S( SRC0 ) /* F4 */
|
||||
FMUL_S( MAT0 )
|
||||
|
||||
FLD_S( SRC1 ) /* F1 F4 */
|
||||
FMUL_S( MAT5 )
|
||||
|
||||
FXCH( ST(1) ) /* F4 F1 */
|
||||
FADD_S( MAT12 )
|
||||
FLD_S( MAT13 ) /* F5 F4 F1 */
|
||||
FXCH( ST(2) ) /* F1 F4 F5 */
|
||||
FADDP( ST0, ST(2) ) /* F4 F5 */
|
||||
|
||||
FSTP_S( DST0 ) /* F5 */
|
||||
FSTP_S( DST1 ) /* */
|
||||
|
||||
LLBL(x86_p2_2dnrr_skip):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(x86_p2_2dnrr_loop) )
|
||||
|
||||
LLBL(x86_p2_2dnrr_done):
|
||||
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points2_identity )
|
||||
HIDDEN(_mesa_x86_transform_points2_identity)
|
||||
GLNAME( _mesa_x86_transform_points2_identity ):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(x86_p2_ir_done) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
CMP_L( ESI, EDI )
|
||||
JE( LLBL(x86_p2_ir_done) )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(x86_p2_ir_loop):
|
||||
|
||||
MOV_L( SRC0, EBX )
|
||||
MOV_L( SRC1, EDX )
|
||||
|
||||
MOV_L( EBX, DST0 )
|
||||
MOV_L( EDX, DST1 )
|
||||
|
||||
LLBL(x86_p2_ir_skip):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(x86_p2_ir_loop) )
|
||||
|
||||
LLBL(x86_p2_ir_done):
|
||||
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
#if defined (__ELF__) && defined (__linux__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
|
@ -0,0 +1,644 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* NOTE: Avoid using spaces in between '(' ')' and arguments, especially
|
||||
* with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
|
||||
* in there will break the build on some platforms.
|
||||
*/
|
||||
|
||||
#include "assyntax.h"
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define FP_ONE 1065353216
|
||||
#define FP_ZERO 0
|
||||
|
||||
#define SRC0 REGOFF(0, ESI)
|
||||
#define SRC1 REGOFF(4, ESI)
|
||||
#define SRC2 REGOFF(8, ESI)
|
||||
#define SRC3 REGOFF(12, ESI)
|
||||
#define DST0 REGOFF(0, EDI)
|
||||
#define DST1 REGOFF(4, EDI)
|
||||
#define DST2 REGOFF(8, EDI)
|
||||
#define DST3 REGOFF(12, EDI)
|
||||
#define MAT0 REGOFF(0, EDX)
|
||||
#define MAT1 REGOFF(4, EDX)
|
||||
#define MAT2 REGOFF(8, EDX)
|
||||
#define MAT3 REGOFF(12, EDX)
|
||||
#define MAT4 REGOFF(16, EDX)
|
||||
#define MAT5 REGOFF(20, EDX)
|
||||
#define MAT6 REGOFF(24, EDX)
|
||||
#define MAT7 REGOFF(28, EDX)
|
||||
#define MAT8 REGOFF(32, EDX)
|
||||
#define MAT9 REGOFF(36, EDX)
|
||||
#define MAT10 REGOFF(40, EDX)
|
||||
#define MAT11 REGOFF(44, EDX)
|
||||
#define MAT12 REGOFF(48, EDX)
|
||||
#define MAT13 REGOFF(52, EDX)
|
||||
#define MAT14 REGOFF(56, EDX)
|
||||
#define MAT15 REGOFF(60, EDX)
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points3_general )
|
||||
HIDDEN(_mesa_x86_transform_points3_general)
|
||||
GLNAME( _mesa_x86_transform_points3_general ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(x86_p3_gr_done) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(x86_p3_gr_loop):
|
||||
|
||||
FLD_S( SRC0 ) /* F4 */
|
||||
FMUL_S( MAT0 )
|
||||
FLD_S( SRC0 ) /* F5 F4 */
|
||||
FMUL_S( MAT1 )
|
||||
FLD_S( SRC0 ) /* F6 F5 F4 */
|
||||
FMUL_S( MAT2 )
|
||||
FLD_S( SRC0 ) /* F7 F6 F5 F4 */
|
||||
FMUL_S( MAT3 )
|
||||
|
||||
FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT4 )
|
||||
FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT5 )
|
||||
FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT6 )
|
||||
FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT7 )
|
||||
|
||||
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
|
||||
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
|
||||
|
||||
FLD_S( SRC2 ) /* F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT8 )
|
||||
FLD_S( SRC2 ) /* F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT9 )
|
||||
FLD_S( SRC2 ) /* F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT10 )
|
||||
FLD_S( SRC2 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT11 )
|
||||
|
||||
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
|
||||
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
|
||||
|
||||
FXCH( ST(3) ) /* F4 F6 F5 F7 */
|
||||
FADD_S( MAT12 )
|
||||
FXCH( ST(2) ) /* F5 F6 F4 F7 */
|
||||
FADD_S( MAT13 )
|
||||
FXCH( ST(1) ) /* F6 F5 F4 F7 */
|
||||
FADD_S( MAT14 )
|
||||
FXCH( ST(3) ) /* F7 F5 F4 F6 */
|
||||
FADD_S( MAT15 )
|
||||
|
||||
FXCH( ST(2) ) /* F4 F5 F7 F6 */
|
||||
FSTP_S( DST0 ) /* F5 F7 F6 */
|
||||
FSTP_S( DST1 ) /* F7 F6 */
|
||||
FXCH( ST(1) ) /* F6 F7 */
|
||||
FSTP_S( DST2 ) /* F7 */
|
||||
FSTP_S( DST3 ) /* */
|
||||
|
||||
LLBL(x86_p3_gr_skip):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(x86_p3_gr_loop) )
|
||||
|
||||
LLBL(x86_p3_gr_done):
|
||||
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points3_perspective )
|
||||
HIDDEN(_mesa_x86_transform_points3_perspective)
|
||||
GLNAME( _mesa_x86_transform_points3_perspective ):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(x86_p3_pr_done) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(x86_p3_pr_loop):
|
||||
|
||||
FLD_S( SRC0 ) /* F4 */
|
||||
FMUL_S( MAT0 )
|
||||
|
||||
FLD_S( SRC1 ) /* F5 F4 */
|
||||
FMUL_S( MAT5 )
|
||||
|
||||
FLD_S( SRC2 ) /* F0 F5 F4 */
|
||||
FMUL_S( MAT8 )
|
||||
FLD_S( SRC2 ) /* F1 F0 F5 F4 */
|
||||
FMUL_S( MAT9 )
|
||||
FLD_S( SRC2 ) /* F2 F1 F0 F5 F4 */
|
||||
FMUL_S( MAT10 )
|
||||
|
||||
FXCH( ST(2) ) /* F0 F1 F2 F5 F4 */
|
||||
FADDP( ST0, ST(4) ) /* F1 F2 F5 F4 */
|
||||
FADDP( ST0, ST(2) ) /* F2 F5 F4 */
|
||||
FLD_S( MAT14 ) /* F6 F2 F5 F4 */
|
||||
FXCH( ST(1) ) /* F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(1) ) /* F6 F5 F4 */
|
||||
|
||||
MOV_L( SRC2, EBX )
|
||||
XOR_L( CONST(-2147483648), EBX )/* change sign */
|
||||
|
||||
FXCH( ST(2) ) /* F4 F5 F6 */
|
||||
FSTP_S( DST0 ) /* F5 F6 */
|
||||
FSTP_S( DST1 ) /* F6 */
|
||||
FSTP_S( DST2 ) /* */
|
||||
MOV_L( EBX, DST3 )
|
||||
|
||||
LLBL(x86_p3_pr_skip):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(x86_p3_pr_loop) )
|
||||
|
||||
LLBL(x86_p3_pr_done):
|
||||
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points3_3d )
|
||||
HIDDEN(_mesa_x86_transform_points3_3d)
|
||||
GLNAME( _mesa_x86_transform_points3_3d ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(x86_p3_3dr_done) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(x86_p3_3dr_loop):
|
||||
|
||||
FLD_S( SRC0 ) /* F4 */
|
||||
FMUL_S( MAT0 )
|
||||
FLD_S( SRC0 ) /* F5 F4 */
|
||||
FMUL_S( MAT1 )
|
||||
FLD_S( SRC0 ) /* F6 F5 F4 */
|
||||
FMUL_S( MAT2 )
|
||||
|
||||
FLD_S( SRC1 ) /* F0 F6 F5 F4 */
|
||||
FMUL_S( MAT4 )
|
||||
FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT5 )
|
||||
FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT6 )
|
||||
|
||||
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(1) ) /* F6 F5 F4 */
|
||||
|
||||
FLD_S( SRC2 ) /* F0 F6 F5 F4 */
|
||||
FMUL_S( MAT8 )
|
||||
FLD_S( SRC2 ) /* F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT9 )
|
||||
FLD_S( SRC2 ) /* F2 F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT10 )
|
||||
|
||||
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(1) ) /* F6 F5 F4 */
|
||||
|
||||
FXCH( ST(2) ) /* F4 F5 F6 */
|
||||
FADD_S( MAT12 )
|
||||
FXCH( ST(1) ) /* F5 F4 F6 */
|
||||
FADD_S( MAT13 )
|
||||
FXCH( ST(2) ) /* F6 F4 F5 */
|
||||
FADD_S( MAT14 )
|
||||
|
||||
FXCH( ST(1) ) /* F4 F6 F5 */
|
||||
FSTP_S( DST0 ) /* F6 F5 */
|
||||
FXCH( ST(1) ) /* F5 F6 */
|
||||
FSTP_S( DST1 ) /* F6 */
|
||||
FSTP_S( DST2 ) /* */
|
||||
|
||||
LLBL(x86_p3_3dr_skip):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(x86_p3_3dr_loop) )
|
||||
|
||||
LLBL(x86_p3_3dr_done):
|
||||
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points3_3d_no_rot )
|
||||
HIDDEN(_mesa_x86_transform_points3_3d_no_rot)
|
||||
GLNAME( _mesa_x86_transform_points3_3d_no_rot ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(x86_p3_3dnrr_done) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(x86_p3_3dnrr_loop):
|
||||
|
||||
FLD_S( SRC0 ) /* F4 */
|
||||
FMUL_S( MAT0 )
|
||||
|
||||
FLD_S( SRC1 ) /* F1 F4 */
|
||||
FMUL_S( MAT5 )
|
||||
|
||||
FLD_S( SRC2 ) /* F2 F1 F4 */
|
||||
FMUL_S( MAT10 )
|
||||
|
||||
FXCH( ST(2) ) /* F4 F1 F2 */
|
||||
FADD_S( MAT12 )
|
||||
FLD_S( MAT13 ) /* F5 F4 F1 F2 */
|
||||
FXCH( ST(2) ) /* F1 F4 F5 F2 */
|
||||
FADDP( ST0, ST(2) ) /* F4 F5 F2 */
|
||||
FLD_S( MAT14 ) /* F6 F4 F5 F2 */
|
||||
FXCH( ST(3) ) /* F2 F4 F5 F6 */
|
||||
FADDP( ST0, ST(3) ) /* F4 F5 F6 */
|
||||
|
||||
FSTP_S( DST0 ) /* F5 F6 */
|
||||
FSTP_S( DST1 ) /* F6 */
|
||||
FSTP_S( DST2 ) /* */
|
||||
|
||||
LLBL(x86_p3_3dnrr_skip):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(x86_p3_3dnrr_loop) )
|
||||
|
||||
LLBL(x86_p3_3dnrr_done):
|
||||
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points3_2d )
|
||||
HIDDEN(_mesa_x86_transform_points3_2d)
|
||||
GLNAME( _mesa_x86_transform_points3_2d ):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(x86_p3_2dr_done) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(x86_p3_2dr_loop):
|
||||
|
||||
FLD_S( SRC0 ) /* F4 */
|
||||
FMUL_S( MAT0 )
|
||||
FLD_S( SRC0 ) /* F5 F4 */
|
||||
FMUL_S( MAT1 )
|
||||
|
||||
FLD_S( SRC1 ) /* F0 F5 F4 */
|
||||
FMUL_S( MAT4 )
|
||||
FLD_S( SRC1 ) /* F1 F0 F5 F4 */
|
||||
FMUL_S( MAT5 )
|
||||
|
||||
FXCH( ST(1) ) /* F0 F1 F5 F4 */
|
||||
FADDP( ST0, ST(3) ) /* F1 F5 F4 */
|
||||
FADDP( ST0, ST(1) ) /* F5 F4 */
|
||||
|
||||
FXCH( ST(1) ) /* F4 F5 */
|
||||
FADD_S( MAT12 )
|
||||
FXCH( ST(1) ) /* F5 F4 */
|
||||
FADD_S( MAT13 )
|
||||
|
||||
MOV_L( SRC2, EBX )
|
||||
|
||||
FXCH( ST(1) ) /* F4 F5 */
|
||||
FSTP_S( DST0 ) /* F5 */
|
||||
FSTP_S( DST1 ) /* */
|
||||
MOV_L( EBX, DST2 )
|
||||
|
||||
LLBL(x86_p3_2dr_skip):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(x86_p3_2dr_loop) )
|
||||
|
||||
LLBL(x86_p3_2dr_done):
|
||||
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points3_2d_no_rot )
|
||||
HIDDEN(_mesa_x86_transform_points3_2d_no_rot)
|
||||
GLNAME( _mesa_x86_transform_points3_2d_no_rot ):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(x86_p3_2dnrr_done) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(x86_p3_2dnrr_loop):
|
||||
|
||||
FLD_S( SRC0 ) /* F4 */
|
||||
FMUL_S( MAT0 )
|
||||
|
||||
FLD_S( SRC1 ) /* F1 F4 */
|
||||
FMUL_S( MAT5 )
|
||||
|
||||
FXCH( ST(1) ) /* F4 F1 */
|
||||
FADD_S( MAT12 )
|
||||
FLD_S( MAT13 ) /* F5 F4 F1 */
|
||||
|
||||
FXCH( ST(2) ) /* F1 F4 F5 */
|
||||
FADDP( ST0, ST(2) ) /* F4 F5 */
|
||||
|
||||
MOV_L( SRC2, EBX )
|
||||
|
||||
FSTP_S( DST0 ) /* F5 */
|
||||
FSTP_S( DST1 ) /* */
|
||||
MOV_L( EBX, DST2 )
|
||||
|
||||
LLBL(x86_p3_2dnrr_skip):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(x86_p3_2dnrr_loop) )
|
||||
|
||||
LLBL(x86_p3_2dnrr_done):
|
||||
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points3_identity )
|
||||
HIDDEN(_mesa_x86_transform_points3_identity)
|
||||
GLNAME(_mesa_x86_transform_points3_identity ):
|
||||
|
||||
#define FRAME_OFFSET 16
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
PUSH_L( EBP )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(x86_p3_ir_done) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
CMP_L( ESI, EDI )
|
||||
JE( LLBL(x86_p3_ir_done) )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(x86_p3_ir_loop):
|
||||
|
||||
#if 1
|
||||
MOV_L( SRC0, EBX )
|
||||
MOV_L( SRC1, EBP )
|
||||
MOV_L( SRC2, EDX )
|
||||
|
||||
MOV_L( EBX, DST0 )
|
||||
MOV_L( EBP, DST1 )
|
||||
MOV_L( EDX, DST2 )
|
||||
#else
|
||||
FLD_S( SRC0 )
|
||||
FLD_S( SRC1 )
|
||||
FLD_S( SRC2 )
|
||||
|
||||
FSTP_S( DST2 )
|
||||
FSTP_S( DST1 )
|
||||
FSTP_S( DST0 )
|
||||
#endif
|
||||
|
||||
LLBL(x86_p3_ir_skip):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(x86_p3_ir_loop) )
|
||||
|
||||
LLBL(x86_p3_ir_done):
|
||||
|
||||
POP_L( EBP )
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
|
||||
#if defined (__ELF__) && defined (__linux__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
|
@ -0,0 +1,677 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* NOTE: Avoid using spaces in between '(' ')' and arguments, especially
|
||||
* with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
|
||||
* in there will break the build on some platforms.
|
||||
*/
|
||||
|
||||
#include "assyntax.h"
|
||||
#include "matypes.h"
|
||||
#include "xform_args.h"
|
||||
|
||||
SEG_TEXT
|
||||
|
||||
#define FP_ONE 1065353216
|
||||
#define FP_ZERO 0
|
||||
|
||||
#define SRC0 REGOFF(0, ESI)
|
||||
#define SRC1 REGOFF(4, ESI)
|
||||
#define SRC2 REGOFF(8, ESI)
|
||||
#define SRC3 REGOFF(12, ESI)
|
||||
#define DST0 REGOFF(0, EDI)
|
||||
#define DST1 REGOFF(4, EDI)
|
||||
#define DST2 REGOFF(8, EDI)
|
||||
#define DST3 REGOFF(12, EDI)
|
||||
#define MAT0 REGOFF(0, EDX)
|
||||
#define MAT1 REGOFF(4, EDX)
|
||||
#define MAT2 REGOFF(8, EDX)
|
||||
#define MAT3 REGOFF(12, EDX)
|
||||
#define MAT4 REGOFF(16, EDX)
|
||||
#define MAT5 REGOFF(20, EDX)
|
||||
#define MAT6 REGOFF(24, EDX)
|
||||
#define MAT7 REGOFF(28, EDX)
|
||||
#define MAT8 REGOFF(32, EDX)
|
||||
#define MAT9 REGOFF(36, EDX)
|
||||
#define MAT10 REGOFF(40, EDX)
|
||||
#define MAT11 REGOFF(44, EDX)
|
||||
#define MAT12 REGOFF(48, EDX)
|
||||
#define MAT13 REGOFF(52, EDX)
|
||||
#define MAT14 REGOFF(56, EDX)
|
||||
#define MAT15 REGOFF(60, EDX)
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points4_general )
|
||||
HIDDEN(_mesa_x86_transform_points4_general)
|
||||
GLNAME( _mesa_x86_transform_points4_general ):
|
||||
|
||||
#define FRAME_OFFSET 8
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(x86_p4_gr_done) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(x86_p4_gr_loop):
|
||||
|
||||
FLD_S( SRC0 ) /* F4 */
|
||||
FMUL_S( MAT0 )
|
||||
FLD_S( SRC0 ) /* F5 F4 */
|
||||
FMUL_S( MAT1 )
|
||||
FLD_S( SRC0 ) /* F6 F5 F4 */
|
||||
FMUL_S( MAT2 )
|
||||
FLD_S( SRC0 ) /* F7 F6 F5 F4 */
|
||||
FMUL_S( MAT3 )
|
||||
|
||||
FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT4 )
|
||||
FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT5 )
|
||||
FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT6 )
|
||||
FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT7 )
|
||||
|
||||
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
|
||||
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
|
||||
|
||||
FLD_S( SRC2 ) /* F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT8 )
|
||||
FLD_S( SRC2 ) /* F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT9 )
|
||||
FLD_S( SRC2 ) /* F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT10 )
|
||||
FLD_S( SRC2 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT11 )
|
||||
|
||||
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
|
||||
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
|
||||
|
||||
FLD_S( SRC3 ) /* F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT12 )
|
||||
FLD_S( SRC3 ) /* F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT13 )
|
||||
FLD_S( SRC3 ) /* F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT14 )
|
||||
FLD_S( SRC3 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
|
||||
FMUL_S( MAT15 )
|
||||
|
||||
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
|
||||
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
|
||||
FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
|
||||
|
||||
FXCH( ST(3) ) /* F4 F6 F5 F7 */
|
||||
FSTP_S( DST0 ) /* F6 F5 F7 */
|
||||
FXCH( ST(1) ) /* F5 F6 F7 */
|
||||
FSTP_S( DST1 ) /* F6 F7 */
|
||||
FSTP_S( DST2 ) /* F7 */
|
||||
FSTP_S( DST3 ) /* */
|
||||
|
||||
LLBL(x86_p4_gr_skip):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(x86_p4_gr_loop) )
|
||||
|
||||
LLBL(x86_p4_gr_done):
|
||||
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points4_perspective )
|
||||
HIDDEN(_mesa_x86_transform_points4_perspective)
|
||||
GLNAME( _mesa_x86_transform_points4_perspective ):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(x86_p4_pr_done) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(x86_p4_pr_loop):
|
||||
|
||||
FLD_S( SRC0 ) /* F4 */
|
||||
FMUL_S( MAT0 )
|
||||
|
||||
FLD_S( SRC1 ) /* F5 F4 */
|
||||
FMUL_S( MAT5 )
|
||||
|
||||
FLD_S( SRC2 ) /* F0 F5 F4 */
|
||||
FMUL_S( MAT8 )
|
||||
FLD_S( SRC2 ) /* F1 F0 F5 F4 */
|
||||
FMUL_S( MAT9 )
|
||||
FLD_S( SRC2 ) /* F6 F1 F0 F5 F4 */
|
||||
FMUL_S( MAT10 )
|
||||
|
||||
FXCH( ST(2) ) /* F0 F1 F6 F5 F4 */
|
||||
FADDP( ST0, ST(4) ) /* F1 F6 F5 F4 */
|
||||
FADDP( ST0, ST(2) ) /* F6 F5 F4 */
|
||||
|
||||
FLD_S( SRC3 ) /* F2 F6 F5 F4 */
|
||||
FMUL_S( MAT14 )
|
||||
|
||||
FADDP( ST0, ST(1) ) /* F6 F5 F4 */
|
||||
|
||||
MOV_L( SRC2, EBX )
|
||||
XOR_L( CONST(-2147483648), EBX )/* change sign */
|
||||
|
||||
FXCH( ST(2) ) /* F4 F5 F6 */
|
||||
FSTP_S( DST0 ) /* F5 F6 */
|
||||
FSTP_S( DST1 ) /* F6 */
|
||||
FSTP_S( DST2 ) /* */
|
||||
MOV_L( EBX, DST3 )
|
||||
|
||||
LLBL(x86_p4_pr_skip):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(x86_p4_pr_loop) )
|
||||
|
||||
LLBL(x86_p4_pr_done):
|
||||
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points4_3d )
|
||||
HIDDEN(_mesa_x86_transform_points4_3d)
|
||||
GLNAME( _mesa_x86_transform_points4_3d ):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(x86_p4_3dr_done) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(x86_p4_3dr_loop):
|
||||
|
||||
FLD_S( SRC0 ) /* F4 */
|
||||
FMUL_S( MAT0 )
|
||||
FLD_S( SRC0 ) /* F5 F4 */
|
||||
FMUL_S( MAT1 )
|
||||
FLD_S( SRC0 ) /* F6 F5 F4 */
|
||||
FMUL_S( MAT2 )
|
||||
|
||||
FLD_S( SRC1 ) /* F0 F6 F5 F4 */
|
||||
FMUL_S( MAT4 )
|
||||
FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT5 )
|
||||
FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT6 )
|
||||
|
||||
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(1) ) /* F6 F5 F4 */
|
||||
|
||||
FLD_S( SRC2 ) /* F0 F6 F5 F4 */
|
||||
FMUL_S( MAT8 )
|
||||
FLD_S( SRC2 ) /* F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT9 )
|
||||
FLD_S( SRC2 ) /* F2 F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT10 )
|
||||
|
||||
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(1) ) /* F6 F5 F4 */
|
||||
|
||||
FLD_S( SRC3 ) /* F0 F6 F5 F4 */
|
||||
FMUL_S( MAT12 )
|
||||
FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT13 )
|
||||
FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT14 )
|
||||
|
||||
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(1) ) /* F6 F5 F4 */
|
||||
|
||||
MOV_L( SRC3, EBX )
|
||||
|
||||
FXCH( ST(2) ) /* F4 F5 F6 */
|
||||
FSTP_S( DST0 ) /* F5 F6 */
|
||||
FSTP_S( DST1 ) /* F6 */
|
||||
FSTP_S( DST2 ) /* */
|
||||
MOV_L( EBX, DST3 )
|
||||
|
||||
LLBL(x86_p4_3dr_skip):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(x86_p4_3dr_loop) )
|
||||
|
||||
LLBL(x86_p4_3dr_done):
|
||||
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME(_mesa_x86_transform_points4_3d_no_rot)
|
||||
HIDDEN(_mesa_x86_transform_points4_3d_no_rot)
|
||||
GLNAME(_mesa_x86_transform_points4_3d_no_rot):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(x86_p4_3dnrr_done) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(x86_p4_3dnrr_loop):
|
||||
|
||||
FLD_S( SRC0 ) /* F4 */
|
||||
FMUL_S( MAT0 )
|
||||
|
||||
FLD_S( SRC1 ) /* F5 F4 */
|
||||
FMUL_S( MAT5 )
|
||||
|
||||
FLD_S( SRC2 ) /* F6 F5 F4 */
|
||||
FMUL_S( MAT10 )
|
||||
|
||||
FLD_S( SRC3 ) /* F0 F6 F5 F4 */
|
||||
FMUL_S( MAT12 )
|
||||
FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT13 )
|
||||
FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */
|
||||
FMUL_S( MAT14 )
|
||||
|
||||
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
|
||||
FADDP( ST0, ST(1) ) /* F6 F5 F4 */
|
||||
|
||||
MOV_L( SRC3, EBX )
|
||||
|
||||
FXCH( ST(2) ) /* F4 F5 F6 */
|
||||
FSTP_S( DST0 ) /* F5 F6 */
|
||||
FSTP_S( DST1 ) /* F6 */
|
||||
FSTP_S( DST2 ) /* */
|
||||
MOV_L( EBX, DST3 )
|
||||
|
||||
LLBL(x86_p4_3dnrr_skip):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(x86_p4_3dnrr_loop) )
|
||||
|
||||
LLBL(x86_p4_3dnrr_done):
|
||||
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points4_2d )
|
||||
HIDDEN(_mesa_x86_transform_points4_2d)
|
||||
GLNAME( _mesa_x86_transform_points4_2d ):
|
||||
|
||||
#define FRAME_OFFSET 16
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
PUSH_L( EBP )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(x86_p4_2dr_done) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(x86_p4_2dr_loop):
|
||||
|
||||
FLD_S( SRC0 ) /* F4 */
|
||||
FMUL_S( MAT0 )
|
||||
FLD_S( SRC0 ) /* F5 F4 */
|
||||
FMUL_S( MAT1 )
|
||||
|
||||
FLD_S( SRC1 ) /* F0 F5 F4 */
|
||||
FMUL_S( MAT4 )
|
||||
FLD_S( SRC1 ) /* F1 F0 F5 F4 */
|
||||
FMUL_S( MAT5 )
|
||||
|
||||
FXCH( ST(1) ) /* F0 F1 F5 F4 */
|
||||
FADDP( ST0, ST(3) ) /* F1 F5 F4 */
|
||||
FADDP( ST0, ST(1) ) /* F5 F4 */
|
||||
|
||||
FLD_S( SRC3 ) /* F0 F5 F4 */
|
||||
FMUL_S( MAT12 )
|
||||
FLD_S( SRC3 ) /* F1 F0 F5 F4 */
|
||||
FMUL_S( MAT13 )
|
||||
|
||||
FXCH( ST(1) ) /* F0 F1 F5 F4 */
|
||||
FADDP( ST0, ST(3) ) /* F1 F5 F4 */
|
||||
FADDP( ST0, ST(1) ) /* F5 F4 */
|
||||
|
||||
MOV_L( SRC2, EBX )
|
||||
MOV_L( SRC3, EBP )
|
||||
|
||||
FXCH( ST(1) ) /* F4 F5 */
|
||||
FSTP_S( DST0 ) /* F5 */
|
||||
FSTP_S( DST1 ) /* */
|
||||
MOV_L( EBX, DST2 )
|
||||
MOV_L( EBP, DST3 )
|
||||
|
||||
LLBL(x86_p4_2dr_skip):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(x86_p4_2dr_loop) )
|
||||
|
||||
LLBL(x86_p4_2dr_done):
|
||||
|
||||
POP_L( EBP )
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points4_2d_no_rot )
|
||||
HIDDEN(_mesa_x86_transform_points4_2d_no_rot)
|
||||
GLNAME( _mesa_x86_transform_points4_2d_no_rot ):
|
||||
|
||||
#define FRAME_OFFSET 16
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
PUSH_L( EBP )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(x86_p4_2dnrr_done) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(x86_p4_2dnrr_loop):
|
||||
|
||||
FLD_S( SRC0 ) /* F4 */
|
||||
FMUL_S( MAT0 )
|
||||
|
||||
FLD_S( SRC1 ) /* F5 F4 */
|
||||
FMUL_S( MAT5 )
|
||||
|
||||
FLD_S( SRC3 ) /* F0 F5 F4 */
|
||||
FMUL_S( MAT12 )
|
||||
FLD_S( SRC3 ) /* F1 F0 F5 F4 */
|
||||
FMUL_S( MAT13 )
|
||||
|
||||
FXCH( ST(1) ) /* F0 F1 F5 F4 */
|
||||
FADDP( ST0, ST(3) ) /* F1 F5 F4 */
|
||||
FADDP( ST0, ST(1) ) /* F5 F4 */
|
||||
|
||||
MOV_L( SRC2, EBX )
|
||||
MOV_L( SRC3, EBP )
|
||||
|
||||
FXCH( ST(1) ) /* F4 F5 */
|
||||
FSTP_S( DST0 ) /* F5 */
|
||||
FSTP_S( DST1 ) /* */
|
||||
MOV_L( EBX, DST2 )
|
||||
MOV_L( EBP, DST3 )
|
||||
|
||||
LLBL(x86_p4_2dnrr_skip):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(x86_p4_2dnrr_loop) )
|
||||
|
||||
LLBL(x86_p4_2dnrr_done):
|
||||
|
||||
POP_L( EBP )
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
#undef FRAME_OFFSET
|
||||
|
||||
|
||||
|
||||
|
||||
ALIGNTEXT16
|
||||
GLOBL GLNAME( _mesa_x86_transform_points4_identity )
|
||||
HIDDEN(_mesa_x86_transform_points4_identity)
|
||||
GLNAME( _mesa_x86_transform_points4_identity ):
|
||||
|
||||
#define FRAME_OFFSET 12
|
||||
PUSH_L( ESI )
|
||||
PUSH_L( EDI )
|
||||
PUSH_L( EBX )
|
||||
|
||||
MOV_L( ARG_SOURCE, ESI )
|
||||
MOV_L( ARG_DEST, EDI )
|
||||
|
||||
MOV_L( ARG_MATRIX, EDX )
|
||||
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
|
||||
|
||||
TEST_L( ECX, ECX )
|
||||
JZ( LLBL(x86_p4_ir_done) )
|
||||
|
||||
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
|
||||
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
|
||||
|
||||
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
|
||||
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
|
||||
|
||||
SHL_L( CONST(4), ECX )
|
||||
MOV_L( REGOFF(V4F_START, ESI), ESI )
|
||||
|
||||
MOV_L( REGOFF(V4F_START, EDI), EDI )
|
||||
ADD_L( EDI, ECX )
|
||||
|
||||
CMP_L( ESI, EDI )
|
||||
JE( LLBL(x86_p4_ir_done) )
|
||||
|
||||
ALIGNTEXT16
|
||||
LLBL(x86_p4_ir_loop):
|
||||
|
||||
MOV_L( SRC0, EBX )
|
||||
MOV_L( SRC1, EDX )
|
||||
|
||||
MOV_L( EBX, DST0 )
|
||||
MOV_L( EDX, DST1 )
|
||||
|
||||
MOV_L( SRC2, EBX )
|
||||
MOV_L( SRC3, EDX )
|
||||
|
||||
MOV_L( EBX, DST2 )
|
||||
MOV_L( EDX, DST3 )
|
||||
|
||||
LLBL(x86_p4_ir_skip):
|
||||
|
||||
ADD_L( CONST(16), EDI )
|
||||
ADD_L( EAX, ESI )
|
||||
CMP_L( ECX, EDI )
|
||||
JNE( LLBL(x86_p4_ir_loop) )
|
||||
|
||||
LLBL(x86_p4_ir_done):
|
||||
|
||||
POP_L( EBX )
|
||||
POP_L( EDI )
|
||||
POP_L( ESI )
|
||||
RET
|
||||
|
||||
#if defined (__ELF__) && defined (__linux__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
|
@ -0,0 +1,51 @@
|
|||
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
* Version: 3.5
|
||||
*
|
||||
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Transform function interface for assembly code. Simply define
|
||||
* FRAME_OFFSET to the number of bytes pushed onto the stack before
|
||||
* using the ARG_* argument macros.
|
||||
*
|
||||
* Gareth Hughes
|
||||
*/
|
||||
|
||||
#ifndef __XFORM_ARGS_H__
|
||||
#define __XFORM_ARGS_H__
|
||||
|
||||
/* Offsets for transform_func arguments
|
||||
*
|
||||
* typedef void (*transform_func)( GLvector4f *to_vec,
|
||||
* const GLfloat m[16],
|
||||
* const GLvector4f *from_vec );
|
||||
*/
|
||||
#define OFFSET_DEST 4
|
||||
#define OFFSET_MATRIX 8
|
||||
#define OFFSET_SOURCE 12
|
||||
|
||||
#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP)
|
||||
#define ARG_MATRIX REGOFF(FRAME_OFFSET+OFFSET_MATRIX, ESP)
|
||||
#define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP)
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue