Add mesa headers from git commit fa7829c36b78b8ecc42238cbc0a02d1059320c77

This commit is contained in:
Brian Nguyen 2013-08-12 13:12:09 -07:00 committed by brnguyen
parent 810c434324
commit 535b1cb0ab
52 changed files with 17371 additions and 0 deletions

145
include/c99_compat.h Normal file
View File

@ -0,0 +1,145 @@
/**************************************************************************
*
* Copyright 2007-2013 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
#ifndef _C99_COMPAT_H_
#define _C99_COMPAT_H_
/*
* MSVC hacks.
*/
#if defined(_MSC_VER)
/*
* Visual Studio 2012 will complain if we define the `inline` keyword, but
* actually it only supports the keyword on C++.
*
* To avoid this the _ALLOW_KEYWORD_MACROS must be set.
*/
# if (_MSC_VER >= 1700) && !defined(_ALLOW_KEYWORD_MACROS)
# define _ALLOW_KEYWORD_MACROS
# endif
/*
* XXX: MSVC has a `__restrict` keyword, but it also has a
* `__declspec(restrict)` modifier, so it is impossible to define a
* `restrict` macro without interfering with the latter. Furthermore the
* MSVC standard library uses __declspec(restrict) under the _CRTRESTRICT
* macro. For now resolve this issue by redefining _CRTRESTRICT, but going
* forward we should probably should stop using restrict, especially
* considering that our code does not obbey strict aliasing rules any way.
*/
# include <crtdefs.h>
# undef _CRTRESTRICT
# define _CRTRESTRICT
#endif
/*
* C99 inline keyword
*/
#ifndef inline
# ifdef __cplusplus
/* C++ supports inline keyword */
# elif defined(__GNUC__)
# define inline __inline__
# elif defined(_MSC_VER)
# define inline __inline
# elif defined(__ICL)
# define inline __inline
# elif defined(__INTEL_COMPILER)
/* Intel compiler supports inline keyword */
# elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
# define inline __inline
# elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
/* C99 supports inline keyword */
# elif (__STDC_VERSION__ >= 199901L)
/* C99 supports inline keyword */
# else
# define inline
# endif
#endif
/*
* C99 restrict keyword
*
* See also:
* - http://cellperformance.beyond3d.com/articles/2006/05/demystifying-the-restrict-keyword.html
*/
#ifndef restrict
# if (__STDC_VERSION__ >= 199901L)
/* C99 */
# elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
/* C99 */
# elif defined(__GNUC__)
# define restrict __restrict__
# elif defined(_MSC_VER)
# define restrict __restrict
# else
# define restrict /* */
# endif
#endif
/*
* C99 __func__ macro
*/
#ifndef __func__
# if (__STDC_VERSION__ >= 199901L)
/* C99 */
# elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
/* C99 */
# elif defined(__GNUC__)
# if __GNUC__ >= 2
# define __func__ __FUNCTION__
# else
# define __func__ "<unknown>"
# endif
# elif defined(_MSC_VER)
# if _MSC_VER >= 1300
# define __func__ __FUNCTION__
# else
# define __func__ "<unknown>"
# endif
# else
# define __func__ "<unknown>"
# endif
#endif
/* Simple test case for debugging */
#if 0
static inline const char *
test_c99_compat_h(const void * restrict a,
const void * restrict b)
{
return __func__;
}
#endif
#endif /* _C99_COMPAT_H_ */

445
include/compiler.h Normal file
View File

@ -0,0 +1,445 @@
/*
* Mesa 3-D graphics library
*
* Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
* Copyright (C) 2009 VMware, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* \file compiler.h
* Compiler-related stuff.
*/
#ifndef COMPILER_H
#define COMPILER_H
#include <assert.h>
#include <ctype.h>
#if defined(__alpha__) && defined(CCPML)
#include <cpml.h> /* use Compaq's Fast Math Library on Alpha */
#else
#include <math.h>
#endif
#include <limits.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <float.h>
#include <stdarg.h>
#include "c99_compat.h" /* inline, __func__, etc. */
#ifdef __cplusplus
extern "C" {
#endif
/**
* Get standard integer types
*/
#include <stdint.h>
/**
* Sun compilers define __i386 instead of the gcc-style __i386__
*/
#ifdef __SUNPRO_C
# if !defined(__i386__) && defined(__i386)
# define __i386__
# elif !defined(__amd64__) && defined(__amd64)
# define __amd64__
# elif !defined(__sparc__) && defined(__sparc)
# define __sparc__
# endif
# if !defined(__volatile)
# define __volatile volatile
# endif
#endif
/**
* finite macro.
*/
#if defined(_MSC_VER)
# define finite _finite
#elif defined(__WATCOMC__)
# define finite _finite
#endif
/**
* Disable assorted warnings
*/
#if !defined(OPENSTEP) && (defined(_WIN32) && !defined(__CYGWIN__)) && !defined(BUILD_FOR_SNAP)
# if !defined(__GNUC__) /* mingw environment */
# pragma warning( disable : 4068 ) /* unknown pragma */
# pragma warning( disable : 4710 ) /* function 'foo' not inlined */
# pragma warning( disable : 4711 ) /* function 'foo' selected for automatic inline expansion */
# pragma warning( disable : 4127 ) /* conditional expression is constant */
# if defined(MESA_MINWARN)
# pragma warning( disable : 4244 ) /* '=' : conversion from 'const double ' to 'float ', possible loss of data */
# pragma warning( disable : 4018 ) /* '<' : signed/unsigned mismatch */
# pragma warning( disable : 4305 ) /* '=' : truncation from 'const double ' to 'float ' */
# pragma warning( disable : 4550 ) /* 'function' undefined; assuming extern returning int */
# pragma warning( disable : 4761 ) /* integral size mismatch in argument; conversion supplied */
# endif
# endif
#endif
#if defined(__WATCOMC__)
# pragma disable_message(201) /* Disable unreachable code warnings */
#endif
/* XXX: Use standard `inline` keyword instead */
#ifndef INLINE
# define INLINE inline
#endif
/**
* PUBLIC/USED macros
*
* If we build the library with gcc's -fvisibility=hidden flag, we'll
* use the PUBLIC macro to mark functions that are to be exported.
*
* We also need to define a USED attribute, so the optimizer doesn't
* inline a static function that we later use in an alias. - ajax
*/
#ifndef PUBLIC
# if (defined(__GNUC__) && __GNUC__ >= 4) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
# define PUBLIC __attribute__((visibility("default")))
# define USED __attribute__((used))
# else
# define PUBLIC
# define USED
# endif
#endif
/**
* __builtin_expect macros
*/
#if !defined(__GNUC__)
# define __builtin_expect(x, y) (x)
#endif
#ifndef likely
# ifdef __GNUC__
# define likely(x) __builtin_expect(!!(x), 1)
# define unlikely(x) __builtin_expect(!!(x), 0)
# else
# define likely(x) (x)
# define unlikely(x) (x)
# endif
#endif
/* XXX: Use standard `__func__` instead */
#ifndef __FUNCTION__
# define __FUNCTION__ __func__
#endif
/**
* Either define MESA_BIG_ENDIAN or MESA_LITTLE_ENDIAN, and CPU_TO_LE32.
* Do not use these unless absolutely necessary!
* Try to use a runtime test instead.
* For now, only used by some DRI hardware drivers for color/texel packing.
*/
#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN
#if defined(__linux__)
#include <byteswap.h>
#define CPU_TO_LE32( x ) bswap_32( x )
#elif defined(__APPLE__)
#include <CoreFoundation/CFByteOrder.h>
#define CPU_TO_LE32( x ) CFSwapInt32HostToLittle( x )
#elif (defined(_AIX) || defined(__blrts))
static INLINE GLuint CPU_TO_LE32(GLuint x)
{
return (((x & 0x000000ff) << 24) |
((x & 0x0000ff00) << 8) |
((x & 0x00ff0000) >> 8) |
((x & 0xff000000) >> 24));
}
#elif defined(__OpenBSD__)
#include <sys/types.h>
#define CPU_TO_LE32( x ) htole32( x )
#else /*__linux__ */
#include <sys/endian.h>
#define CPU_TO_LE32( x ) bswap32( x )
#endif /*__linux__*/
#define MESA_BIG_ENDIAN 1
#else
#define CPU_TO_LE32( x ) ( x )
#define MESA_LITTLE_ENDIAN 1
#endif
#define LE32_TO_CPU( x ) CPU_TO_LE32( x )
#if !defined(CAPI) && defined(_WIN32) && !defined(BUILD_FOR_SNAP)
#define CAPI _cdecl
#endif
/**
* Create a macro so that asm functions can be linked into compilers other
* than GNU C
*/
#ifndef _ASMAPI
#if defined(_WIN32) && !defined(BUILD_FOR_SNAP)/* was: !defined( __GNUC__ ) && !defined( VMS ) && !defined( __INTEL_COMPILER )*/
#define _ASMAPI __cdecl
#else
#define _ASMAPI
#endif
#ifdef PTR_DECL_IN_FRONT
#define _ASMAPIP * _ASMAPI
#else
#define _ASMAPIP _ASMAPI *
#endif
#endif
#ifdef USE_X86_ASM
#define _NORMAPI _ASMAPI
#define _NORMAPIP _ASMAPIP
#else
#define _NORMAPI
#define _NORMAPIP *
#endif
/* Turn off macro checking systems used by other libraries */
#ifdef CHECK
#undef CHECK
#endif
/**
* ASSERT macro
*/
#if !defined(_WIN32_WCE)
#if defined(BUILD_FOR_SNAP) && defined(CHECKED)
# define ASSERT(X) _CHECK(X)
#elif defined(DEBUG)
# define ASSERT(X) assert(X)
#else
# define ASSERT(X)
#endif
#endif
/**
* Static (compile-time) assertion.
* Basically, use COND to dimension an array. If COND is false/zero the
* array size will be -1 and we'll get a compilation error.
*/
#define STATIC_ASSERT(COND) \
do { \
(void) sizeof(char [1 - 2*!(COND)]); \
} while (0)
#if (__GNUC__ >= 3)
#define PRINTFLIKE(f, a) __attribute__ ((format(__printf__, f, a)))
#else
#define PRINTFLIKE(f, a)
#endif
#ifndef NULL
#define NULL 0
#endif
/**
* LONGSTRING macro
* gcc -pedantic warns about long string literals, LONGSTRING silences that.
*/
#if !defined(__GNUC__)
# define LONGSTRING
#else
# define LONGSTRING __extension__
#endif
#ifndef M_PI
#define M_PI (3.14159265358979323846)
#endif
#ifndef M_E
#define M_E (2.7182818284590452354)
#endif
#ifndef M_LOG2E
#define M_LOG2E (1.4426950408889634074)
#endif
#ifndef ONE_DIV_SQRT_LN2
#define ONE_DIV_SQRT_LN2 (1.201122408786449815)
#endif
#ifndef FLT_MAX_EXP
#define FLT_MAX_EXP 128
#endif
/**
* USE_IEEE: Determine if we're using IEEE floating point
*/
#if defined(__i386__) || defined(__386__) || defined(__sparc__) || \
defined(__s390__) || defined(__s390x__) || defined(__powerpc__) || \
defined(__x86_64__) || \
defined(__m68k__) || \
defined(ia64) || defined(__ia64__) || \
defined(__hppa__) || defined(hpux) || \
defined(__mips) || defined(_MIPS_ARCH) || \
defined(__arm__) || \
defined(__sh__) || defined(__m32r__) || \
(defined(__sun) && defined(_IEEE_754)) || \
defined(__alpha__)
#define USE_IEEE
#define IEEE_ONE 0x3f800000
#endif
/**
* START/END_FAST_MATH macros:
*
* START_FAST_MATH: Set x86 FPU to faster, 32-bit precision mode (and save
* original mode to a temporary).
* END_FAST_MATH: Restore x86 FPU to original mode.
*/
#if defined(__GNUC__) && defined(__i386__)
/*
* Set the x86 FPU control word to guarentee only 32 bits of precision
* are stored in registers. Allowing the FPU to store more introduces
* differences between situations where numbers are pulled out of memory
* vs. situations where the compiler is able to optimize register usage.
*
* In the worst case, we force the compiler to use a memory access to
* truncate the float, by specifying the 'volatile' keyword.
*/
/* Hardware default: All exceptions masked, extended double precision,
* round to nearest (IEEE compliant):
*/
#define DEFAULT_X86_FPU 0x037f
/* All exceptions masked, single precision, round to nearest:
*/
#define FAST_X86_FPU 0x003f
/* The fldcw instruction will cause any pending FP exceptions to be
* raised prior to entering the block, and we clear any pending
* exceptions before exiting the block. Hence, asm code has free
* reign over the FPU while in the fast math block.
*/
#if defined(NO_FAST_MATH)
#define START_FAST_MATH(x) \
do { \
static GLuint mask = DEFAULT_X86_FPU; \
__asm__ ( "fnstcw %0" : "=m" (*&(x)) ); \
__asm__ ( "fldcw %0" : : "m" (mask) ); \
} while (0)
#else
#define START_FAST_MATH(x) \
do { \
static GLuint mask = FAST_X86_FPU; \
__asm__ ( "fnstcw %0" : "=m" (*&(x)) ); \
__asm__ ( "fldcw %0" : : "m" (mask) ); \
} while (0)
#endif
/* Restore original FPU mode, and clear any exceptions that may have
* occurred in the FAST_MATH block.
*/
#define END_FAST_MATH(x) \
do { \
__asm__ ( "fnclex ; fldcw %0" : : "m" (*&(x)) ); \
} while (0)
#elif defined(__WATCOMC__) && defined(__386__)
#define DEFAULT_X86_FPU 0x037f /* See GCC comments above */
#define FAST_X86_FPU 0x003f /* See GCC comments above */
void _watcom_start_fast_math(unsigned short *x,unsigned short *mask);
#pragma aux _watcom_start_fast_math = \
"fnstcw word ptr [eax]" \
"fldcw word ptr [ecx]" \
parm [eax] [ecx] \
modify exact [];
void _watcom_end_fast_math(unsigned short *x);
#pragma aux _watcom_end_fast_math = \
"fnclex" \
"fldcw word ptr [eax]" \
parm [eax] \
modify exact [];
#if defined(NO_FAST_MATH)
#define START_FAST_MATH(x) \
do { \
static GLushort mask = DEFAULT_X86_FPU; \
_watcom_start_fast_math(&x,&mask); \
} while (0)
#else
#define START_FAST_MATH(x) \
do { \
static GLushort mask = FAST_X86_FPU; \
_watcom_start_fast_math(&x,&mask); \
} while (0)
#endif
#define END_FAST_MATH(x) _watcom_end_fast_math(&x)
#elif defined(_MSC_VER) && defined(_M_IX86)
#define DEFAULT_X86_FPU 0x037f /* See GCC comments above */
#define FAST_X86_FPU 0x003f /* See GCC comments above */
#if defined(NO_FAST_MATH)
#define START_FAST_MATH(x) do {\
static GLuint mask = DEFAULT_X86_FPU;\
__asm fnstcw word ptr [x]\
__asm fldcw word ptr [mask]\
} while(0)
#else
#define START_FAST_MATH(x) do {\
static GLuint mask = FAST_X86_FPU;\
__asm fnstcw word ptr [x]\
__asm fldcw word ptr [mask]\
} while(0)
#endif
#define END_FAST_MATH(x) do {\
__asm fnclex\
__asm fldcw word ptr [x]\
} while(0)
#else
#define START_FAST_MATH(x) x = 0
#define END_FAST_MATH(x) (void)(x)
#endif
#ifndef Elements
#define Elements(x) (sizeof(x)/sizeof(*(x)))
#endif
#ifdef __cplusplus
}
#endif
#endif /* COMPILER_H */

196
include/glheader.h Normal file
View File

@ -0,0 +1,196 @@
/*
* Mesa 3-D graphics library
* Version: 7.5
*
* Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* \file glheader.h
* Wrapper for GL/gl.h and GL/glext.h
*/
#ifndef GLHEADER_H
#define GLHEADER_H
#ifdef WGLAPI
#undef WGLAPI
#endif
#if !defined(OPENSTEP) && (defined(__WIN32__) && !defined(__CYGWIN__)) && !defined(BUILD_FOR_SNAP)
# if (defined(_MSC_VER) || defined(__MINGW32__)) && defined(BUILD_GL32) /* tag specify we're building mesa as a DLL */
# define WGLAPI __declspec(dllexport)
# elif (defined(_MSC_VER) || defined(__MINGW32__)) && defined(_DLL) /* tag specifying we're building for DLL runtime support */
# define WGLAPI __declspec(dllimport)
# else /* for use with static link lib build of Win32 edition only */
# define WGLAPI __declspec(dllimport)
# endif /* _STATIC_MESA support */
#endif /* WIN32 / CYGWIN bracket */
#define GL_GLEXT_PROTOTYPES
#include "GL/gl.h"
#include "GL/glext.h"
#ifdef __cplusplus
extern "C" {
#endif
/**
* GL_FIXED is defined in glext.h version 64 but these typedefs aren't (yet).
*/
typedef int GLfixed;
typedef int GLclampx;
#ifndef GL_OES_EGL_image
typedef void *GLeglImageOES;
#endif
#ifndef GL_OES_EGL_image_external
#define GL_TEXTURE_EXTERNAL_OES 0x8D65
#define GL_SAMPLER_EXTERNAL_OES 0x8D66
#define GL_TEXTURE_BINDING_EXTERNAL_OES 0x8D67
#define GL_REQUIRED_TEXTURE_IMAGE_UNITS_OES 0x8D68
#endif
#ifndef GL_OES_point_size_array
#define GL_POINT_SIZE_ARRAY_OES 0x8B9C
#define GL_POINT_SIZE_ARRAY_TYPE_OES 0x898A
#define GL_POINT_SIZE_ARRAY_STRIDE_OES 0x898B
#define GL_POINT_SIZE_ARRAY_POINTER_OES 0x898C
#define GL_POINT_SIZE_ARRAY_BUFFER_BINDING_OES 0x8B9F
#endif
#ifndef GL_OES_draw_texture
#define GL_TEXTURE_CROP_RECT_OES 0x8B9D
#endif
#ifndef GL_PROGRAM_BINARY_LENGTH_OES
#define GL_PROGRAM_BINARY_LENGTH_OES 0x8741
#endif
/* GLES 2.0 tokens */
#ifndef GL_RGB565
#define GL_RGB565 0x8D62
#endif
#ifndef GL_TEXTURE_GEN_STR_OES
#define GL_TEXTURE_GEN_STR_OES 0x8D60
#endif
#ifndef GL_OES_compressed_paletted_texture
#define GL_PALETTE4_RGB8_OES 0x8B90
#define GL_PALETTE4_RGBA8_OES 0x8B91
#define GL_PALETTE4_R5_G6_B5_OES 0x8B92
#define GL_PALETTE4_RGBA4_OES 0x8B93
#define GL_PALETTE4_RGB5_A1_OES 0x8B94
#define GL_PALETTE8_RGB8_OES 0x8B95
#define GL_PALETTE8_RGBA8_OES 0x8B96
#define GL_PALETTE8_R5_G6_B5_OES 0x8B97
#define GL_PALETTE8_RGBA4_OES 0x8B98
#define GL_PALETTE8_RGB5_A1_OES 0x8B99
#endif
#ifndef GL_OES_matrix_get
#define GL_MODELVIEW_MATRIX_FLOAT_AS_INT_BITS_OES 0x898D
#define GL_PROJECTION_MATRIX_FLOAT_AS_INT_BITS_OES 0x898E
#define GL_TEXTURE_MATRIX_FLOAT_AS_INT_BITS_OES 0x898F
#endif
#ifndef GL_ES_VERSION_2_0
#define GL_SHADER_BINARY_FORMATS 0x8DF8
#define GL_NUM_SHADER_BINARY_FORMATS 0x8DF9
#define GL_SHADER_COMPILER 0x8DFA
#define GL_MAX_VERTEX_UNIFORM_VECTORS 0x8DFB
#define GL_MAX_VARYING_VECTORS 0x8DFC
#define GL_MAX_FRAGMENT_UNIFORM_VECTORS 0x8DFD
#endif
#ifndef GL_ATI_texture_compression_3dc
#define GL_ATI_texture_compression_3dc 1
#define GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI 0x8837
#endif
#ifndef GL_OES_compressed_ETC1_RGB8_texture
#define GL_ETC1_RGB8_OES 0x8D64
#endif
/* Inexplicably, GL_HALF_FLOAT_OES has a different value than GL_HALF_FLOAT.
*/
#ifndef GL_HALF_FLOAT_OES
#define GL_HALF_FLOAT_OES 0x8D61
#endif
/**
* Internal token to represent a GLSL shader program (a collection of
* one or more shaders that get linked together). Note that GLSL
* shaders and shader programs share one name space (one hash table)
* so we need a value that's different from any of the
* GL_VERTEX/FRAGMENT/GEOMETRY_PROGRAM tokens.
*/
#define GL_SHADER_PROGRAM_MESA 0x9999
/**
* Internal token for geometry programs.
* Use the value for GL_GEOMETRY_PROGRAM_NV for now.
*/
#define MESA_GEOMETRY_PROGRAM 0x8c26
/* Several fields of struct gl_config can take these as values. Since
* GLX header files may not be available everywhere they need to be used,
* redefine them here.
*/
#define GLX_NONE 0x8000
#define GLX_SLOW_CONFIG 0x8001
#define GLX_TRUE_COLOR 0x8002
#define GLX_DIRECT_COLOR 0x8003
#define GLX_PSEUDO_COLOR 0x8004
#define GLX_STATIC_COLOR 0x8005
#define GLX_GRAY_SCALE 0x8006
#define GLX_STATIC_GRAY 0x8007
#define GLX_TRANSPARENT_RGB 0x8008
#define GLX_TRANSPARENT_INDEX 0x8009
#define GLX_NON_CONFORMANT_CONFIG 0x800D
#define GLX_SWAP_EXCHANGE_OML 0x8061
#define GLX_SWAP_COPY_OML 0x8062
#define GLX_SWAP_UNDEFINED_OML 0x8063
#define GLX_DONT_CARE 0xFFFFFFFF
#ifdef __cplusplus
}
#endif
#endif /* GLHEADER_H */

1
include/main/glheader.h Symbolic link
View File

@ -0,0 +1 @@
../glheader.h

605
src/arch/sparc/norm.S Normal file
View File

@ -0,0 +1,605 @@
#include "sparc_matrix.h"
.register %g2, #scratch
.register %g3, #scratch
.text
#ifdef __arch64__
#define STACK_VAR_OFF (2047 + (8 * 16))
#else
#define STACK_VAR_OFF (4 * 16)
#endif
/* Newton-Raphson approximation turns out to be slower
* (and less accurate) than direct fsqrts/fdivs.
*/
#define ONE_DOT_ZERO 0x3f800000
.globl _mesa_sparc_transform_normalize_normals
_mesa_sparc_transform_normalize_normals:
/* o0=mat o1=scale o2=in o3=lengths o4=dest */
sethi %hi(ONE_DOT_ZERO), %g2
sub %sp, 16, %sp
st %g2, [%sp + STACK_VAR_OFF+0x0]
st %o1, [%sp + STACK_VAR_OFF+0x4]
ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
add %sp, 16, %sp
LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
/* dest->count = in->count */
st %g1, [%o4 + V4F_COUNT]
cmp %g1, 1
bl 7f
cmp %o3, 0
bne 4f
clr %o4 ! 'i' for STRIDE_LOOP
1: /* LENGTHS == NULL */
ld [%o5 + 0x00], %f0 ! ux = from[0]
ld [%o5 + 0x04], %f1 ! uy = from[1]
ld [%o5 + 0x08], %f2 ! uz = from[2]
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
add %o4, 1, %o4 ! i++
/* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
* ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
* tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
*/
fmuls %f0, M0, %f3 ! FGM Group
fmuls %f1, M1, %f4 ! FGM Group
fmuls %f0, M4, %f5 ! FGM Group
fmuls %f1, M5, %f6 ! FGM Group
fmuls %f0, M8, %f7 ! FGM Group f3 available
fmuls %f1, M9, %f8 ! FGM Group f4 available
fadds %f3, %f4, %f3 ! FGA
fmuls %f2, M2, %f10 ! FGM Group f5 available
fmuls %f2, M6, %f0 ! FGM Group f6 available
fadds %f5, %f6, %f5 ! FGA
fmuls %f2, M10, %f4 ! FGM Group f7 available
fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
fadds %f3, %f10, %f3 ! FGA Group f10 available
fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
/* f3=tx, f5=ty, f7=tz */
/* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
fmuls %f3, %f3, %f6 ! FGM Group f3 available
fmuls %f5, %f5, %f8 ! FGM Group f5 available
fmuls %f7, %f7, %f10 ! FGM Group f7 available
fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
/* scale (f6) = 1.0 / sqrt(len) */
fsqrts %f6, %f6 ! FDIV 20 cycles
fdivs %f12, %f6, %f6 ! FDIV 14 cycles
fmuls %f3, %f6, %f3
st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
fmuls %f5, %f6, %f5
st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
fmuls %f7, %f6, %f7
st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
cmp %o4, %g1 ! continue if (i < count)
bl 1b
add %g3, 0x10, %g3 ! advance out vector pointer
ba 7f
nop
4: /* LENGTHS != NULL */
fmuls M0, %f15, M0
fmuls M1, %f15, M1
fmuls M2, %f15, M2
fmuls M4, %f15, M4
fmuls M5, %f15, M5
fmuls M6, %f15, M6
fmuls M8, %f15, M8
fmuls M9, %f15, M9
fmuls M10, %f15, M10
5:
ld [%o5 + 0x00], %f0 ! ux = from[0]
ld [%o5 + 0x04], %f1 ! uy = from[1]
ld [%o5 + 0x08], %f2 ! uz = from[2]
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
add %o4, 1, %o4 ! i++
/* tx (f3) = (ux * m0) + (uy * m1) + (uz * m2)
* ty (f5) = (ux * m4) + (uy * m5) + (uz * m6)
* tz (f7) = (ux * m8) + (uy * m9) + (uz * m10)
*/
fmuls %f0, M0, %f3 ! FGM Group
fmuls %f1, M1, %f4 ! FGM Group
fmuls %f0, M4, %f5 ! FGM Group
fmuls %f1, M5, %f6 ! FGM Group
fmuls %f0, M8, %f7 ! FGM Group f3 available
fmuls %f1, M9, %f8 ! FGM Group f4 available
fadds %f3, %f4, %f3 ! FGA
fmuls %f2, M2, %f10 ! FGM Group f5 available
fmuls %f2, M6, %f0 ! FGM Group f6 available
fadds %f5, %f6, %f5 ! FGA
fmuls %f2, M10, %f4 ! FGM Group f7 available
fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
fadds %f3, %f10, %f3 ! FGA Group f10 available
ld [%o3], %f13 ! LSU
fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
add %o3, 4, %o3 ! IEU0
fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
/* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
fmuls %f3, %f13, %f3
st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
fmuls %f5, %f13, %f5
st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
fmuls %f7, %f13, %f7
st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
cmp %o4, %g1 ! continue if (i < count)
bl 5b
add %g3, 0x10, %g3 ! advance out vector pointer
7: retl
nop
.globl _mesa_sparc_transform_normalize_normals_no_rot
_mesa_sparc_transform_normalize_normals_no_rot:
/* o0=mat o1=scale o2=in o3=lengths o4=dest */
sethi %hi(ONE_DOT_ZERO), %g2
sub %sp, 16, %sp
st %g2, [%sp + STACK_VAR_OFF+0x0]
st %o1, [%sp + STACK_VAR_OFF+0x4]
ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
ld [%sp + STACK_VAR_OFF+0x4], %f15 ! f15 = scale
add %sp, 16, %sp
LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
LDMATRIX_0_5_10(%o0)
/* dest->count = in->count */
st %g1, [%o4 + V4F_COUNT]
cmp %g1, 1
bl 7f
cmp %o3, 0
bne 4f
clr %o4 ! 'i' for STRIDE_LOOP
1: /* LENGTHS == NULL */
ld [%o5 + 0x00], %f0 ! ux = from[0]
ld [%o5 + 0x04], %f1 ! uy = from[1]
ld [%o5 + 0x08], %f2 ! uz = from[2]
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
add %o4, 1, %o4 ! i++
/* tx (f3) = (ux * m0)
* ty (f5) = (uy * m5)
* tz (f7) = (uz * m10)
*/
fmuls %f0, M0, %f3 ! FGM Group
fmuls %f1, M5, %f5 ! FGM Group
fmuls %f2, M10, %f7 ! FGM Group
/* f3=tx, f5=ty, f7=tz */
/* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
fmuls %f3, %f3, %f6 ! FGM Group stall, f3 available
fmuls %f5, %f5, %f8 ! FGM Group f5 available
fmuls %f7, %f7, %f10 ! FGM Group f7 available
fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
/* scale (f6) = 1.0 / sqrt(len) */
fsqrts %f6, %f6 ! FDIV 20 cycles
fdivs %f12, %f6, %f6 ! FDIV 14 cycles
fmuls %f3, %f6, %f3
st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
fmuls %f5, %f6, %f5
st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
fmuls %f7, %f6, %f7
st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
cmp %o4, %g1 ! continue if (i < count)
bl 1b
add %g3, 0x10, %g3 ! advance out vector pointer
ba 7f
nop
4: /* LENGTHS != NULL */
fmuls M0, %f15, M0
fmuls M5, %f15, M5
fmuls M10, %f15, M10
5:
ld [%o5 + 0x00], %f0 ! ux = from[0]
ld [%o5 + 0x04], %f1 ! uy = from[1]
ld [%o5 + 0x08], %f2 ! uz = from[2]
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
add %o4, 1, %o4 ! i++
/* tx (f3) = (ux * m0)
* ty (f5) = (uy * m5)
* tz (f7) = (uz * m10)
*/
fmuls %f0, M0, %f3 ! FGM Group
ld [%o3], %f13 ! LSU
fmuls %f1, M5, %f5 ! FGM Group
add %o3, 4, %o3 ! IEU0
fmuls %f2, M10, %f7 ! FGM Group
/* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
fmuls %f3, %f13, %f3
st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
fmuls %f5, %f13, %f5
st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
fmuls %f7, %f13, %f7
st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
cmp %o4, %g1 ! continue if (i < count)
bl 5b
add %g3, 0x10, %g3 ! advance out vector pointer
7: retl
nop
.globl _mesa_sparc_transform_rescale_normals_no_rot
_mesa_sparc_transform_rescale_normals_no_rot:
/* o0=mat o1=scale o2=in o3=lengths o4=dest */
sub %sp, 16, %sp
st %o1, [%sp + STACK_VAR_OFF+0x0]
ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
add %sp, 16, %sp
LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
LDMATRIX_0_5_10(%o0)
/* dest->count = in->count */
st %g1, [%o4 + V4F_COUNT]
cmp %g1, 1
bl 7f
clr %o4 ! 'i' for STRIDE_LOOP
fmuls M0, %f15, M0
fmuls M5, %f15, M5
fmuls M10, %f15, M10
1: ld [%o5 + 0x00], %f0 ! ux = from[0]
ld [%o5 + 0x04], %f1 ! uy = from[1]
ld [%o5 + 0x08], %f2 ! uz = from[2]
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
add %o4, 1, %o4 ! i++
/* tx (f3) = (ux * m0)
* ty (f5) = (uy * m5)
* tz (f7) = (uz * m10)
*/
fmuls %f0, M0, %f3 ! FGM Group
st %f3, [%g3 + 0x00] ! LSU
fmuls %f1, M5, %f5 ! FGM Group
st %f5, [%g3 + 0x04] ! LSU
fmuls %f2, M10, %f7 ! FGM Group
st %f7, [%g3 + 0x08] ! LSU
cmp %o4, %g1 ! continue if (i < count)
bl 1b
add %g3, 0x10, %g3 ! advance out vector pointer
7: retl
nop
.globl _mesa_sparc_transform_rescale_normals
_mesa_sparc_transform_rescale_normals:
/* o0=mat o1=scale o2=in o3=lengths o4=dest */
sub %sp, 16, %sp
st %o1, [%sp + STACK_VAR_OFF+0x0]
ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
add %sp, 16, %sp
LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
/* dest->count = in->count */
st %g1, [%o4 + V4F_COUNT]
cmp %g1, 1
bl 7f
clr %o4 ! 'i' for STRIDE_LOOP
fmuls M0, %f15, M0
fmuls M1, %f15, M1
fmuls M2, %f15, M2
fmuls M4, %f15, M4
fmuls M5, %f15, M5
fmuls M6, %f15, M6
fmuls M8, %f15, M8
fmuls M9, %f15, M9
fmuls M10, %f15, M10
1: ld [%o5 + 0x00], %f0 ! ux = from[0]
ld [%o5 + 0x04], %f1 ! uy = from[1]
ld [%o5 + 0x08], %f2 ! uz = from[2]
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
add %o4, 1, %o4 ! i++
fmuls %f0, M0, %f3 ! FGM Group
fmuls %f1, M1, %f4 ! FGM Group
fmuls %f0, M4, %f5 ! FGM Group
fmuls %f1, M5, %f6 ! FGM Group
fmuls %f0, M8, %f7 ! FGM Group f3 available
fmuls %f1, M9, %f8 ! FGM Group f4 available
fadds %f3, %f4, %f3 ! FGA
fmuls %f2, M2, %f10 ! FGM Group f5 available
fmuls %f2, M6, %f0 ! FGM Group f6 available
fadds %f5, %f6, %f5 ! FGA
fmuls %f2, M10, %f4 ! FGM Group f7 available
fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
fadds %f3, %f10, %f3 ! FGA Group f10 available
st %f3, [%g3 + 0x00] ! LSU
fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
st %f5, [%g3 + 0x04] ! LSU
fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
st %f7, [%g3 + 0x08] ! LSU
cmp %o4, %g1 ! continue if (i < count)
bl 1b
add %g3, 0x10, %g3 ! advance out vector pointer
7: retl
nop
.globl _mesa_sparc_transform_normals_no_rot
_mesa_sparc_transform_normals_no_rot:
/* o0=mat o1=scale o2=in o3=lengths o4=dest */
LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
LDMATRIX_0_5_10(%o0)
/* dest->count = in->count */
st %g1, [%o4 + V4F_COUNT]
cmp %g1, 1
bl 7f
clr %o4 ! 'i' for STRIDE_LOOP
1: ld [%o5 + 0x00], %f0 ! ux = from[0]
ld [%o5 + 0x04], %f1 ! uy = from[1]
ld [%o5 + 0x08], %f2 ! uz = from[2]
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
add %o4, 1, %o4 ! i++
/* tx (f3) = (ux * m0)
* ty (f5) = (uy * m5)
* tz (f7) = (uz * m10)
*/
fmuls %f0, M0, %f3 ! FGM Group
st %f3, [%g3 + 0x00] ! LSU
fmuls %f1, M5, %f5 ! FGM Group
st %f5, [%g3 + 0x04] ! LSU
fmuls %f2, M10, %f7 ! FGM Group
st %f7, [%g3 + 0x08] ! LSU
cmp %o4, %g1 ! continue if (i < count)
bl 1b
add %g3, 0x10, %g3 ! advance out vector pointer
7: retl
nop
.globl _mesa_sparc_transform_normals
_mesa_sparc_transform_normals:
/* o0=mat o1=scale o2=in o3=lengths o4=dest */
LDPTR [%o0 + MAT_INV], %o0 ! o0 = mat->inv
LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
LDMATRIX_0_1_2_4_5_6_8_9_10(%o0)
/* dest->count = in->count */
st %g1, [%o4 + V4F_COUNT]
cmp %g1, 1
bl 7f
clr %o4 ! 'i' for STRIDE_LOOP
1: ld [%o5 + 0x00], %f0 ! ux = from[0]
ld [%o5 + 0x04], %f1 ! uy = from[1]
ld [%o5 + 0x08], %f2 ! uz = from[2]
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
add %o4, 1, %o4 ! i++
fmuls %f0, M0, %f3 ! FGM Group
fmuls %f1, M1, %f4 ! FGM Group
fmuls %f0, M4, %f5 ! FGM Group
fmuls %f1, M5, %f6 ! FGM Group
fmuls %f0, M8, %f7 ! FGM Group f3 available
fmuls %f1, M9, %f8 ! FGM Group f4 available
fadds %f3, %f4, %f3 ! FGA
fmuls %f2, M2, %f10 ! FGM Group f5 available
fmuls %f2, M6, %f0 ! FGM Group f6 available
fadds %f5, %f6, %f5 ! FGA
fmuls %f2, M10, %f4 ! FGM Group f7 available
fadds %f7, %f8, %f7 ! FGA Group f8,f3 available
fadds %f3, %f10, %f3 ! FGA Group f10 available
st %f3, [%g3 + 0x00] ! LSU
fadds %f5, %f0, %f5 ! FGA Group stall f0,f5 available
st %f5, [%g3 + 0x04] ! LSU
fadds %f7, %f4, %f7 ! FGA Group stall f4,f7 available
st %f7, [%g3 + 0x08] ! LSU
cmp %o4, %g1 ! continue if (i < count)
bl 1b
add %g3, 0x10, %g3 ! advance out vector pointer
7: retl
nop
.globl _mesa_sparc_normalize_normals
_mesa_sparc_normalize_normals:
/* o0=mat o1=scale o2=in o3=lengths o4=dest */
sethi %hi(ONE_DOT_ZERO), %g2
sub %sp, 16, %sp
st %g2, [%sp + STACK_VAR_OFF+0x0]
ld [%sp + STACK_VAR_OFF+0x0], %f12 ! f12 = 1.0f
add %sp, 16, %sp
LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
/* dest->count = in->count */
st %g1, [%o4 + V4F_COUNT]
cmp %g1, 1
bl 7f
cmp %o3, 0
bne 4f
clr %o4 ! 'i' for STRIDE_LOOP
1: /* LENGTHS == NULL */
ld [%o5 + 0x00], %f3 ! ux = from[0]
ld [%o5 + 0x04], %f5 ! uy = from[1]
ld [%o5 + 0x08], %f7 ! uz = from[2]
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
add %o4, 1, %o4 ! i++
/* f3=tx, f5=ty, f7=tz */
/* len (f6) = (tx * tx) + (ty * ty) + (tz * tz) */
fmuls %f3, %f3, %f6 ! FGM Group f3 available
fmuls %f5, %f5, %f8 ! FGM Group f5 available
fmuls %f7, %f7, %f10 ! FGM Group f7 available
fadds %f6, %f8, %f6 ! FGA Group 2cyc stall f6,f8 available
fadds %f6, %f10, %f6 ! FGA Group 4cyc stall f6,f10 available
/* scale (f6) = 1.0 / sqrt(len) */
fsqrts %f6, %f6 ! FDIV 20 cycles
fdivs %f12, %f6, %f6 ! FDIV 14 cycles
fmuls %f3, %f6, %f3
st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
fmuls %f5, %f6, %f5
st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
fmuls %f7, %f6, %f7
st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
cmp %o4, %g1 ! continue if (i < count)
bl 1b
add %g3, 0x10, %g3 ! advance out vector pointer
ba 7f
nop
4: /* LENGTHS != NULL */
5:
ld [%o5 + 0x00], %f3 ! ux = from[0]
ld [%o5 + 0x04], %f5 ! uy = from[1]
ld [%o5 + 0x08], %f7 ! uz = from[2]
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
add %o4, 1, %o4 ! i++
ld [%o3], %f13 ! LSU
add %o3, 4, %o3 ! IEU0
/* f3=tx, f5=ty, f7=tz, f13=lengths[i] */
fmuls %f3, %f13, %f3
st %f3, [%g3 + 0x00] ! out[i][0] = tx * len
fmuls %f5, %f13, %f5
st %f5, [%g3 + 0x04] ! out[i][1] = ty * len
fmuls %f7, %f13, %f7
st %f7, [%g3 + 0x08] ! out[i][2] = tz * len
cmp %o4, %g1 ! continue if (i < count)
bl 5b
add %g3, 0x10, %g3 ! advance out vector pointer
7: retl
nop
.globl _mesa_sparc_rescale_normals
_mesa_sparc_rescale_normals:
/* o0=mat o1=scale o2=in o3=lengths o4=dest */
sethi %hi(ONE_DOT_ZERO), %g2
sub %sp, 16, %sp
st %o1, [%sp + STACK_VAR_OFF+0x0]
ld [%sp + STACK_VAR_OFF+0x0], %f15 ! f15 = scale
add %sp, 16, %sp
LDPTR [%o2 + V4F_START], %o5 ! o5 = 'from' in->start
ld [%o2 + V4F_COUNT], %g1 ! g1 = in->count
ld [%o2 + V4F_STRIDE], %g2 ! g2 = in->stride
LDPTR [%o4 + V4F_START], %g3 ! g3 = 'out' dest->start
/* dest->count = in->count */
st %g1, [%o4 + V4F_COUNT]
cmp %g1, 1
bl 7f
clr %o4 ! 'i' for STRIDE_LOOP
1:
ld [%o5 + 0x00], %f3 ! ux = from[0]
ld [%o5 + 0x04], %f5 ! uy = from[1]
ld [%o5 + 0x08], %f7 ! uz = from[2]
add %o5, %g2, %o5 ! STRIDE_F(from, stride)
add %o4, 1, %o4 ! i++
/* f3=tx, f5=ty, f7=tz */
fmuls %f3, %f15, %f3
st %f3, [%g3 + 0x00] ! out[i][0] = tx * scale
fmuls %f5, %f15, %f5
st %f5, [%g3 + 0x04] ! out[i][1] = ty * scale
fmuls %f7, %f15, %f7
st %f7, [%g3 + 0x08] ! out[i][2] = tz * scale
cmp %o4, %g1 ! continue if (i < count)
bl 1b
add %g3, 0x10, %g3 ! advance out vector pointer
7: retl
nop

142
src/arch/sparc/sparc.c Normal file
View File

@ -0,0 +1,142 @@
/*
* Mesa 3-D graphics library
* Version: 6.3
*
* Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Sparc assembly code by David S. Miller
*/
#include "sparc.h"
#ifdef USE_SPARC_ASM
#include "main/context.h"
#include "math/m_xform.h"
#include "tnl/t_context.h"
#ifdef DEBUG
#include "math/m_debug.h"
#endif
#define XFORM_ARGS GLvector4f *to_vec, \
const GLfloat m[16], \
const GLvector4f *from_vec
#define DECLARE_XFORM_GROUP(pfx, sz) \
extern void _mesa_##pfx##_transform_points##sz##_general(XFORM_ARGS); \
extern void _mesa_##pfx##_transform_points##sz##_identity(XFORM_ARGS); \
extern void _mesa_##pfx##_transform_points##sz##_3d_no_rot(XFORM_ARGS); \
extern void _mesa_##pfx##_transform_points##sz##_perspective(XFORM_ARGS); \
extern void _mesa_##pfx##_transform_points##sz##_2d(XFORM_ARGS); \
extern void _mesa_##pfx##_transform_points##sz##_2d_no_rot(XFORM_ARGS); \
extern void _mesa_##pfx##_transform_points##sz##_3d(XFORM_ARGS);
#define ASSIGN_XFORM_GROUP(pfx, sz) \
_mesa_transform_tab[sz][MATRIX_GENERAL] = \
_mesa_##pfx##_transform_points##sz##_general; \
_mesa_transform_tab[sz][MATRIX_IDENTITY] = \
_mesa_##pfx##_transform_points##sz##_identity; \
_mesa_transform_tab[sz][MATRIX_3D_NO_ROT] = \
_mesa_##pfx##_transform_points##sz##_3d_no_rot; \
_mesa_transform_tab[sz][MATRIX_PERSPECTIVE] = \
_mesa_##pfx##_transform_points##sz##_perspective; \
_mesa_transform_tab[sz][MATRIX_2D] = \
_mesa_##pfx##_transform_points##sz##_2d; \
_mesa_transform_tab[sz][MATRIX_2D_NO_ROT] = \
_mesa_##pfx##_transform_points##sz##_2d_no_rot; \
_mesa_transform_tab[sz][MATRIX_3D] = \
_mesa_##pfx##_transform_points##sz##_3d;
DECLARE_XFORM_GROUP(sparc, 1)
DECLARE_XFORM_GROUP(sparc, 2)
DECLARE_XFORM_GROUP(sparc, 3)
DECLARE_XFORM_GROUP(sparc, 4)
extern GLvector4f *_mesa_sparc_cliptest_points4(GLvector4f *clip_vec,
GLvector4f *proj_vec,
GLubyte clipMask[],
GLubyte *orMask,
GLubyte *andMask,
GLboolean viewport_z_clip);
extern GLvector4f *_mesa_sparc_cliptest_points4_np(GLvector4f *clip_vec,
GLvector4f *proj_vec,
GLubyte clipMask[],
GLubyte *orMask,
GLubyte *andMask,
GLboolean viewport_z_clip);
#define NORM_ARGS const GLmatrix *mat, \
GLfloat scale, \
const GLvector4f *in, \
const GLfloat *lengths, \
GLvector4f *dest
extern void _mesa_sparc_transform_normalize_normals(NORM_ARGS);
extern void _mesa_sparc_transform_normalize_normals_no_rot(NORM_ARGS);
extern void _mesa_sparc_transform_rescale_normals_no_rot(NORM_ARGS);
extern void _mesa_sparc_transform_rescale_normals(NORM_ARGS);
extern void _mesa_sparc_transform_normals_no_rot(NORM_ARGS);
extern void _mesa_sparc_transform_normals(NORM_ARGS);
extern void _mesa_sparc_normalize_normals(NORM_ARGS);
extern void _mesa_sparc_rescale_normals(NORM_ARGS);
void _mesa_init_all_sparc_transform_asm(void)
{
ASSIGN_XFORM_GROUP(sparc, 1)
ASSIGN_XFORM_GROUP(sparc, 2)
ASSIGN_XFORM_GROUP(sparc, 3)
ASSIGN_XFORM_GROUP(sparc, 4)
_mesa_clip_tab[4] = _mesa_sparc_cliptest_points4;
_mesa_clip_np_tab[4] = _mesa_sparc_cliptest_points4_np;
_mesa_normal_tab[NORM_TRANSFORM | NORM_NORMALIZE] =
_mesa_sparc_transform_normalize_normals;
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE] =
_mesa_sparc_transform_normalize_normals_no_rot;
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE] =
_mesa_sparc_transform_rescale_normals_no_rot;
_mesa_normal_tab[NORM_TRANSFORM | NORM_RESCALE] =
_mesa_sparc_transform_rescale_normals;
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT] =
_mesa_sparc_transform_normals_no_rot;
_mesa_normal_tab[NORM_TRANSFORM] =
_mesa_sparc_transform_normals;
_mesa_normal_tab[NORM_NORMALIZE] =
_mesa_sparc_normalize_normals;
_mesa_normal_tab[NORM_RESCALE] =
_mesa_sparc_rescale_normals;
#ifdef DEBUG_MATH
_math_test_all_transform_functions("sparc");
_math_test_all_cliptest_functions("sparc");
_math_test_all_normal_transform_functions("sparc");
#endif
}
#endif /* USE_SPARC_ASM */

36
src/arch/sparc/sparc.h Normal file
View File

@ -0,0 +1,36 @@
/*
* Mesa 3-D graphics library
* Version: 3.1
*
* Copyright (C) 1999 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Sparc assembly code by David S. Miller
*/
#ifndef SPARC_H
#define SPARC_H
extern void _mesa_init_all_sparc_transform_asm(void);
#endif /* !(SPARC_H) */

233
src/arch/sparc/sparc_clip.S Normal file
View File

@ -0,0 +1,233 @@
/*
* Clip testing in SPARC assembly
*/
#if __arch64__
#define LDPTR ldx
#define V4F_DATA 0x00
#define V4F_START 0x08
#define V4F_COUNT 0x10
#define V4F_STRIDE 0x14
#define V4F_SIZE 0x18
#define V4F_FLAGS 0x1c
#else
#define LDPTR ld
#define V4F_DATA 0x00
#define V4F_START 0x04
#define V4F_COUNT 0x08
#define V4F_STRIDE 0x0c
#define V4F_SIZE 0x10
#define V4F_FLAGS 0x14
#endif
#define VEC_SIZE_1 1
#define VEC_SIZE_2 3
#define VEC_SIZE_3 7
#define VEC_SIZE_4 15
.register %g2, #scratch
.register %g3, #scratch
.text
.align 64
one_dot_zero:
.word 0x3f800000 /* 1.0f */
/* This trick is shamelessly stolen from the x86
* Mesa asm. Very clever, and we can do it too
* since we have the necessary add with carry
* instructions on Sparc.
*/
clip_table:
.byte 0, 1, 0, 2, 4, 5, 4, 6
.byte 0, 1, 0, 2, 8, 9, 8, 10
.byte 32, 33, 32, 34, 36, 37, 36, 38
.byte 32, 33, 32, 34, 40, 41, 40, 42
.byte 0, 1, 0, 2, 4, 5, 4, 6
.byte 0, 1, 0, 2, 8, 9, 8, 10
.byte 16, 17, 16, 18, 20, 21, 20, 22
.byte 16, 17, 16, 18, 24, 25, 24, 26
.byte 63, 61, 63, 62, 55, 53, 55, 54
.byte 63, 61, 63, 62, 59, 57, 59, 58
.byte 47, 45, 47, 46, 39, 37, 39, 38
.byte 47, 45, 47, 46, 43, 41, 43, 42
.byte 63, 61, 63, 62, 55, 53, 55, 54
.byte 63, 61, 63, 62, 59, 57, 59, 58
.byte 31, 29, 31, 30, 23, 21, 23, 22
.byte 31, 29, 31, 30, 27, 25, 27, 26
/* GLvector4f *clip_vec, GLvector4f *proj_vec,
GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask,
GLboolean viewport_z_enable */
.align 64
__pc_tramp:
retl
nop
.globl _mesa_sparc_cliptest_points4
_mesa_sparc_cliptest_points4:
save %sp, -64, %sp
call __pc_tramp
sub %o7, (. - one_dot_zero - 4), %g1
ld [%g1 + 0x0], %f4
add %g1, 0x4, %g1
ld [%i0 + V4F_STRIDE], %l1
ld [%i0 + V4F_COUNT], %l3
LDPTR [%i0 + V4F_START], %i0
LDPTR [%i1 + V4F_START], %i5
ldub [%i3], %g2
ldub [%i4], %g3
sll %g3, 8, %g3
or %g2, %g3, %g2
ld [%i1 + V4F_FLAGS], %g3
or %g3, VEC_SIZE_4, %g3
st %g3, [%i1 + V4F_FLAGS]
mov 3, %g3
st %g3, [%i1 + V4F_SIZE]
st %l3, [%i1 + V4F_COUNT]
clr %l2
clr %l0
/* l0: i
* l3: count
* l1: stride
* l2: c
* g2: (tmpAndMask << 8) | tmpOrMask
* g1: clip_table
* i0: from[stride][i]
* i2: clipMask
* i5: vProj[4][i]
*/
1: ld [%i0 + 0x0c], %f3 ! LSU Group
ld [%i0 + 0x0c], %g5 ! LSU Group
ld [%i0 + 0x08], %g4 ! LSU Group
fdivs %f4, %f3, %f8 ! FGM
addcc %g5, %g5, %g5 ! IEU1 Group
addx %g0, 0x0, %g3 ! IEU1 Group
addcc %g4, %g4, %g4 ! IEU1 Group
addx %g3, %g3, %g3 ! IEU1 Group
subcc %g5, %g4, %g0 ! IEU1 Group
ld [%i0 + 0x04], %g4 ! LSU Group
addx %g3, %g3, %g3 ! IEU1 Group
addcc %g4, %g4, %g4 ! IEU1 Group
addx %g3, %g3, %g3 ! IEU1 Group
subcc %g5, %g4, %g0 ! IEU1 Group
ld [%i0 + 0x00], %g4 ! LSU Group
addx %g3, %g3, %g3 ! IEU1 Group
addcc %g4, %g4, %g4 ! IEU1 Group
addx %g3, %g3, %g3 ! IEU1 Group
subcc %g5, %g4, %g0 ! IEU1 Group
addx %g3, %g3, %g3 ! IEU1 Group
ldub [%g1 + %g3], %g3 ! LSU Group
cmp %g3, 0 ! IEU1 Group, stall
be 2f ! CTI
stb %g3, [%i2] ! LSU
sll %g3, 8, %g4 ! IEU1 Group
add %l2, 1, %l2 ! IEU0
st %g0, [%i5 + 0x00] ! LSU
or %g4, 0xff, %g4 ! IEU0 Group
or %g2, %g3, %g2 ! IEU1
st %g0, [%i5 + 0x04] ! LSU
and %g2, %g4, %g2 ! IEU0 Group
st %g0, [%i5 + 0x08] ! LSU
b 3f ! CTI
st %f4, [%i5 + 0x0c] ! LSU Group
2: ld [%i0 + 0x00], %f0 ! LSU Group
ld [%i0 + 0x04], %f1 ! LSU Group
ld [%i0 + 0x08], %f2 ! LSU Group
fmuls %f0, %f8, %f0 ! FGM
st %f0, [%i5 + 0x00] ! LSU Group
fmuls %f1, %f8, %f1 ! FGM
st %f1, [%i5 + 0x04] ! LSU Group
fmuls %f2, %f8, %f2 ! FGM
st %f2, [%i5 + 0x08] ! LSU Group
st %f8, [%i5 + 0x0c] ! LSU Group
3: add %i5, 0x10, %i5 ! IEU1
add %l0, 1, %l0 ! IEU0 Group
add %i2, 1, %i2 ! IEU0 Group
cmp %l0, %l3 ! IEU1 Group
bne 1b ! CTI
add %i0, %l1, %i0 ! IEU0 Group
stb %g2, [%i3] ! LSU
srl %g2, 8, %g3 ! IEU0 Group
cmp %l2, %l3 ! IEU1 Group
bl,a 1f ! CTI
clr %g3 ! IEU0
1: stb %g3, [%i4] ! LSU Group
ret ! CTI Group
restore %i1, 0x0, %o0
.globl _mesa_sparc_cliptest_points4_np
_mesa_sparc_cliptest_points4_np:
save %sp, -64, %sp
call __pc_tramp
sub %o7, (. - one_dot_zero - 4), %g1
add %g1, 0x4, %g1
ld [%i0 + V4F_STRIDE], %l1
ld [%i0 + V4F_COUNT], %l3
LDPTR [%i0 + V4F_START], %i0
ldub [%i3], %g2
ldub [%i4], %g3
sll %g3, 8, %g3
or %g2, %g3, %g2
clr %l2
clr %l0
/* l0: i
* l3: count
* l1: stride
* l2: c
* g2: (tmpAndMask << 8) | tmpOrMask
* g1: clip_table
* i0: from[stride][i]
* i2: clipMask
*/
1: ld [%i0 + 0x0c], %g5 ! LSU Group
ld [%i0 + 0x08], %g4 ! LSU Group
addcc %g5, %g5, %g5 ! IEU1 Group
addx %g0, 0x0, %g3 ! IEU1 Group
addcc %g4, %g4, %g4 ! IEU1 Group
addx %g3, %g3, %g3 ! IEU1 Group
subcc %g5, %g4, %g0 ! IEU1 Group
ld [%i0 + 0x04], %g4 ! LSU Group
addx %g3, %g3, %g3 ! IEU1 Group
addcc %g4, %g4, %g4 ! IEU1 Group
addx %g3, %g3, %g3 ! IEU1 Group
subcc %g5, %g4, %g0 ! IEU1 Group
ld [%i0 + 0x00], %g4 ! LSU Group
addx %g3, %g3, %g3 ! IEU1 Group
addcc %g4, %g4, %g4 ! IEU1 Group
addx %g3, %g3, %g3 ! IEU1 Group
subcc %g5, %g4, %g0 ! IEU1 Group
addx %g3, %g3, %g3 ! IEU1 Group
ldub [%g1 + %g3], %g3 ! LSU Group
cmp %g3, 0 ! IEU1 Group, stall
be 2f ! CTI
stb %g3, [%i2] ! LSU
sll %g3, 8, %g4 ! IEU1 Group
add %l2, 1, %l2 ! IEU0
or %g4, 0xff, %g4 ! IEU0 Group
or %g2, %g3, %g2 ! IEU1
and %g2, %g4, %g2 ! IEU0 Group
2: add %l0, 1, %l0 ! IEU0 Group
add %i2, 1, %i2 ! IEU0 Group
cmp %l0, %l3 ! IEU1 Group
bne 1b ! CTI
add %i0, %l1, %i0 ! IEU0 Group
stb %g2, [%i3] ! LSU
srl %g2, 8, %g3 ! IEU0 Group
cmp %l2, %l3 ! IEU1 Group
bl,a 1f ! CTI
clr %g3 ! IEU0
1: stb %g3, [%i4] ! LSU Group
ret ! CTI Group
restore %i1, 0x0, %o0

View File

@ -0,0 +1,170 @@
/*
* SPARC assembly matrix code.
*/
#ifndef _SPARC_MATRIX_H
#define _SPARC_MATRIX_H
#ifdef __arch64__
#define LDPTR ldx
#define MAT_M 0x00
#define MAT_INV 0x08
#define V4F_DATA 0x00
#define V4F_START 0x08
#define V4F_COUNT 0x10
#define V4F_STRIDE 0x14
#define V4F_SIZE 0x18
#define V4F_FLAGS 0x1c
#else
#define LDPTR ld
#define MAT_M 0x00
#define MAT_INV 0x04
#define V4F_DATA 0x00
#define V4F_START 0x04
#define V4F_COUNT 0x08
#define V4F_STRIDE 0x0c
#define V4F_SIZE 0x10
#define V4F_FLAGS 0x14
#endif
#define VEC_SIZE_1 1
#define VEC_SIZE_2 3
#define VEC_SIZE_3 7
#define VEC_SIZE_4 15
#define M0 %f16
#define M1 %f17
#define M2 %f18
#define M3 %f19
#define M4 %f20
#define M5 %f21
#define M6 %f22
#define M7 %f23
#define M8 %f24
#define M9 %f25
#define M10 %f26
#define M11 %f27
#define M12 %f28
#define M13 %f29
#define M14 %f30
#define M15 %f31
#define LDMATRIX_0_1_2_3_12_13_14_15(BASE) \
ldd [BASE + ( 0 * 0x4)], M0; \
ldd [BASE + ( 2 * 0x4)], M2; \
ldd [BASE + (12 * 0x4)], M12; \
ldd [BASE + (14 * 0x4)], M14
#define LDMATRIX_0_1_12_13(BASE) \
ldd [BASE + ( 0 * 0x4)], M0; \
ldd [BASE + (12 * 0x4)], M12
#define LDMATRIX_0_12_13(BASE) \
ld [BASE + ( 0 * 0x4)], M0; \
ldd [BASE + (12 * 0x4)], M12
#define LDMATRIX_0_1_2_12_13_14(BASE) \
ldd [BASE + ( 0 * 0x4)], M0; \
ld [BASE + ( 2 * 0x4)], M2; \
ldd [BASE + (12 * 0x4)], M12; \
ld [BASE + (14 * 0x4)], M14
#define LDMATRIX_0_12_13_14(BASE) \
ld [BASE + ( 0 * 0x4)], M0; \
ldd [BASE + (12 * 0x4)], M12; \
ld [BASE + (14 * 0x4)], M14
#define LDMATRIX_0_14(BASE) \
ld [BASE + ( 0 * 0x4)], M0; \
ld [BASE + (14 * 0x4)], M14
#define LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(BASE) \
ldd [BASE + ( 0 * 0x4)], M0; \
ldd [BASE + ( 2 * 0x4)], M2; \
ldd [BASE + ( 4 * 0x4)], M4; \
ldd [BASE + ( 6 * 0x4)], M6; \
ldd [BASE + (12 * 0x4)], M12; \
ldd [BASE + (14 * 0x4)], M14
#define LDMATRIX_0_5_12_13(BASE) \
ld [BASE + ( 0 * 0x4)], M0; \
ld [BASE + ( 5 * 0x4)], M5; \
ldd [BASE + (12 * 0x4)], M12
#define LDMATRIX_0_1_2_3_4_5_6_12_13_14(BASE) \
ldd [BASE + ( 0 * 0x4)], M0; \
ldd [BASE + ( 2 * 0x4)], M2; \
ldd [BASE + ( 4 * 0x4)], M4; \
ld [BASE + ( 6 * 0x4)], M6; \
ldd [BASE + (12 * 0x4)], M12; \
ld [BASE + (14 * 0x4)], M14
#define LDMATRIX_0_5_12_13_14(BASE) \
ld [BASE + ( 0 * 0x4)], M0; \
ld [BASE + ( 5 * 0x4)], M5; \
ldd [BASE + (12 * 0x4)], M12; \
ld [BASE + (14 * 0x4)], M14
#define LDMATRIX_0_5_14(BASE) \
ld [BASE + ( 0 * 0x4)], M0; \
ld [BASE + ( 5 * 0x4)], M5; \
ld [BASE + (14 * 0x4)], M14
#define LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(BASE) \
ldd [BASE + ( 0 * 0x4)], M0; \
ldd [BASE + ( 2 * 0x4)], M2; \
ldd [BASE + ( 4 * 0x4)], M4; \
ldd [BASE + ( 6 * 0x4)], M6; \
ldd [BASE + ( 8 * 0x4)], M8; \
ldd [BASE + (10 * 0x4)], M10; \
ldd [BASE + (12 * 0x4)], M12; \
ldd [BASE + (14 * 0x4)], M14
#define LDMATRIX_0_1_4_5_12_13(BASE) \
ldd [BASE + ( 0 * 0x4)], M0; \
ldd [BASE + ( 4 * 0x4)], M4; \
ldd [BASE + (12 * 0x4)], M12
#define LDMATRIX_0_5_12_13(BASE) \
ld [BASE + ( 0 * 0x4)], M0; \
ld [BASE + ( 5 * 0x4)], M5; \
ldd [BASE + (12 * 0x4)], M12
#define LDMATRIX_0_1_2_4_5_6_8_9_10(BASE) \
ldd [BASE + ( 0 * 0x4)], M0; \
ld [BASE + ( 2 * 0x4)], M2; \
ldd [BASE + ( 4 * 0x4)], M4; \
ld [BASE + ( 6 * 0x4)], M6; \
ldd [BASE + ( 8 * 0x4)], M8; \
ld [BASE + (10 * 0x4)], M10
#define LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(BASE) \
ldd [BASE + ( 0 * 0x4)], M0; \
ld [BASE + ( 2 * 0x4)], M2; \
ldd [BASE + ( 4 * 0x4)], M4; \
ld [BASE + ( 6 * 0x4)], M6; \
ldd [BASE + ( 8 * 0x4)], M8; \
ld [BASE + (10 * 0x4)], M10; \
ldd [BASE + (12 * 0x4)], M12; \
ld [BASE + (14 * 0x4)], M14
#define LDMATRIX_0_5_10(BASE) \
ld [BASE + ( 0 * 0x4)], M0; \
ld [BASE + ( 5 * 0x4)], M5; \
ld [BASE + (10 * 0x4)], M10; \
#define LDMATRIX_0_5_10_12_13_14(BASE) \
ld [BASE + ( 0 * 0x4)], M0; \
ld [BASE + ( 5 * 0x4)], M5; \
ld [BASE + (10 * 0x4)], M10; \
ldd [BASE + (12 * 0x4)], M12; \
ld [BASE + (14 * 0x4)], M14
#define LDMATRIX_0_5_8_9_10_14(BASE) \
ld [BASE + ( 0 * 0x4)], M0; \
ld [BASE + ( 5 * 0x4)], M5; \
ldd [BASE + ( 8 * 0x4)], M8; \
ld [BASE + (10 * 0x4)], M10; \
ld [BASE + (14 * 0x4)], M14
#endif /* !(_SPARC_MATRIX_H) */

1392
src/arch/sparc/xform.S Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,40 @@
# Copyright © 2012 Intel Corporation
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
if HAVE_X86_64_ASM
AM_CPPFLAGS = \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src/mesa \
-I$(top_srcdir)/src/GLdispatch/mapi \
$(API_DEFINES) \
$(DEFINES)
noinst_PROGRAMS = gen_matypes
gen_matypes_SOURCES = ../x86/gen_matypes.c
BUILT_SOURCES = matypes.h
CLEANFILES = matypes.h
matypes.h: gen_matypes
$(AM_V_GEN)./gen_matypes > $@
endif

View File

@ -0,0 +1,50 @@
Register Usage
rax temporary register; with variable arguments passes information
about the number of SSE registers used; 1st return register
rbx* callee-saved register; optionally used as base pointer
rcx used to pass 4th integer argument to functions
rdx used to pass 3rd argument to functions 2nd return register
rsp* stack pointer
rbp* callee-saved register; optionally used as frame pointer
rsi used to pass 2nd argument to functions
rdi used to pass 1st argument to functions
r8 used to pass 5th argument to functions
r9 used to pass 6th argument to functions
r10 temporary register, used for passing a function's static chain pointer
r11 temporary register
r12-15* callee-saved registers
xmm0­1 used to pass and return floating point arguments
xmm2­7 used to pass floating point arguments
xmm8­15 temporary registers
mmx0­7 temporary registers
st0 temporary register; used to return long double arguments
st1 temporary registers; used to return long double arguments
st2­7 temporary registers
fs Reserved for system use (as thread specific data register)
*) must be preserved across function calls
Integer arguments from list: rdi,rsi,rdx,rcx,r8,r9,stack
Floating point arguments from list: xmm0-xmm7

119
src/arch/x86-64/x86-64.c Normal file
View File

@ -0,0 +1,119 @@
/*
* Mesa 3-D graphics library
* Version: 6.3
*
* Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* x86-64 optimizations shamelessy converted from x86/sse/3dnow assembly by
* Mikko Tiihonen
*/
#ifdef USE_X86_64_ASM
#include "main/glheader.h"
#include "main/context.h"
#include "math/m_xform.h"
#include "tnl/t_context.h"
#include "x86-64.h"
#include "../x86/x86_xform.h"
#ifdef DEBUG
#include "math/m_debug.h"
#endif
extern void _mesa_x86_64_cpuid(unsigned int *regs);
DECLARE_XFORM_GROUP( x86_64, 4 )
DECLARE_XFORM_GROUP( 3dnow, 4 )
#else
/* just to silence warning below */
#include "x86-64.h"
#endif
/*
extern void _mesa_x86_64_transform_points4_general( XFORM_ARGS );
extern void _mesa_x86_64_transform_points4_identity( XFORM_ARGS );
extern void _mesa_x86_64_transform_points4_perspective( XFORM_ARGS );
extern void _mesa_x86_64_transform_points4_3d( XFORM_ARGS );
extern void _mesa_x86_64_transform_points4_3d_no_rot( XFORM_ARGS );
extern void _mesa_x86_64_transform_points4_2d_no_rot( XFORM_ARGS );
extern void _mesa_x86_64_transform_points4_2d( XFORM_ARGS );
*/
#ifdef USE_X86_64_ASM
static void message( const char *msg )
{
if (_mesa_getenv("MESA_DEBUG")) {
_mesa_debug( NULL, "%s", msg );
}
}
#endif
void _mesa_init_all_x86_64_transform_asm(void)
{
#ifdef USE_X86_64_ASM
unsigned int regs[4];
if ( _mesa_getenv( "MESA_NO_ASM" ) ) {
return;
}
message("Initializing x86-64 optimizations\n");
_mesa_transform_tab[4][MATRIX_GENERAL] =
_mesa_x86_64_transform_points4_general;
_mesa_transform_tab[4][MATRIX_IDENTITY] =
_mesa_x86_64_transform_points4_identity;
_mesa_transform_tab[4][MATRIX_3D] =
_mesa_x86_64_transform_points4_3d;
regs[0] = 0x80000001;
regs[1] = 0x00000000;
regs[2] = 0x00000000;
regs[3] = 0x00000000;
_mesa_x86_64_cpuid(regs);
if (regs[3] & (1U << 31)) {
message("3Dnow! detected\n");
_mesa_transform_tab[4][MATRIX_3D_NO_ROT] =
_mesa_3dnow_transform_points4_3d_no_rot;
_mesa_transform_tab[4][MATRIX_PERSPECTIVE] =
_mesa_3dnow_transform_points4_perspective;
_mesa_transform_tab[4][MATRIX_2D_NO_ROT] =
_mesa_3dnow_transform_points4_2d_no_rot;
_mesa_transform_tab[4][MATRIX_2D] =
_mesa_3dnow_transform_points4_2d;
}
#ifdef DEBUG_MATH
_math_test_all_transform_functions("x86_64");
_math_test_all_cliptest_functions("x86_64");
_math_test_all_normal_transform_functions("x86_64");
#endif
#endif
}

31
src/arch/x86-64/x86-64.h Normal file
View File

@ -0,0 +1,31 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef __X86_64_ASM_H__
#define __X86_64_ASM_H__
extern void _mesa_init_all_x86_64_transform_asm( void );
#endif

483
src/arch/x86-64/xform4.S Normal file
View File

@ -0,0 +1,483 @@
/*
* Mesa 3-D graphics library
* Version: 7.1
*
* Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifdef USE_X86_64_ASM
#include "matypes.h"
.text
.align 16
.globl _mesa_x86_64_cpuid
.hidden _mesa_x86_64_cpuid
_mesa_x86_64_cpuid:
pushq %rbx
movl (%rdi), %eax
movl 8(%rdi), %ecx
cpuid
movl %ebx, 4(%rdi)
movl %eax, (%rdi)
movl %ecx, 8(%rdi)
movl %edx, 12(%rdi)
popq %rbx
ret
.align 16
.globl _mesa_x86_64_transform_points4_general
.hidden _mesa_x86_64_transform_points4_general
_mesa_x86_64_transform_points4_general:
/*
* rdi = dest
* rsi = matrix
* rdx = source
*/
movl V4F_COUNT(%rdx), %ecx /* count */
movzbl V4F_STRIDE(%rdx), %eax /* stride */
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
movl $4, V4F_SIZE(%rdi) /* set dest size */
.byte 0x66, 0x66, 0x66, 0x90 /* manual align += 3 */
orl $VEC_SIZE_4, V4F_FLAGS(%rdi)/* set dest flags */
testl %ecx, %ecx /* verify non-zero count */
prefetchnta 64(%rsi)
jz p4_general_done
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
prefetch 16(%rdx)
movaps 0(%rsi), %xmm4 /* m3 | m2 | m1 | m0 */
movaps 16(%rsi), %xmm5 /* m7 | m6 | m5 | m4 */
.byte 0x66, 0x66, 0x90 /* manual align += 3 */
movaps 32(%rsi), %xmm6 /* m11 | m10 | m9 | m8 */
movaps 48(%rsi), %xmm7 /* m15 | m14 | m13 | m12 */
p4_general_loop:
movups (%rdx), %xmm8 /* ox | oy | oz | ow */
prefetchw 16(%rdi)
pshufd $0x00, %xmm8, %xmm0 /* ox | ox | ox | ox */
addq %rax, %rdx
pshufd $0x55, %xmm8, %xmm1 /* oy | oy | oy | oy */
mulps %xmm4, %xmm0 /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
pshufd $0xAA, %xmm8, %xmm2 /* oz | oz | oz | ox */
mulps %xmm5, %xmm1 /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
pshufd $0xFF, %xmm8, %xmm3 /* ow | ow | ow | ow */
mulps %xmm6, %xmm2 /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */
addps %xmm1, %xmm0 /* ox*m3+oy*m7 | ... */
mulps %xmm7, %xmm3 /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
addps %xmm2, %xmm0 /* ox*m3+oy*m7+oz*m11 | ... */
prefetch 16(%rdx)
addps %xmm3, %xmm0 /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
movaps %xmm0, (%rdi) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
addq $16, %rdi
decl %ecx
jnz p4_general_loop
p4_general_done:
.byte 0xf3
ret
.section .rodata
.align 16
p4_constants:
.byte 0xff, 0xff, 0xff, 0xff
.byte 0xff, 0xff, 0xff, 0xff
.byte 0xff, 0xff, 0xff, 0xff
.byte 0x00, 0x00, 0x00, 0x00
.byte 0x00, 0x00, 0x00, 0x00
.byte 0x00, 0x00, 0x00, 0x00
.byte 0x00, 0x00, 0x00, 0x00
.float 1.0
.text
.align 16
.globl _mesa_x86_64_transform_points4_3d
.hidden _mesa_x86_64_transform_points4_3d
/*
* this is slower than _mesa_x86_64_transform_points4_general
* because it ensures that the last matrix row (or is it column?) is 0,0,0,1
*/
_mesa_x86_64_transform_points4_3d:
leaq p4_constants(%rip), %rax
prefetchnta 64(%rsi)
movaps (%rax), %xmm9
movaps 16(%rax), %xmm10
movl V4F_COUNT(%rdx), %ecx /* count */
movzbl V4F_STRIDE(%rdx), %eax /* stride */
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
movl $4, V4F_SIZE(%rdi) /* set dest size */
orl $VEC_SIZE_4, V4F_FLAGS(%rdi)/* set dest flags */
testl %ecx, %ecx /* verify non-zero count */
jz p4_3d_done
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
prefetch 16(%rdx)
movaps 0(%rsi), %xmm4 /* m3 | m2 | m1 | m0 */
movaps 16(%rsi), %xmm5 /* m7 | m6 | m5 | m4 */
andps %xmm9, %xmm4 /* 0.0 | m2 | m1 | m0 */
movaps 32(%rsi), %xmm6 /* m11 | m10 | m9 | m8 */
andps %xmm9, %xmm5 /* 0.0 | m6 | m5 | m4 */
movaps 48(%rsi), %xmm7 /* m15 | m14 | m13 | m12 */
andps %xmm9, %xmm6 /* 0.0 | m10 | m9 | m8 */
andps %xmm9, %xmm7 /* 0.0 | m14 | m13 | m12 */
.byte 0x66, 0x66, 0x90 /* manual align += 3 */
orps %xmm10, %xmm7 /* 1.0 | m14 | m13 | m12 */
p4_3d_loop:
movups (%rdx), %xmm8 /* ox | oy | oz | ow */
prefetchw 16(%rdi)
pshufd $0x00, %xmm8, %xmm0 /* ox | ox | ox | ox */
addq %rax, %rdx
pshufd $0x55, %xmm8, %xmm1 /* oy | oy | oy | oy */
mulps %xmm4, %xmm0 /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
pshufd $0xAA, %xmm8, %xmm2 /* oz | oz | oz | ox */
mulps %xmm5, %xmm1 /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
pshufd $0xFF, %xmm8, %xmm3 /* ow | ow | ow | ow */
mulps %xmm6, %xmm2 /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */
addps %xmm1, %xmm0 /* ox*m3+oy*m7 | ... */
mulps %xmm7, %xmm3 /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
addps %xmm2, %xmm0 /* ox*m3+oy*m7+oz*m11 | ... */
prefetch 16(%rdx)
addps %xmm3, %xmm0 /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
movaps %xmm0, (%rdi) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
addq $16, %rdi
dec %ecx
jnz p4_3d_loop
p4_3d_done:
.byte 0xf3
ret
.align 16
.globl _mesa_x86_64_transform_points4_identity
.hidden _mesa_x86_64_transform_points4_identity
_mesa_x86_64_transform_points4_identity:
movl V4F_COUNT(%rdx), %ecx /* count */
movzbl V4F_STRIDE(%rdx), %eax /* stride */
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
movl $4, V4F_SIZE(%rdi) /* set dest size */
orl $VEC_SIZE_4, V4F_FLAGS(%rdi)/* set dest flags */
test %ecx, %ecx
jz p4_identity_done
movq V4F_START(%rdx), %rsi /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
prefetch 64(%rsi)
prefetchw 64(%rdi)
add %ecx, %ecx
rep movsq
p4_identity_done:
.byte 0xf3
ret
.align 16
.globl _mesa_3dnow_transform_points4_3d_no_rot
.hidden _mesa_3dnow_transform_points4_3d_no_rot
_mesa_3dnow_transform_points4_3d_no_rot:
movl V4F_COUNT(%rdx), %ecx /* count */
movzbl V4F_STRIDE(%rdx), %eax /* stride */
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
movl $4, V4F_SIZE(%rdi) /* set dest size */
.byte 0x66, 0x66, 0x90 /* manual align += 3 */
orl $VEC_SIZE_4, V4F_FLAGS(%rdi)/* set dest flags */
test %ecx, %ecx
.byte 0x66, 0x66, 0x90 /* manual align += 3 */
jz p4_3d_no_rot_done
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
prefetch (%rdx)
movd (%rsi), %mm0 /* | m00 */
.byte 0x66, 0x66, 0x90 /* manual align += 3 */
punpckldq 20(%rsi), %mm0 /* m11 | m00 */
movd 40(%rsi), %mm2 /* | m22 */
movq 48(%rsi), %mm1 /* m31 | m30 */
punpckldq 56(%rsi), %mm2 /* m11 | m00 */
p4_3d_no_rot_loop:
prefetchw 32(%rdi)
movq (%rdx), %mm4 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
movd 12(%rdx), %mm7 /* | x3 */
movq %mm5, %mm6 /* x3 | x2 */
pfmul %mm0, %mm4 /* x1*m11 | x0*m00 */
punpckhdq %mm6, %mm6 /* x3 | x3 */
pfmul %mm2, %mm5 /* x3*m32 | x2*m22 */
pfmul %mm1, %mm6 /* x3*m31 | x3*m30 */
pfacc %mm7, %mm5 /* x3 | x2*m22+x3*m32 */
pfadd %mm6, %mm4 /* x1*m11+x3*m31 | x0*m00+x3*m30 */
addq %rax, %rdx
movq %mm4, (%rdi) /* write r0, r1 */
movq %mm5, 8(%rdi) /* write r2, r3 */
addq $16, %rdi
decl %ecx
prefetch 32(%rdx)
jnz p4_3d_no_rot_loop
p4_3d_no_rot_done:
femms
ret
.align 16
.globl _mesa_3dnow_transform_points4_perspective
.hidden _mesa_3dnow_transform_points4_perspective
_mesa_3dnow_transform_points4_perspective:
movl V4F_COUNT(%rdx), %ecx /* count */
movzbl V4F_STRIDE(%rdx), %eax /* stride */
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
movl $4, V4F_SIZE(%rdi) /* set dest size */
orl $VEC_SIZE_4, V4F_FLAGS(%rdi)/* set dest flags */
test %ecx, %ecx
.byte 0x66, 0x66, 0x90 /* manual align += 3 */
jz p4_perspective_done
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
movd (%rsi), %mm0 /* | m00 */
pxor %mm7, %mm7 /* 0 | 0 */
punpckldq 20(%rsi), %mm0 /* m11 | m00 */
movq 32(%rsi), %mm2 /* m21 | m20 */
prefetch (%rdx)
movd 40(%rsi), %mm1 /* | m22 */
.byte 0x66, 0x66, 0x90 /* manual align += 3 */
punpckldq 56(%rsi), %mm1 /* m32 | m22 */
p4_perspective_loop:
prefetchw 32(%rdi) /* prefetch 2 vertices ahead */
movq (%rdx), %mm4 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
movd 8(%rdx), %mm3 /* | x2 */
movq %mm5, %mm6 /* x3 | x2 */
pfmul %mm0, %mm4 /* x1*m11 | x0*m00 */
punpckldq %mm5, %mm5 /* x2 | x2 */
pfmul %mm2, %mm5 /* x2*m21 | x2*m20 */
pfsubr %mm7, %mm3 /* | -x2 */
pfmul %mm1, %mm6 /* x3*m32 | x2*m22 */
pfadd %mm4, %mm5 /* x1*m11+x2*m21 | x0*m00+x2*m20 */
pfacc %mm3, %mm6 /* -x2 | x2*m22+x3*m32 */
movq %mm5, (%rdi) /* write r0, r1 */
addq %rax, %rdx
movq %mm6, 8(%rdi) /* write r2, r3 */
addq $16, %rdi
decl %ecx
prefetch 32(%rdx) /* hopefully stride is zero */
jnz p4_perspective_loop
p4_perspective_done:
femms
ret
.align 16
.globl _mesa_3dnow_transform_points4_2d_no_rot
.hidden _mesa_3dnow_transform_points4_2d_no_rot
_mesa_3dnow_transform_points4_2d_no_rot:
movl V4F_COUNT(%rdx), %ecx /* count */
movzbl V4F_STRIDE(%rdx), %eax /* stride */
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
movl $4, V4F_SIZE(%rdi) /* set dest size */
orl $VEC_SIZE_4, V4F_FLAGS(%rdi)/* set dest flags */
test %ecx, %ecx
.byte 0x90 /* manual align += 1 */
jz p4_2d_no_rot_done
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
movd (%rsi), %mm0 /* | m00 */
prefetch (%rdx)
punpckldq 20(%rsi), %mm0 /* m11 | m00 */
movq 48(%rsi), %mm1 /* m31 | m30 */
p4_2d_no_rot_loop:
prefetchw 32(%rdi) /* prefetch 2 vertices ahead */
movq (%rdx), %mm4 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
pfmul %mm0, %mm4 /* x1*m11 | x0*m00 */
movq %mm5, %mm6 /* x3 | x2 */
punpckhdq %mm6, %mm6 /* x3 | x3 */
addq %rax, %rdx
pfmul %mm1, %mm6 /* x3*m31 | x3*m30 */
prefetch 32(%rdx) /* hopefully stride is zero */
pfadd %mm4, %mm6 /* x1*m11+x3*m31 | x0*m00+x3*m30 */
movq %mm6, (%rdi) /* write r0, r1 */
movq %mm5, 8(%rdi) /* write r2, r3 */
addq $16, %rdi
decl %ecx
jnz p4_2d_no_rot_loop
p4_2d_no_rot_done:
femms
ret
.align 16
.globl _mesa_3dnow_transform_points4_2d
.hidden _mesa_3dnow_transform_points4_2d
_mesa_3dnow_transform_points4_2d:
movl V4F_COUNT(%rdx), %ecx /* count */
movzbl V4F_STRIDE(%rdx), %eax /* stride */
movl %ecx, V4F_COUNT(%rdi) /* set dest count */
movl $4, V4F_SIZE(%rdi) /* set dest size */
.byte 0x66, 0x66, 0x90 /* manual align += 4 */
orl $VEC_SIZE_4, V4F_FLAGS(%rdi)/* set dest flags */
test %ecx, %ecx
.byte 0x66, 0x66, 0x90 /* manual align += 4 */
jz p4_2d_done
movq V4F_START(%rdx), %rdx /* ptr to first src vertex */
movq V4F_START(%rdi), %rdi /* ptr to first dest vertex */
movd (%rsi), %mm0 /* | m00 */
movd 4(%rsi), %mm1 /* | m01 */
prefetch (%rdx)
punpckldq 16(%rsi), %mm0 /* m10 | m00 */
.byte 0x66, 0x66, 0x90 /* manual align += 4 */
punpckldq 20(%rsi), %mm1 /* m11 | m01 */
movq 48(%rsi), %mm2 /* m31 | m30 */
p4_2d_loop:
prefetchw 32(%rdi) /* prefetch 2 vertices ahead */
movq (%rdx), %mm3 /* x1 | x0 */
movq 8(%rdx), %mm5 /* x3 | x2 */
movq %mm3, %mm4 /* x1 | x0 */
movq %mm5, %mm6 /* x3 | x2 */
pfmul %mm1, %mm4 /* x1*m11 | x0*m01 */
punpckhdq %mm6, %mm6 /* x3 | x3 */
pfmul %mm0, %mm3 /* x1*m10 | x0*m00 */
addq %rax, %rdx
pfacc %mm4, %mm3 /* x0*m01+x1*m11 | x0*m00+x1*m10 */
pfmul %mm2, %mm6 /* x3*m31 | x3*m30 */
prefetch 32(%rdx) /* hopefully stride is zero */
pfadd %mm6, %mm3 /* r1 | r0 */
movq %mm3, (%rdi) /* write r0, r1 */
movq %mm5, 8(%rdi) /* write r2, r3 */
addq $16, %rdi
decl %ecx
jnz p4_2d_loop
p4_2d_done:
femms
ret
#endif
#if defined (__ELF__) && defined (__linux__)
.section .note.GNU-stack,"",%progbits
#endif

91
src/arch/x86/3dnow.c Normal file
View File

@ -0,0 +1,91 @@
/*
* Mesa 3-D graphics library
* Version: 5.0.1
*
* Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* 3DNow! optimizations contributed by
* Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
*/
#include "main/glheader.h"
#include "main/context.h"
#include "math/m_xform.h"
#include "tnl/t_context.h"
#include "3dnow.h"
#include "x86_xform.h"
#ifdef DEBUG_MATH
#include "math/m_debug.h"
#endif
#ifdef USE_3DNOW_ASM
DECLARE_XFORM_GROUP( 3dnow, 2 )
DECLARE_XFORM_GROUP( 3dnow, 3 )
DECLARE_XFORM_GROUP( 3dnow, 4 )
DECLARE_NORM_GROUP( 3dnow )
extern void _ASMAPI
_mesa_v16_3dnow_general_xform( GLfloat *first_vert,
const GLfloat *m,
const GLfloat *src,
GLuint src_stride,
GLuint count );
extern void _ASMAPI
_mesa_3dnow_project_vertices( GLfloat *first,
GLfloat *last,
const GLfloat *m,
GLuint stride );
extern void _ASMAPI
_mesa_3dnow_project_clipped_vertices( GLfloat *first,
GLfloat *last,
const GLfloat *m,
GLuint stride,
const GLubyte *clipmask );
#endif
void _mesa_init_3dnow_transform_asm( void )
{
#ifdef USE_3DNOW_ASM
ASSIGN_XFORM_GROUP( 3dnow, 2 );
ASSIGN_XFORM_GROUP( 3dnow, 3 );
ASSIGN_XFORM_GROUP( 3dnow, 4 );
/* There's a bug somewhere in the 3dnow_normal.S file that causes
* bad shading. Disable for now.
ASSIGN_NORM_GROUP( 3dnow );
*/
#ifdef DEBUG_MATH
_math_test_all_transform_functions( "3DNow!" );
_math_test_all_normal_transform_functions( "3DNow!" );
#endif
#endif
}

36
src/arch/x86/3dnow.h Normal file
View File

@ -0,0 +1,36 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* 3DNow! optimizations contributed by
* Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
*/
#ifndef __3DNOW_H__
#define __3DNOW_H__
void _mesa_init_3dnow_transform_asm( void );
#endif

852
src/arch/x86/3dnow_normal.S Normal file
View File

@ -0,0 +1,852 @@
/*
* Mesa 3-D graphics library
* Version: 5.1
*
* Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* 3Dnow assembly code by Holger Waechtler
*/
#ifdef USE_3DNOW_ASM
#include "assyntax.h"
#include "matypes.h"
#include "norm_args.h"
SEG_TEXT
#define M(i) REGOFF(i * 4, ECX)
#define STRIDE REGOFF(12, ESI)
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals)
HIDDEN(_mesa_3dnow_transform_normalize_normals)
GLNAME(_mesa_3dnow_transform_normalize_normals):
#define FRAME_OFFSET 12
PUSH_L ( EDI )
PUSH_L ( ESI )
PUSH_L ( EBP )
MOV_L ( ARG_LENGTHS, EDI )
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_DEST, EAX )
MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
MOV_L ( ARG_MAT, ECX )
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
JE ( LLBL (G3TN_end) )
MOV_L ( REGOFF (V4F_COUNT, ESI), EBP )
FEMMS
PUSH_L ( EBP )
PUSH_L ( EAX )
PUSH_L ( EDX ) /* save counter & pointer for */
/* the normalize pass */
#undef FRAME_OFFSET
#define FRAME_OFFSET 24
MOVQ ( M(0), MM3 ) /* m1 | m0 */
MOVQ ( M(4), MM4 ) /* m5 | m4 */
MOVD ( M(2), MM5 ) /* | m2 */
PUNPCKLDQ ( M(6), MM5 ) /* m6 | m2 */
MOVQ ( M(8), MM6 ) /* m9 | m8 */
MOVQ ( M(10), MM7 ) /* | m10 */
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
JNE ( LLBL (G3TN_scale_end ) )
MOVD ( ARG_SCALE, MM0 ) /* | scale */
PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
PFMUL ( MM0, MM3 ) /* scale * m1 | scale * m0 */
PFMUL ( MM0, MM4 ) /* scale * m5 | scale * m4 */
PFMUL ( MM0, MM5 ) /* scale * m6 | scale * m2 */
PFMUL ( MM0, MM6 ) /* scale * m9 | scale * m8 */
PFMUL ( MM0, MM7 ) /* | scale * m10 */
ALIGNTEXT32
LLBL (G3TN_scale_end):
LLBL (G3TN_transform):
MOVQ ( REGIND (EDX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
MOVQ ( MM0, MM1 ) /* x1 | x0 */
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
ADD_L ( CONST(16), EAX ) /* next r */
PREFETCHW ( REGIND(EAX) )
PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
PFADD ( MM2, MM0 ) /* x0*m4+x1*m5+x2*m6| x0*m0+...+x2**/
MOVQ ( REGIND (EDX), MM1 ) /* x1 | x0 */
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
MOVD ( REGOFF (8, EDX), MM2 ) /* | x2 */
PFMUL ( MM7, MM2 ) /* | x2*m10 */
PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m*/
ADD_L ( STRIDE, EDX ) /* next normal */
PREFETCH ( REGIND(EDX) )
MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
SUB_L ( CONST(1), EBP ) /* decrement normal counter */
JNZ ( LLBL (G3TN_transform) )
POP_L ( EDX ) /* end of transform --- */
POP_L ( EAX ) /* now normalizing ... */
POP_L ( EBP )
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
JE ( LLBL (G3TN_norm ) ) /* calculate lengths */
ALIGNTEXT32
LLBL (G3TN_norm_w_lengths):
PREFETCHW ( REGOFF(12,EAX) )
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
MOVD ( REGIND (EDI), MM3 ) /* | length (x) */
PFMUL ( MM3, MM1 ) /* | x2 (normalize*/
PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalize*/
ADD_L ( STRIDE, EDX ) /* next normal */
ADD_L ( CONST(4), EDI ) /* next length */
PREFETCH ( REGIND(EDI) )
MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
ADD_L ( CONST(16), EAX ) /* next r */
SUB_L ( CONST(1), EBP ) /* decrement normal counter */
JNZ ( LLBL (G3TN_norm_w_lengths) )
JMP ( LLBL (G3TN_exit_3dnow) )
ALIGNTEXT32
LLBL (G3TN_norm):
PREFETCHW ( REGIND(EAX) )
MOVQ ( REGIND (EAX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
MOVQ ( MM0, MM3 ) /* x1 | x0 */
MOVQ ( MM1, MM4 ) /* | x2 */
PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
ADD_L ( CONST(16), EAX ) /* next r */
PFMUL ( MM1, MM4 ) /* | x2*x2 */
PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1+x2**/
PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
MOVQ ( MM5, MM4 )
PUNPCKLDQ ( MM3, MM3 )
SUB_L ( CONST(1), EBP ) /* decrement normal counter */
PFMUL ( MM5, MM5 )
PFRSQIT1 ( MM3, MM5 )
PFRCPIT2 ( MM4, MM5 )
PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalize*/
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */
PFMUL ( MM5, MM1 ) /* | x2 (normalize*/
MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */
JNZ ( LLBL (G3TN_norm) )
LLBL (G3TN_exit_3dnow):
FEMMS
LLBL (G3TN_end):
POP_L ( EBP )
POP_L ( ESI )
POP_L ( EDI )
RET
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot)
HIDDEN(_mesa_3dnow_transform_normalize_normals_no_rot)
GLNAME(_mesa_3dnow_transform_normalize_normals_no_rot):
#undef FRAME_OFFSET
#define FRAME_OFFSET 12
PUSH_L ( EDI )
PUSH_L ( ESI )
PUSH_L ( EBP )
MOV_L ( ARG_LENGTHS, EDI )
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_DEST, EAX )
MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
MOV_L ( ARG_MAT, ECX )
MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
JE ( LLBL (G3TNNR_end) )
FEMMS
MOVD ( M(0), MM0 ) /* | m0 */
PUNPCKLDQ ( M(5), MM0 ) /* m5 | m0 */
MOVD ( M(10), MM2 ) /* | m10 */
PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
JNE ( LLBL (G3TNNR_scale_end ) )
MOVD ( ARG_SCALE, MM7 ) /* | scale */
PUNPCKLDQ ( MM7, MM7 ) /* scale | scale */
PFMUL ( MM7, MM0 ) /* scale * m5 | scale * m0 */
PFMUL ( MM7, MM2 ) /* scale * m10 | scale * m10 */
ALIGNTEXT32
LLBL (G3TNNR_scale_end):
CMP_L ( CONST(0), EDI ) /* lengths == 0 ? */
JE ( LLBL (G3TNNR_norm) ) /* need to calculate lengths */
MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
ALIGNTEXT32
LLBL (G3TNNR_norm_w_lengths): /* use precalculated lengths */
PREFETCHW ( REGIND(EAX) )
MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
ADD_L ( STRIDE, EDX ) /* next normal */
PREFETCH ( REGIND(EDX) )
PFMUL ( MM2, MM7 ) /* | x2*m10 */
ADD_L ( CONST(16), EAX ) /* next r */
PFMUL ( MM3, MM7 ) /* | x2 (normalized) */
PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
ADD_L ( CONST(4), EDI ) /* next length */
PFMUL ( MM3, MM6 ) /* x1 (normalized) | x0 (normalized) */
SUB_L ( CONST(1), EBP ) /* decrement normal counter */
MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */
MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */
MOVD ( REGIND(EDI), MM3 ) /* | length (x) */
JNZ ( LLBL (G3TNNR_norm_w_lengths) )
JMP ( LLBL (G3TNNR_exit_3dnow) )
ALIGNTEXT32
LLBL (G3TNNR_norm): /* need to calculate lengths */
PREFETCHW ( REGIND(EAX) )
MOVQ ( REGIND(EDX), MM6 ) /* x1 | x0 */
MOVD ( REGOFF(8, EDX), MM7 ) /* | x2 */
PFMUL ( MM0, MM6 ) /* x1*m5 | x0*m0 */
ADD_L ( CONST(16), EAX ) /* next r */
PFMUL ( MM2, MM7 ) /* | x2*m10 */
MOVQ ( MM6, MM3 ) /* x1 (transformed)| x0 (transformed) */
MOVQ ( MM7, MM4 ) /* | x2 (transformed) */
PFMUL ( MM6, MM3 ) /* x1*x1 | x0*x0 */
PFMUL ( MM7, MM4 ) /* | x2*x2 */
PFACC ( MM3, MM3 ) /* **not used** | x0*x0+x1*x1 */
PFADD ( MM4, MM3 ) /* | x0*x0+x1*x1+x2*x2*/
ADD_L ( STRIDE, EDX ) /* next normal */
PREFETCH ( REGIND(EDX) )
PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
MOVQ ( MM5, MM4 )
PUNPCKLDQ ( MM3, MM3 )
PFMUL ( MM5, MM5 )
PFRSQIT1 ( MM3, MM5 )
SUB_L ( CONST(1), EBP ) /* decrement normal counter */
PFRCPIT2 ( MM4, MM5 )
PFMUL ( MM5, MM6 ) /* x1 (normalized) | x0 (normalized) */
MOVQ ( MM6, REGOFF(-16, EAX) ) /* write r0, r1 */
PFMUL ( MM5, MM7 ) /* | x2 (normalized) */
MOVD ( MM7, REGOFF(-8, EAX) ) /* write r2 */
JNZ ( LLBL (G3TNNR_norm) )
LLBL (G3TNNR_exit_3dnow):
FEMMS
LLBL (G3TNNR_end):
POP_L ( EBP )
POP_L ( ESI )
POP_L ( EDI )
RET
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot)
HIDDEN(_mesa_3dnow_transform_rescale_normals_no_rot)
GLNAME(_mesa_3dnow_transform_rescale_normals_no_rot):
#undef FRAME_OFFSET
#define FRAME_OFFSET 12
PUSH_L ( EDI )
PUSH_L ( ESI )
PUSH_L ( EBP )
MOV_L ( ARG_IN, EAX )
MOV_L ( ARG_DEST, EDX )
MOV_L ( REGOFF(V4F_COUNT, EAX), EBP ) /* dest->count = in->count */
MOV_L ( EBP, REGOFF(V4F_COUNT, EDX) )
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_MAT, ECX )
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
MOV_L ( REGOFF(V4F_START, EDX), EAX ) /* dest->start */
MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
CMP_L ( CONST(0), EBP )
JE ( LLBL (G3TRNR_end) )
FEMMS
MOVD ( ARG_SCALE, MM6 ) /* | scale */
PUNPCKLDQ ( MM6, MM6 ) /* scale | scale */
MOVD ( REGIND(ECX), MM0 ) /* | m0 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
PFMUL ( MM6, MM0 ) /* scale*m5 | scale*m0 */
MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
PFMUL ( MM6, MM2 ) /* | scale*m10 */
ALIGNTEXT32
LLBL (G3TRNR_rescale):
PREFETCHW ( REGIND(EAX) )
MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
ADD_L ( STRIDE, EDX ) /* next normal */
PREFETCH ( REGIND(EDX) )
PFMUL ( MM2, MM5 ) /* | x2*m10 */
ADD_L ( CONST(16), EAX ) /* next r */
SUB_L ( CONST(1), EBP ) /* decrement normal counter */
MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */
MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */
JNZ ( LLBL (G3TRNR_rescale) ) /* cnt > 0 ? -> process next normal */
FEMMS
LLBL (G3TRNR_end):
POP_L ( EBP )
POP_L ( ESI )
POP_L ( EDI )
RET
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_transform_rescale_normals)
HIDDEN(_mesa_3dnow_transform_rescale_normals)
GLNAME(_mesa_3dnow_transform_rescale_normals):
#undef FRAME_OFFSET
#define FRAME_OFFSET 8
PUSH_L ( EDI )
PUSH_L ( ESI )
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_DEST, EAX )
MOV_L ( ARG_MAT, ECX )
MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
CMP_L ( CONST(0), EDI )
JE ( LLBL (G3TR_end) )
FEMMS
MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
MOVQ ( REGOFF(16,ECX), MM4 ) /* m5 | m4 */
MOVD ( ARG_SCALE, MM0 ) /* scale */
MOVD ( REGOFF(8,ECX), MM5 ) /* | m2 */
PUNPCKLDQ ( MM0, MM0 ) /* scale | scale */
PUNPCKLDQ ( REGOFF(24, ECX), MM5 )
PFMUL ( MM0, MM3 ) /* scale*m1 | scale*m0 */
MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8*/
PFMUL ( MM0, MM4 ) /* scale*m5 | scale*m4 */
MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
PFMUL ( MM0, MM5 ) /* scale*m6 | scale*m2 */
PFMUL ( MM0, MM6 ) /* scale*m9 | scale*m8 */
PFMUL ( MM0, MM7 ) /* | scale*m10 */
ALIGNTEXT32
LLBL (G3TR_rescale):
PREFETCHW ( REGIND(EAX) )
MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
MOVQ ( MM0, MM1 ) /* x1 | x0 */
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
ADD_L ( CONST(16), EAX ) /* next r */
PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
ADD_L ( STRIDE, EDX ) /* next normal */
PREFETCH ( REGIND(EDX) )
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
PFMUL ( MM7, MM2 ) /* | x2*m10 */
PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
SUB_L ( CONST(1), EDI ) /* decrement normal counter */
JNZ ( LLBL (G3TR_rescale) )
FEMMS
LLBL (G3TR_end):
POP_L ( ESI )
POP_L ( EDI )
RET
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_transform_normals_no_rot)
HIDDEN(_mesa_3dnow_transform_normals_no_rot)
GLNAME(_mesa_3dnow_transform_normals_no_rot):
#undef FRAME_OFFSET
#define FRAME_OFFSET 8
PUSH_L ( EDI )
PUSH_L ( ESI )
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_DEST, EAX )
MOV_L ( ARG_MAT, ECX )
MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
CMP_L ( CONST(0), EDI )
JE ( LLBL (G3TNR_end) )
FEMMS
MOVD ( REGIND(ECX), MM0 ) /* | m0 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m5 | m0 */
MOVD ( REGOFF(40, ECX), MM2 ) /* | m10 */
PUNPCKLDQ ( MM2, MM2 ) /* m10 | m10 */
ALIGNTEXT32
LLBL (G3TNR_transform):
PREFETCHW ( REGIND(EAX) )
MOVQ ( REGIND(EDX), MM4 ) /* x1 | x0 */
MOVD ( REGOFF(8, EDX), MM5 ) /* | x2 */
PFMUL ( MM0, MM4 ) /* x1*m5 | x0*m0 */
ADD_L ( STRIDE, EDX) /* next normal */
PREFETCH ( REGIND(EDX) )
PFMUL ( MM2, MM5 ) /* | x2*m10 */
ADD_L ( CONST(16), EAX ) /* next r */
SUB_L ( CONST(1), EDI ) /* decrement normal counter */
MOVQ ( MM4, REGOFF(-16, EAX) ) /* write r0, r1 */
MOVD ( MM5, REGOFF(-8, EAX) ) /* write r2 */
JNZ ( LLBL (G3TNR_transform) )
FEMMS
LLBL (G3TNR_end):
POP_L ( ESI )
POP_L ( EDI )
RET
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_transform_normals)
HIDDEN(_mesa_3dnow_transform_normals)
GLNAME(_mesa_3dnow_transform_normals):
#undef FRAME_OFFSET
#define FRAME_OFFSET 8
PUSH_L ( EDI )
PUSH_L ( ESI )
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_DEST, EAX )
MOV_L ( ARG_MAT, ECX )
MOV_L ( REGOFF(V4F_COUNT, ESI), EDI ) /* dest->count = in->count */
MOV_L ( EDI, REGOFF(V4F_COUNT, EAX) )
MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
MOV_L ( REGOFF(V4F_START, ESI), EDX ) /* in->start */
MOV_L ( REGOFF(MATRIX_INV, ECX), ECX ) /* mat->inv */
CMP_L ( CONST(0), EDI ) /* count > 0 ?? */
JE ( LLBL (G3T_end) )
FEMMS
MOVQ ( REGIND(ECX), MM3 ) /* m1 | m0 */
MOVQ ( REGOFF(16, ECX), MM4 ) /* m5 | m4 */
MOVD ( REGOFF(8, ECX), MM5 ) /* | m2 */
PUNPCKLDQ ( REGOFF(24, ECX), MM5 ) /* m6 | m2 */
MOVQ ( REGOFF(32, ECX), MM6 ) /* m9 | m8 */
MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
ALIGNTEXT32
LLBL (G3T_transform):
PREFETCHW ( REGIND(EAX) )
MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
MOVQ ( MM0, MM1 ) /* x1 | x0 */
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
PFMUL ( MM3, MM0 ) /* x1*m1 | x0*m0 */
ADD_L ( CONST(16), EAX ) /* next r */
PFMUL ( MM4, MM1 ) /* x1*m5 | x0*m4 */
PFACC ( MM1, MM0 ) /* x0*m4+x1*m5 | x0*m0+x1*m1 */
PFMUL ( MM5, MM2 ) /* x2*m6 | x2*m2 */
PFADD ( MM2, MM0 ) /* x0*m4...+x2*m6| x0*m0+x1*m1+x2*m2 */
MOVQ ( REGIND(EDX), MM1 ) /* x1 | x0 */
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write r0, r1 */
PFMUL ( MM6, MM1 ) /* x1*m9 | x0*m8 */
MOVD ( REGOFF(8, EDX), MM2 ) /* | x2 */
PFMUL ( MM7, MM2 ) /* | x2*m10 */
ADD_L ( STRIDE, EDX ) /* next normal */
PREFETCH ( REGIND(EDX) )
PFACC ( MM1, MM1 ) /* *not used* | x0*m8+x1*m9 */
PFADD ( MM2, MM1 ) /* *not used* | x0*m8+x1*m9+x2*m10 */
MOVD ( MM1, REGOFF(-8, EAX) ) /* write r2 */
SUB_L ( CONST(1), EDI ) /* decrement normal counter */
JNZ ( LLBL (G3T_transform) )
FEMMS
LLBL (G3T_end):
POP_L ( ESI )
POP_L ( EDI )
RET
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_normalize_normals)
HIDDEN(_mesa_3dnow_normalize_normals)
GLNAME(_mesa_3dnow_normalize_normals):
#undef FRAME_OFFSET
#define FRAME_OFFSET 12
PUSH_L ( EDI )
PUSH_L ( ESI )
PUSH_L ( EBP )
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_DEST, EAX )
MOV_L ( REGOFF(V4F_COUNT, ESI), EBP ) /* dest->count = in->count */
MOV_L ( EBP, REGOFF(V4F_COUNT, EAX) )
MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */
MOV_L ( ARG_LENGTHS, EDX )
CMP_L ( CONST(0), EBP ) /* count > 0 ?? */
JE ( LLBL (G3N_end) )
FEMMS
CMP_L ( CONST(0), EDX ) /* lengths == 0 ? */
JE ( LLBL (G3N_norm2) ) /* calculate lengths */
ALIGNTEXT32
LLBL (G3N_norm1): /* use precalculated lengths */
PREFETCH ( REGIND(EAX) )
MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
MOVD ( REGIND(EDX), MM3 ) /* | length (x) */
PFMUL ( MM3, MM1 ) /* | x2 (normalized) */
PUNPCKLDQ ( MM3, MM3 ) /* length (x) | length (x) */
ADD_L ( STRIDE, ECX ) /* next normal */
PREFETCH ( REGIND(ECX) )
PFMUL ( MM3, MM0 ) /* x1 (normalized) | x0 (normalized) */
MOVQ ( MM0, REGIND(EAX) ) /* write new x0, x1 */
MOVD ( MM1, REGOFF(8, EAX) ) /* write new x2 */
ADD_L ( CONST(16), EAX ) /* next r */
ADD_L ( CONST(4), EDX ) /* next length */
SUB_L ( CONST(1), EBP ) /* decrement normal counter */
JNZ ( LLBL (G3N_norm1) )
JMP ( LLBL (G3N_end1) )
ALIGNTEXT32
LLBL (G3N_norm2): /* need to calculate lengths */
PREFETCHW ( REGIND(EAX) )
PREFETCH ( REGIND(ECX) )
MOVQ ( REGIND(ECX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, ECX), MM1 ) /* | x2 */
MOVQ ( MM0, MM3 ) /* x1 | x0 */
ADD_L ( STRIDE, ECX ) /* next normal */
PFMUL ( MM0, MM3 ) /* x1*x1 | x0*x0 */
MOVQ ( MM1, MM4 ) /* | x2 */
ADD_L ( CONST(16), EAX ) /* next r */
PFMUL ( MM1, MM4 ) /* | x2*x2 */
PFADD ( MM4, MM3 ) /* | x0*x0+x2*x2 */
PFACC ( MM3, MM3 ) /* x0*x0+...+x2*x2 | x0*x0+x1*x1+x2*x2*/
PFRSQRT ( MM3, MM5 ) /* 1/sqrt (x0*x0+x1*x1+x2*x2) */
MOVQ ( MM5, MM4 )
PUNPCKLDQ ( MM3, MM3 )
PFMUL ( MM5, MM5 )
PFRSQIT1 ( MM3, MM5 )
SUB_L ( CONST(1), EBP ) /* decrement normal counter */
PFRCPIT2 ( MM4, MM5 )
PFMUL ( MM5, MM0 ) /* x1 (normalized) | x0 (normalized) */
MOVQ ( MM0, REGOFF(-16, EAX) ) /* write new x0, x1 */
PFMUL ( MM5, MM1 ) /* | x2 (normalized) */
MOVD ( MM1, REGOFF(-8, EAX) ) /* write new x2 */
JNZ ( LLBL (G3N_norm2) )
LLBL (G3N_end1):
FEMMS
LLBL (G3N_end):
POP_L ( EBP )
POP_L ( ESI )
POP_L ( EDI )
RET
ALIGNTEXT16
GLOBL GLNAME(_mesa_3dnow_rescale_normals)
HIDDEN(_mesa_3dnow_rescale_normals)
GLNAME(_mesa_3dnow_rescale_normals):
#undef FRAME_OFFSET
#define FRAME_OFFSET 8
PUSH_L ( EDI )
PUSH_L ( ESI )
MOV_L ( ARG_IN, ESI )
MOV_L ( ARG_DEST, EAX )
MOV_L ( REGOFF(V4F_COUNT, ESI), EDX ) /* dest->count = in->count */
MOV_L ( EDX, REGOFF(V4F_COUNT, EAX) )
MOV_L ( REGOFF(V4F_START, EAX), EAX ) /* dest->start */
MOV_L ( REGOFF(V4F_START, ESI), ECX ) /* in->start */
CMP_L ( CONST(0), EDX )
JE ( LLBL (G3R_end) )
FEMMS
MOVD ( ARG_SCALE, MM0 ) /* scale */
PUNPCKLDQ ( MM0, MM0 )
ALIGNTEXT32
LLBL (G3R_rescale):
PREFETCHW ( REGIND(EAX) )
MOVQ ( REGIND(ECX), MM1 ) /* x1 | x0 */
MOVD ( REGOFF(8, ECX), MM2 ) /* | x2 */
PFMUL ( MM0, MM1 ) /* x1*scale | x0*scale */
ADD_L ( STRIDE, ECX ) /* next normal */
PREFETCH ( REGIND(ECX) )
PFMUL ( MM0, MM2 ) /* | x2*scale */
ADD_L ( CONST(16), EAX ) /* next r */
MOVQ ( MM1, REGOFF(-16, EAX) ) /* write r0, r1 */
MOVD ( MM2, REGOFF(-8, EAX) ) /* write r2 */
SUB_L ( CONST(1), EDX ) /* decrement normal counter */
JNZ ( LLBL (G3R_rescale) )
FEMMS
LLBL (G3R_end):
POP_L ( ESI )
POP_L ( EDI )
RET
#endif
#if defined (__ELF__) && defined (__linux__)
.section .note.GNU-stack,"",%progbits
#endif

437
src/arch/x86/3dnow_xform1.S Normal file
View File

@ -0,0 +1,437 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifdef USE_3DNOW_ASM
#include "assyntax.h"
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define FRAME_OFFSET 4
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points1_general )
HIDDEN(_mesa_3dnow_transform_points1_general)
GLNAME( _mesa_3dnow_transform_points1_general ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(4, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPGR_3 ) )
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */
MOVQ ( REGOFF(8, ECX), MM1 ) /* m03 | m02 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
MOVQ ( REGOFF(56, ECX), MM3 ) /* m33 | m32 */
ALIGNTEXT16
LLBL( G3TPGR_2 ):
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */
MOVQ ( MM4, MM5 ) /* x0 | x0 */
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */
PFMUL ( MM1, MM5 ) /* x0*m03 | x0*m02 */
PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */
PFADD ( MM3, MM5 ) /* x0*m03+m33 | x0*m02+m32 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
MOVQ ( MM5, REGOFF(8, EDX) ) /* write r3, r2 */
ADD_L ( EDI, EAX ) /* next vertex */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPGR_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPGR_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points1_identity )
HIDDEN(_mesa_3dnow_transform_points1_identity)
GLNAME( _mesa_3dnow_transform_points1_identity ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(1), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(4, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPIR_4) )
ALIGNTEXT16
LLBL( G3TPIR_3 ):
MOVD ( REGIND(EAX), MM0 ) /* | x0 */
ADD_L ( EDI, EAX ) /* next vertex */
MOVD ( MM0, REGIND(EDX) ) /* | r0 */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPIR_3 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPIR_4 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points1_3d_no_rot )
HIDDEN(_mesa_3dnow_transform_points1_3d_no_rot)
GLNAME( _mesa_3dnow_transform_points1_3d_no_rot ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(4, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3NRR_3 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
ALIGNTEXT16
LLBL( G3TP3NRR_2 ):
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
PFMUL ( MM0, MM4 ) /* | x0*m00 */
PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */
ADD_L ( EDI, EAX ) /* next vertex */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP3NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP3NRR_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points1_perspective )
HIDDEN(_mesa_3dnow_transform_points1_perspective)
GLNAME( _mesa_3dnow_transform_points1_perspective ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(4, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPPR_3 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
ALIGNTEXT16
LLBL( G3TPPR_2 ):
MOVD ( REGIND(EAX), MM4 ) /* 0 | x0 */
PFMUL ( MM0, MM4 ) /* 0 | x0*m00 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */
ADD_L ( EDI, EAX ) /* next vertex */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPPR_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPPR_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points1_2d )
HIDDEN(_mesa_3dnow_transform_points1_2d)
GLNAME( _mesa_3dnow_transform_points1_2d ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(4, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2R_3 ) )
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
ALIGNTEXT16
LLBL( G3TP2R_2 ):
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */
PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
ADD_L ( EDI, EAX ) /* next vertex */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP2R_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points1_2d_no_rot )
HIDDEN(_mesa_3dnow_transform_points1_2d_no_rot)
GLNAME( _mesa_3dnow_transform_points1_2d_no_rot ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(4, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2NRR_3 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
ALIGNTEXT16
LLBL( G3TP2NRR_2 ):
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
ADD_L ( EDI, EAX ) /* next vertex */
PFMUL ( MM0, MM4 ) /* | x0*m00 */
PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP2NRR_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points1_3d )
HIDDEN(_mesa_3dnow_transform_points1_3d)
GLNAME( _mesa_3dnow_transform_points1_3d ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(4, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3R_3 ) )
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */
MOVD ( REGOFF(8, ECX), MM1 ) /* | m02 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
ALIGNTEXT16
LLBL( G3TP3R_2 ):
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */
MOVQ ( MM4, MM5 ) /* | x0 */
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */
PFMUL ( MM1, MM5 ) /* | x0*m02 */
PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */
PFADD ( MM3, MM5 ) /* | x0*m02+m32 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
MOVD ( MM5, REGOFF(8, EDX) ) /* write r2 */
ADD_L ( EDI, EAX ) /* next vertex */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP3R_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP3R_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
#endif
#if defined (__ELF__) && defined (__linux__)
.section .note.GNU-stack,"",%progbits
#endif

477
src/arch/x86/3dnow_xform2.S Normal file
View File

@ -0,0 +1,477 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifdef USE_3DNOW_ASM
#include "assyntax.h"
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define FRAME_OFFSET 4
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points2_general )
HIDDEN(_mesa_3dnow_transform_points2_general)
GLNAME( _mesa_3dnow_transform_points2_general ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPGR_3 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */
PUNPCKLDQ ( REGOFF(24, ECX), MM2 ) /* m12 | m02 */
MOVD ( REGOFF(12, ECX), MM3 ) /* | m03 */
PUNPCKLDQ ( REGOFF(28, ECX), MM3 ) /* m13 | m03 */
MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */
MOVQ ( REGOFF(56, ECX), MM5 ) /* m33 | m32 */
ALIGNTEXT16
LLBL( G3TPGR_2 ):
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */
MOVQ ( MM6, MM7 ) /* x1 | x0 */
PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */
PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */
PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */
PFADD ( MM4, MM6 ) /* x0*...*m11+m31 | x0*...*m10+m30 */
MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */
MOVQ ( MM6, MM7 ) /* x1 | x0 */
PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */
PFMUL ( MM3, MM7 ) /* x1*m13 | x0*m03 */
ADD_L ( EDI, EAX ) /* next vertex */
PFACC ( MM7, MM6 ) /* x0*m03+x1*m13 | x0*x02+x1*m12 */
PFADD ( MM5, MM6 ) /* x0*...*m13+m33 | x0*...*m12+m32 */
MOVQ ( MM6, REGOFF(8, EDX) ) /* write r3, r2 */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPGR_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPGR_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points2_perspective )
HIDDEN(_mesa_3dnow_transform_points2_perspective)
GLNAME( _mesa_3dnow_transform_points2_perspective ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPPR_3 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
ALIGNTEXT16
LLBL( G3TPPR_2 ):
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */
ADD_L ( EDI, EAX ) /* next vertex */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPPR_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPPR_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points2_3d )
HIDDEN(_mesa_3dnow_transform_points2_3d)
GLNAME( _mesa_3dnow_transform_points2_3d ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3R_3 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */
PUNPCKLDQ ( REGOFF(24, ECX), MM2 ) /* m12 | m02 */
MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */
MOVD ( REGOFF(56, ECX), MM5 ) /* | m32 */
ALIGNTEXT16
LLBL( G3TP3R_2 ):
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */
MOVQ ( MM6, MM7 ) /* x1 | x0 */
PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */
PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */
PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */
PFADD ( MM4, MM6 ) /* x0*...*m11+m31 | x0*...*m10+m30 */
MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */
MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */
MOVQ ( MM6, MM7 ) /* x1 | x0 */
PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */
PFACC ( MM7, MM6 ) /* ***trash*** | x0*x02+x1*m12 */
PFADD ( MM5, MM6 ) /* ***trash*** | x0*...*m12+m32 */
MOVD ( MM6, REGOFF(8, EDX) ) /* write r2 */
ADD_L ( EDI, EAX ) /* next vertex */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP3R_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP3R_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points2_3d_no_rot )
HIDDEN(_mesa_3dnow_transform_points2_3d_no_rot)
GLNAME( _mesa_3dnow_transform_points2_3d_no_rot ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3NRR_3 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
ALIGNTEXT16
LLBL( G3TP3NRR_2 ):
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
PFADD ( MM2, MM4 ) /* x1*m11+m31 | x0*m00+m30 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */
ADD_L ( EDI, EAX ) /* next vertex */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP3NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP3NRR_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points2_2d )
HIDDEN(_mesa_3dnow_transform_points2_2d)
GLNAME( _mesa_3dnow_transform_points2_2d ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2R_3 ) )
MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */
MOVQ ( REGOFF(16, ECX), MM1 ) /* m11 | m10 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
ALIGNTEXT16
LLBL( G3TP2R_2 ):
MOVD ( REGIND(EAX), MM4 ) /* | x0 */
MOVD ( REGOFF(4, EAX), MM5 ) /* | x1 */
PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */
ADD_L ( EDI, EAX ) /* next vertex */
PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */
PUNPCKLDQ ( MM5, MM5 ) /* x1 | x1 */
PFMUL ( MM1, MM5 ) /* x1*m11 | x1*m10 */
PFADD ( MM2, MM4 ) /* x...x1*m11+31 | x0*..*m10+m30 */
PFADD ( MM5, MM4 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP2R_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points2_2d_no_rot )
HIDDEN(_mesa_3dnow_transform_points2_2d_no_rot)
GLNAME( _mesa_3dnow_transform_points2_2d_no_rot ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2NRR_3 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
ALIGNTEXT16
LLBL( G3TP2NRR_2 ):
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
ADD_L ( EDI, EAX ) /* next vertex */
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */
MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP2NRR_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points2_identity )
HIDDEN(_mesa_3dnow_transform_points2_identity)
GLNAME( _mesa_3dnow_transform_points2_identity ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPIR_3 ) )
ALIGNTEXT16
LLBL( G3TPIR_3 ):
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
ADD_L ( EDI, EAX ) /* next vertex */
MOVQ ( MM0, REGIND(EDX) ) /* r1 | r0 */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPIR_3 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPIR_4 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
#endif
#if defined (__ELF__) && defined (__linux__)
.section .note.GNU-stack,"",%progbits
#endif

561
src/arch/x86/3dnow_xform3.S Normal file
View File

@ -0,0 +1,561 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifdef USE_3DNOW_ASM
#include "assyntax.h"
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define FRAME_OFFSET 4
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points3_general )
HIDDEN(_mesa_3dnow_transform_points3_general)
GLNAME( _mesa_3dnow_transform_points3_general ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPGR_2 ) )
PREFETCHW ( REGIND(EDX) )
ALIGNTEXT16
LLBL( G3TPGR_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM2 ) /* | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGIND(EAX) )
MOVQ ( MM0, MM1 ) /* x1 | x0 */
PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
MOVQ ( MM2, MM5 ) /* x2 | x2 */
PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */
PFMUL ( REGOFF(32, ECX), MM2 ) /* x2*m9 | x2*m8 */
MOVQ ( MM0, MM3 ) /* x0 | x0 */
PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */
MOVQ ( MM1, MM4 ) /* x1 | x1 */
PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
PFADD ( REGOFF(48, ECX), MM2 ) /* x2*m9+m13 | x2*m8+m12 */
PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */
PFADD ( REGOFF(56, ECX), MM5 ) /* x2*m11+m15 | x2*m10+m14 */
PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */
PFMUL ( REGOFF(8, ECX), MM3 ) /* x0*m3 | x0*m2 */
PFADD ( MM1, MM2 ) /* r1 | r0 */
PFMUL ( REGOFF(24, ECX), MM4 ) /* x1*m7 | x1*m6 */
ADD_L ( CONST(16), EDX ) /* next output vertex */
PFADD ( MM3, MM4 ) /* x0*m3+x1*m7 | x0*m2+x1*m6 */
MOVQ ( MM2, REGOFF(-16, EDX) ) /* write r0, r1 */
PFADD ( MM4, MM5 ) /* r3 | r2 */
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPGR_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPGR_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points3_perspective )
HIDDEN(_mesa_3dnow_transform_points3_perspective)
GLNAME( _mesa_3dnow_transform_points3_perspective ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPPR_2 ) )
PREFETCH ( REGIND(EAX) )
PREFETCHW ( REGIND(EDX) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
MOVQ ( REGOFF(32, ECX), MM1 ) /* m21 | m20 */
MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
ALIGNTEXT16
LLBL( G3TPPR_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGIND(EAX) )
PXOR ( MM7, MM7 ) /* 0 | 0 */
MOVQ ( MM5, MM6 ) /* | x2 */
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
PFSUB ( MM5, MM7 ) /* | -x2 */
PFMUL ( MM2, MM6 ) /* | x2*m22 */
PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */
ADD_L ( CONST(16), EDX ) /* next r */
PFMUL ( MM1, MM5 ) /* x2*m21 | x2*m20 */
PFADD ( MM3, MM6 ) /* | x2*m22+m32 */
PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */
MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */
MOVD ( MM6, REGOFF(-8, EDX) ) /* write r2 */
MOVD ( MM7, REGOFF(-4, EDX) ) /* write r3 */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPPR_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPPR_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points3_3d )
HIDDEN(_mesa_3dnow_transform_points3_3d)
GLNAME( _mesa_3dnow_transform_points3_3d ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3R_2 ) )
PREFETCH ( REGIND(EAX) )
PREFETCH ( REGIND(EDX) )
MOVD ( REGOFF(8, ECX), MM7 ) /* | m2 */
PUNPCKLDQ ( REGOFF(24, ECX), MM7 ) /* m6 | m2 */
ALIGNTEXT16
LLBL( G3TP3R_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGIND(EAX) )
MOVQ ( MM0, MM2 ) /* x1 | x0 */
ADD_L ( CONST(16), EDX ) /* next r */
PUNPCKLDQ ( MM2, MM2 ) /* x0 | x0 */
MOVQ ( MM0, MM3 ) /* x1 | x0 */
PFMUL ( REGIND(ECX), MM2 ) /* x0*m1 | x0*m0 */
PUNPCKHDQ ( MM3, MM3 ) /* x1 | x1 */
MOVQ ( MM1, MM4 ) /* | x2 */
PFMUL ( REGOFF(16, ECX), MM3 ) /* x1*m5 | x1*m4 */
PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */
PFADD ( MM2, MM3 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */
PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */
PFADD ( REGOFF(48, ECX), MM3 ) /* x0*m1+...+m11 | x0*m0+x1*m4+m12 */
PFMUL ( MM7, MM0 ) /* x1*m6 | x0*m2 */
PFADD ( MM4, MM3 ) /* r1 | r0 */
PFMUL ( REGOFF(40, ECX), MM1 ) /* | x2*m10 */
PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m14 | x2*m10 */
PFACC ( MM0, MM1 )
MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */
PFACC ( MM1, MM1 ) /* | r2 */
MOVD ( MM1, REGOFF(-8, EDX) ) /* write r2 */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP3R_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP3R_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points3_3d_no_rot )
HIDDEN(_mesa_3dnow_transform_points3_3d_no_rot)
GLNAME( _mesa_3dnow_transform_points3_3d_no_rot ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3NRR_2 ) )
PREFETCH ( REGIND(EAX) )
PREFETCHW ( REGIND(EDX) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */
PUNPCKLDQ ( MM2, MM2 ) /* m22 | m22 */
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */
PUNPCKLDQ ( MM3, MM3 ) /* m32 | m32 */
ALIGNTEXT16
LLBL( G3TP3NRR_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCHW ( REGIND(EAX) )
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */
PFMUL ( MM2, MM5 ) /* | x2*m22 */
PFADD ( MM3, MM5 ) /* | x2*m22+m32 */
MOVQ ( MM4, REGIND(EDX) ) /* write r0, r1 */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 */
JNZ ( LLBL( G3TP3NRR_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP3NRR_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points3_2d )
HIDDEN(_mesa_3dnow_transform_points3_2d)
GLNAME( _mesa_3dnow_transform_points3_2d ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2R_3) )
PREFETCH ( REGIND(EAX) )
PREFETCHW ( REGIND(EDX) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
ALIGNTEXT16
LLBL( G3TP2R_2 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGIND(EAX) )
MOVQ ( MM3, MM4 ) /* x1 | x0 */
PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */
ADD_L ( CONST(16), EDX ) /* next r */
PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */
PFACC ( MM4, MM3 ) /* x0*m00+x1*m10 | x0*m01+x1*m11 */
MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 (=x2) */
PFADD ( MM2, MM3 ) /* x0*...*m10+m30 | x0*...*m11+m31 */
MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP2R_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points3_2d_no_rot )
HIDDEN(_mesa_3dnow_transform_points3_2d_no_rot)
GLNAME( _mesa_3dnow_transform_points3_2d_no_rot ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2NRR_2 ) )
PREFETCH ( REGIND(EAX) )
PREFETCHW ( REGIND(EDX) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
ALIGNTEXT16
LLBL( G3TP2NRR_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGIND(EAX) )
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
ADD_L ( CONST(16), EDX ) /* next r */
PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */
MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 (=x2) */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP2NRR_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP2NRR_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points3_identity )
HIDDEN(_mesa_3dnow_transform_points3_identity)
GLNAME( _mesa_3dnow_transform_points3_identity ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPIR_2 ) )
PREFETCHW ( REGIND(EDX) )
ALIGNTEXT16
LLBL( G3TPIR_1 ):
PREFETCHW ( REGOFF(32, EDX) )
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
ADD_L ( CONST(16), EDX ) /* next r */
DEC_L ( ESI ) /* decrement vertex counter */
MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */
MOVD ( MM1, REGOFF(-8, EDX) ) /* | r2 */
JNZ ( LLBL( G3TPIR_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPIR_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
#endif
#if defined (__ELF__) && defined (__linux__)
.section .note.GNU-stack,"",%progbits
#endif

570
src/arch/x86/3dnow_xform4.S Normal file
View File

@ -0,0 +1,570 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifdef USE_3DNOW_ASM
#include "assyntax.h"
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define FRAME_OFFSET 4
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points4_general )
HIDDEN(_mesa_3dnow_transform_points4_general)
GLNAME( _mesa_3dnow_transform_points4_general ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPGR_2 ) )
PREFETCHW ( REGIND(EDX) )
ALIGNTEXT16
LLBL( G3TPGR_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM4 ) /* x3 | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGIND(EAX) )
MOVQ ( MM0, MM2 ) /* x1 | x0 */
MOVQ ( MM4, MM6 ) /* x3 | x2 */
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
PUNPCKHDQ ( MM2, MM2 ) /* x1 | x1 */
MOVQ ( MM0, MM1 ) /* x0 | x0 */
ADD_L ( CONST(16), EDX ) /* next r */
PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
MOVQ ( MM2, MM3 ) /* x1 | x1 */
PFMUL ( REGOFF(8, ECX), MM1 ) /* x0*m3 | x0*m2 */
PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */
PFMUL ( REGOFF(16, ECX), MM2 ) /* x1*m5 | x1*m4 */
MOVQ ( MM4, MM5 ) /* x2 | x2 */
PFMUL ( REGOFF(24, ECX), MM3 ) /* x1*m7 | x1*m6 */
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */
MOVQ ( MM6, MM7 ) /* x3 | x3 */
PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */
PFADD ( MM0, MM2 )
PFMUL ( REGOFF(48, ECX), MM6 ) /* x3*m13 | x3*m12 */
PFADD ( MM1, MM3 )
PFMUL ( REGOFF(56, ECX), MM7 ) /* x3*m15 | x3*m14 */
PFADD ( MM4, MM6 )
PFADD ( MM5, MM7 )
PFADD ( MM2, MM6 )
PFADD ( MM3, MM7 )
MOVQ ( MM6, REGOFF(-16, EDX) )
MOVQ ( MM7, REGOFF(-8, EDX) )
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPGR_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPGR_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points4_perspective )
HIDDEN(_mesa_3dnow_transform_points4_perspective)
GLNAME( _mesa_3dnow_transform_points4_perspective ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPPR_2 ) )
PREFETCH ( REGIND(EAX) )
PREFETCHW ( REGIND(EDX) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
MOVD ( REGOFF(40, ECX), MM1 ) /* | m22 */
PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m32 | m22 */
MOVQ ( REGOFF(32, ECX), MM2 ) /* m21 | m20 */
PXOR ( MM7, MM7 ) /* 0 | 0 */
ALIGNTEXT16
LLBL( G3TPPR_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
MOVD ( REGOFF(8, EAX), MM3 ) /* | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */
MOVQ ( MM5, MM6 ) /* x3 | x2 */
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */
ADD_L ( CONST(16), EDX ) /* next r */
PFMUL ( MM2, MM5 ) /* x2*m21 | x2*m20 */
PFSUBR ( MM7, MM3 ) /* | -x2 */
PFMUL ( MM1, MM6 ) /* x3*m32 | x2*m22 */
PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */
PFACC ( MM3, MM6 ) /* -x2 | x2*m22+x3*m32 */
MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */
MOVQ ( MM6, REGOFF(-8, EDX) ) /* write r2, r3 */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPPR_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPPR_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points4_3d )
HIDDEN(_mesa_3dnow_transform_points4_3d)
GLNAME( _mesa_3dnow_transform_points4_3d ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3R_2 ) )
MOVD ( REGOFF(8, ECX), MM6 ) /* | m2 */
PUNPCKLDQ ( REGOFF(24, ECX), MM6 ) /* m6 | m2 */
MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */
PUNPCKLDQ ( REGOFF(56, ECX), MM7 ) /* m14 | m10 */
ALIGNTEXT16
LLBL( G3TP3R_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
PREFETCH ( REGOFF(32, EAX) ) /* hopefully array is tightly packed */
MOVQ ( REGIND(EAX), MM2 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM3 ) /* x3 | x2 */
MOVQ ( MM2, MM0 ) /* x1 | x0 */
MOVQ ( MM3, MM4 ) /* x3 | x2 */
MOVQ ( MM0, MM1 ) /* x1 | x0 */
MOVQ ( MM4, MM5 ) /* x3 | x2 */
PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */
PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */
PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */
PUNPCKLDQ ( MM3, MM3 ) /* x2 | x2 */
PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */
PUNPCKHDQ ( MM4, MM4 ) /* x3 | x3 */
PFMUL ( MM6, MM2 ) /* x1*m6 | x0*m2 */
PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */
PFMUL ( REGOFF(32, ECX), MM3 ) /* x2*m9 | x2*m8 */
ADD_L ( CONST(16), EDX ) /* next r */
PFMUL ( REGOFF(48, ECX), MM4 ) /* x3*m13 | x3*m12 */
PFADD ( MM1, MM3 ) /* x0*m1+..+x2*m9 | x0*m0+...+x2*m8 */
PFMUL ( MM7, MM5 ) /* x3*m14 | x2*m10 */
PFADD ( MM3, MM4 ) /* r1 | r0 */
PFACC ( MM2, MM5 ) /* x0*m2+x1*m6 | x2*m10+x3*m14 */
MOVD ( REGOFF(12, EAX), MM0 ) /* | x3 */
ADD_L ( EDI, EAX ) /* next vertex */
PFACC ( MM0, MM5 ) /* r3 | r2 */
MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP3R_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP3R_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points4_3d_no_rot )
HIDDEN(_mesa_3dnow_transform_points4_3d_no_rot)
GLNAME( _mesa_3dnow_transform_points4_3d_no_rot ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP3NRR_2 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */
PUNPCKLDQ ( REGOFF(56, ECX), MM2 ) /* m32 | m22 */
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
ALIGNTEXT16
LLBL( G3TP3NRR_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
MOVD ( REGOFF(12, EAX), MM7 ) /* | x3 */
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */
MOVQ ( MM5, MM6 ) /* x3 | x2 */
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
PFMUL ( MM2, MM5 ) /* x3*m32 | x2*m22 */
PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */
PFACC ( MM7, MM5 ) /* x3 | x2*m22+x3*m32 */
PFADD ( MM6, MM4 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */
ADD_L ( CONST(16), EDX ) /* next r */
MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP3NRR_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP3NRR_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points4_2d )
HIDDEN(_mesa_3dnow_transform_points4_2d)
GLNAME( _mesa_3dnow_transform_points4_2d ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2R_2 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */
MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */
PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */
MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */
ALIGNTEXT16
LLBL( G3TP2R_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGIND(EAX) )
MOVQ ( MM3, MM4 ) /* x1 | x0 */
MOVQ ( MM5, MM6 ) /* x3 | x2 */
PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */
ADD_L ( CONST(16), EDX ) /* next r */
PFACC ( MM4, MM3 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */
PFMUL ( MM2, MM6 ) /* x3*m31 | x3*m30 */
PFADD ( MM6, MM3 ) /* r1 | r0 */
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP2R_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP2R_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points4_2d_no_rot )
HIDDEN(_mesa_3dnow_transform_points4_2d_no_rot)
GLNAME( _mesa_3dnow_transform_points4_2d_no_rot ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TP2NRR_3 ) )
MOVD ( REGIND(ECX), MM0 ) /* | m00 */
PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */
MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */
ALIGNTEXT16
LLBL( G3TP2NRR_2 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGIND(EAX) )
PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */
MOVQ ( MM5, MM6 ) /* x3 | x2 */
ADD_L ( CONST(16), EDX ) /* next r */
PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */
PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */
PFADD ( MM4, MM6 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */
MOVQ ( MM6, REGOFF(-16, EDX) ) /* write r0, r1 */
MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TP2NRR_3 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_3dnow_transform_points4_identity )
HIDDEN(_mesa_3dnow_transform_points4_identity)
GLNAME( _mesa_3dnow_transform_points4_identity ):
PUSH_L ( ESI )
MOV_L ( ARG_DEST, ECX )
MOV_L ( ARG_MATRIX, ESI )
MOV_L ( ARG_SOURCE, EAX )
MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) )
OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) )
MOV_L ( REGOFF(V4F_COUNT, EAX), EDX )
MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) )
PUSH_L ( EDI )
MOV_L ( REGOFF(V4F_START, ECX), EDX )
MOV_L ( ESI, ECX )
MOV_L ( REGOFF(V4F_COUNT, EAX), ESI )
MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI )
MOV_L ( REGOFF(V4F_START, EAX), EAX )
TEST_L ( ESI, ESI )
JZ ( LLBL( G3TPIR_2 ) )
ALIGNTEXT16
LLBL( G3TPIR_1 ):
PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */
MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */
MOVQ ( REGOFF(8, EAX), MM1 ) /* x3 | x2 */
ADD_L ( EDI, EAX ) /* next vertex */
PREFETCH ( REGIND(EAX) )
ADD_L ( CONST(16), EDX ) /* next r */
MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */
MOVQ ( MM1, REGOFF(-8, EDX) ) /* r3 | r2 */
DEC_L ( ESI ) /* decrement vertex counter */
JNZ ( LLBL( G3TPIR_1 ) ) /* cnt > 0 ? -> process next vertex */
LLBL( G3TPIR_2 ):
FEMMS
POP_L ( EDI )
POP_L ( ESI )
RET
#endif
#if defined (__ELF__) && defined (__linux__)
.section .note.GNU-stack,"",%progbits
#endif

40
src/arch/x86/Makefile.am Normal file
View File

@ -0,0 +1,40 @@
# Copyright © 2012 Intel Corporation
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
if HAVE_X86_ASM
AM_CPPFLAGS = \
-I$(top_srcdir)/include \
-I$(top_srcdir)/src/mesa \
-I$(top_srcdir)/src/GLdispatch/mapi \
$(API_DEFINES) \
$(DEFINES)
noinst_PROGRAMS = gen_matypes
gen_matypes_SOURCES = gen_matypes.c
BUILT_SOURCES = matypes.h
CLEANFILES = matypes.h
matypes.h: gen_matypes
$(AM_V_GEN)./gen_matypes > $@
endif

1747
src/arch/x86/assyntax.h Normal file

File diff suppressed because it is too large Load Diff

59
src/arch/x86/clip_args.h Normal file
View File

@ -0,0 +1,59 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Clip test function interface for assembly code. Simply define
* FRAME_OFFSET to the number of bytes pushed onto the stack before
* using the ARG_* argument macros.
*
* Gareth Hughes
*/
#ifndef __CLIP_ARGS_H__
#define __CLIP_ARGS_H__
/*
* Offsets for clip_func arguments
*
* typedef GLvector4f *(*clip_func)( GLvector4f *clip_vec,
* GLvector4f *proj_vec,
* GLubyte clipMask[],
* GLubyte *orMask,
* GLubyte *andMask );
*/
#define OFFSET_SOURCE 4
#define OFFSET_DEST 8
#define OFFSET_CLIP 12
#define OFFSET_OR 16
#define OFFSET_AND 20
#define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP)
#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP)
#define ARG_CLIP REGOFF(FRAME_OFFSET+OFFSET_CLIP, ESP)
#define ARG_OR REGOFF(FRAME_OFFSET+OFFSET_OR, ESP)
#define ARG_AND REGOFF(FRAME_OFFSET+OFFSET_AND, ESP)
#endif

336
src/arch/x86/common_x86.c Normal file
View File

@ -0,0 +1,336 @@
/*
* Mesa 3-D graphics library
* Version: 6.5.1
*
* Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* \file common_x86.c
*
* Check CPU capabilities & initialize optimized funtions for this particular
* processor.
*
* Changed by Andre Werthmann for using the new SSE functions.
*
* \author Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
* \author Andre Werthmann <wertmann@cs.uni-potsdam.de>
*/
/* XXX these includes should probably go into imports.h or glheader.h */
#if defined(USE_SSE_ASM) && defined(__linux__)
#include <linux/version.h>
#endif
#if defined(USE_SSE_ASM) && defined(__FreeBSD__)
#include <sys/types.h>
#include <sys/sysctl.h>
#endif
#if defined(USE_SSE_ASM) && defined(__OpenBSD__)
#include <sys/param.h>
#include <sys/sysctl.h>
#include <machine/cpu.h>
#endif
#include "main/imports.h"
#include "common_x86_asm.h"
/** Bitmask of X86_FEATURE_x bits */
int _mesa_x86_cpu_features = 0x0;
static int detection_debug = GL_FALSE;
/* No reason for this to be public.
*/
extern GLuint _ASMAPI _mesa_x86_has_cpuid(void);
extern void _ASMAPI _mesa_x86_cpuid(GLuint op, GLuint *reg_eax, GLuint *reg_ebx, GLuint *reg_ecx, GLuint *reg_edx);
extern GLuint _ASMAPI _mesa_x86_cpuid_eax(GLuint op);
extern GLuint _ASMAPI _mesa_x86_cpuid_ebx(GLuint op);
extern GLuint _ASMAPI _mesa_x86_cpuid_ecx(GLuint op);
extern GLuint _ASMAPI _mesa_x86_cpuid_edx(GLuint op);
#if defined(USE_SSE_ASM)
/*
* We must verify that the Streaming SIMD Extensions are truly supported
* on this processor before we go ahead and hook out the optimized code.
*
* However, I have been told by Alan Cox that all 2.4 (and later) Linux
* kernels provide full SSE support on all processors that expose SSE via
* the CPUID mechanism.
*/
/* These are assembly functions: */
extern void _mesa_test_os_sse_support( void );
extern void _mesa_test_os_sse_exception_support( void );
#if defined(_WIN32)
#ifndef STATUS_FLOAT_MULTIPLE_TRAPS
# define STATUS_FLOAT_MULTIPLE_TRAPS (0xC00002B5L)
#endif
static LONG WINAPI ExceptionFilter(LPEXCEPTION_POINTERS exp)
{
PEXCEPTION_RECORD rec = exp->ExceptionRecord;
PCONTEXT ctx = exp->ContextRecord;
if ( rec->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION ) {
_mesa_debug(NULL, "EXCEPTION_ILLEGAL_INSTRUCTION\n" );
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
} else if ( rec->ExceptionCode == STATUS_FLOAT_MULTIPLE_TRAPS ) {
_mesa_debug(NULL, "STATUS_FLOAT_MULTIPLE_TRAPS\n");
/* Windows seems to clear the exception flag itself, we just have to increment Eip */
} else {
_mesa_debug(NULL, "UNEXPECTED EXCEPTION (0x%08x), terminating!\n" );
return EXCEPTION_EXECUTE_HANDLER;
}
if ( (ctx->ContextFlags & CONTEXT_CONTROL) != CONTEXT_CONTROL ) {
_mesa_debug(NULL, "Context does not contain control registers, terminating!\n");
return EXCEPTION_EXECUTE_HANDLER;
}
ctx->Eip += 3;
return EXCEPTION_CONTINUE_EXECUTION;
}
#endif /* _WIN32 */
/**
* Check if SSE is supported.
* If not, turn off the X86_FEATURE_XMM flag in _mesa_x86_cpu_features.
*/
void _mesa_check_os_sse_support( void )
{
#if defined(__FreeBSD__)
{
int ret, enabled;
unsigned int len;
len = sizeof(enabled);
ret = sysctlbyname("hw.instruction_sse", &enabled, &len, NULL, 0);
if (ret || !enabled)
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
}
#elif defined (__NetBSD__)
{
int ret, enabled;
size_t len = sizeof(enabled);
ret = sysctlbyname("machdep.sse", &enabled, &len, (void *)NULL, 0);
if (ret || !enabled)
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
}
#elif defined(__OpenBSD__)
{
int mib[2];
int ret, enabled;
size_t len = sizeof(enabled);
mib[0] = CTL_MACHDEP;
mib[1] = CPU_SSE;
ret = sysctl(mib, 2, &enabled, &len, NULL, 0);
if (ret || !enabled)
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
}
#elif defined(_WIN32)
LPTOP_LEVEL_EXCEPTION_FILTER oldFilter;
/* Install our ExceptionFilter */
oldFilter = SetUnhandledExceptionFilter( ExceptionFilter );
if ( cpu_has_xmm ) {
_mesa_debug(NULL, "Testing OS support for SSE...\n");
_mesa_test_os_sse_support();
if ( cpu_has_xmm ) {
_mesa_debug(NULL, "Yes.\n");
} else {
_mesa_debug(NULL, "No!\n");
}
}
if ( cpu_has_xmm ) {
_mesa_debug(NULL, "Testing OS support for SSE unmasked exceptions...\n");
_mesa_test_os_sse_exception_support();
if ( cpu_has_xmm ) {
_mesa_debug(NULL, "Yes.\n");
} else {
_mesa_debug(NULL, "No!\n");
}
}
/* Restore previous exception filter */
SetUnhandledExceptionFilter( oldFilter );
if ( cpu_has_xmm ) {
_mesa_debug(NULL, "Tests of OS support for SSE passed.\n");
} else {
_mesa_debug(NULL, "Tests of OS support for SSE failed!\n");
}
#else
/* Do nothing on other platforms for now.
*/
if (detection_debug)
_mesa_debug(NULL, "Not testing OS support for SSE, leaving enabled.\n");
#endif /* __FreeBSD__ */
}
#endif /* USE_SSE_ASM */
/**
* Initialize the _mesa_x86_cpu_features bitfield.
* This is a no-op if called more than once.
*/
void
_mesa_get_x86_features(void)
{
static int called = 0;
if (called)
return;
called = 1;
#ifdef USE_X86_ASM
_mesa_x86_cpu_features = 0x0;
if (_mesa_getenv( "MESA_NO_ASM")) {
return;
}
if (!_mesa_x86_has_cpuid()) {
_mesa_debug(NULL, "CPUID not detected\n");
}
else {
GLuint cpu_features;
GLuint cpu_ext_features;
GLuint cpu_ext_info;
char cpu_vendor[13];
GLuint result;
/* get vendor name */
_mesa_x86_cpuid(0, &result, (GLuint *)(cpu_vendor + 0), (GLuint *)(cpu_vendor + 8), (GLuint *)(cpu_vendor + 4));
cpu_vendor[12] = '\0';
if (detection_debug)
_mesa_debug(NULL, "CPU vendor: %s\n", cpu_vendor);
/* get cpu features */
cpu_features = _mesa_x86_cpuid_edx(1);
if (cpu_features & X86_CPU_FPU)
_mesa_x86_cpu_features |= X86_FEATURE_FPU;
if (cpu_features & X86_CPU_CMOV)
_mesa_x86_cpu_features |= X86_FEATURE_CMOV;
#ifdef USE_MMX_ASM
if (cpu_features & X86_CPU_MMX)
_mesa_x86_cpu_features |= X86_FEATURE_MMX;
#endif
#ifdef USE_SSE_ASM
if (cpu_features & X86_CPU_XMM)
_mesa_x86_cpu_features |= X86_FEATURE_XMM;
if (cpu_features & X86_CPU_XMM2)
_mesa_x86_cpu_features |= X86_FEATURE_XMM2;
#endif
/* query extended cpu features */
if ((cpu_ext_info = _mesa_x86_cpuid_eax(0x80000000)) > 0x80000000) {
if (cpu_ext_info >= 0x80000001) {
cpu_ext_features = _mesa_x86_cpuid_edx(0x80000001);
if (cpu_features & X86_CPU_MMX) {
#ifdef USE_3DNOW_ASM
if (cpu_ext_features & X86_CPUEXT_3DNOW)
_mesa_x86_cpu_features |= X86_FEATURE_3DNOW;
if (cpu_ext_features & X86_CPUEXT_3DNOW_EXT)
_mesa_x86_cpu_features |= X86_FEATURE_3DNOWEXT;
#endif
#ifdef USE_MMX_ASM
if (cpu_ext_features & X86_CPUEXT_MMX_EXT)
_mesa_x86_cpu_features |= X86_FEATURE_MMXEXT;
#endif
}
}
/* query cpu name */
if (cpu_ext_info >= 0x80000002) {
GLuint ofs;
char cpu_name[49];
for (ofs = 0; ofs < 3; ofs++)
_mesa_x86_cpuid(0x80000002+ofs, (GLuint *)(cpu_name + (16*ofs)+0), (GLuint *)(cpu_name + (16*ofs)+4), (GLuint *)(cpu_name + (16*ofs)+8), (GLuint *)(cpu_name + (16*ofs)+12));
cpu_name[48] = '\0'; /* the name should be NULL terminated, but just to be sure */
if (detection_debug)
_mesa_debug(NULL, "CPU name: %s\n", cpu_name);
}
}
}
#ifdef USE_MMX_ASM
if ( cpu_has_mmx ) {
if ( _mesa_getenv( "MESA_NO_MMX" ) == 0 ) {
if (detection_debug)
_mesa_debug(NULL, "MMX cpu detected.\n");
} else {
_mesa_x86_cpu_features &= ~(X86_FEATURE_MMX);
}
}
#endif
#ifdef USE_3DNOW_ASM
if ( cpu_has_3dnow ) {
if ( _mesa_getenv( "MESA_NO_3DNOW" ) == 0 ) {
if (detection_debug)
_mesa_debug(NULL, "3DNow! cpu detected.\n");
} else {
_mesa_x86_cpu_features &= ~(X86_FEATURE_3DNOW);
}
}
#endif
#ifdef USE_SSE_ASM
if ( cpu_has_xmm ) {
if ( _mesa_getenv( "MESA_NO_SSE" ) == 0 ) {
if (detection_debug)
_mesa_debug(NULL, "SSE cpu detected.\n");
if ( _mesa_getenv( "MESA_FORCE_SSE" ) == 0 ) {
_mesa_check_os_sse_support();
}
} else {
_mesa_debug(NULL, "SSE cpu detected, but switched off by user.\n");
_mesa_x86_cpu_features &= ~(X86_FEATURE_XMM);
}
}
#endif
#endif /* USE_X86_ASM */
(void) detection_debug;
}

View File

@ -0,0 +1,220 @@
/*
* Mesa 3-D graphics library
* Version: 6.3
*
* Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Check extended CPU capabilities. Now justs returns the raw CPUID
* feature information, allowing the higher level code to interpret the
* results.
*
* Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
*
* Cleaned up and simplified by Gareth Hughes <gareth@valinux.com>
*
*/
/*
* NOTE: Avoid using spaces in between '(' ')' and arguments, especially
* with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
* in there will break the build on some platforms.
*/
#include "matypes.h"
#include "assyntax.h"
#include "common_x86_features.h"
SEG_TEXT
ALIGNTEXT4
GLOBL GLNAME(_mesa_x86_has_cpuid)
HIDDEN(_mesa_x86_has_cpuid)
GLNAME(_mesa_x86_has_cpuid):
/* Test for the CPUID command. If the ID Flag bit in EFLAGS
* (bit 21) is writable, the CPUID command is present */
PUSHF_L
POP_L (EAX)
MOV_L (EAX, ECX)
XOR_L (CONST(0x00200000), EAX)
PUSH_L (EAX)
POPF_L
PUSHF_L
POP_L (EAX)
/* Verify the ID Flag bit has been written. */
CMP_L (ECX, EAX)
SETNE (AL)
XOR_L (CONST(0xff), EAX)
RET
ALIGNTEXT4
GLOBL GLNAME(_mesa_x86_cpuid)
HIDDEN(_mesa_x86_cpuid)
GLNAME(_mesa_x86_cpuid):
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */
PUSH_L (EDI)
PUSH_L (EBX)
CPUID
MOV_L (REGOFF(16, ESP), EDI) /* *eax */
MOV_L (EAX, REGIND(EDI))
MOV_L (REGOFF(20, ESP), EDI) /* *ebx */
MOV_L (EBX, REGIND(EDI))
MOV_L (REGOFF(24, ESP), EDI) /* *ecx */
MOV_L (ECX, REGIND(EDI))
MOV_L (REGOFF(28, ESP), EDI) /* *edx */
MOV_L (EDX, REGIND(EDI))
POP_L (EBX)
POP_L (EDI)
RET
ALIGNTEXT4
GLOBL GLNAME(_mesa_x86_cpuid_eax)
HIDDEN(_mesa_x86_cpuid_eax)
GLNAME(_mesa_x86_cpuid_eax):
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */
PUSH_L (EBX)
CPUID
POP_L (EBX)
RET
ALIGNTEXT4
GLOBL GLNAME(_mesa_x86_cpuid_ebx)
HIDDEN(_mesa_x86_cpuid_ebx)
GLNAME(_mesa_x86_cpuid_ebx):
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */
PUSH_L (EBX)
CPUID
MOV_L (EBX, EAX) /* return EBX */
POP_L (EBX)
RET
ALIGNTEXT4
GLOBL GLNAME(_mesa_x86_cpuid_ecx)
HIDDEN(_mesa_x86_cpuid_ecx)
GLNAME(_mesa_x86_cpuid_ecx):
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */
PUSH_L (EBX)
CPUID
MOV_L (ECX, EAX) /* return ECX */
POP_L (EBX)
RET
ALIGNTEXT4
GLOBL GLNAME(_mesa_x86_cpuid_edx)
HIDDEN(_mesa_x86_cpuid_edx)
GLNAME(_mesa_x86_cpuid_edx):
MOV_L (REGOFF(4, ESP), EAX) /* cpuid op */
PUSH_L (EBX)
CPUID
MOV_L (EDX, EAX) /* return EDX */
POP_L (EBX)
RET
#ifdef USE_SSE_ASM
/* Execute an SSE instruction to see if the operating system correctly
* supports SSE. A signal handler for SIGILL should have been set
* before calling this function, otherwise this could kill the client
* application.
*
* -----> !!!! ATTENTION DEVELOPERS !!!! <-----
*
* If you're debugging with gdb and you get stopped in this function,
* just type 'continue'! Execution will proceed normally.
* See freedesktop.org bug #1709 for more info.
*/
ALIGNTEXT4
GLOBL GLNAME( _mesa_test_os_sse_support )
HIDDEN(_mesa_test_os_sse_support)
GLNAME( _mesa_test_os_sse_support ):
XORPS ( XMM0, XMM0 )
RET
/* Perform an SSE divide-by-zero to see if the operating system
* correctly supports unmasked SIMD FPU exceptions. Signal handlers for
* SIGILL and SIGFPE should have been set before calling this function,
* otherwise this could kill the client application.
*/
ALIGNTEXT4
GLOBL GLNAME( _mesa_test_os_sse_exception_support )
HIDDEN(_mesa_test_os_sse_exception_support)
GLNAME( _mesa_test_os_sse_exception_support ):
PUSH_L ( EBP )
MOV_L ( ESP, EBP )
SUB_L ( CONST( 8 ), ESP )
/* Save the original MXCSR register value.
*/
STMXCSR ( REGOFF( -4, EBP ) )
/* Unmask the divide-by-zero exception and perform one.
*/
STMXCSR ( REGOFF( -8, EBP ) )
AND_L ( CONST( 0xfffffdff ), REGOFF( -8, EBP ) )
LDMXCSR ( REGOFF( -8, EBP ) )
XORPS ( XMM0, XMM0 )
PUSH_L ( CONST( 0x3f800000 ) )
PUSH_L ( CONST( 0x3f800000 ) )
PUSH_L ( CONST( 0x3f800000 ) )
PUSH_L ( CONST( 0x3f800000 ) )
MOVUPS ( REGIND( ESP ), XMM1 )
DIVPS ( XMM0, XMM1 )
/* Restore the original MXCSR register value.
*/
LDMXCSR ( REGOFF( -4, EBP ) )
LEAVE
RET
#endif
#if defined (__ELF__) && defined (__linux__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,53 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Check CPU capabilities & initialize optimized funtions for this particular
* processor.
*
* Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de>
* Changed by Andre Werthmann <wertmann@cs.uni-potsdam.de> for using the
* new SSE functions
*
* Reimplemented by Gareth Hughes in a more
* future-proof manner, based on code in the Linux kernel.
*/
#ifndef __COMMON_X86_ASM_H__
#define __COMMON_X86_ASM_H__
/* Do not reference mtypes.h from this file.
*/
#include "common_x86_features.h"
extern int _mesa_x86_cpu_features;
extern void _mesa_get_x86_features(void);
extern void _mesa_check_os_sse_support(void);
extern void _mesa_init_all_x86_transform_asm( void );
#endif

View File

@ -0,0 +1,67 @@
/*
* Mesa 3-D graphics library
* Version: 5.1
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* x86 CPUID feature information. The raw data is returned by
* _mesa_identify_x86_cpu_features() and interpreted with the cpu_has_*
* helper macros.
*
* Gareth Hughes
*/
#ifndef __COMMON_X86_FEATURES_H__
#define __COMMON_X86_FEATURES_H__
#define X86_FEATURE_FPU (1<<0)
#define X86_FEATURE_CMOV (1<<1)
#define X86_FEATURE_MMXEXT (1<<2)
#define X86_FEATURE_MMX (1<<3)
#define X86_FEATURE_FXSR (1<<4)
#define X86_FEATURE_XMM (1<<5)
#define X86_FEATURE_XMM2 (1<<6)
#define X86_FEATURE_3DNOWEXT (1<<7)
#define X86_FEATURE_3DNOW (1<<8)
/* standard X86 CPU features */
#define X86_CPU_FPU (1<<0)
#define X86_CPU_CMOV (1<<15)
#define X86_CPU_MMX (1<<23)
#define X86_CPU_XMM (1<<25)
#define X86_CPU_XMM2 (1<<26)
/* extended X86 CPU features */
#define X86_CPUEXT_MMX_EXT (1<<22)
#define X86_CPUEXT_3DNOW_EXT (1<<30)
#define X86_CPUEXT_3DNOW (1<<31)
#define cpu_has_mmx (_mesa_x86_cpu_features & X86_FEATURE_MMX)
#define cpu_has_mmxext (_mesa_x86_cpu_features & X86_FEATURE_MMXEXT)
#define cpu_has_xmm (_mesa_x86_cpu_features & X86_FEATURE_XMM)
#define cpu_has_xmm2 (_mesa_x86_cpu_features & X86_FEATURE_XMM2)
#define cpu_has_3dnow (_mesa_x86_cpu_features & X86_FEATURE_3DNOW)
#define cpu_has_3dnowext (_mesa_x86_cpu_features & X86_FEATURE_3DNOWEXT)
#endif

240
src/arch/x86/gen_matypes.c Normal file
View File

@ -0,0 +1,240 @@
/*
* Mesa 3-D graphics library
* Version: 6.5.1
*
* Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Gareth Hughes
*/
/*
* This generates an asm version of mtypes.h (called matypes.h), so that
* Mesa's x86 assembly code can access the internal structures easily.
* This will be particularly useful when developing new x86 asm code for
* Mesa, including lighting, clipping, texture image conversion etc.
*/
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include "main/glheader.h"
#include "main/mtypes.h"
#include "tnl/t_context.h"
#undef offsetof
#define offsetof( type, member ) ((size_t) &((type *)0)->member)
#define OFFSET_HEADER( x ) \
do { \
printf( "\n" ); \
printf( "\n" ); \
printf( "/* =====================================================" \
"========\n" ); \
printf( " * Offsets for %s\n", x ); \
printf( " */\n" ); \
printf( "\n" ); \
} while (0)
#define DEFINE_HEADER( x ) \
do { \
printf( "\n" ); \
printf( "/*\n" ); \
printf( " * Flags for %s\n", x ); \
printf( " */\n" ); \
printf( "\n" ); \
} while (0)
#define OFFSET( s, t, m ) \
printf( "#define %s\t%lu\n", s, (unsigned long) offsetof( t, m ) );
#define SIZEOF( s, t ) \
printf( "#define %s\t%lu\n", s, (unsigned long) sizeof(t) );
#define DEFINE( s, d ) \
printf( "#define %s\t0x%" PRIx64 "\n", s, (uint64_t) d );
int main( int argc, char **argv )
{
printf( "/*\n" );
printf( " * This file is automatically generated from the Mesa internal type\n" );
printf( " * definitions. Do not edit directly.\n" );
printf( " */\n" );
printf( "\n" );
printf( "#ifndef __ASM_TYPES_H__\n" );
printf( "#define __ASM_TYPES_H__\n" );
printf( "\n" );
/* struct gl_context offsets:
*/
OFFSET_HEADER( "struct gl_context" );
printf( "\n" );
OFFSET( "CTX_LIGHT_ENABLED ", struct gl_context, Light.Enabled );
OFFSET( "CTX_LIGHT_SHADE_MODEL ", struct gl_context, Light.ShadeModel );
OFFSET( "CTX_LIGHT_COLOR_MAT_FACE ", struct gl_context, Light.ColorMaterialFace );
OFFSET( "CTX_LIGHT_COLOR_MAT_MODE ", struct gl_context, Light.ColorMaterialMode );
OFFSET( "CTX_LIGHT_COLOR_MAT_MASK ", struct gl_context, Light._ColorMaterialBitmask );
OFFSET( "CTX_LIGHT_COLOR_MAT_ENABLED ", struct gl_context, Light.ColorMaterialEnabled );
OFFSET( "CTX_LIGHT_ENABLED_LIST ", struct gl_context, Light.EnabledList );
OFFSET( "CTX_LIGHT_NEED_VERTS ", struct gl_context, Light._NeedVertices );
OFFSET( "CTX_LIGHT_BASE_COLOR ", struct gl_context, Light._BaseColor );
/* struct vertex_buffer offsets:
*/
OFFSET_HEADER( "struct vertex_buffer" );
OFFSET( "VB_SIZE ", struct vertex_buffer, Size );
OFFSET( "VB_COUNT ", struct vertex_buffer, Count );
printf( "\n" );
OFFSET( "VB_ELTS ", struct vertex_buffer, Elts );
OFFSET( "VB_OBJ_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_POS] );
OFFSET( "VB_EYE_PTR ", struct vertex_buffer, EyePtr );
OFFSET( "VB_CLIP_PTR ", struct vertex_buffer, ClipPtr );
OFFSET( "VB_PROJ_CLIP_PTR ", struct vertex_buffer, NdcPtr );
OFFSET( "VB_CLIP_OR_MASK ", struct vertex_buffer, ClipOrMask );
OFFSET( "VB_CLIP_MASK ", struct vertex_buffer, ClipMask );
OFFSET( "VB_NORMAL_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_NORMAL] );
OFFSET( "VB_EDGE_FLAG ", struct vertex_buffer, EdgeFlag );
OFFSET( "VB_TEX0_COORD_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_TEX0] );
OFFSET( "VB_TEX1_COORD_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_TEX1] );
OFFSET( "VB_TEX2_COORD_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_TEX2] );
OFFSET( "VB_TEX3_COORD_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_TEX3] );
OFFSET( "VB_INDEX_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_COLOR_INDEX] );
OFFSET( "VB_COLOR_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_COLOR0] );
OFFSET( "VB_SECONDARY_COLOR_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_COLOR1] );
OFFSET( "VB_FOG_COORD_PTR ", struct vertex_buffer, AttribPtr[_TNL_ATTRIB_FOG] );
OFFSET( "VB_PRIMITIVE ", struct vertex_buffer, Primitive );
printf( "\n" );
DEFINE_HEADER( "struct vertex_buffer" );
/* XXX use new labels here someday after vertex proram is done */
DEFINE( "VERT_BIT_OBJ ", VERT_BIT_POS );
DEFINE( "VERT_BIT_NORM ", VERT_BIT_NORMAL );
DEFINE( "VERT_BIT_RGBA ", VERT_BIT_COLOR0 );
DEFINE( "VERT_BIT_SPEC_RGB ", VERT_BIT_COLOR1 );
DEFINE( "VERT_BIT_FOG_COORD ", VERT_BIT_FOG );
DEFINE( "VERT_BIT_TEX0 ", VERT_BIT_TEX0 );
DEFINE( "VERT_BIT_TEX1 ", VERT_BIT_TEX1 );
DEFINE( "VERT_BIT_TEX2 ", VERT_BIT_TEX2 );
DEFINE( "VERT_BIT_TEX3 ", VERT_BIT_TEX3 );
/* GLvector4f offsets:
*/
OFFSET_HEADER( "GLvector4f" );
OFFSET( "V4F_DATA ", GLvector4f, data );
OFFSET( "V4F_START ", GLvector4f, start );
OFFSET( "V4F_COUNT ", GLvector4f, count );
OFFSET( "V4F_STRIDE ", GLvector4f, stride );
OFFSET( "V4F_SIZE ", GLvector4f, size );
OFFSET( "V4F_FLAGS ", GLvector4f, flags );
DEFINE_HEADER( "GLvector4f" );
DEFINE( "VEC_MALLOC ", VEC_MALLOC );
DEFINE( "VEC_NOT_WRITEABLE ", VEC_NOT_WRITEABLE );
DEFINE( "VEC_BAD_STRIDE ", VEC_BAD_STRIDE );
printf( "\n" );
DEFINE( "VEC_SIZE_1 ", VEC_SIZE_1 );
DEFINE( "VEC_SIZE_2 ", VEC_SIZE_2 );
DEFINE( "VEC_SIZE_3 ", VEC_SIZE_3 );
DEFINE( "VEC_SIZE_4 ", VEC_SIZE_4 );
/* GLmatrix offsets:
*/
OFFSET_HEADER( "GLmatrix" );
OFFSET( "MATRIX_DATA ", GLmatrix, m );
OFFSET( "MATRIX_INV ", GLmatrix, inv );
OFFSET( "MATRIX_FLAGS ", GLmatrix, flags );
OFFSET( "MATRIX_TYPE ", GLmatrix, type );
/* struct gl_light offsets:
*/
OFFSET_HEADER( "struct gl_light" );
OFFSET( "LIGHT_NEXT ", struct gl_light, next );
OFFSET( "LIGHT_PREV ", struct gl_light, prev );
printf( "\n" );
OFFSET( "LIGHT_AMBIENT ", struct gl_light, Ambient );
OFFSET( "LIGHT_DIFFUSE ", struct gl_light, Diffuse );
OFFSET( "LIGHT_SPECULAR ", struct gl_light, Specular );
OFFSET( "LIGHT_EYE_POSITION ", struct gl_light, EyePosition );
OFFSET( "LIGHT_SPOT_DIRECTION ", struct gl_light, SpotDirection );
OFFSET( "LIGHT_SPOT_EXPONENT ", struct gl_light, SpotExponent );
OFFSET( "LIGHT_SPOT_CUTOFF ", struct gl_light, SpotCutoff );
OFFSET( "LIGHT_COS_CUTOFF ", struct gl_light, _CosCutoff );
OFFSET( "LIGHT_CONST_ATTEN ", struct gl_light, ConstantAttenuation );
OFFSET( "LIGHT_LINEAR_ATTEN ", struct gl_light, LinearAttenuation );
OFFSET( "LIGHT_QUADRATIC_ATTEN ", struct gl_light, QuadraticAttenuation );
OFFSET( "LIGHT_ENABLED ", struct gl_light, Enabled );
printf( "\n" );
OFFSET( "LIGHT_FLAGS ", struct gl_light, _Flags );
printf( "\n" );
OFFSET( "LIGHT_POSITION ", struct gl_light, _Position );
OFFSET( "LIGHT_VP_INF_NORM ", struct gl_light, _VP_inf_norm );
OFFSET( "LIGHT_H_INF_NORM ", struct gl_light, _h_inf_norm );
OFFSET( "LIGHT_NORM_DIRECTION ", struct gl_light, _NormSpotDirection );
OFFSET( "LIGHT_VP_INF_SPOT_ATTEN ", struct gl_light, _VP_inf_spot_attenuation );
printf( "\n" );
OFFSET( "LIGHT_MAT_AMBIENT ", struct gl_light, _MatAmbient );
OFFSET( "LIGHT_MAT_DIFFUSE ", struct gl_light, _MatDiffuse );
OFFSET( "LIGHT_MAT_SPECULAR ", struct gl_light, _MatSpecular );
printf( "\n" );
SIZEOF( "SIZEOF_GL_LIGHT ", struct gl_light );
DEFINE_HEADER( "struct gl_light" );
DEFINE( "LIGHT_SPOT ", LIGHT_SPOT );
DEFINE( "LIGHT_LOCAL_VIEWER ", LIGHT_LOCAL_VIEWER );
DEFINE( "LIGHT_POSITIONAL ", LIGHT_POSITIONAL );
printf( "\n" );
DEFINE( "LIGHT_NEED_VERTICES ", LIGHT_NEED_VERTICES );
/* struct gl_lightmodel offsets:
*/
OFFSET_HEADER( "struct gl_lightmodel" );
OFFSET( "LIGHT_MODEL_AMBIENT ", struct gl_lightmodel, Ambient );
OFFSET( "LIGHT_MODEL_LOCAL_VIEWER ", struct gl_lightmodel, LocalViewer );
OFFSET( "LIGHT_MODEL_TWO_SIDE ", struct gl_lightmodel, TwoSide );
OFFSET( "LIGHT_MODEL_COLOR_CONTROL ", struct gl_lightmodel, ColorControl );
printf( "\n" );
printf( "\n" );
printf( "#endif /* __ASM_TYPES_H__ */\n" );
return 0;
}

59
src/arch/x86/mmx.h Normal file
View File

@ -0,0 +1,59 @@
/*
* Mesa 3-D graphics library
* Version: 6.5.2
*
* Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef ASM_MMX_H
#define ASM_MMX_H
#include "main/compiler.h"
#include "main/glheader.h"
struct gl_context;
extern void _ASMAPI
_mesa_mmx_blend_transparency( struct gl_context *ctx, GLuint n, const GLubyte mask[],
GLvoid *rgba, const GLvoid *dest,
GLenum chanType );
extern void _ASMAPI
_mesa_mmx_blend_add( struct gl_context *ctx, GLuint n, const GLubyte mask[],
GLvoid *rgba, const GLvoid *dest,
GLenum chanType );
extern void _ASMAPI
_mesa_mmx_blend_min( struct gl_context *ctx, GLuint n, const GLubyte mask[],
GLvoid *rgba, const GLvoid *dest,
GLenum chanType );
extern void _ASMAPI
_mesa_mmx_blend_max( struct gl_context *ctx, GLuint n, const GLubyte mask[],
GLvoid *rgba, const GLvoid *dest,
GLenum chanType );
extern void _ASMAPI
_mesa_mmx_blend_modulate( struct gl_context *ctx, GLuint n, const GLubyte mask[],
GLvoid *rgba, const GLvoid *dest,
GLenum chanType );
#endif

402
src/arch/x86/mmx_blend.S Normal file
View File

@ -0,0 +1,402 @@
;
/*
* Written by Jos<EFBFBD> Fonseca <j_r_fonseca@yahoo.co.uk>
*/
#ifdef USE_MMX_ASM
#include "assyntax.h"
#include "matypes.h"
/* integer multiplication - alpha plus one
*
* makes the following approximation to the division (Sree)
*
* rgb*a/255 ~= (rgb*(a+1)) >> 256
*
* which is the fastest method that satisfies the following OpenGL criteria
*
* 0*0 = 0 and 255*255 = 255
*
* note that MX1 is a register with 0xffffffffffffffff constant which can be easily obtained making
*
* PCMPEQW ( MX1, MX1 )
*/
#define GMB_MULT_AP1( MP1, MA1, MP2, MA2, MX1 ) \
PSUBW ( MX1, MA1 ) /* a1 + 1 | a1 + 1 | a1 + 1 | a1 + 1 */ ;\
PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\
;\
TWO(PSUBW ( MX1, MA2 )) /* a2 + 1 | a2 + 1 | a2 + 1 | a2 + 1 */ ;\
TWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\
;\
PSRLW ( CONST(8), MA1 ) /* t1 >> 8 ~= t1/255 */ ;\
TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 ~= t2/255 */
/* integer multiplication - geometric series
*
* takes the geometric series approximation to the division
*
* t/255 = (t >> 8) + (t >> 16) + (t >> 24) ..
*
* in this case just the first two terms to fit in 16bit arithmetic
*
* t/255 ~= (t + (t >> 8)) >> 8
*
* note that just by itself it doesn't satisfies the OpenGL criteria, as 255*255 = 254,
* so the special case a = 255 must be accounted or roundoff must be used
*/
#define GMB_MULT_GS( MP1, MA1, MP2, MA2 ) \
PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\
TWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\
;\
MOVQ ( MA1, MP1 ) ;\
PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\
;\
TWO(MOVQ ( MA2, MP2 )) ;\
TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\
;\
PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\
PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\
;\
TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\
TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */
/* integer multiplication - geometric series plus rounding
*
* when using a geometric series division instead of truncating the result
* use roundoff in the approximation (Jim Blinn)
*
* t = rgb*a + 0x80
*
* achieving the exact results
*
* note that M80 is register with the 0x0080008000800080 constant
*/
#define GMB_MULT_GSR( MP1, MA1, MP2, MA2, M80 ) \
PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\
PADDW ( M80, MA1 ) /* t1 += 0x80 */ ;\
;\
TWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\
TWO(PADDW ( M80, MA2 )) /* t2 += 0x80 */ ;\
;\
MOVQ ( MA1, MP1 ) ;\
PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\
;\
TWO(MOVQ ( MA2, MP2 )) ;\
TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\
;\
PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\
PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\
;\
TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\
TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */
/* linear interpolation - geometric series
*/
#define GMB_LERP_GS( MP1, MQ1, MA1, MP2, MQ2, MA2) \
PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\
PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\
PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\
;\
TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\
TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\
TWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\
;\
MOVQ ( MA1, MP1 ) ;\
PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\
;\
TWO(MOVQ ( MA2, MP2 )) ;\
TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\
;\
PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\
TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\
;\
PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\
TWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\
;\
PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\
TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */
/* linear interpolation - geometric series with roundoff
*
* this is a generalization of Blinn's formula to signed arithmetic
*
* note that M80 is a register with the 0x0080008000800080 constant
*/
#define GMB_LERP_GSR( MP1, MQ1, MA1, MP2, MQ2, MA2, M80) \
PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\
PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\
PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\
;\
TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\
TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\
TWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\
;\
PSRLW ( CONST(15), MP1 ) /* q1 > p1 ? 1 : 0 */ ;\
TWO(PSRLW ( CONST(15), MP2 )) /* q2 > q2 ? 1 : 0 */ ;\
;\
PSLLW ( CONST(8), MP1 ) /* q1 > p1 ? 0x100 : 0 */ ;\
TWO(PSLLW ( CONST(8), MP2 )) /* q2 > q2 ? 0x100 : 0 */ ;\
;\
PSUBW ( MP1, MA1 ) /* t1 -=? 0x100 */ ;\
TWO(PSUBW ( MP2, MA2 )) /* t2 -=? 0x100 */ ;\
;\
PADDW ( M80, MA1 ) /* t1 += 0x80 */ ;\
TWO(PADDW ( M80, MA2 )) /* t2 += 0x80 */ ;\
;\
MOVQ ( MA1, MP1 ) ;\
PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\
;\
TWO(MOVQ ( MA2, MP2 )) ;\
TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\
;\
PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\
TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\
;\
PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\
TWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\
;\
PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\
TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */
/* linear interpolation - geometric series with correction
*
* instead of the roundoff this adds a small correction to satisfy the OpenGL criteria
*
* t/255 ~= (t + (t >> 8) + (t >> 15)) >> 8
*
* note that although is faster than rounding off it doesn't give always the exact results
*/
#define GMB_LERP_GSC( MP1, MQ1, MA1, MP2, MQ2, MA2) \
PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\
PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\
PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\
;\
TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\
TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\
TWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\
;\
MOVQ ( MA1, MP1 ) ;\
PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\
;\
TWO(MOVQ ( MA2, MP2 )) ;\
TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\
;\
PADDW ( MA1, MP1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\
PSRLW ( CONST(7), MA1 ) /* t1 >> 15 */ ;\
;\
TWO(PADDW ( MA2, MP2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\
TWO(PSRLW ( CONST(7), MA2 )) /* t2 >> 15 */ ;\
;\
PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) + (t1 >>15) ~= (t1/255) << 8 */ ;\
TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) + (t2 >>15) ~= (t2/255) << 8 */ ;\
;\
PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\
TWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\
;\
PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\
TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */
/* common blending setup code
*
* note that M00 is a register with 0x0000000000000000 constant which can be easily obtained making
*
* PXOR ( M00, M00 )
*/
#define GMB_LOAD(rgba, dest, MPP, MQQ) \
ONE(MOVD ( REGIND(rgba), MPP )) /* | | | | qa1 | qb1 | qg1 | qr1 */ ;\
ONE(MOVD ( REGIND(dest), MQQ )) /* | | | | pa1 | pb1 | pg1 | pr1 */ ;\
;\
TWO(MOVQ ( REGIND(rgba), MPP )) /* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */ ;\
TWO(MOVQ ( REGIND(dest), MQQ )) /* pa2 | pb2 | pg2 | pr2 | pa1 | pb1 | pg1 | pr1 */
#define GMB_UNPACK(MP1, MQ1, MP2, MQ2, M00) \
TWO(MOVQ ( MP1, MP2 )) ;\
TWO(MOVQ ( MQ1, MQ2 )) ;\
;\
PUNPCKLBW ( M00, MQ1 ) /* qa1 | qb1 | qg1 | qr1 */ ;\
TWO(PUNPCKHBW ( M00, MQ2 )) /* qa2 | qb2 | qg2 | qr2 */ ;\
PUNPCKLBW ( M00, MP1 ) /* pa1 | pb1 | pg1 | pr1 */ ;\
TWO(PUNPCKHBW ( M00, MP2 )) /* pa2 | pb2 | pg2 | pr2 */
#define GMB_ALPHA(MP1, MA1, MP2, MA2) \
MOVQ ( MP1, MA1 ) ;\
TWO(MOVQ ( MP2, MA2 )) ;\
;\
PUNPCKHWD ( MA1, MA1 ) /* pa1 | pa1 | | */ ;\
TWO(PUNPCKHWD ( MA2, MA2 )) /* pa2 | pa2 | | */ ;\
PUNPCKHDQ ( MA1, MA1 ) /* pa1 | pa1 | pa1 | pa1 */ ;\
TWO(PUNPCKHDQ ( MA2, MA2 )) /* pa2 | pa2 | pa2 | pa2 */
#define GMB_PACK( MS1, MS2 ) \
PACKUSWB ( MS2, MS1 ) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ ;\
#define GMB_STORE(rgba, MSS ) \
ONE(MOVD ( MSS, REGIND(rgba) )) /* | | | | sa1 | sb1 | sg1 | sr1 */ ;\
TWO(MOVQ ( MSS, REGIND(rgba) )) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */
/* Kevin F. Quinn <kevquinn@gentoo.org> 2 July 2006
* Replace data segment constants with text-segment
* constants (via pushl/movq)
SEG_DATA
ALIGNDATA8
const_0080:
D_LONG 0x00800080, 0x00800080
const_80:
D_LONG 0x80808080, 0x80808080
*/
#define const_0080_l 0x00800080
#define const_0080_h 0x00800080
#define const_80_l 0x80808080
#define const_80_h 0x80808080
SEG_TEXT
/* Blend transparency function
*/
#define TAG(x) CONCAT(x,_transparency)
#define LLTAG(x) LLBL2(x,_transparency)
#define INIT \
PXOR ( MM0, MM0 ) /* 0x0000 | 0x0000 | 0x0000 | 0x0000 */
#define MAIN( rgba, dest ) \
GMB_LOAD( rgba, dest, MM1, MM2 ) ;\
GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 ) ;\
GMB_ALPHA( MM1, MM3, MM4, MM6 ) ;\
GMB_LERP_GSC( MM1, MM2, MM3, MM4, MM5, MM6 ) ;\
GMB_PACK( MM3, MM6 ) ;\
GMB_STORE( rgba, MM3 )
#include "mmx_blendtmp.h"
/* Blend add function
*
* FIXME: Add some loop unrolling here...
*/
#define TAG(x) CONCAT(x,_add)
#define LLTAG(x) LLBL2(x,_add)
#define INIT
#define MAIN( rgba, dest ) \
ONE(MOVD ( REGIND(rgba), MM1 )) /* | | | | qa1 | qb1 | qg1 | qr1 */ ;\
ONE(MOVD ( REGIND(dest), MM2 )) /* | | | | pa1 | pb1 | pg1 | pr1 */ ;\
ONE(PADDUSB ( MM2, MM1 )) ;\
ONE(MOVD ( MM1, REGIND(rgba) )) /* | | | | sa1 | sb1 | sg1 | sr1 */ ;\
;\
TWO(MOVQ ( REGIND(rgba), MM1 )) /* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */ ;\
TWO(PADDUSB ( REGIND(dest), MM1 )) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ ;\
TWO(MOVQ ( MM1, REGIND(rgba) ))
#include "mmx_blendtmp.h"
/* Blend min function
*/
#define TAG(x) CONCAT(x,_min)
#define LLTAG(x) LLBL2(x,_min)
/* Kevin F. Quinn 2nd July 2006
* Replace data segment constants with text-segment instructions
#define INIT \
MOVQ ( CONTENT(const_80), MM7 )
*/
#define INIT \
PUSH_L ( CONST(const_80_h) ) /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/ ;\
PUSH_L ( CONST(const_80_l) ) ;\
MOVQ ( REGIND(ESP), MM7 ) ;\
ADD_L ( CONST(8), ESP)
#define MAIN( rgba, dest ) \
GMB_LOAD( rgba, dest, MM1, MM2 ) ;\
MOVQ ( MM1, MM3 ) ;\
MOVQ ( MM2, MM4 ) ;\
PXOR ( MM7, MM3 ) /* unsigned -> signed */ ;\
PXOR ( MM7, MM4 ) /* unsigned -> signed */ ;\
PCMPGTB ( MM3, MM4 ) /* q > p ? 0xff : 0x00 */ ;\
PAND ( MM4, MM1 ) /* q > p ? p : 0 */ ;\
PANDN ( MM2, MM4 ) /* q > p ? 0 : q */ ;\
POR ( MM1, MM4 ) /* q > p ? p : q */ ;\
GMB_STORE( rgba, MM4 )
#include "mmx_blendtmp.h"
/* Blend max function
*/
#define TAG(x) CONCAT(x,_max)
#define LLTAG(x) LLBL2(x,_max)
/* Kevin F. Quinn 2nd July 2006
* Replace data segment constants with text-segment instructions
#define INIT \
MOVQ ( CONTENT(const_80), MM7 )
*/
#define INIT \
PUSH_L ( CONST(const_80_l) ) /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/ ;\
PUSH_L ( CONST(const_80_h) ) ;\
MOVQ ( REGIND(ESP), MM7 ) ;\
ADD_L ( CONST(8), ESP)
#define MAIN( rgba, dest ) \
GMB_LOAD( rgba, dest, MM1, MM2 ) ;\
MOVQ ( MM1, MM3 ) ;\
MOVQ ( MM2, MM4 ) ;\
PXOR ( MM7, MM3 ) /* unsigned -> signed */ ;\
PXOR ( MM7, MM4 ) /* unsigned -> signed */ ;\
PCMPGTB ( MM3, MM4 ) /* q > p ? 0xff : 0x00 */ ;\
PAND ( MM4, MM2 ) /* q > p ? q : 0 */ ;\
PANDN ( MM1, MM4 ) /* q > p ? 0 : p */ ;\
POR ( MM2, MM4 ) /* q > p ? p : q */ ;\
GMB_STORE( rgba, MM4 )
#include "mmx_blendtmp.h"
/* Blend modulate function
*/
#define TAG(x) CONCAT(x,_modulate)
#define LLTAG(x) LLBL2(x,_modulate)
/* Kevin F. Quinn 2nd July 2006
* Replace data segment constants with text-segment instructions
#define INIT \
MOVQ ( CONTENT(const_0080), MM7 )
*/
#define INIT \
PXOR ( MM0, MM0 ) /* 0x0000 | 0x0000 | 0x0000 | 0x0000 */ ;\
PUSH_L ( CONST(const_0080_l) ) /* 0x0080 | 0x0080 | 0x0080 | 0x0080 */ ;\
PUSH_L ( CONST(const_0080_h) ) ;\
MOVQ ( REGIND(ESP), MM7 ) ;\
ADD_L ( CONST(8), ESP)
#define MAIN( rgba, dest ) \
GMB_LOAD( rgba, dest, MM1, MM2 ) ;\
GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 ) ;\
GMB_MULT_GSR( MM1, MM2, MM4, MM5, MM7 ) ;\
GMB_PACK( MM2, MM5 ) ;\
GMB_STORE( rgba, MM2 )
#include "mmx_blendtmp.h"
#endif
#if defined (__ELF__) && defined (__linux__)
.section .note.GNU-stack,"",%progbits
#endif

114
src/arch/x86/mmx_blendtmp.h Normal file
View File

@ -0,0 +1,114 @@
/*
* Written by José Fonseca <j_r_fonseca@yahoo.co.uk>
*/
/*
* void _mesa_mmx_blend( struct gl_context *ctx,
* GLuint n,
* const GLubyte mask[],
* GLchan rgba[][4],
* CONST GLchan dest[][4] )
*
*/
ALIGNTEXT16
GLOBL GLNAME( TAG(_mesa_mmx_blend) )
HIDDEN( TAG(_mesa_mmx_blend) )
GLNAME( TAG(_mesa_mmx_blend) ):
PUSH_L ( EBP )
MOV_L ( ESP, EBP )
PUSH_L ( ESI )
PUSH_L ( EDI )
PUSH_L ( EBX )
MOV_L ( REGOFF(12, EBP), ECX ) /* n */
CMP_L ( CONST(0), ECX)
JE ( LLTAG(GMB_return) )
MOV_L ( REGOFF(16, EBP), EBX ) /* mask */
MOV_L ( REGOFF(20, EBP), EDI ) /* rgba */
MOV_L ( REGOFF(24, EBP), ESI ) /* dest */
INIT
TEST_L ( CONST(4), EDI ) /* align rgba on an 8-byte boundary */
JZ ( LLTAG(GMB_align_end) )
CMP_B ( CONST(0), REGIND(EBX) ) /* *mask == 0 */
JE ( LLTAG(GMB_align_continue) )
/* runin */
#define ONE(x) x
#define TWO(x)
MAIN ( EDI, ESI )
#undef ONE
#undef TWO
LLTAG(GMB_align_continue):
DEC_L ( ECX ) /* n -= 1 */
INC_L ( EBX ) /* mask += 1 */
ADD_L ( CONST(4), EDI ) /* rgba += 1 */
ADD_L ( CONST(4), ESI ) /* dest += 1 */
LLTAG(GMB_align_end):
CMP_L ( CONST(2), ECX)
JB ( LLTAG(GMB_loop_end) )
ALIGNTEXT16
LLTAG(GMB_loop_begin):
CMP_W ( CONST(0), REGIND(EBX) ) /* *mask == 0 && *(mask + 1) == 0 */
JE ( LLTAG(GMB_loop_continue) )
/* main loop */
#define ONE(x)
#define TWO(x) x
MAIN ( EDI, ESI )
#undef ONE
#undef TWO
LLTAG(GMB_loop_continue):
DEC_L ( ECX )
DEC_L ( ECX ) /* n -= 2 */
ADD_L ( CONST(2), EBX ) /* mask += 2 */
ADD_L ( CONST(8), EDI ) /* rgba += 2 */
ADD_L ( CONST(8), ESI ) /* dest += 2 */
CMP_L ( CONST(2), ECX )
JAE ( LLTAG(GMB_loop_begin) )
LLTAG(GMB_loop_end):
CMP_L ( CONST(1), ECX )
JB ( LLTAG(GMB_done) )
CMP_B ( CONST(0), REGIND(EBX) ) /* *mask == 0 */
JE ( LLTAG(GMB_done) )
/* runout */
#define ONE(x) x
#define TWO(x)
MAIN ( EDI, ESI )
#undef ONE
#undef TWO
LLTAG(GMB_done):
EMMS
LLTAG(GMB_return):
POP_L ( EBX )
POP_L ( EDI )
POP_L ( ESI )
MOV_L ( EBP, ESP )
POP_L ( EBP )
RET
#undef TAG
#undef LLTAG
#undef INIT
#undef MAIN

57
src/arch/x86/norm_args.h Normal file
View File

@ -0,0 +1,57 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Normal transform function interface for assembly code. Simply define
* FRAME_OFFSET to the number of bytes pushed onto the stack before
* using the ARG_* argument macros.
*
* Gareth Hughes
*/
#ifndef __NORM_ARGS_H__
#define __NORM_ARGS_H__
/* Offsets for normal_func arguments
*
* typedef void (*normal_func)( const GLmatrix *mat,
* GLfloat scale,
* const GLvector4f *in,
* const GLfloat lengths[],
* GLvector4f *dest );
*/
#define OFFSET_MAT 4
#define OFFSET_SCALE 8
#define OFFSET_IN 12
#define OFFSET_LENGTHS 16
#define OFFSET_DEST 20
#define ARG_MAT REGOFF(FRAME_OFFSET+OFFSET_MAT, ESP)
#define ARG_SCALE REGOFF(FRAME_OFFSET+OFFSET_SCALE, ESP)
#define ARG_IN REGOFF(FRAME_OFFSET+OFFSET_IN, ESP)
#define ARG_LENGTHS REGOFF(FRAME_OFFSET+OFFSET_LENGTHS, ESP)
#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP)
#endif

View File

@ -0,0 +1,686 @@
/*
* (C) Copyright IBM Corporation 2004
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* \file read_rgba_span_x86.S
* Optimized routines to transfer pixel data from the framebuffer to a
* buffer in main memory.
*
* \author Ian Romanick <idr@us.ibm.com>
*/
.file "read_rgba_span_x86.S"
#if !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__) /* this one cries for assyntax.h */
/* Kevin F. Quinn 2nd July 2006
* Replaced data segment constants with text-segment instructions.
*/
#define LOAD_MASK(mvins,m1,m2) \
pushl $0xff00ff00 ;\
pushl $0xff00ff00 ;\
pushl $0xff00ff00 ;\
pushl $0xff00ff00 ;\
mvins (%esp), m1 ;\
pushl $0x00ff0000 ;\
pushl $0x00ff0000 ;\
pushl $0x00ff0000 ;\
pushl $0x00ff0000 ;\
mvins (%esp), m2 ;\
addl $32, %esp
/* I implemented these as macros because they appear in several places,
* and I've tweaked them a number of times. I got tired of changing every
* place they appear. :)
*/
#define DO_ONE_PIXEL() \
movl (%ebx), %eax ; \
addl $4, %ebx ; \
bswap %eax /* ARGB -> BGRA */ ; \
rorl $8, %eax /* BGRA -> ABGR */ ; \
movl %eax, (%ecx) /* ABGR -> R, G, B, A */ ; \
addl $4, %ecx
#define DO_ONE_LAST_PIXEL() \
movl (%ebx), %eax ; \
bswap %eax /* ARGB -> BGRA */ ; \
rorl $8, %eax /* BGRA -> ABGR */ ; \
movl %eax, (%ecx) /* ABGR -> R, G, B, A */ ; \
/**
* MMX optimized version of the BGRA8888_REV to RGBA copy routine.
*
* \warning
* This function assumes that the caller will issue the EMMS instruction
* at the correct places.
*/
.globl _generic_read_RGBA_span_BGRA8888_REV_MMX
#ifndef USE_DRICORE
.hidden _generic_read_RGBA_span_BGRA8888_REV_MMX
#endif
.type _generic_read_RGBA_span_BGRA8888_REV_MMX, @function
_generic_read_RGBA_span_BGRA8888_REV_MMX:
pushl %ebx
#ifdef USE_INNER_EMMS
emms
#endif
LOAD_MASK(movq,%mm1,%mm2)
movl 8(%esp), %ebx /* source pointer */
movl 16(%esp), %edx /* number of pixels to copy */
movl 12(%esp), %ecx /* destination pointer */
testl %edx, %edx
jle .L20 /* Bail if there's nothing to do. */
movl %ebx, %eax
negl %eax
sarl $2, %eax
andl $1, %eax
je .L17
subl %eax, %edx
DO_ONE_PIXEL()
.L17:
/* Would it be faster to unroll this loop once and process 4 pixels
* per pass, instead of just two?
*/
movl %edx, %eax
shrl %eax
jmp .L18
.L19:
movq (%ebx), %mm0
addl $8, %ebx
/* These 9 instructions do what PSHUFB (if there were such an
* instruction) could do in 1. :(
*/
movq %mm0, %mm3
movq %mm0, %mm4
pand %mm2, %mm3
psllq $16, %mm4
psrlq $16, %mm3
pand %mm2, %mm4
pand %mm1, %mm0
por %mm4, %mm3
por %mm3, %mm0
movq %mm0, (%ecx)
addl $8, %ecx
subl $1, %eax
.L18:
jne .L19
#ifdef USE_INNER_EMMS
emms
#endif
/* At this point there are either 1 or 0 pixels remaining to be
* converted. Convert the last pixel, if needed.
*/
testl $1, %edx
je .L20
DO_ONE_LAST_PIXEL()
.L20:
popl %ebx
ret
.size _generic_read_RGBA_span_BGRA8888_REV_MMX, .-_generic_read_RGBA_span_BGRA8888_REV_MMX
/**
* SSE optimized version of the BGRA8888_REV to RGBA copy routine. SSE
* instructions are only actually used to read data from the framebuffer.
* In practice, the speed-up is pretty small.
*
* \todo
* Do some more testing and determine if there's any reason to have this
* function in addition to the MMX version.
*
* \warning
* This function assumes that the caller will issue the EMMS instruction
* at the correct places.
*/
.globl _generic_read_RGBA_span_BGRA8888_REV_SSE
#ifndef USE_DRICORE
.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE
#endif
.type _generic_read_RGBA_span_BGRA8888_REV_SSE, @function
_generic_read_RGBA_span_BGRA8888_REV_SSE:
pushl %esi
pushl %ebx
pushl %ebp
#ifdef USE_INNER_EMMS
emms
#endif
LOAD_MASK(movq,%mm1,%mm2)
movl 16(%esp), %ebx /* source pointer */
movl 24(%esp), %edx /* number of pixels to copy */
movl 20(%esp), %ecx /* destination pointer */
testl %edx, %edx
jle .L35 /* Bail if there's nothing to do. */
movl %esp, %ebp
subl $16, %esp
andl $0xfffffff0, %esp
movl %ebx, %eax
movl %edx, %esi
negl %eax
andl $15, %eax
sarl $2, %eax
cmpl %edx, %eax
cmovle %eax, %esi
subl %esi, %edx
testl $1, %esi
je .L32
DO_ONE_PIXEL()
.L32:
testl $2, %esi
je .L31
movq (%ebx), %mm0
addl $8, %ebx
movq %mm0, %mm3
movq %mm0, %mm4
pand %mm2, %mm3
psllq $16, %mm4
psrlq $16, %mm3
pand %mm2, %mm4
pand %mm1, %mm0
por %mm4, %mm3
por %mm3, %mm0
movq %mm0, (%ecx)
addl $8, %ecx
.L31:
movl %edx, %eax
shrl $2, %eax
jmp .L33
.L34:
movaps (%ebx), %xmm0
addl $16, %ebx
/* This would be so much better if we could just move directly from
* an SSE register to an MMX register. Unfortunately, that
* functionality wasn't introduced until SSE2 with the MOVDQ2Q
* instruction.
*/
movaps %xmm0, (%esp)
movq (%esp), %mm0
movq 8(%esp), %mm5
movq %mm0, %mm3
movq %mm0, %mm4
movq %mm5, %mm6
movq %mm5, %mm7
pand %mm2, %mm3
pand %mm2, %mm6
psllq $16, %mm4
psllq $16, %mm7
psrlq $16, %mm3
psrlq $16, %mm6
pand %mm2, %mm4
pand %mm2, %mm7
pand %mm1, %mm0
pand %mm1, %mm5
por %mm4, %mm3
por %mm7, %mm6
por %mm3, %mm0
por %mm6, %mm5
movq %mm0, (%ecx)
movq %mm5, 8(%ecx)
addl $16, %ecx
subl $1, %eax
.L33:
jne .L34
#ifdef USE_INNER_EMMS
emms
#endif
movl %ebp, %esp
/* At this point there are either [0, 3] pixels remaining to be
* converted.
*/
testl $2, %edx
je .L36
movq (%ebx), %mm0
addl $8, %ebx
movq %mm0, %mm3
movq %mm0, %mm4
pand %mm2, %mm3
psllq $16, %mm4
psrlq $16, %mm3
pand %mm2, %mm4
pand %mm1, %mm0
por %mm4, %mm3
por %mm3, %mm0
movq %mm0, (%ecx)
addl $8, %ecx
.L36:
testl $1, %edx
je .L35
DO_ONE_LAST_PIXEL()
.L35:
popl %ebp
popl %ebx
popl %esi
ret
.size _generic_read_RGBA_span_BGRA8888_REV_SSE, .-_generic_read_RGBA_span_BGRA8888_REV_SSE
/**
* SSE2 optimized version of the BGRA8888_REV to RGBA copy routine.
*/
.text
.globl _generic_read_RGBA_span_BGRA8888_REV_SSE2
#ifndef USE_DRICORE
.hidden _generic_read_RGBA_span_BGRA8888_REV_SSE2
#endif
.type _generic_read_RGBA_span_BGRA8888_REV_SSE2, @function
_generic_read_RGBA_span_BGRA8888_REV_SSE2:
pushl %esi
pushl %ebx
LOAD_MASK(movdqu,%xmm1,%xmm2)
movl 12(%esp), %ebx /* source pointer */
movl 20(%esp), %edx /* number of pixels to copy */
movl 16(%esp), %ecx /* destination pointer */
movl %ebx, %eax
movl %edx, %esi
testl %edx, %edx
jle .L46 /* Bail if there's nothing to do. */
/* If the source pointer isn't a multiple of 16 we have to process
* a few pixels the "slow" way to get the address aligned for
* the SSE fetch intsructions.
*/
negl %eax
andl $15, %eax
sarl $2, %eax
cmpl %edx, %eax
cmovbe %eax, %esi
subl %esi, %edx
testl $1, %esi
je .L41
DO_ONE_PIXEL()
.L41:
testl $2, %esi
je .L40
movq (%ebx), %xmm0
addl $8, %ebx
movdqa %xmm0, %xmm3
movdqa %xmm0, %xmm4
andps %xmm1, %xmm0
andps %xmm2, %xmm3
pslldq $2, %xmm4
psrldq $2, %xmm3
andps %xmm2, %xmm4
orps %xmm4, %xmm3
orps %xmm3, %xmm0
movq %xmm0, (%ecx)
addl $8, %ecx
.L40:
/* Would it be worth having a specialized version of this loop for
* the case where the destination is 16-byte aligned? That version
* would be identical except that it could use movedqa instead of
* movdqu.
*/
movl %edx, %eax
shrl $2, %eax
jmp .L42
.L43:
movdqa (%ebx), %xmm0
addl $16, %ebx
movdqa %xmm0, %xmm3
movdqa %xmm0, %xmm4
andps %xmm1, %xmm0
andps %xmm2, %xmm3
pslldq $2, %xmm4
psrldq $2, %xmm3
andps %xmm2, %xmm4
orps %xmm4, %xmm3
orps %xmm3, %xmm0
movdqu %xmm0, (%ecx)
addl $16, %ecx
subl $1, %eax
.L42:
jne .L43
/* There may be upto 3 pixels remaining to be copied. Take care
* of them now. We do the 2 pixel case first because the data
* will be aligned.
*/
testl $2, %edx
je .L47
movq (%ebx), %xmm0
addl $8, %ebx
movdqa %xmm0, %xmm3
movdqa %xmm0, %xmm4
andps %xmm1, %xmm0
andps %xmm2, %xmm3
pslldq $2, %xmm4
psrldq $2, %xmm3
andps %xmm2, %xmm4
orps %xmm4, %xmm3
orps %xmm3, %xmm0
movq %xmm0, (%ecx)
addl $8, %ecx
.L47:
testl $1, %edx
je .L46
DO_ONE_LAST_PIXEL()
.L46:
popl %ebx
popl %esi
ret
.size _generic_read_RGBA_span_BGRA8888_REV_SSE2, .-_generic_read_RGBA_span_BGRA8888_REV_SSE2
#define MASK_565_L 0x07e0f800
#define MASK_565_H 0x0000001f
/* Setting SCALE_ADJUST to 5 gives a perfect match with the
* classic C implementation in Mesa. Setting SCALE_ADJUST
* to 0 is slightly faster but at a small cost to accuracy.
*/
#define SCALE_ADJUST 5
#if SCALE_ADJUST == 5
#define PRESCALE_L 0x00100001
#define PRESCALE_H 0x00000200
#define SCALE_L 0x40C620E8
#define SCALE_H 0x0000839d
#elif SCALE_ADJUST == 0
#define PRESCALE_L 0x00200001
#define PRESCALE_H 0x00000800
#define SCALE_L 0x01040108
#define SCALE_H 0x00000108
#else
#error SCALE_ADJUST must either be 5 or 0.
#endif
#define ALPHA_L 0x00000000
#define ALPHA_H 0x00ff0000
/**
* MMX optimized version of the RGB565 to RGBA copy routine.
*/
.text
.globl _generic_read_RGBA_span_RGB565_MMX
#ifndef USE_DRICORE
.hidden _generic_read_RGBA_span_RGB565_MMX
#endif
.type _generic_read_RGBA_span_RGB565_MMX, @function
_generic_read_RGBA_span_RGB565_MMX:
#ifdef USE_INNER_EMMS
emms
#endif
movl 4(%esp), %eax /* source pointer */
movl 8(%esp), %edx /* destination pointer */
movl 12(%esp), %ecx /* number of pixels to copy */
pushl $MASK_565_H
pushl $MASK_565_L
movq (%esp), %mm5
pushl $PRESCALE_H
pushl $PRESCALE_L
movq (%esp), %mm6
pushl $SCALE_H
pushl $SCALE_L
movq (%esp), %mm7
pushl $ALPHA_H
pushl $ALPHA_L
movq (%esp), %mm3
addl $32,%esp
sarl $2, %ecx
jl .L01 /* Bail early if the count is negative. */
jmp .L02
.L03:
/* Fetch 4 RGB565 pixels into %mm4. Distribute the first and
* second pixels into the four words of %mm0 and %mm2.
*/
movq (%eax), %mm4
addl $8, %eax
pshufw $0x00, %mm4, %mm0
pshufw $0x55, %mm4, %mm2
/* Mask the pixels so that each word of each register contains only
* one color component.
*/
pand %mm5, %mm0
pand %mm5, %mm2
/* Adjust the component values so that they are as small as possible,
* but large enough so that we can multiply them by an unsigned 16-bit
* number and get a value as large as 0x00ff0000.
*/
pmullw %mm6, %mm0
pmullw %mm6, %mm2
#if SCALE_ADJUST > 0
psrlw $SCALE_ADJUST, %mm0
psrlw $SCALE_ADJUST, %mm2
#endif
/* Scale the input component values to be on the range
* [0, 0x00ff0000]. This it the real magic of the whole routine.
*/
pmulhuw %mm7, %mm0
pmulhuw %mm7, %mm2
/* Always set the alpha value to 0xff.
*/
por %mm3, %mm0
por %mm3, %mm2
/* Pack the 16-bit values to 8-bit values and store the converted
* pixel data.
*/
packuswb %mm2, %mm0
movq %mm0, (%edx)
addl $8, %edx
pshufw $0xaa, %mm4, %mm0
pshufw $0xff, %mm4, %mm2
pand %mm5, %mm0
pand %mm5, %mm2
pmullw %mm6, %mm0
pmullw %mm6, %mm2
#if SCALE_ADJUST > 0
psrlw $SCALE_ADJUST, %mm0
psrlw $SCALE_ADJUST, %mm2
#endif
pmulhuw %mm7, %mm0
pmulhuw %mm7, %mm2
por %mm3, %mm0
por %mm3, %mm2
packuswb %mm2, %mm0
movq %mm0, (%edx)
addl $8, %edx
subl $1, %ecx
.L02:
jne .L03
/* At this point there can be at most 3 pixels left to process. If
* there is either 2 or 3 left, process 2.
*/
movl 12(%esp), %ecx
testl $0x02, %ecx
je .L04
movd (%eax), %mm4
addl $4, %eax
pshufw $0x00, %mm4, %mm0
pshufw $0x55, %mm4, %mm2
pand %mm5, %mm0
pand %mm5, %mm2
pmullw %mm6, %mm0
pmullw %mm6, %mm2
#if SCALE_ADJUST > 0
psrlw $SCALE_ADJUST, %mm0
psrlw $SCALE_ADJUST, %mm2
#endif
pmulhuw %mm7, %mm0
pmulhuw %mm7, %mm2
por %mm3, %mm0
por %mm3, %mm2
packuswb %mm2, %mm0
movq %mm0, (%edx)
addl $8, %edx
.L04:
/* At this point there can be at most 1 pixel left to process.
* Process it if needed.
*/
testl $0x01, %ecx
je .L01
movzwl (%eax), %ecx
movd %ecx, %mm4
pshufw $0x00, %mm4, %mm0
pand %mm5, %mm0
pmullw %mm6, %mm0
#if SCALE_ADJUST > 0
psrlw $SCALE_ADJUST, %mm0
#endif
pmulhuw %mm7, %mm0
por %mm3, %mm0
packuswb %mm0, %mm0
movd %mm0, (%edx)
.L01:
#ifdef USE_INNER_EMMS
emms
#endif
ret
#endif /* !defined(__DJGPP__) && !defined(__MINGW32__) && !defined(__APPLE__) */
#if defined (__ELF__) && defined (__linux__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,56 @@
/*
* (C) Copyright IBM Corporation 2004
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* \file read_rgba_span_x86.h
*
* \author Ian Romanick <idr@us.ibm.com>
*/
#ifndef READ_RGBA_SPAN_X86_H
#define READ_RGBA_SPAN_X86_H
#if defined(USE_SSE_ASM) || defined(USE_MMX_ASM)
#include "x86/common_x86_asm.h"
#endif
#if defined(USE_SSE_ASM)
extern void _generic_read_RGBA_span_BGRA8888_REV_SSE2( const unsigned char *,
unsigned char *, unsigned );
#endif
#if defined(USE_SSE_ASM)
extern void _generic_read_RGBA_span_BGRA8888_REV_SSE( const unsigned char *,
unsigned char *, unsigned );
#endif
#if defined(USE_MMX_ASM)
extern void _generic_read_RGBA_span_BGRA8888_REV_MMX( const unsigned char *,
unsigned char *, unsigned );
extern void _generic_read_RGBA_span_RGB565_MMX( const unsigned char *,
unsigned char *, unsigned );
#endif
#endif /* READ_RGBA_SPAN_X86_H */

1203
src/arch/x86/rtasm/x86sse.c Normal file

File diff suppressed because it is too large Load Diff

256
src/arch/x86/rtasm/x86sse.h Normal file
View File

@ -0,0 +1,256 @@
#ifndef _X86SSE_H_
#define _X86SSE_H_
#if defined(__i386__) || defined(__386__)
/* It is up to the caller to ensure that instructions issued are
* suitable for the host cpu. There are no checks made in this module
* for mmx/sse/sse2 support on the cpu.
*/
struct x86_reg {
unsigned file:3;
unsigned idx:3;
unsigned mod:2; /* mod_REG if this is just a register */
int disp:24; /* only +/- 23bits of offset - should be enough... */
};
struct x86_function {
unsigned size;
unsigned char *store;
unsigned char *csr;
unsigned stack_offset;
int need_emms;
const char *fn;
};
enum x86_reg_file {
file_REG32,
file_MMX,
file_XMM,
file_x87
};
/* Values for mod field of modr/m byte
*/
enum x86_reg_mod {
mod_INDIRECT,
mod_DISP8,
mod_DISP32,
mod_REG
};
enum x86_reg_name {
reg_AX,
reg_CX,
reg_DX,
reg_BX,
reg_SP,
reg_BP,
reg_SI,
reg_DI
};
enum x86_cc {
cc_O, /* overflow */
cc_NO, /* not overflow */
cc_NAE, /* not above or equal / carry */
cc_AE, /* above or equal / not carry */
cc_E, /* equal / zero */
cc_NE /* not equal / not zero */
};
enum sse_cc {
cc_Equal,
cc_LessThan,
cc_LessThanEqual,
cc_Unordered,
cc_NotEqual,
cc_NotLessThan,
cc_NotLessThanEqual,
cc_Ordered
};
#define cc_Z cc_E
#define cc_NZ cc_NE
/* Begin/end/retreive function creation:
*/
void x86_init_func( struct x86_function *p );
int x86_init_func_size( struct x86_function *p, unsigned code_size );
void x86_release_func( struct x86_function *p );
void (*x86_get_func( struct x86_function *p ))( void );
/* Create and manipulate registers and regmem values:
*/
struct x86_reg x86_make_reg( enum x86_reg_file file,
enum x86_reg_name idx );
struct x86_reg x86_make_disp( struct x86_reg reg,
int disp );
struct x86_reg x86_deref( struct x86_reg reg );
struct x86_reg x86_get_base_reg( struct x86_reg reg );
/* Labels, jumps and fixup:
*/
unsigned char *x86_get_label( struct x86_function *p );
void x86_jcc( struct x86_function *p,
enum x86_cc cc,
unsigned char *label );
unsigned char *x86_jcc_forward( struct x86_function *p,
enum x86_cc cc );
unsigned char *x86_jmp_forward( struct x86_function *p);
unsigned char *x86_call_forward( struct x86_function *p);
void x86_fixup_fwd_jump( struct x86_function *p,
unsigned char *fixup );
void x86_jmp( struct x86_function *p, unsigned char *label );
/* void x86_call( struct x86_function *p, void (*label)() ); */
void x86_call( struct x86_function *p, struct x86_reg reg);
/* michal:
* Temporary. As I need immediate operands, and dont want to mess with the codegen,
* I load the immediate into general purpose register and use it.
*/
void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm );
/* Macro for sse_shufps() and sse2_pshufd():
*/
#define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6))
#define SHUF_NOOP RSW(0,1,2,3)
#define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
void mmx_emms( struct x86_function *p );
void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
unsigned char shuf );
void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src,
unsigned char cc );
void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
unsigned char shuf );
void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_dec( struct x86_function *p, struct x86_reg reg );
void x86_inc( struct x86_function *p, struct x86_reg reg );
void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_mul( struct x86_function *p, struct x86_reg src );
void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_pop( struct x86_function *p, struct x86_reg reg );
void x86_push( struct x86_function *p, struct x86_reg reg );
void x86_ret( struct x86_function *p );
void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_sahf( struct x86_function *p );
void x87_f2xm1( struct x86_function *p );
void x87_fabs( struct x86_function *p );
void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_faddp( struct x86_function *p, struct x86_reg dst );
void x87_fchs( struct x86_function *p );
void x87_fclex( struct x86_function *p );
void x87_fcom( struct x86_function *p, struct x86_reg dst );
void x87_fcomp( struct x86_function *p, struct x86_reg dst );
void x87_fcos( struct x86_function *p );
void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_fdivp( struct x86_function *p, struct x86_reg dst );
void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_fdivrp( struct x86_function *p, struct x86_reg dst );
void x87_fild( struct x86_function *p, struct x86_reg arg );
void x87_fist( struct x86_function *p, struct x86_reg dst );
void x87_fistp( struct x86_function *p, struct x86_reg dst );
void x87_fld( struct x86_function *p, struct x86_reg arg );
void x87_fld1( struct x86_function *p );
void x87_fldcw( struct x86_function *p, struct x86_reg arg );
void x87_fldl2e( struct x86_function *p );
void x87_fldln2( struct x86_function *p );
void x87_fldz( struct x86_function *p );
void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_fmulp( struct x86_function *p, struct x86_reg dst );
void x87_fnclex( struct x86_function *p );
void x87_fprndint( struct x86_function *p );
void x87_fscale( struct x86_function *p );
void x87_fsin( struct x86_function *p );
void x87_fsincos( struct x86_function *p );
void x87_fsqrt( struct x86_function *p );
void x87_fst( struct x86_function *p, struct x86_reg dst );
void x87_fstp( struct x86_function *p, struct x86_reg dst );
void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_fsubp( struct x86_function *p, struct x86_reg dst );
void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
void x87_fsubrp( struct x86_function *p, struct x86_reg dst );
void x87_fxch( struct x86_function *p, struct x86_reg dst );
void x87_fxtract( struct x86_function *p );
void x87_fyl2x( struct x86_function *p );
void x87_fyl2xp1( struct x86_function *p );
void x87_fwait( struct x86_function *p );
void x87_fnstsw( struct x86_function *p, struct x86_reg dst );
void x87_fucompp( struct x86_function *p );
void x87_fucomp( struct x86_function *p, struct x86_reg arg );
void x87_fucom( struct x86_function *p, struct x86_reg arg );
/* Retreive a reference to one of the function arguments, taking into
* account any push/pop activity. Note - doesn't track explict
* manipulation of ESP by other instructions.
*/
struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg );
#endif
#endif

123
src/arch/x86/sse.c Normal file
View File

@ -0,0 +1,123 @@
/*
* Mesa 3-D graphics library
* Version: 6.0
*
* Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* PentiumIII-SIMD (SSE) optimizations contributed by
* Andre Werthmann <wertmann@cs.uni-potsdam.de>
*/
#include "main/glheader.h"
#include "main/context.h"
#include "math/m_xform.h"
#include "tnl/t_context.h"
#include "sse.h"
#include "x86_xform.h"
#ifdef DEBUG_MATH
#include "math/m_debug.h"
#endif
#ifdef USE_SSE_ASM
DECLARE_XFORM_GROUP( sse, 2 )
DECLARE_XFORM_GROUP( sse, 3 )
#if 1
/* Some functions are not written in SSE-assembly, because the fpu ones are faster */
extern void _ASMAPI _mesa_sse_transform_normals_no_rot( NORM_ARGS );
extern void _ASMAPI _mesa_sse_transform_rescale_normals( NORM_ARGS );
extern void _ASMAPI _mesa_sse_transform_rescale_normals_no_rot( NORM_ARGS );
extern void _ASMAPI _mesa_sse_transform_points4_general( XFORM_ARGS );
extern void _ASMAPI _mesa_sse_transform_points4_3d( XFORM_ARGS );
/* XXX this function segfaults, see below */
extern void _ASMAPI _mesa_sse_transform_points4_identity( XFORM_ARGS );
/* XXX this one works, see below */
extern void _ASMAPI _mesa_x86_transform_points4_identity( XFORM_ARGS );
#else
DECLARE_NORM_GROUP( sse )
#endif
extern void _ASMAPI
_mesa_v16_sse_general_xform( GLfloat *first_vert,
const GLfloat *m,
const GLfloat *src,
GLuint src_stride,
GLuint count );
extern void _ASMAPI
_mesa_sse_project_vertices( GLfloat *first,
GLfloat *last,
const GLfloat *m,
GLuint stride );
extern void _ASMAPI
_mesa_sse_project_clipped_vertices( GLfloat *first,
GLfloat *last,
const GLfloat *m,
GLuint stride,
const GLubyte *clipmask );
#endif
void _mesa_init_sse_transform_asm( void )
{
#ifdef USE_SSE_ASM
ASSIGN_XFORM_GROUP( sse, 2 );
ASSIGN_XFORM_GROUP( sse, 3 );
#if 1
/* TODO: Finish these off.
*/
_mesa_transform_tab[4][MATRIX_GENERAL] =
_mesa_sse_transform_points4_general;
_mesa_transform_tab[4][MATRIX_3D] =
_mesa_sse_transform_points4_3d;
/* XXX NOTE: _mesa_sse_transform_points4_identity segfaults with the
conformance tests, so use the x86 version.
*/
_mesa_transform_tab[4][MATRIX_IDENTITY] =
_mesa_x86_transform_points4_identity;/*_mesa_sse_transform_points4_identity;*/
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT] =
_mesa_sse_transform_normals_no_rot;
_mesa_normal_tab[NORM_TRANSFORM | NORM_RESCALE] =
_mesa_sse_transform_rescale_normals;
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE] =
_mesa_sse_transform_rescale_normals_no_rot;
#else
ASSIGN_XFORM_GROUP( sse, 4 );
ASSIGN_NORM_GROUP( sse );
#endif
#ifdef DEBUG_MATH
_math_test_all_transform_functions( "SSE" );
_math_test_all_normal_transform_functions( "SSE" );
#endif
#endif
}

36
src/arch/x86/sse.h Normal file
View File

@ -0,0 +1,36 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* PentiumIII-SIMD (SSE) optimizations contributed by
* Andre Werthmann <wertmann@cs.uni-potsdam.de>
*/
#ifndef __SSE_H__
#define __SSE_H__
void _mesa_init_sse_transform_asm( void );
#endif

261
src/arch/x86/sse_normal.S Normal file
View File

@ -0,0 +1,261 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/** TODO:
* - insert PREFETCH instructions to avoid cache-misses !
* - some more optimizations are possible...
* - for 40-50% more performance in the SSE-functions, the
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
*/
#ifdef USE_SSE_ASM
#include "assyntax.h"
#include "matypes.h"
#include "norm_args.h"
SEG_TEXT
#define M(i) REGOFF(i * 4, EDX)
#define S(i) REGOFF(i * 4, ESI)
#define D(i) REGOFF(i * 4, EDI)
#define STRIDE REGOFF(12, ESI)
ALIGNTEXT16
GLOBL GLNAME(_mesa_sse_transform_rescale_normals_no_rot)
HIDDEN(_mesa_sse_transform_rescale_normals_no_rot)
GLNAME(_mesa_sse_transform_rescale_normals_no_rot):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L ( ARG_IN, ESI ) /* ptr to source GLvector3f */
MOV_L ( ARG_DEST, EDI ) /* ptr to dest GLvector3f */
MOV_L ( ARG_MAT, EDX ) /* ptr to matrix */
MOV_L ( REGOFF(MATRIX_INV, EDX), EDX) /* matrix->inv */
MOV_L ( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L ( ECX, ECX )
JZ( LLBL(K_G3TRNNRR_finish) ) /* count was zero; go to finish */
MOV_L ( STRIDE, EAX ) /* stride */
MOV_L ( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest-count */
IMUL_L( CONST(16), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVSS ( M(0), XMM1 ) /* m0 */
MOVSS ( M(5), XMM2 ) /* m5 */
UNPCKLPS( XMM2, XMM1 ) /* m5 | m0 */
MOVSS ( ARG_SCALE, XMM0 ) /* scale */
SHUFPS ( CONST(0x0), XMM0, XMM0 ) /* scale | scale */
MULPS ( XMM0, XMM1 ) /* m5*scale | m0*scale */
MULSS ( M(10), XMM0 ) /* m10*scale */
ALIGNTEXT32
LLBL(K_G3TRNNRR_top):
MOVLPS ( S(0), XMM2 ) /* uy | ux */
MULPS ( XMM1, XMM2 ) /* uy*m5*scale | ux*m0*scale */
MOVLPS ( XMM2, D(0) ) /* ->D(1) | D(0) */
MOVSS ( S(2), XMM2 ) /* uz */
MULSS ( XMM0, XMM2 ) /* uz*m10*scale */
MOVSS ( XMM2, D(2) ) /* ->D(2) */
LLBL(K_G3TRNNRR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_G3TRNNRR_top) )
LLBL(K_G3TRNNRR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME(_mesa_sse_transform_rescale_normals)
HIDDEN(_mesa_sse_transform_rescale_normals)
GLNAME(_mesa_sse_transform_rescale_normals):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L ( ARG_IN, ESI ) /* ptr to source GLvector3f */
MOV_L ( ARG_DEST, EDI ) /* ptr to dest GLvector3f */
MOV_L ( ARG_MAT, EDX ) /* ptr to matrix */
MOV_L ( REGOFF(MATRIX_INV, EDX), EDX) /* matrix->inv */
MOV_L ( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L ( ECX, ECX )
JZ( LLBL(K_G3TRNR_finish) ) /* count was zero; go to finish */
MOV_L ( STRIDE, EAX ) /* stride */
MOV_L ( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest-count */
IMUL_L( CONST(16), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVSS ( M(0), XMM0 ) /* m0 */
MOVSS ( M(4), XMM1 ) /* m4 */
UNPCKLPS( XMM1, XMM0 ) /* m4 | m0 */
MOVSS ( ARG_SCALE, XMM4 ) /* scale */
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* scale | scale */
MULPS ( XMM4, XMM0 ) /* m4*scale | m0*scale */
MOVSS ( M(1), XMM1 ) /* m1 */
MOVSS ( M(5), XMM2 ) /* m5 */
UNPCKLPS( XMM2, XMM1 ) /* m5 | m1 */
MULPS ( XMM4, XMM1 ) /* m5*scale | m1*scale */
MOVSS ( M(2), XMM2 ) /* m2 */
MOVSS ( M(6), XMM3 ) /* m6 */
UNPCKLPS( XMM3, XMM2 ) /* m6 | m2 */
MULPS ( XMM4, XMM2 ) /* m6*scale | m2*scale */
MOVSS ( M(8), XMM6 ) /* m8 */
MULSS ( ARG_SCALE, XMM6 ) /* m8*scale */
MOVSS ( M(9), XMM7 ) /* m9 */
MULSS ( ARG_SCALE, XMM7 ) /* m9*scale */
ALIGNTEXT32
LLBL(K_G3TRNR_top):
MOVSS ( S(0), XMM3 ) /* ux */
SHUFPS ( CONST(0x0), XMM3, XMM3 ) /* ux | ux */
MULPS ( XMM0, XMM3 ) /* ux*m4 | ux*m0 */
MOVSS ( S(1), XMM4 ) /* uy */
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* uy | uy */
MULPS ( XMM1, XMM4 ) /* uy*m5 | uy*m1 */
MOVSS ( S(2), XMM5 ) /* uz */
SHUFPS ( CONST(0x0), XMM5, XMM5 ) /* uz | uz */
MULPS ( XMM2, XMM5 ) /* uz*m6 | uz*m2 */
ADDPS ( XMM4, XMM3 )
ADDPS ( XMM5, XMM3 )
MOVLPS ( XMM3, D(0) )
MOVSS ( M(10), XMM3 ) /* m10 */
MULSS ( ARG_SCALE, XMM3 ) /* m10*scale */
MULSS ( S(2), XMM3 ) /* m10*scale*uz */
MOVSS ( S(1), XMM4 ) /* uy */
MULSS ( XMM7, XMM4 ) /* uy*m9*scale */
MOVSS ( S(0), XMM5 ) /* ux */
MULSS ( XMM6, XMM5 ) /* ux*m8*scale */
ADDSS ( XMM4, XMM3 )
ADDSS ( XMM5, XMM3 )
MOVSS ( XMM3, D(2) )
LLBL(K_G3TRNR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_G3TRNR_top) )
LLBL(K_G3TRNR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME(_mesa_sse_transform_normals_no_rot)
HIDDEN(_mesa_sse_transform_normals_no_rot)
GLNAME(_mesa_sse_transform_normals_no_rot):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L ( ARG_IN, ESI ) /* ptr to source GLvector3f */
MOV_L ( ARG_DEST, EDI ) /* ptr to dest GLvector3f */
MOV_L ( ARG_MAT, EDX ) /* ptr to matrix */
MOV_L ( REGOFF(MATRIX_INV, EDX), EDX) /* matrix->inv */
MOV_L ( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L ( ECX, ECX )
JZ( LLBL(K_G3TNNRR_finish) ) /* count was zero; go to finish */
MOV_L ( STRIDE, EAX ) /* stride */
MOV_L ( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest-count */
IMUL_L( CONST(16), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVSS( M(0), XMM0 ) /* m0 */
MOVSS( M(5), XMM1 ) /* m5 */
UNPCKLPS( XMM1, XMM0 ) /* m5 | m0 */
MOVSS( M(10), XMM1 ) /* m10 */
ALIGNTEXT32
LLBL(K_G3TNNRR_top):
MOVLPS( S(0), XMM2 ) /* uy | ux */
MULPS( XMM0, XMM2 ) /* uy*m5 | ux*m0 */
MOVLPS( XMM2, D(0) )
MOVSS( S(2), XMM2 ) /* uz */
MULSS( XMM1, XMM2 ) /* uz*m10 */
MOVSS( XMM2, D(2) )
LLBL(K_G3TNNRR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_G3TNNRR_top) )
LLBL(K_G3TNNRR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
#endif
#if defined (__ELF__) && defined (__linux__)
.section .note.GNU-stack,"",%progbits
#endif

446
src/arch/x86/sse_xform1.S Normal file
View File

@ -0,0 +1,446 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/** TODO:
* - insert PREFETCH instructions to avoid cache-misses !
* - some more optimizations are possible...
* - for 40-50% more performance in the SSE-functions, the
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
*/
#ifdef USE_SSE_ASM
#include "assyntax.h"
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define S(i) REGOFF(i * 4, ESI)
#define D(i) REGOFF(i * 4, EDI)
#define M(i) REGOFF(i * 4, EDX)
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points1_general)
HIDDEN( _mesa_sse_transform_points1_general )
GLNAME( _mesa_sse_transform_points1_general ):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
CMP_L( CONST(0), ECX ) /* count == 0 ? */
JE( LLBL(K_GTP1GR_finish) ) /* yes -> nothing to do. */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */
MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */
ALIGNTEXT32
LLBL(K_GTP1GR_top):
MOVSS( S(0), XMM2 ) /* ox */
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */
MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
ADDPS( XMM1, XMM2 ) /* + | + | + | + */
MOVUPS( XMM2, D(0) )
LLBL(K_GTP1GR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP1GR_top) )
LLBL(K_GTP1GR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points1_identity)
HIDDEN(_mesa_sse_transform_points1_identity)
GLNAME( _mesa_sse_transform_points1_identity ):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP1IR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(1), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
CMP_L( ESI, EDI )
JE( LLBL(K_GTP1IR_finish) )
ALIGNTEXT32
LLBL(K_GTP1IR_top):
MOV_L( S(0), EDX )
MOV_L( EDX, D(0) )
LLBL(K_GTP1IR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP1IR_top) )
LLBL(K_GTP1IR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points1_3d_no_rot)
HIDDEN(_mesa_sse_transform_points1_3d_no_rot)
GLNAME(_mesa_sse_transform_points1_3d_no_rot):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP13DNRR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVSS( M(0), XMM0 ) /* m0 */
MOVSS( M(12), XMM1 ) /* m12 */
MOVSS( M(13), XMM2 ) /* m13 */
MOVSS( M(14), XMM3 ) /* m14 */
ALIGNTEXT32
LLBL(K_GTP13DNRR_top):
MOVSS( S(0), XMM4 ) /* ox */
MULSS( XMM0, XMM4 ) /* ox*m0 */
ADDSS( XMM1, XMM4 ) /* ox*m0+m12 */
MOVSS( XMM4, D(0) )
MOVSS( XMM2, D(1) )
MOVSS( XMM3, D(2) )
LLBL(K_GTP13DNRR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP13DNRR_top) )
LLBL(K_GTP13DNRR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points1_perspective)
HIDDEN(_mesa_sse_transform_points1_perspective)
GLNAME(_mesa_sse_transform_points1_perspective):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP13PR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
XORPS( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */
MOVSS( M(0), XMM1 ) /* m0 */
MOVSS( M(14), XMM2 ) /* m14 */
ALIGNTEXT32
LLBL(K_GTP13PR_top):
MOVSS( S(0), XMM3 ) /* ox */
MULSS( XMM1, XMM3 ) /* ox*m0 */
MOVSS( XMM3, D(0) ) /* ox*m0->D(0) */
MOVSS( XMM2, D(2) ) /* m14->D(2) */
MOVSS( XMM0, D(1) )
MOVSS( XMM0, D(3) )
LLBL(K_GTP13PR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP13PR_top) )
LLBL(K_GTP13PR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points1_2d)
HIDDEN(_mesa_sse_transform_points1_2d)
GLNAME(_mesa_sse_transform_points1_2d):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP13P2DR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVLPS( M(0), XMM0 ) /* m1 | m0 */
MOVLPS( M(12), XMM1 ) /* m13 | m12 */
ALIGNTEXT32
LLBL(K_GTP13P2DR_top):
MOVSS( S(0), XMM2 ) /* ox */
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */
MULPS( XMM0, XMM2 ) /* - | - | ox*m1 | ox*m0 */
ADDPS( XMM1, XMM2 ) /* - | - | ox*m1+m13 | ox*m0+m12 */
MOVLPS( XMM2, D(0) )
LLBL(K_GTP13P2DR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP13P2DR_top) )
LLBL(K_GTP13P2DR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points1_2d_no_rot)
HIDDEN(_mesa_sse_transform_points1_2d_no_rot)
GLNAME(_mesa_sse_transform_points1_2d_no_rot):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP13P2DNRR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVSS( M(0), XMM0 ) /* m0 */
MOVSS( M(12), XMM1 ) /* m12 */
MOVSS( M(13), XMM2 ) /* m13 */
ALIGNTEXT32
LLBL(K_GTP13P2DNRR_top):
MOVSS( S(0), XMM3 ) /* ox */
MULSS( XMM0, XMM3 ) /* ox*m0 */
ADDSS( XMM1, XMM3 ) /* ox*m0+m12 */
MOVSS( XMM3, D(0) )
MOVSS( XMM2, D(1) )
LLBL(K_GTP13P2DNRR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP13P2DNRR_top) )
LLBL(K_GTP13P2DNRR_finish):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points1_3d)
HIDDEN(_mesa_sse_transform_points1_3d)
GLNAME(_mesa_sse_transform_points1_3d):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP13P3DR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */
MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */
ALIGNTEXT32
LLBL(K_GTP13P3DR_top):
MOVSS( S(0), XMM2 ) /* ox */
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */
MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
ADDPS( XMM1, XMM2 ) /* +m15 | +m14 | +m13 | +m12 */
MOVLPS( XMM2, D(0) ) /* - | - | ->D(1)| ->D(0)*/
UNPCKHPS( XMM2, XMM2 ) /* ox*m3+m15 | ox*m3+m15 | ox*m2+m14 | ox*m2+m14 */
MOVSS( XMM2, D(2) )
LLBL(K_GTP13P3DR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP13P3DR_top) )
LLBL(K_GTP13P3DR_finish):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
#endif
#if defined (__ELF__) && defined (__linux__)
.section .note.GNU-stack,"",%progbits
#endif

466
src/arch/x86/sse_xform2.S Normal file
View File

@ -0,0 +1,466 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/** TODO:
* - insert PREFETCH instructions to avoid cache-misses !
* - some more optimizations are possible...
* - for 40-50% more performance in the SSE-functions, the
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
*/
#ifdef USE_SSE_ASM
#include "assyntax.h"
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define S(i) REGOFF(i * 4, ESI)
#define D(i) REGOFF(i * 4, EDI)
#define M(i) REGOFF(i * 4, EDX)
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points2_general)
HIDDEN (_mesa_sse_transform_points2_general)
GLNAME( _mesa_sse_transform_points2_general ):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX )
JZ( LLBL(K_GTP2GR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */
MOVAPS( M(4), XMM1 ) /* m7 | m6 | m5 | m4 */
MOVAPS( M(12), XMM2 ) /* m15 | m14 | m13 | m12 */
ALIGNTEXT32
LLBL(K_GTP2GR_top):
MOVSS( S(0), XMM3 ) /* ox */
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox | ox | ox */
MULPS( XMM0, XMM3 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
MOVSS( S(1), XMM4 ) /* oy */
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy | oy | oy */
MULPS( XMM1, XMM4 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
ADDPS( XMM4, XMM3 )
ADDPS( XMM2, XMM3 )
MOVAPS( XMM3, D(0) )
LLBL(K_GTP2GR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP2GR_top) )
LLBL(K_GTP2GR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points2_identity)
HIDDEN(_mesa_sse_transform_points2_identity)
GLNAME( _mesa_sse_transform_points2_identity ):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP2IR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
CMP_L( ESI, EDI )
JE( LLBL(K_GTP2IR_finish) )
ALIGNTEXT32
LLBL(K_GTP2IR_top):
MOV_L ( S(0), EDX )
MOV_L ( EDX, D(0) )
MOV_L ( S(1), EDX )
MOV_L ( EDX, D(1) )
LLBL(K_GTP2IR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP2IR_top) )
LLBL(K_GTP2IR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points2_3d_no_rot)
HIDDEN(_mesa_sse_transform_points2_3d_no_rot)
GLNAME(_mesa_sse_transform_points2_3d_no_rot):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP23DNRR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
XORPS( XMM0, XMM0 ) /* clean the working register */
ALIGNTEXT32
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */
MOVLPS ( M(12), XMM2 ) /* - | - | m13 | m12 */
MOVSS ( M(14), XMM3 ) /* - | - | - | m14 */
ALIGNTEXT32
LLBL(K_GTP23DNRR_top):
MOVLPS ( S(0), XMM0 ) /* - | - | oy | ox */
MULPS ( XMM1, XMM0 ) /* - | - | oy*m5 | ox*m0 */
ADDPS ( XMM2, XMM0 ) /* - | - | +m13 | +m12 */
MOVLPS ( XMM0, D(0) ) /* -> D(1) | -> D(0) */
MOVSS ( XMM3, D(2) ) /* -> D(2) */
LLBL(K_GTP23DNRR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP23DNRR_top) )
LLBL(K_GTP23DNRR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points2_perspective)
HIDDEN(_mesa_sse_transform_points2_perspective)
GLNAME(_mesa_sse_transform_points2_perspective):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP23PR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */
MOVSS ( M(14), XMM3 ) /* m14 */
XORPS ( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */
ALIGNTEXT32
LLBL(K_GTP23PR_top):
MOVLPS( S(0), XMM4 ) /* oy | ox */
MULPS( XMM1, XMM4 ) /* oy*m5 | ox*m0 */
MOVLPS( XMM4, D(0) ) /* ->D(1) | ->D(0) */
MOVSS( XMM3, D(2) ) /* ->D(2) */
MOVSS( XMM0, D(3) ) /* ->D(3) */
LLBL(K_GTP23PR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP23PR_top) )
LLBL(K_GTP23PR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points2_2d)
HIDDEN(_mesa_sse_transform_points2_2d)
GLNAME(_mesa_sse_transform_points2_2d):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP23P2DR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVLPS( M(0), XMM0 ) /* m1 | m0 */
MOVLPS( M(4), XMM1 ) /* m5 | m4 */
MOVLPS( M(12), XMM2 ) /* m13 | m12 */
ALIGNTEXT32
LLBL(K_GTP23P2DR_top):
MOVSS( S(0), XMM3 ) /* ox */
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox */
MULPS( XMM0, XMM3 ) /* ox*m1 | ox*m0 */
MOVSS( S(1), XMM4 ) /* oy */
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy */
MULPS( XMM1, XMM4 ) /* oy*m5 | oy*m4 */
ADDPS( XMM4, XMM3 )
ADDPS( XMM2, XMM3 )
MOVLPS( XMM3, D(0) ) /* ->D(1) | ->D(0) */
LLBL(K_GTP23P2DR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP23P2DR_top) )
LLBL(K_GTP23P2DR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points2_2d_no_rot)
HIDDEN(_mesa_sse_transform_points2_2d_no_rot)
GLNAME(_mesa_sse_transform_points2_2d_no_rot):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP23P2DNRR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVSS ( M(0), XMM1 ) /* m0 */
MOVSS ( M(5), XMM2 ) /* m5 */
UNPCKLPS ( XMM2, XMM1 ) /* m5 | m0 */
MOVLPS ( M(12), XMM2 ) /* m13 | m12 */
ALIGNTEXT32
LLBL(K_GTP23P2DNRR_top):
MOVLPS( S(0), XMM0 ) /* oy | ox */
MULPS( XMM1, XMM0 ) /* oy*m5 | ox*m0 */
ADDPS( XMM2, XMM0 ) /* +m13 | +m12 */
MOVLPS( XMM0, D(0) ) /* ->D(1) | ->D(0) */
LLBL(K_GTP23P2DNRR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP23P2DNRR_top) )
LLBL(K_GTP23P2DNRR_finish):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points2_3d)
HIDDEN(_mesa_sse_transform_points2_3d)
GLNAME(_mesa_sse_transform_points2_3d):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP23P3DR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVAPS( M(0), XMM0 ) /* m2 | m1 | m0 */
MOVAPS( M(4), XMM1 ) /* m6 | m5 | m4 */
MOVAPS( M(12), XMM2 ) /* m14 | m13 | m12 */
ALIGNTEXT32
LLBL(K_GTP23P3DR_top):
MOVSS( S(0), XMM3 ) /* ox */
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox | ox */
MULPS( XMM0, XMM3 ) /* ox*m2 | ox*m1 | ox*m0 */
MOVSS( S(1), XMM4 ) /* oy */
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* oy | oy | oy */
MULPS( XMM1, XMM4 ) /* oy*m6 | oy*m5 | oy*m4 */
ADDPS( XMM4, XMM3 )
ADDPS( XMM2, XMM3 )
MOVLPS( XMM3, D(0) ) /* ->D(1) | ->D(0) */
UNPCKHPS( XMM3, XMM3 )
MOVSS( XMM3, D(2) ) /* ->D(2) */
LLBL(K_GTP23P3DR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP23P3DR_top) )
LLBL(K_GTP23P3DR_finish):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
#endif
#if defined (__ELF__) && defined (__linux__)
.section .note.GNU-stack,"",%progbits
#endif

512
src/arch/x86/sse_xform3.S Normal file
View File

@ -0,0 +1,512 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/** TODO:
* - insert PREFETCH instructions to avoid cache-misses !
* - some more optimizations are possible...
* - for 40-50% more performance in the SSE-functions, the
* data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned !
*/
#ifdef USE_SSE_ASM
#include "assyntax.h"
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define S(i) REGOFF(i * 4, ESI)
#define D(i) REGOFF(i * 4, EDI)
#define M(i) REGOFF(i * 4, EDX)
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points3_general)
HIDDEN(_mesa_sse_transform_points3_general)
GLNAME( _mesa_sse_transform_points3_general ):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
CMP_L ( CONST(0), ECX ) /* count == 0 ? */
JE ( LLBL(K_GTPGR_finish) ) /* yes -> nothing to do. */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVAPS ( REGOFF(0, EDX), XMM0 ) /* m0 | m1 | m2 | m3 */
MOVAPS ( REGOFF(16, EDX), XMM1 ) /* m4 | m5 | m6 | m7 */
MOVAPS ( REGOFF(32, EDX), XMM2 ) /* m8 | m9 | m10 | m11 */
MOVAPS ( REGOFF(48, EDX), XMM3 ) /* m12 | m13 | m14 | m15 */
ALIGNTEXT32
LLBL(K_GTPGR_top):
MOVSS ( REGOFF(0, ESI), XMM4 ) /* | | | ox */
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox | ox */
MOVSS ( REGOFF(4, ESI), XMM5 ) /* | | | oy */
SHUFPS ( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy | oy */
MOVSS ( REGOFF(8, ESI), XMM6 ) /* | | | oz */
SHUFPS ( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz | oz */
MULPS ( XMM0, XMM4 ) /* m3*ox | m2*ox | m1*ox | m0*ox */
MULPS ( XMM1, XMM5 ) /* m7*oy | m6*oy | m5*oy | m4*oy */
MULPS ( XMM2, XMM6 ) /* m11*oz | m10*oz | m9*oz | m8*oz */
ADDPS ( XMM5, XMM4 )
ADDPS ( XMM6, XMM4 )
ADDPS ( XMM3, XMM4 )
MOVAPS ( XMM4, REGOFF(0, EDI) )
LLBL(K_GTPGR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTPGR_top) )
LLBL(K_GTPGR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points3_identity)
HIDDEN(_mesa_sse_transform_points3_identity)
GLNAME( _mesa_sse_transform_points3_identity ):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTPIR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
CMP_L( ESI, EDI )
JE( LLBL(K_GTPIR_finish) )
ALIGNTEXT32
LLBL(K_GTPIR_top):
MOVLPS ( S(0), XMM0 )
MOVLPS ( XMM0, D(0) )
MOVSS ( S(2), XMM0 )
MOVSS ( XMM0, D(2) )
LLBL(K_GTPIR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTPIR_top) )
LLBL(K_GTPIR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points3_3d_no_rot)
HIDDEN(_mesa_sse_transform_points3_3d_no_rot)
GLNAME(_mesa_sse_transform_points3_3d_no_rot):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP3DNRR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
XORPS( XMM0, XMM0 ) /* clean the working register */
ALIGNTEXT32
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */
MOVLPS ( M(12), XMM2 ) /* - | - | m13 | m12 */
MOVSS ( M(10), XMM3 ) /* - | - | - | m10 */
MOVSS ( M(14), XMM4 ) /* - | - | - | m14 */
ALIGNTEXT32
LLBL(K_GTP3DNRR_top):
MOVLPS ( S(0), XMM0 ) /* - | - | s1 | s0 */
MULPS ( XMM1, XMM0 ) /* - | - | s1*m5 | s0*m0 */
ADDPS ( XMM2, XMM0 ) /* - | - | +m13 | +m12 */
MOVLPS ( XMM0, D(0) ) /* -> D(1) | -> D(0) */
MOVSS ( S(2), XMM0 ) /* sz */
MULSS ( XMM3, XMM0 ) /* sz*m10 */
ADDSS ( XMM4, XMM0 ) /* +m14 */
MOVSS ( XMM0, D(2) ) /* -> D(2) */
LLBL(K_GTP3DNRR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP3DNRR_top) )
LLBL(K_GTP3DNRR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points3_perspective)
HIDDEN(_mesa_sse_transform_points3_perspective)
GLNAME(_mesa_sse_transform_points3_perspective):
#define FRAME_OFFSET 8
PUSH_L ( ESI )
PUSH_L ( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP3PR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVSS ( M(0), XMM1 ) /* - | - | - | m0 */
MOVSS ( M(5), XMM2 ) /* - | - | - | m5 */
UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */
MOVLPS ( M(8), XMM2 ) /* - | - | m9 | m8 */
MOVSS ( M(10), XMM3 ) /* m10 */
MOVSS ( M(14), XMM4 ) /* m14 */
XORPS ( XMM6, XMM6 ) /* 0 */
ALIGNTEXT32
LLBL(K_GTP3PR_top):
MOVLPS ( S(0), XMM0 ) /* oy | ox */
MULPS ( XMM1, XMM0 ) /* oy*m5 | ox*m0 */
MOVSS ( S(2), XMM5 ) /* oz */
SHUFPS ( CONST(0x0), XMM5, XMM5 ) /* oz | oz */
MULPS ( XMM2, XMM5 ) /* oz*m9 | oz*m8 */
ADDPS ( XMM5, XMM0 ) /* +oy*m5 | +ox*m0 */
MOVLPS ( XMM0, D(0) ) /* ->D(1) | ->D(0) */
MOVSS ( S(2), XMM0 ) /* oz */
MULSS ( XMM3, XMM0 ) /* oz*m10 */
ADDSS ( XMM4, XMM0 ) /* +m14 */
MOVSS ( XMM0, D(2) ) /* ->D(2) */
MOVSS ( S(2), XMM0 ) /* oz */
MOVSS ( XMM6, XMM5 ) /* 0 */
SUBPS ( XMM0, XMM5 ) /* -oz */
MOVSS ( XMM5, D(3) ) /* ->D(3) */
LLBL(K_GTP3PR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP3PR_top) )
LLBL(K_GTP3PR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points3_2d)
HIDDEN(_mesa_sse_transform_points3_2d)
GLNAME(_mesa_sse_transform_points3_2d):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP3P2DR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVLPS( M(0), XMM0 ) /* m1 | m0 */
MOVLPS( M(4), XMM1 ) /* m5 | m4 */
MOVLPS( M(12), XMM2 ) /* m13 | m12 */
ALIGNTEXT32
LLBL(K_GTP3P2DR_top):
MOVSS ( S(0), XMM3 ) /* ox */
SHUFPS ( CONST(0x0), XMM3, XMM3 ) /* ox | ox */
MULPS ( XMM0, XMM3 ) /* ox*m1 | ox*m0 */
MOVSS ( S(1), XMM4 ) /* oy */
SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* oy | oy */
MULPS ( XMM1, XMM4 ) /* oy*m5 | oy*m4 */
ADDPS ( XMM4, XMM3 )
ADDPS ( XMM2, XMM3 )
MOVLPS ( XMM3, D(0) )
MOVSS ( S(2), XMM3 )
MOVSS ( XMM3, D(2) )
LLBL(K_GTP3P2DR_skip):
ADD_L ( CONST(16), EDI )
ADD_L ( EAX, ESI )
CMP_L ( ECX, EDI )
JNE ( LLBL(K_GTP3P2DR_top) )
LLBL(K_GTP3P2DR_finish):
POP_L ( EDI )
POP_L ( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points3_2d_no_rot)
HIDDEN(_mesa_sse_transform_points3_2d_no_rot)
GLNAME(_mesa_sse_transform_points3_2d_no_rot):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP3P2DNRR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVSS ( M(0), XMM1 ) /* m0 */
MOVSS ( M(5), XMM2 ) /* m5 */
UNPCKLPS ( XMM2, XMM1 ) /* m5 | m0 */
MOVLPS ( M(12), XMM2 ) /* m13 | m12 */
ALIGNTEXT32
LLBL(K_GTP3P2DNRR_top):
MOVLPS( S(0), XMM0 ) /* oy | ox */
MULPS( XMM1, XMM0 ) /* oy*m5 | ox*m0 */
ADDPS( XMM2, XMM0 ) /* +m13 | +m12 */
MOVLPS( XMM0, D(0) ) /* ->D(1) | ->D(0) */
MOVSS( S(2), XMM0 )
MOVSS( XMM0, D(2) )
LLBL(K_GTP3P2DNRR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP3P2DNRR_top) )
LLBL(K_GTP3P2DNRR_finish):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME(_mesa_sse_transform_points3_3d)
HIDDEN(_mesa_sse_transform_points3_3d)
GLNAME(_mesa_sse_transform_points3_3d):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */
MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP3P3DR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
ALIGNTEXT32
MOVAPS( M(0), XMM0 ) /* m2 | m1 | m0 */
MOVAPS( M(4), XMM1 ) /* m6 | m5 | m4 */
MOVAPS( M(8), XMM2 ) /* m10 | m9 | m8 */
MOVAPS( M(12), XMM3 ) /* m14 | m13 | m12 */
ALIGNTEXT32
LLBL(K_GTP3P3DR_top):
MOVSS( S(0), XMM4 )
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox */
MULPS( XMM0, XMM4 ) /* ox*m2 | ox*m1 | ox*m0 */
MOVSS( S(1), XMM5 )
SHUFPS( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy */
MULPS( XMM1, XMM5 ) /* oy*m6 | oy*m5 | oy*m4 */
MOVSS( S(2), XMM6 )
SHUFPS( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz */
MULPS( XMM2, XMM6 ) /* oz*m10 | oz*m9 | oz*m8 */
ADDPS( XMM5, XMM4 ) /* + | + | + */
ADDPS( XMM6, XMM4 ) /* + | + | + */
ADDPS( XMM3, XMM4 ) /* + | + | + */
MOVLPS( XMM4, D(0) ) /* => D(1) | => D(0) */
UNPCKHPS( XMM4, XMM4 )
MOVSS( XMM4, D(2) )
LLBL(K_GTP3P3DR_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP3P3DR_top) )
LLBL(K_GTP3P3DR_finish):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
#endif
#if defined (__ELF__) && defined (__linux__)
.section .note.GNU-stack,"",%progbits
#endif

235
src/arch/x86/sse_xform4.S Normal file
View File

@ -0,0 +1,235 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifdef USE_SSE_ASM
#include "assyntax.h"
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define FRAME_OFFSET 8
#define SRC(i) REGOFF(i * 4, ESI)
#define DST(i) REGOFF(i * 4, EDI)
#define MAT(i) REGOFF(i * 4, EDX)
#define SELECT(r0, r1, r2, r3) CONST( r0 * 64 + r1 * 16 + r2 * 4 + r3 )
ALIGNTEXT16
GLOBL GLNAME( _mesa_sse_transform_points4_general )
HIDDEN(_mesa_sse_transform_points4_general)
GLNAME( _mesa_sse_transform_points4_general ):
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX ) /* verify non-zero count */
JE( LLBL( sse_general_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )/* set dest size */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
PREFETCHT0( REGIND(ESI) )
MOVAPS( MAT(0), XMM4 ) /* m3 | m2 | m1 | m0 */
MOVAPS( MAT(4), XMM5 ) /* m7 | m6 | m5 | m4 */
MOVAPS( MAT(8), XMM6 ) /* m11 | m10 | m9 | m8 */
MOVAPS( MAT(12), XMM7 ) /* m15 | m14 | m13 | m12 */
ALIGNTEXT16
LLBL( sse_general_loop ):
MOVSS( SRC(0), XMM0 ) /* ox */
SHUFPS( CONST(0x0), XMM0, XMM0 ) /* ox | ox | ox | ox */
MULPS( XMM4, XMM0 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
MOVSS( SRC(1), XMM1 ) /* oy */
SHUFPS( CONST(0x0), XMM1, XMM1 ) /* oy | oy | oy | oy */
MULPS( XMM5, XMM1 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
MOVSS( SRC(2), XMM2 ) /* oz */
SHUFPS( CONST(0x0), XMM2, XMM2 ) /* oz | oz | oz | oz */
MULPS( XMM6, XMM2 ) /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */
MOVSS( SRC(3), XMM3 ) /* ow */
SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ow | ow | ow | ow */
MULPS( XMM7, XMM3 ) /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
ADDPS( XMM1, XMM0 ) /* ox*m3+oy*m7 | ... */
ADDPS( XMM2, XMM0 ) /* ox*m3+oy*m7+oz*m11 | ... */
ADDPS( XMM3, XMM0 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
MOVAPS( XMM0, DST(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
DEC_L( ECX )
JNZ( LLBL( sse_general_loop ) )
LLBL( sse_general_done ):
POP_L( EDI )
POP_L( ESI )
RET
ALIGNTEXT4
GLOBL GLNAME( _mesa_sse_transform_points4_3d )
HIDDEN(_mesa_sse_transform_points4_3d)
GLNAME( _mesa_sse_transform_points4_3d ):
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */
MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */
MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */
MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */
TEST_L( ECX, ECX)
JZ( LLBL(K_GTP43P3DR_finish) ) /* count was zero; go to finish */
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )/* set dest size */
SHL_L( CONST(4), ECX ) /* count *= 16 */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ADD_L( EDI, ECX ) /* count += dest ptr */
MOVAPS( MAT(0), XMM0 ) /* m3 | m2 | m1 | m0 */
MOVAPS( MAT(4), XMM1 ) /* m7 | m6 | m5 | m4 */
MOVAPS( MAT(8), XMM2 ) /* m11 | m10 | m9 | m8 */
MOVAPS( MAT(12), XMM3 ) /* m15 | m14 | m13 | m12 */
ALIGNTEXT32
LLBL( K_GTP43P3DR_top ):
MOVSS( SRC(0), XMM4 ) /* ox */
SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox | ox */
MULPS( XMM0, XMM4 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */
MOVSS( SRC(1), XMM5 ) /* oy */
SHUFPS( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy | oy */
MULPS( XMM1, XMM5 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */
MOVSS( SRC(2), XMM6 ) /* oz */
SHUFPS( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz | oz */
MULPS( XMM2, XMM6 ) /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */
MOVSS( SRC(3), XMM7 ) /* ow */
SHUFPS( CONST(0x0), XMM7, XMM7 ) /* ow | ow | ow | ow */
MULPS( XMM3, XMM7 ) /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */
ADDPS( XMM5, XMM4 ) /* ox*m3+oy*m7 | ... */
ADDPS( XMM6, XMM4 ) /* ox*m3+oy*m7+oz*m11 | ... */
ADDPS( XMM7, XMM4 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */
MOVAPS( XMM4, DST(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */
MOVSS( SRC(3), XMM4 ) /* ow */
MOVSS( XMM4, DST(3) ) /* ->D(3) */
LLBL( K_GTP43P3DR_skip ):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(K_GTP43P3DR_top) )
LLBL( K_GTP43P3DR_finish ):
POP_L( EDI )
POP_L( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_sse_transform_points4_identity )
HIDDEN(_mesa_sse_transform_points4_identity)
GLNAME( _mesa_sse_transform_points4_identity ):
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX ) /* verify non-zero count */
JE( LLBL( sse_identity_done ) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )/* set dest size */
MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */
MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */
ALIGNTEXT16
LLBL( sse_identity_loop ):
PREFETCHNTA( REGOFF(32, ESI) )
MOVAPS( REGIND(ESI), XMM0 )
ADD_L( EAX, ESI )
MOVAPS( XMM0, REGIND(EDI) )
ADD_L( CONST(16), EDI )
DEC_L( ECX )
JNZ( LLBL( sse_identity_loop ) )
LLBL( sse_identity_done ):
POP_L( EDI )
POP_L( ESI )
RET
#endif
#if defined (__ELF__) && defined (__linux__)
.section .note.GNU-stack,"",%progbits
#endif

407
src/arch/x86/x86_cliptest.S Normal file
View File

@ -0,0 +1,407 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* NOTE: Avoid using spaces in between '(' ')' and arguments, especially
* with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
* in there will break the build on some platforms.
*/
#include "assyntax.h"
#include "matypes.h"
#include "clip_args.h"
#define SRC0 REGOFF(0, ESI)
#define SRC1 REGOFF(4, ESI)
#define SRC2 REGOFF(8, ESI)
#define SRC3 REGOFF(12, ESI)
#define DST0 REGOFF(0, EDI)
#define DST1 REGOFF(4, EDI)
#define DST2 REGOFF(8, EDI)
#define DST3 REGOFF(12, EDI)
#define MAT0 REGOFF(0, EDX)
#define MAT1 REGOFF(4, EDX)
#define MAT2 REGOFF(8, EDX)
#define MAT3 REGOFF(12, EDX)
/*
* Table for clip test.
*
* bit6 = SRC3 < 0
* bit5 = SRC2 < 0
* bit4 = abs(S(2)) > abs(S(3))
* bit3 = SRC1 < 0
* bit2 = abs(S(1)) > abs(S(3))
* bit1 = SRC0 < 0
* bit0 = abs(S(0)) > abs(S(3))
*/
SEG_DATA
clip_table:
D_BYTE 0x00, 0x01, 0x00, 0x02, 0x04, 0x05, 0x04, 0x06
D_BYTE 0x00, 0x01, 0x00, 0x02, 0x08, 0x09, 0x08, 0x0a
D_BYTE 0x20, 0x21, 0x20, 0x22, 0x24, 0x25, 0x24, 0x26
D_BYTE 0x20, 0x21, 0x20, 0x22, 0x28, 0x29, 0x28, 0x2a
D_BYTE 0x00, 0x01, 0x00, 0x02, 0x04, 0x05, 0x04, 0x06
D_BYTE 0x00, 0x01, 0x00, 0x02, 0x08, 0x09, 0x08, 0x0a
D_BYTE 0x10, 0x11, 0x10, 0x12, 0x14, 0x15, 0x14, 0x16
D_BYTE 0x10, 0x11, 0x10, 0x12, 0x18, 0x19, 0x18, 0x1a
D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x37, 0x35, 0x37, 0x36
D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x3b, 0x39, 0x3b, 0x3a
D_BYTE 0x2f, 0x2d, 0x2f, 0x2e, 0x27, 0x25, 0x27, 0x26
D_BYTE 0x2f, 0x2d, 0x2f, 0x2e, 0x2b, 0x29, 0x2b, 0x2a
D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x37, 0x35, 0x37, 0x36
D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x3b, 0x39, 0x3b, 0x3a
D_BYTE 0x1f, 0x1d, 0x1f, 0x1e, 0x17, 0x15, 0x17, 0x16
D_BYTE 0x1f, 0x1d, 0x1f, 0x1e, 0x1b, 0x19, 0x1b, 0x1a
SEG_TEXT
/*
* _mesa_x86_cliptest_points4
*
* AL: ormask
* AH: andmask
* EBX: temp0
* ECX: temp1
* EDX: clipmask[]
* ESI: clip[]
* EDI: proj[]
* EBP: temp2
*/
#if defined(__ELF__) && defined(__PIC__) && defined(GNU_ASSEMBLER) && !defined(ELFPIC)
#define ELFPIC
#endif
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_cliptest_points4 )
HIDDEN(_mesa_x86_cliptest_points4)
GLNAME( _mesa_x86_cliptest_points4 ):
#ifdef ELFPIC
#define FRAME_OFFSET 20
#else
#define FRAME_OFFSET 16
#endif
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBP )
PUSH_L( EBX )
#ifdef ELFPIC
/* store pointer to clip_table on stack */
CALL( LLBL(ctp4_get_eip) )
ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX )
MOV_L( REGOFF(clip_table@GOT, EBX), EBX )
PUSH_L( EBX )
JMP( LLBL(ctp4_clip_table_ready) )
LLBL(ctp4_get_eip):
/* store eip in ebx */
MOV_L( REGIND(ESP), EBX )
RET
LLBL(ctp4_clip_table_ready):
#endif
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_CLIP, EDX )
MOV_L( ARG_OR, EBX )
MOV_L( ARG_AND, EBP )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( EAX, ARG_SOURCE ) /* put stride in ARG_SOURCE */
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDX, ECX )
MOV_L( ECX, ARG_CLIP ) /* put clipmask + count in ARG_CLIP */
CMP_L( ECX, EDX )
MOV_B( REGIND(EBX), AL )
MOV_B( REGIND(EBP), AH )
JZ( LLBL(ctp4_finish) )
ALIGNTEXT16
LLBL(ctp4_top):
FLD1 /* F3 */
FDIV_S( SRC3 ) /* GH: don't care about div-by-zero */
MOV_L( SRC3, EBP )
MOV_L( SRC2, EBX )
XOR_L( ECX, ECX )
ADD_L( EBP, EBP ) /* ebp = abs(S(3))*2 ; carry = sign of S(3) */
ADC_L( ECX, ECX )
ADD_L( EBX, EBX ) /* ebx = abs(S(2))*2 ; carry = sign of S(2) */
ADC_L( ECX, ECX )
CMP_L( EBX, EBP ) /* carry = abs(S(2))*2 > abs(S(3))*2 */
ADC_L( ECX, ECX )
MOV_L( SRC1, EBX )
ADD_L( EBX, EBX ) /* ebx = abs(S(1))*2 ; carry = sign of S(1) */
ADC_L( ECX, ECX )
CMP_L( EBX, EBP ) /* carry = abs(S(1))*2 > abs(S(3))*2 */
ADC_L( ECX, ECX )
MOV_L( SRC0, EBX )
ADD_L( EBX, EBX ) /* ebx = abs(S(0))*2 ; carry = sign of S(0) */
ADC_L( ECX, ECX )
CMP_L( EBX, EBP ) /* carry = abs(S(0))*2 > abs(S(3))*2 */
ADC_L( ECX, ECX )
#ifdef ELFPIC
MOV_L( REGIND(ESP), EBP ) /* clip_table */
MOV_B( REGBI(EBP, ECX), CL )
#else
MOV_B( REGOFF(clip_table,ECX), CL )
#endif
OR_B( CL, AL )
AND_B( CL, AH )
TEST_B( CL, CL )
MOV_B( CL, REGIND(EDX) )
JZ( LLBL(ctp4_proj) )
LLBL(ctp4_noproj):
FSTP( ST(0) ) /* */
MOV_L( CONST(0), DST0 )
MOV_L( CONST(0), DST1 )
MOV_L( CONST(0), DST2 )
MOV_L( CONST(0x3f800000), DST3 )
JMP( LLBL(ctp4_next) )
LLBL(ctp4_proj):
FLD_S( SRC0 ) /* F0 F3 */
FMUL2( ST(1), ST0 )
FLD_S( SRC1 ) /* F1 F0 F3 */
FMUL2( ST(2), ST0 )
FLD_S( SRC2 ) /* F2 F1 F0 F3 */
FMUL2( ST(3), ST0 )
FXCH( ST(2) ) /* F0 F1 F2 F3 */
FSTP_S( DST0 ) /* F1 F2 F3 */
FSTP_S( DST1 ) /* F2 F3 */
FSTP_S( DST2 ) /* F3 */
FSTP_S( DST3 ) /* */
LLBL(ctp4_next):
INC_L( EDX )
ADD_L( CONST(16), EDI )
ADD_L( ARG_SOURCE, ESI )
CMP_L( EDX, ARG_CLIP )
JNZ( LLBL(ctp4_top) )
MOV_L( ARG_OR, ECX )
MOV_L( ARG_AND, EDX )
MOV_B( AL, REGIND(ECX) )
MOV_B( AH, REGIND(EDX) )
LLBL(ctp4_finish):
MOV_L( ARG_DEST, EAX )
#ifdef ELFPIC
POP_L( ESI ) /* discard ptr to clip_table */
#endif
POP_L( EBX )
POP_L( EBP )
POP_L( EDI )
POP_L( ESI )
RET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_cliptest_points4_np )
HIDDEN(_mesa_x86_cliptest_points4_np)
GLNAME( _mesa_x86_cliptest_points4_np ):
#ifdef ELFPIC
#define FRAME_OFFSET 20
#else
#define FRAME_OFFSET 16
#endif
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBP )
PUSH_L( EBX )
#ifdef ELFPIC
/* store pointer to clip_table on stack */
CALL( LLBL(ctp4_np_get_eip) )
ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX )
MOV_L( REGOFF(clip_table@GOT, EBX), EBX )
PUSH_L( EBX )
JMP( LLBL(ctp4_np_clip_table_ready) )
LLBL(ctp4_np_get_eip):
/* store eip in ebx */
MOV_L( REGIND(ESP), EBX )
RET
LLBL(ctp4_np_clip_table_ready):
#endif
MOV_L( ARG_SOURCE, ESI )
/* slot */
MOV_L( ARG_CLIP, EDX )
MOV_L( ARG_OR, EBX )
MOV_L( ARG_AND, EBP )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( EAX, ARG_DEST ) /* put stride in ARG_DEST */
ADD_L( EDX, ECX )
MOV_L( ECX, EDI ) /* put clipmask + count in EDI */
CMP_L( ECX, EDX )
MOV_B( REGIND(EBX), AL )
MOV_B( REGIND(EBP), AH )
JZ( LLBL(ctp4_np_finish) )
ALIGNTEXT16
LLBL(ctp4_np_top):
MOV_L( SRC3, EBP )
MOV_L( SRC2, EBX )
XOR_L( ECX, ECX )
ADD_L( EBP, EBP ) /* ebp = abs(S(3))*2 ; carry = sign of S(3) */
ADC_L( ECX, ECX )
ADD_L( EBX, EBX ) /* ebx = abs(S(2))*2 ; carry = sign of S(2) */
ADC_L( ECX, ECX )
CMP_L( EBX, EBP ) /* carry = abs(S(2))*2 > abs(S(3))*2 */
ADC_L( ECX, ECX )
MOV_L( SRC1, EBX )
ADD_L( EBX, EBX ) /* ebx = abs(S(1))*2 ; carry = sign of S(1) */
ADC_L( ECX, ECX )
CMP_L( EBX, EBP ) /* carry = abs(S(1))*2 > abs(S(3))*2 */
ADC_L( ECX, ECX )
MOV_L( SRC0, EBX )
ADD_L( EBX, EBX ) /* ebx = abs(S(0))*2 ; carry = sign of S(0) */
ADC_L( ECX, ECX )
CMP_L( EBX, EBP ) /* carry = abs(S(0))*2 > abs(S(3))*2 */
ADC_L( ECX, ECX )
#ifdef ELFPIC
MOV_L( REGIND(ESP), EBP ) /* clip_table */
MOV_B( REGBI(EBP, ECX), CL )
#else
MOV_B( REGOFF(clip_table,ECX), CL )
#endif
OR_B( CL, AL )
AND_B( CL, AH )
TEST_B( CL, CL )
MOV_B( CL, REGIND(EDX) )
INC_L( EDX )
/* slot */
ADD_L( ARG_DEST, ESI )
CMP_L( EDX, EDI )
JNZ( LLBL(ctp4_np_top) )
MOV_L( ARG_OR, ECX )
MOV_L( ARG_AND, EDX )
MOV_B( AL, REGIND(ECX) )
MOV_B( AH, REGIND(EDX) )
LLBL(ctp4_np_finish):
MOV_L( ARG_SOURCE, EAX )
#ifdef ELFPIC
POP_L( ESI ) /* discard ptr to clip_table */
#endif
POP_L( EBX )
POP_L( EBP )
POP_L( EDI )
POP_L( ESI )
RET
#if defined (__ELF__) && defined (__linux__)
.section .note.GNU-stack,"",%progbits
#endif

126
src/arch/x86/x86_xform.c Normal file
View File

@ -0,0 +1,126 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Intel x86 assembly code by Josh Vanderhoof
*/
#include "main/glheader.h"
#include "main/context.h"
#include "math/m_xform.h"
#include "x86_xform.h"
#include "common_x86_asm.h"
#ifdef USE_X86_ASM
#ifdef USE_3DNOW_ASM
#include "3dnow.h"
#endif
#ifdef USE_SSE_ASM
#include "sse.h"
#endif
#endif
#ifdef DEBUG_MATH
#include "math/m_debug.h"
#endif
#ifdef USE_X86_ASM
DECLARE_XFORM_GROUP( x86, 2 )
DECLARE_XFORM_GROUP( x86, 3 )
DECLARE_XFORM_GROUP( x86, 4 )
extern GLvector4f * _ASMAPI
_mesa_x86_cliptest_points4( GLvector4f *clip_vec,
GLvector4f *proj_vec,
GLubyte clipMask[],
GLubyte *orMask,
GLubyte *andMask,
GLboolean viewport_z_clip );
extern GLvector4f * _ASMAPI
_mesa_x86_cliptest_points4_np( GLvector4f *clip_vec,
GLvector4f *proj_vec,
GLubyte clipMask[],
GLubyte *orMask,
GLubyte *andMask,
GLboolean viewport_z_clip );
extern void _ASMAPI
_mesa_v16_x86_cliptest_points4( GLfloat *first_vert,
GLfloat *last_vert,
GLubyte *or_mask,
GLubyte *and_mask,
GLubyte *clip_mask,
GLboolean viewport_z_clip );
extern void _ASMAPI
_mesa_v16_x86_general_xform( GLfloat *dest,
const GLfloat *m,
const GLfloat *src,
GLuint src_stride,
GLuint count );
#endif
#ifdef USE_X86_ASM
static void _mesa_init_x86_transform_asm( void )
{
ASSIGN_XFORM_GROUP( x86, 2 );
ASSIGN_XFORM_GROUP( x86, 3 );
ASSIGN_XFORM_GROUP( x86, 4 );
_mesa_clip_tab[4] = _mesa_x86_cliptest_points4;
_mesa_clip_np_tab[4] = _mesa_x86_cliptest_points4_np;
#ifdef DEBUG_MATH
_math_test_all_transform_functions( "x86" );
_math_test_all_cliptest_functions( "x86" );
#endif
}
#endif
void _mesa_init_all_x86_transform_asm( void )
{
_mesa_get_x86_features();
#ifdef USE_X86_ASM
if ( _mesa_x86_cpu_features ) {
_mesa_init_x86_transform_asm();
}
if (cpu_has_3dnow) {
_mesa_init_3dnow_transform_asm();
}
if ( cpu_has_xmm ) {
_mesa_init_sse_transform_asm();
}
#endif
}

106
src/arch/x86/x86_xform.h Normal file
View File

@ -0,0 +1,106 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Gareth Hughes
*/
#ifndef X86_XFORM_H
#define X86_XFORM_H
/* =============================================================
* Transformation function declarations:
*/
#define XFORM_ARGS GLvector4f *to_vec, \
const GLfloat m[16], \
const GLvector4f *from_vec
#define DECLARE_XFORM_GROUP( pfx, sz ) \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_general( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_identity( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d_no_rot( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_perspective( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_2d_no_rot( XFORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_points##sz##_3d( XFORM_ARGS );
#define ASSIGN_XFORM_GROUP( pfx, sz ) \
_mesa_transform_tab[sz][MATRIX_GENERAL] = \
_mesa_##pfx##_transform_points##sz##_general; \
_mesa_transform_tab[sz][MATRIX_IDENTITY] = \
_mesa_##pfx##_transform_points##sz##_identity; \
_mesa_transform_tab[sz][MATRIX_3D_NO_ROT] = \
_mesa_##pfx##_transform_points##sz##_3d_no_rot; \
_mesa_transform_tab[sz][MATRIX_PERSPECTIVE] = \
_mesa_##pfx##_transform_points##sz##_perspective; \
_mesa_transform_tab[sz][MATRIX_2D] = \
_mesa_##pfx##_transform_points##sz##_2d; \
_mesa_transform_tab[sz][MATRIX_2D_NO_ROT] = \
_mesa_##pfx##_transform_points##sz##_2d_no_rot; \
_mesa_transform_tab[sz][MATRIX_3D] = \
_mesa_##pfx##_transform_points##sz##_3d;
/* =============================================================
* Normal transformation function declarations:
*/
#define NORM_ARGS const GLmatrix *mat, \
GLfloat scale, \
const GLvector4f *in, \
const GLfloat *lengths, \
GLvector4f *dest
#define DECLARE_NORM_GROUP( pfx ) \
extern void _ASMAPI _mesa_##pfx##_rescale_normals( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_normalize_normals( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_normals( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_normals_no_rot( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_rescale_normals_no_rot( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals( NORM_ARGS ); \
extern void _ASMAPI _mesa_##pfx##_transform_normalize_normals_no_rot( NORM_ARGS );
#define ASSIGN_NORM_GROUP( pfx ) \
_mesa_normal_tab[NORM_RESCALE] = \
_mesa_##pfx##_rescale_normals; \
_mesa_normal_tab[NORM_NORMALIZE] = \
_mesa_##pfx##_normalize_normals; \
_mesa_normal_tab[NORM_TRANSFORM] = \
_mesa_##pfx##_transform_normals; \
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT] = \
_mesa_##pfx##_transform_normals_no_rot; \
_mesa_normal_tab[NORM_TRANSFORM | NORM_RESCALE] = \
_mesa_##pfx##_transform_rescale_normals; \
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE] = \
_mesa_##pfx##_transform_rescale_normals_no_rot; \
_mesa_normal_tab[NORM_TRANSFORM | NORM_NORMALIZE] = \
_mesa_##pfx##_transform_normalize_normals; \
_mesa_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE] = \
_mesa_##pfx##_transform_normalize_normals_no_rot;
#endif

574
src/arch/x86/x86_xform2.S Normal file
View File

@ -0,0 +1,574 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* NOTE: Avoid using spaces in between '(' ')' and arguments, especially
* with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
* in there will break the build on some platforms.
*/
#include "assyntax.h"
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define FP_ONE 1065353216
#define FP_ZERO 0
#define SRC0 REGOFF(0, ESI)
#define SRC1 REGOFF(4, ESI)
#define SRC2 REGOFF(8, ESI)
#define SRC3 REGOFF(12, ESI)
#define DST0 REGOFF(0, EDI)
#define DST1 REGOFF(4, EDI)
#define DST2 REGOFF(8, EDI)
#define DST3 REGOFF(12, EDI)
#define MAT0 REGOFF(0, EDX)
#define MAT1 REGOFF(4, EDX)
#define MAT2 REGOFF(8, EDX)
#define MAT3 REGOFF(12, EDX)
#define MAT4 REGOFF(16, EDX)
#define MAT5 REGOFF(20, EDX)
#define MAT6 REGOFF(24, EDX)
#define MAT7 REGOFF(28, EDX)
#define MAT8 REGOFF(32, EDX)
#define MAT9 REGOFF(36, EDX)
#define MAT10 REGOFF(40, EDX)
#define MAT11 REGOFF(44, EDX)
#define MAT12 REGOFF(48, EDX)
#define MAT13 REGOFF(52, EDX)
#define MAT14 REGOFF(56, EDX)
#define MAT15 REGOFF(60, EDX)
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points2_general )
HIDDEN(_mesa_x86_transform_points2_general)
GLNAME( _mesa_x86_transform_points2_general ):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL(x86_p2_gr_done) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL(x86_p2_gr_loop):
FLD_S( SRC0 ) /* F4 */
FMUL_S( MAT0 )
FLD_S( SRC0 ) /* F5 F4 */
FMUL_S( MAT1 )
FLD_S( SRC0 ) /* F6 F5 F4 */
FMUL_S( MAT2 )
FLD_S( SRC0 ) /* F7 F6 F5 F4 */
FMUL_S( MAT3 )
FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */
FMUL_S( MAT4 )
FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT5 )
FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT6 )
FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT7 )
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
FXCH( ST(3) ) /* F4 F6 F5 F7 */
FADD_S( MAT12 )
FXCH( ST(2) ) /* F5 F6 F4 F7 */
FADD_S( MAT13 )
FXCH( ST(1) ) /* F6 F5 F4 F7 */
FADD_S( MAT14 )
FXCH( ST(3) ) /* F7 F5 F4 F6 */
FADD_S( MAT15 )
FXCH( ST(2) ) /* F4 F5 F7 F6 */
FSTP_S( DST0 ) /* F5 F7 F6 */
FSTP_S( DST1 ) /* F7 F6 */
FXCH( ST(1) ) /* F6 F7 */
FSTP_S( DST2 ) /* F7 */
FSTP_S( DST3 ) /* */
LLBL(x86_p2_gr_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(x86_p2_gr_loop) )
LLBL(x86_p2_gr_done):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points2_perspective )
HIDDEN(_mesa_x86_transform_points2_perspective)
GLNAME( _mesa_x86_transform_points2_perspective ):
#define FRAME_OFFSET 12
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL(x86_p2_pr_done) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
MOV_L( MAT14, EBX )
ALIGNTEXT16
LLBL(x86_p2_pr_loop):
FLD_S( SRC0 ) /* F4 */
FMUL_S( MAT0 )
FLD_S( SRC1 ) /* F1 F4 */
FMUL_S( MAT5 )
FXCH( ST(1) ) /* F4 F1 */
FSTP_S( DST0 ) /* F1 */
FSTP_S( DST1 ) /* */
MOV_L( EBX, DST2 )
MOV_L( CONST(FP_ZERO), DST3 )
LLBL(x86_p2_pr_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(x86_p2_pr_loop) )
LLBL(x86_p2_pr_done):
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points2_3d )
HIDDEN(_mesa_x86_transform_points2_3d)
GLNAME( _mesa_x86_transform_points2_3d ):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL(x86_p2_3dr_done) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL(x86_p2_3dr_loop):
FLD_S( SRC0 ) /* F4 */
FMUL_S( MAT0 )
FLD_S( SRC0 ) /* F5 F4 */
FMUL_S( MAT1 )
FLD_S( SRC0 ) /* F6 F5 F4 */
FMUL_S( MAT2 )
FLD_S( SRC1 ) /* F0 F6 F5 F4 */
FMUL_S( MAT4 )
FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */
FMUL_S( MAT5 )
FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */
FMUL_S( MAT6 )
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
FADDP( ST0, ST(1) ) /* F6 F5 F4 */
FXCH( ST(2) ) /* F4 F5 F6 */
FADD_S( MAT12 )
FXCH( ST(1) ) /* F5 F4 F6 */
FADD_S( MAT13 )
FXCH( ST(2) ) /* F6 F4 F5 */
FADD_S( MAT14 )
FXCH( ST(1) ) /* F4 F6 F5 */
FSTP_S( DST0 ) /* F6 F5 */
FXCH( ST(1) ) /* F5 F6 */
FSTP_S( DST1 ) /* F6 */
FSTP_S( DST2 ) /* */
LLBL(x86_p2_3dr_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(x86_p2_3dr_loop) )
LLBL(x86_p2_3dr_done):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points2_3d_no_rot )
HIDDEN(_mesa_x86_transform_points2_3d_no_rot)
GLNAME( _mesa_x86_transform_points2_3d_no_rot ):
#define FRAME_OFFSET 12
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL(x86_p2_3dnrr_done) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
MOV_L( MAT14, EBX )
ALIGNTEXT16
LLBL(x86_p2_3dnrr_loop):
FLD_S( SRC0 ) /* F4 */
FMUL_S( MAT0 )
FLD_S( SRC1 ) /* F1 F4 */
FMUL_S( MAT5 )
FXCH( ST(1) ) /* F4 F1 */
FADD_S( MAT12 )
FLD_S( MAT13 ) /* F5 F4 F1 */
FXCH( ST(2) ) /* F1 F4 F5 */
FADDP( ST0, ST(2) ) /* F4 F5 */
FSTP_S( DST0 ) /* F5 */
FSTP_S( DST1 ) /* */
MOV_L( EBX, DST2 )
LLBL(x86_p2_3dnrr_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(x86_p2_3dnrr_loop) )
LLBL(x86_p2_3dnrr_done):
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points2_2d )
HIDDEN(_mesa_x86_transform_points2_2d)
GLNAME( _mesa_x86_transform_points2_2d ):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL(x86_p2_2dr_done) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL(x86_p2_2dr_loop):
FLD_S( SRC0 ) /* F4 */
FMUL_S( MAT0 )
FLD_S( SRC0 ) /* F5 F4 */
FMUL_S( MAT1 )
FLD_S( SRC1 ) /* F0 F5 F4 */
FMUL_S( MAT4 )
FLD_S( SRC1 ) /* F1 F0 F5 F4 */
FMUL_S( MAT5 )
FXCH( ST(1) ) /* F0 F1 F5 F4 */
FADDP( ST0, ST(3) ) /* F1 F5 F4 */
FADDP( ST0, ST(1) ) /* F5 F4 */
FXCH( ST(1) ) /* F4 F5 */
FADD_S( MAT12 )
FXCH( ST(1) ) /* F5 F4 */
FADD_S( MAT13 )
FXCH( ST(1) ) /* F4 F5 */
FSTP_S( DST0 ) /* F5 */
FSTP_S( DST1 ) /* */
LLBL(x86_p2_2dr_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(x86_p2_2dr_loop) )
LLBL(x86_p2_2dr_done):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT4
GLOBL GLNAME( _mesa_x86_transform_points2_2d_no_rot )
HIDDEN(_mesa_x86_transform_points2_2d_no_rot)
GLNAME( _mesa_x86_transform_points2_2d_no_rot ):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL(x86_p2_2dnrr_done) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL(x86_p2_2dnrr_loop):
FLD_S( SRC0 ) /* F4 */
FMUL_S( MAT0 )
FLD_S( SRC1 ) /* F1 F4 */
FMUL_S( MAT5 )
FXCH( ST(1) ) /* F4 F1 */
FADD_S( MAT12 )
FLD_S( MAT13 ) /* F5 F4 F1 */
FXCH( ST(2) ) /* F1 F4 F5 */
FADDP( ST0, ST(2) ) /* F4 F5 */
FSTP_S( DST0 ) /* F5 */
FSTP_S( DST1 ) /* */
LLBL(x86_p2_2dnrr_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(x86_p2_2dnrr_loop) )
LLBL(x86_p2_2dnrr_done):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points2_identity )
HIDDEN(_mesa_x86_transform_points2_identity)
GLNAME( _mesa_x86_transform_points2_identity ):
#define FRAME_OFFSET 12
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL(x86_p2_ir_done) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
CMP_L( ESI, EDI )
JE( LLBL(x86_p2_ir_done) )
ALIGNTEXT16
LLBL(x86_p2_ir_loop):
MOV_L( SRC0, EBX )
MOV_L( SRC1, EDX )
MOV_L( EBX, DST0 )
MOV_L( EDX, DST1 )
LLBL(x86_p2_ir_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(x86_p2_ir_loop) )
LLBL(x86_p2_ir_done):
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
#if defined (__ELF__) && defined (__linux__)
.section .note.GNU-stack,"",%progbits
#endif

644
src/arch/x86/x86_xform3.S Normal file
View File

@ -0,0 +1,644 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* NOTE: Avoid using spaces in between '(' ')' and arguments, especially
* with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
* in there will break the build on some platforms.
*/
#include "assyntax.h"
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define FP_ONE 1065353216
#define FP_ZERO 0
#define SRC0 REGOFF(0, ESI)
#define SRC1 REGOFF(4, ESI)
#define SRC2 REGOFF(8, ESI)
#define SRC3 REGOFF(12, ESI)
#define DST0 REGOFF(0, EDI)
#define DST1 REGOFF(4, EDI)
#define DST2 REGOFF(8, EDI)
#define DST3 REGOFF(12, EDI)
#define MAT0 REGOFF(0, EDX)
#define MAT1 REGOFF(4, EDX)
#define MAT2 REGOFF(8, EDX)
#define MAT3 REGOFF(12, EDX)
#define MAT4 REGOFF(16, EDX)
#define MAT5 REGOFF(20, EDX)
#define MAT6 REGOFF(24, EDX)
#define MAT7 REGOFF(28, EDX)
#define MAT8 REGOFF(32, EDX)
#define MAT9 REGOFF(36, EDX)
#define MAT10 REGOFF(40, EDX)
#define MAT11 REGOFF(44, EDX)
#define MAT12 REGOFF(48, EDX)
#define MAT13 REGOFF(52, EDX)
#define MAT14 REGOFF(56, EDX)
#define MAT15 REGOFF(60, EDX)
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points3_general )
HIDDEN(_mesa_x86_transform_points3_general)
GLNAME( _mesa_x86_transform_points3_general ):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL(x86_p3_gr_done) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL(x86_p3_gr_loop):
FLD_S( SRC0 ) /* F4 */
FMUL_S( MAT0 )
FLD_S( SRC0 ) /* F5 F4 */
FMUL_S( MAT1 )
FLD_S( SRC0 ) /* F6 F5 F4 */
FMUL_S( MAT2 )
FLD_S( SRC0 ) /* F7 F6 F5 F4 */
FMUL_S( MAT3 )
FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */
FMUL_S( MAT4 )
FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT5 )
FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT6 )
FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT7 )
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
FLD_S( SRC2 ) /* F0 F7 F6 F5 F4 */
FMUL_S( MAT8 )
FLD_S( SRC2 ) /* F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT9 )
FLD_S( SRC2 ) /* F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT10 )
FLD_S( SRC2 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT11 )
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
FXCH( ST(3) ) /* F4 F6 F5 F7 */
FADD_S( MAT12 )
FXCH( ST(2) ) /* F5 F6 F4 F7 */
FADD_S( MAT13 )
FXCH( ST(1) ) /* F6 F5 F4 F7 */
FADD_S( MAT14 )
FXCH( ST(3) ) /* F7 F5 F4 F6 */
FADD_S( MAT15 )
FXCH( ST(2) ) /* F4 F5 F7 F6 */
FSTP_S( DST0 ) /* F5 F7 F6 */
FSTP_S( DST1 ) /* F7 F6 */
FXCH( ST(1) ) /* F6 F7 */
FSTP_S( DST2 ) /* F7 */
FSTP_S( DST3 ) /* */
LLBL(x86_p3_gr_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(x86_p3_gr_loop) )
LLBL(x86_p3_gr_done):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points3_perspective )
HIDDEN(_mesa_x86_transform_points3_perspective)
GLNAME( _mesa_x86_transform_points3_perspective ):
#define FRAME_OFFSET 12
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL(x86_p3_pr_done) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL(x86_p3_pr_loop):
FLD_S( SRC0 ) /* F4 */
FMUL_S( MAT0 )
FLD_S( SRC1 ) /* F5 F4 */
FMUL_S( MAT5 )
FLD_S( SRC2 ) /* F0 F5 F4 */
FMUL_S( MAT8 )
FLD_S( SRC2 ) /* F1 F0 F5 F4 */
FMUL_S( MAT9 )
FLD_S( SRC2 ) /* F2 F1 F0 F5 F4 */
FMUL_S( MAT10 )
FXCH( ST(2) ) /* F0 F1 F2 F5 F4 */
FADDP( ST0, ST(4) ) /* F1 F2 F5 F4 */
FADDP( ST0, ST(2) ) /* F2 F5 F4 */
FLD_S( MAT14 ) /* F6 F2 F5 F4 */
FXCH( ST(1) ) /* F2 F6 F5 F4 */
FADDP( ST0, ST(1) ) /* F6 F5 F4 */
MOV_L( SRC2, EBX )
XOR_L( CONST(-2147483648), EBX )/* change sign */
FXCH( ST(2) ) /* F4 F5 F6 */
FSTP_S( DST0 ) /* F5 F6 */
FSTP_S( DST1 ) /* F6 */
FSTP_S( DST2 ) /* */
MOV_L( EBX, DST3 )
LLBL(x86_p3_pr_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(x86_p3_pr_loop) )
LLBL(x86_p3_pr_done):
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points3_3d )
HIDDEN(_mesa_x86_transform_points3_3d)
GLNAME( _mesa_x86_transform_points3_3d ):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL(x86_p3_3dr_done) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL(x86_p3_3dr_loop):
FLD_S( SRC0 ) /* F4 */
FMUL_S( MAT0 )
FLD_S( SRC0 ) /* F5 F4 */
FMUL_S( MAT1 )
FLD_S( SRC0 ) /* F6 F5 F4 */
FMUL_S( MAT2 )
FLD_S( SRC1 ) /* F0 F6 F5 F4 */
FMUL_S( MAT4 )
FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */
FMUL_S( MAT5 )
FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */
FMUL_S( MAT6 )
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
FADDP( ST0, ST(1) ) /* F6 F5 F4 */
FLD_S( SRC2 ) /* F0 F6 F5 F4 */
FMUL_S( MAT8 )
FLD_S( SRC2 ) /* F1 F0 F6 F5 F4 */
FMUL_S( MAT9 )
FLD_S( SRC2 ) /* F2 F1 F0 F6 F5 F4 */
FMUL_S( MAT10 )
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
FADDP( ST0, ST(1) ) /* F6 F5 F4 */
FXCH( ST(2) ) /* F4 F5 F6 */
FADD_S( MAT12 )
FXCH( ST(1) ) /* F5 F4 F6 */
FADD_S( MAT13 )
FXCH( ST(2) ) /* F6 F4 F5 */
FADD_S( MAT14 )
FXCH( ST(1) ) /* F4 F6 F5 */
FSTP_S( DST0 ) /* F6 F5 */
FXCH( ST(1) ) /* F5 F6 */
FSTP_S( DST1 ) /* F6 */
FSTP_S( DST2 ) /* */
LLBL(x86_p3_3dr_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(x86_p3_3dr_loop) )
LLBL(x86_p3_3dr_done):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points3_3d_no_rot )
HIDDEN(_mesa_x86_transform_points3_3d_no_rot)
GLNAME( _mesa_x86_transform_points3_3d_no_rot ):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL(x86_p3_3dnrr_done) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL(x86_p3_3dnrr_loop):
FLD_S( SRC0 ) /* F4 */
FMUL_S( MAT0 )
FLD_S( SRC1 ) /* F1 F4 */
FMUL_S( MAT5 )
FLD_S( SRC2 ) /* F2 F1 F4 */
FMUL_S( MAT10 )
FXCH( ST(2) ) /* F4 F1 F2 */
FADD_S( MAT12 )
FLD_S( MAT13 ) /* F5 F4 F1 F2 */
FXCH( ST(2) ) /* F1 F4 F5 F2 */
FADDP( ST0, ST(2) ) /* F4 F5 F2 */
FLD_S( MAT14 ) /* F6 F4 F5 F2 */
FXCH( ST(3) ) /* F2 F4 F5 F6 */
FADDP( ST0, ST(3) ) /* F4 F5 F6 */
FSTP_S( DST0 ) /* F5 F6 */
FSTP_S( DST1 ) /* F6 */
FSTP_S( DST2 ) /* */
LLBL(x86_p3_3dnrr_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(x86_p3_3dnrr_loop) )
LLBL(x86_p3_3dnrr_done):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points3_2d )
HIDDEN(_mesa_x86_transform_points3_2d)
GLNAME( _mesa_x86_transform_points3_2d ):
#define FRAME_OFFSET 12
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL(x86_p3_2dr_done) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL(x86_p3_2dr_loop):
FLD_S( SRC0 ) /* F4 */
FMUL_S( MAT0 )
FLD_S( SRC0 ) /* F5 F4 */
FMUL_S( MAT1 )
FLD_S( SRC1 ) /* F0 F5 F4 */
FMUL_S( MAT4 )
FLD_S( SRC1 ) /* F1 F0 F5 F4 */
FMUL_S( MAT5 )
FXCH( ST(1) ) /* F0 F1 F5 F4 */
FADDP( ST0, ST(3) ) /* F1 F5 F4 */
FADDP( ST0, ST(1) ) /* F5 F4 */
FXCH( ST(1) ) /* F4 F5 */
FADD_S( MAT12 )
FXCH( ST(1) ) /* F5 F4 */
FADD_S( MAT13 )
MOV_L( SRC2, EBX )
FXCH( ST(1) ) /* F4 F5 */
FSTP_S( DST0 ) /* F5 */
FSTP_S( DST1 ) /* */
MOV_L( EBX, DST2 )
LLBL(x86_p3_2dr_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(x86_p3_2dr_loop) )
LLBL(x86_p3_2dr_done):
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points3_2d_no_rot )
HIDDEN(_mesa_x86_transform_points3_2d_no_rot)
GLNAME( _mesa_x86_transform_points3_2d_no_rot ):
#define FRAME_OFFSET 12
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL(x86_p3_2dnrr_done) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL(x86_p3_2dnrr_loop):
FLD_S( SRC0 ) /* F4 */
FMUL_S( MAT0 )
FLD_S( SRC1 ) /* F1 F4 */
FMUL_S( MAT5 )
FXCH( ST(1) ) /* F4 F1 */
FADD_S( MAT12 )
FLD_S( MAT13 ) /* F5 F4 F1 */
FXCH( ST(2) ) /* F1 F4 F5 */
FADDP( ST0, ST(2) ) /* F4 F5 */
MOV_L( SRC2, EBX )
FSTP_S( DST0 ) /* F5 */
FSTP_S( DST1 ) /* */
MOV_L( EBX, DST2 )
LLBL(x86_p3_2dnrr_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(x86_p3_2dnrr_loop) )
LLBL(x86_p3_2dnrr_done):
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points3_identity )
HIDDEN(_mesa_x86_transform_points3_identity)
GLNAME(_mesa_x86_transform_points3_identity ):
#define FRAME_OFFSET 16
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
PUSH_L( EBP )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL(x86_p3_ir_done) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
CMP_L( ESI, EDI )
JE( LLBL(x86_p3_ir_done) )
ALIGNTEXT16
LLBL(x86_p3_ir_loop):
#if 1
MOV_L( SRC0, EBX )
MOV_L( SRC1, EBP )
MOV_L( SRC2, EDX )
MOV_L( EBX, DST0 )
MOV_L( EBP, DST1 )
MOV_L( EDX, DST2 )
#else
FLD_S( SRC0 )
FLD_S( SRC1 )
FLD_S( SRC2 )
FSTP_S( DST2 )
FSTP_S( DST1 )
FSTP_S( DST0 )
#endif
LLBL(x86_p3_ir_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(x86_p3_ir_loop) )
LLBL(x86_p3_ir_done):
POP_L( EBP )
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#if defined (__ELF__) && defined (__linux__)
.section .note.GNU-stack,"",%progbits
#endif

677
src/arch/x86/x86_xform4.S Normal file
View File

@ -0,0 +1,677 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* NOTE: Avoid using spaces in between '(' ')' and arguments, especially
* with macros like CONST, LLBL that expand to CONCAT(...). Putting spaces
* in there will break the build on some platforms.
*/
#include "assyntax.h"
#include "matypes.h"
#include "xform_args.h"
SEG_TEXT
#define FP_ONE 1065353216
#define FP_ZERO 0
#define SRC0 REGOFF(0, ESI)
#define SRC1 REGOFF(4, ESI)
#define SRC2 REGOFF(8, ESI)
#define SRC3 REGOFF(12, ESI)
#define DST0 REGOFF(0, EDI)
#define DST1 REGOFF(4, EDI)
#define DST2 REGOFF(8, EDI)
#define DST3 REGOFF(12, EDI)
#define MAT0 REGOFF(0, EDX)
#define MAT1 REGOFF(4, EDX)
#define MAT2 REGOFF(8, EDX)
#define MAT3 REGOFF(12, EDX)
#define MAT4 REGOFF(16, EDX)
#define MAT5 REGOFF(20, EDX)
#define MAT6 REGOFF(24, EDX)
#define MAT7 REGOFF(28, EDX)
#define MAT8 REGOFF(32, EDX)
#define MAT9 REGOFF(36, EDX)
#define MAT10 REGOFF(40, EDX)
#define MAT11 REGOFF(44, EDX)
#define MAT12 REGOFF(48, EDX)
#define MAT13 REGOFF(52, EDX)
#define MAT14 REGOFF(56, EDX)
#define MAT15 REGOFF(60, EDX)
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points4_general )
HIDDEN(_mesa_x86_transform_points4_general)
GLNAME( _mesa_x86_transform_points4_general ):
#define FRAME_OFFSET 8
PUSH_L( ESI )
PUSH_L( EDI )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL(x86_p4_gr_done) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL(x86_p4_gr_loop):
FLD_S( SRC0 ) /* F4 */
FMUL_S( MAT0 )
FLD_S( SRC0 ) /* F5 F4 */
FMUL_S( MAT1 )
FLD_S( SRC0 ) /* F6 F5 F4 */
FMUL_S( MAT2 )
FLD_S( SRC0 ) /* F7 F6 F5 F4 */
FMUL_S( MAT3 )
FLD_S( SRC1 ) /* F0 F7 F6 F5 F4 */
FMUL_S( MAT4 )
FLD_S( SRC1 ) /* F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT5 )
FLD_S( SRC1 ) /* F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT6 )
FLD_S( SRC1 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT7 )
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
FLD_S( SRC2 ) /* F0 F7 F6 F5 F4 */
FMUL_S( MAT8 )
FLD_S( SRC2 ) /* F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT9 )
FLD_S( SRC2 ) /* F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT10 )
FLD_S( SRC2 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT11 )
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
FLD_S( SRC3 ) /* F0 F7 F6 F5 F4 */
FMUL_S( MAT12 )
FLD_S( SRC3 ) /* F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT13 )
FLD_S( SRC3 ) /* F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT14 )
FLD_S( SRC3 ) /* F3 F2 F1 F0 F7 F6 F5 F4 */
FMUL_S( MAT15 )
FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */
FADDP( ST0, ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */
FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */
FADDP( ST0, ST(5) ) /* F2 F3 F7 F6 F5 F4 */
FADDP( ST0, ST(3) ) /* F3 F7 F6 F5 F4 */
FADDP( ST0, ST(1) ) /* F7 F6 F5 F4 */
FXCH( ST(3) ) /* F4 F6 F5 F7 */
FSTP_S( DST0 ) /* F6 F5 F7 */
FXCH( ST(1) ) /* F5 F6 F7 */
FSTP_S( DST1 ) /* F6 F7 */
FSTP_S( DST2 ) /* F7 */
FSTP_S( DST3 ) /* */
LLBL(x86_p4_gr_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(x86_p4_gr_loop) )
LLBL(x86_p4_gr_done):
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points4_perspective )
HIDDEN(_mesa_x86_transform_points4_perspective)
GLNAME( _mesa_x86_transform_points4_perspective ):
#define FRAME_OFFSET 12
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL(x86_p4_pr_done) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL(x86_p4_pr_loop):
FLD_S( SRC0 ) /* F4 */
FMUL_S( MAT0 )
FLD_S( SRC1 ) /* F5 F4 */
FMUL_S( MAT5 )
FLD_S( SRC2 ) /* F0 F5 F4 */
FMUL_S( MAT8 )
FLD_S( SRC2 ) /* F1 F0 F5 F4 */
FMUL_S( MAT9 )
FLD_S( SRC2 ) /* F6 F1 F0 F5 F4 */
FMUL_S( MAT10 )
FXCH( ST(2) ) /* F0 F1 F6 F5 F4 */
FADDP( ST0, ST(4) ) /* F1 F6 F5 F4 */
FADDP( ST0, ST(2) ) /* F6 F5 F4 */
FLD_S( SRC3 ) /* F2 F6 F5 F4 */
FMUL_S( MAT14 )
FADDP( ST0, ST(1) ) /* F6 F5 F4 */
MOV_L( SRC2, EBX )
XOR_L( CONST(-2147483648), EBX )/* change sign */
FXCH( ST(2) ) /* F4 F5 F6 */
FSTP_S( DST0 ) /* F5 F6 */
FSTP_S( DST1 ) /* F6 */
FSTP_S( DST2 ) /* */
MOV_L( EBX, DST3 )
LLBL(x86_p4_pr_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(x86_p4_pr_loop) )
LLBL(x86_p4_pr_done):
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points4_3d )
HIDDEN(_mesa_x86_transform_points4_3d)
GLNAME( _mesa_x86_transform_points4_3d ):
#define FRAME_OFFSET 12
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL(x86_p4_3dr_done) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL(x86_p4_3dr_loop):
FLD_S( SRC0 ) /* F4 */
FMUL_S( MAT0 )
FLD_S( SRC0 ) /* F5 F4 */
FMUL_S( MAT1 )
FLD_S( SRC0 ) /* F6 F5 F4 */
FMUL_S( MAT2 )
FLD_S( SRC1 ) /* F0 F6 F5 F4 */
FMUL_S( MAT4 )
FLD_S( SRC1 ) /* F1 F0 F6 F5 F4 */
FMUL_S( MAT5 )
FLD_S( SRC1 ) /* F2 F1 F0 F6 F5 F4 */
FMUL_S( MAT6 )
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
FADDP( ST0, ST(1) ) /* F6 F5 F4 */
FLD_S( SRC2 ) /* F0 F6 F5 F4 */
FMUL_S( MAT8 )
FLD_S( SRC2 ) /* F1 F0 F6 F5 F4 */
FMUL_S( MAT9 )
FLD_S( SRC2 ) /* F2 F1 F0 F6 F5 F4 */
FMUL_S( MAT10 )
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
FADDP( ST0, ST(1) ) /* F6 F5 F4 */
FLD_S( SRC3 ) /* F0 F6 F5 F4 */
FMUL_S( MAT12 )
FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */
FMUL_S( MAT13 )
FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */
FMUL_S( MAT14 )
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
FADDP( ST0, ST(1) ) /* F6 F5 F4 */
MOV_L( SRC3, EBX )
FXCH( ST(2) ) /* F4 F5 F6 */
FSTP_S( DST0 ) /* F5 F6 */
FSTP_S( DST1 ) /* F6 */
FSTP_S( DST2 ) /* */
MOV_L( EBX, DST3 )
LLBL(x86_p4_3dr_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(x86_p4_3dr_loop) )
LLBL(x86_p4_3dr_done):
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME(_mesa_x86_transform_points4_3d_no_rot)
HIDDEN(_mesa_x86_transform_points4_3d_no_rot)
GLNAME(_mesa_x86_transform_points4_3d_no_rot):
#define FRAME_OFFSET 12
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL(x86_p4_3dnrr_done) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL(x86_p4_3dnrr_loop):
FLD_S( SRC0 ) /* F4 */
FMUL_S( MAT0 )
FLD_S( SRC1 ) /* F5 F4 */
FMUL_S( MAT5 )
FLD_S( SRC2 ) /* F6 F5 F4 */
FMUL_S( MAT10 )
FLD_S( SRC3 ) /* F0 F6 F5 F4 */
FMUL_S( MAT12 )
FLD_S( SRC3 ) /* F1 F0 F6 F5 F4 */
FMUL_S( MAT13 )
FLD_S( SRC3 ) /* F2 F1 F0 F6 F5 F4 */
FMUL_S( MAT14 )
FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */
FADDP( ST0, ST(5) ) /* F1 F2 F6 F5 F4 */
FADDP( ST0, ST(3) ) /* F2 F6 F5 F4 */
FADDP( ST0, ST(1) ) /* F6 F5 F4 */
MOV_L( SRC3, EBX )
FXCH( ST(2) ) /* F4 F5 F6 */
FSTP_S( DST0 ) /* F5 F6 */
FSTP_S( DST1 ) /* F6 */
FSTP_S( DST2 ) /* */
MOV_L( EBX, DST3 )
LLBL(x86_p4_3dnrr_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(x86_p4_3dnrr_loop) )
LLBL(x86_p4_3dnrr_done):
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points4_2d )
HIDDEN(_mesa_x86_transform_points4_2d)
GLNAME( _mesa_x86_transform_points4_2d ):
#define FRAME_OFFSET 16
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
PUSH_L( EBP )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL(x86_p4_2dr_done) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL(x86_p4_2dr_loop):
FLD_S( SRC0 ) /* F4 */
FMUL_S( MAT0 )
FLD_S( SRC0 ) /* F5 F4 */
FMUL_S( MAT1 )
FLD_S( SRC1 ) /* F0 F5 F4 */
FMUL_S( MAT4 )
FLD_S( SRC1 ) /* F1 F0 F5 F4 */
FMUL_S( MAT5 )
FXCH( ST(1) ) /* F0 F1 F5 F4 */
FADDP( ST0, ST(3) ) /* F1 F5 F4 */
FADDP( ST0, ST(1) ) /* F5 F4 */
FLD_S( SRC3 ) /* F0 F5 F4 */
FMUL_S( MAT12 )
FLD_S( SRC3 ) /* F1 F0 F5 F4 */
FMUL_S( MAT13 )
FXCH( ST(1) ) /* F0 F1 F5 F4 */
FADDP( ST0, ST(3) ) /* F1 F5 F4 */
FADDP( ST0, ST(1) ) /* F5 F4 */
MOV_L( SRC2, EBX )
MOV_L( SRC3, EBP )
FXCH( ST(1) ) /* F4 F5 */
FSTP_S( DST0 ) /* F5 */
FSTP_S( DST1 ) /* */
MOV_L( EBX, DST2 )
MOV_L( EBP, DST3 )
LLBL(x86_p4_2dr_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(x86_p4_2dr_loop) )
LLBL(x86_p4_2dr_done):
POP_L( EBP )
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points4_2d_no_rot )
HIDDEN(_mesa_x86_transform_points4_2d_no_rot)
GLNAME( _mesa_x86_transform_points4_2d_no_rot ):
#define FRAME_OFFSET 16
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
PUSH_L( EBP )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL(x86_p4_2dnrr_done) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
ALIGNTEXT16
LLBL(x86_p4_2dnrr_loop):
FLD_S( SRC0 ) /* F4 */
FMUL_S( MAT0 )
FLD_S( SRC1 ) /* F5 F4 */
FMUL_S( MAT5 )
FLD_S( SRC3 ) /* F0 F5 F4 */
FMUL_S( MAT12 )
FLD_S( SRC3 ) /* F1 F0 F5 F4 */
FMUL_S( MAT13 )
FXCH( ST(1) ) /* F0 F1 F5 F4 */
FADDP( ST0, ST(3) ) /* F1 F5 F4 */
FADDP( ST0, ST(1) ) /* F5 F4 */
MOV_L( SRC2, EBX )
MOV_L( SRC3, EBP )
FXCH( ST(1) ) /* F4 F5 */
FSTP_S( DST0 ) /* F5 */
FSTP_S( DST1 ) /* */
MOV_L( EBX, DST2 )
MOV_L( EBP, DST3 )
LLBL(x86_p4_2dnrr_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(x86_p4_2dnrr_loop) )
LLBL(x86_p4_2dnrr_done):
POP_L( EBP )
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#undef FRAME_OFFSET
ALIGNTEXT16
GLOBL GLNAME( _mesa_x86_transform_points4_identity )
HIDDEN(_mesa_x86_transform_points4_identity)
GLNAME( _mesa_x86_transform_points4_identity ):
#define FRAME_OFFSET 12
PUSH_L( ESI )
PUSH_L( EDI )
PUSH_L( EBX )
MOV_L( ARG_SOURCE, ESI )
MOV_L( ARG_DEST, EDI )
MOV_L( ARG_MATRIX, EDX )
MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
TEST_L( ECX, ECX )
JZ( LLBL(x86_p4_ir_done) )
MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
SHL_L( CONST(4), ECX )
MOV_L( REGOFF(V4F_START, ESI), ESI )
MOV_L( REGOFF(V4F_START, EDI), EDI )
ADD_L( EDI, ECX )
CMP_L( ESI, EDI )
JE( LLBL(x86_p4_ir_done) )
ALIGNTEXT16
LLBL(x86_p4_ir_loop):
MOV_L( SRC0, EBX )
MOV_L( SRC1, EDX )
MOV_L( EBX, DST0 )
MOV_L( EDX, DST1 )
MOV_L( SRC2, EBX )
MOV_L( SRC3, EDX )
MOV_L( EBX, DST2 )
MOV_L( EDX, DST3 )
LLBL(x86_p4_ir_skip):
ADD_L( CONST(16), EDI )
ADD_L( EAX, ESI )
CMP_L( ECX, EDI )
JNE( LLBL(x86_p4_ir_loop) )
LLBL(x86_p4_ir_done):
POP_L( EBX )
POP_L( EDI )
POP_L( ESI )
RET
#if defined (__ELF__) && defined (__linux__)
.section .note.GNU-stack,"",%progbits
#endif

51
src/arch/x86/xform_args.h Normal file
View File

@ -0,0 +1,51 @@
/*
* Mesa 3-D graphics library
* Version: 3.5
*
* Copyright (C) 1999-2001 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Transform function interface for assembly code. Simply define
* FRAME_OFFSET to the number of bytes pushed onto the stack before
* using the ARG_* argument macros.
*
* Gareth Hughes
*/
#ifndef __XFORM_ARGS_H__
#define __XFORM_ARGS_H__
/* Offsets for transform_func arguments
*
* typedef void (*transform_func)( GLvector4f *to_vec,
* const GLfloat m[16],
* const GLvector4f *from_vec );
*/
#define OFFSET_DEST 4
#define OFFSET_MATRIX 8
#define OFFSET_SOURCE 12
#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP)
#define ARG_MATRIX REGOFF(FRAME_OFFSET+OFFSET_MATRIX, ESP)
#define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP)
#endif