FP/VP assembler project

Technical discussion on the newly released and hard to find PS3.

Moderators: cheriff, emoon

Post Reply
cnlohr
Posts: 24
Joined: Sun Feb 05, 2006 1:57 pm

FP/VP assembler project

Post by cnlohr »

Ok, it looks like the next imperative step is to write the assembler for the shading programs. Since I'm new at this, it's a critical step, I've become much more busy, and it would take me much longer than the other devs, I'll recind my request to do this project.

So far it appears there are two really important parts of the headers that are necessary. 1) The definitions and 2) the bit-packed structures. Nouveau has both, but I can only seem to find #1 right now, I'll post #2 as soon as I find it.

*EDIT* NOTE: This list is incomplete for all hardware. It was only the basic stuff out of nouveau_shader.h, we should include nv20_shader.h nv30_shader.h, etc. info as well. Differences are clear in the Vertex shading part. This mostly just deals with the fragment shading part.

So, without further ado, below is the general header for defining the many commands and the different command classes (jump, inst, etc.).

The header:

Code: Select all

#ifndef _SRECOMP_H
#define _SRECOMP_H

//This file contains tons of structures and stuff that
//has been ripped from Nouveau.

struct _nvs_command_set;

typedef enum {
   NVS_FILE_NONE,
   NVS_FILE_TEMP,
   NVS_FILE_ATTRIB,
   NVS_FILE_CONST,
   NVS_FILE_RESULT,
   NVS_FILE_ADDRESS,
   NVS_FILE_UNKNOWN
} nvsRegFile;

typedef enum {
   NVS_SWZ_X = 0,
   NVS_SWZ_Y = 1,
   NVS_SWZ_Z = 2,
   NVS_SWZ_W = 3
} nvsSwzComp;

typedef struct {
   nvsRegFile	file;
   unsigned int		index;

   unsigned int		indexed;
   unsigned int		addr_reg;
   nvsSwzComp		addr_comp;

   nvsSwzComp		swizzle[4];
   int			negate;
   int			abs;
} nvsRegister;

typedef enum {
   NVS_OP_UNKNOWN = 0,
   NVS_OP_NOP,
   NVS_OP_ABS, NVS_OP_ADD, NVS_OP_ARA, NVS_OP_ARL, NVS_OP_ARR,
   NVS_OP_BRA, NVS_OP_BRK,
   NVS_OP_CAL, NVS_OP_CMP, NVS_OP_COS,
   NVS_OP_DDX, NVS_OP_DDY, NVS_OP_DIV, NVS_OP_DP2, NVS_OP_DP2A, NVS_OP_DP3,
   NVS_OP_DP4, NVS_OP_DPH, NVS_OP_DST,
   NVS_OP_EX2, NVS_OP_EXP,
   NVS_OP_FLR, NVS_OP_FRC,
   NVS_OP_IF,
   NVS_OP_KIL,
   NVS_OP_LG2, NVS_OP_LIT, NVS_OP_LOG, NVS_OP_LOOP, NVS_OP_LRP,
   NVS_OP_MAD, NVS_OP_MAX, NVS_OP_MIN, NVS_OP_MOV, NVS_OP_MUL,
   NVS_OP_NRM,
   NVS_OP_PK2H, NVS_OP_PK2US, NVS_OP_PK4B, NVS_OP_PK4UB, NVS_OP_POW,
   NVS_OP_POPA, NVS_OP_PUSHA,
   NVS_OP_RCC, NVS_OP_RCP, NVS_OP_REP, NVS_OP_RET, NVS_OP_RFL, NVS_OP_RSQ,
   NVS_OP_SCS, NVS_OP_SEQ, NVS_OP_SFL, NVS_OP_SGE, NVS_OP_SGT, NVS_OP_SIN,
   NVS_OP_SLE, NVS_OP_SLT, NVS_OP_SNE, NVS_OP_SSG, NVS_OP_STR, NVS_OP_SUB,
   NVS_OP_SWZ,
   NVS_OP_TEX, NVS_OP_TXB, NVS_OP_TXD, NVS_OP_TXL, NVS_OP_TXP,
   NVS_OP_UP2H, NVS_OP_UP2US, NVS_OP_UP4B, NVS_OP_UP4UB,
   NVS_OP_X2D, NVS_OP_XPD,
   NVS_OP_EMUL
} nvsOpcode;

typedef enum {
   NVS_PREC_FLOAT32,
   NVS_PREC_FLOAT16,
   NVS_PREC_FIXED12,
   NVS_PREC_UNKNOWN
} nvsPrecision;

typedef enum {
   NVS_FR_POSITION	= 0,
   NVS_FR_WEIGHT	= 1,
   NVS_FR_NORMAL	= 2,
   NVS_FR_COL0		= 3,
   NVS_FR_COL1		= 4,
   NVS_FR_FOGCOORD	= 5,
   NVS_FR_TEXCOORD0	= 8,
   NVS_FR_TEXCOORD1	= 9,
   NVS_FR_TEXCOORD2	= 10,
   NVS_FR_TEXCOORD3	= 11,
   NVS_FR_TEXCOORD4	= 12,
   NVS_FR_TEXCOORD5	= 13,
   NVS_FR_TEXCOORD6	= 14,
   NVS_FR_TEXCOORD7	= 15,
   NVS_FR_BFC0		= 16,
   NVS_FR_BFC1		= 17,
   NVS_FR_POINTSZ	= 18,
   NVS_FR_FRAGDATA0	= 19,
   NVS_FR_FRAGDATA1	= 20,
   NVS_FR_FRAGDATA2	= 21,
   NVS_FR_FRAGDATA3	= 22,
   NVS_FR_CLIP0		= 23,
   NVS_FR_CLIP1		= 24,
   NVS_FR_CLIP2		= 25,
   NVS_FR_CLIP3		= 26,
   NVS_FR_CLIP4		= 27,
   NVS_FR_CLIP5		= 28,
   NVS_FR_CLIP6		= 29,
   NVS_FR_FACING	= 30,
   NVS_FR_UNKNOWN
} nvsFixedReg;

typedef enum {
   NVS_COND_FL, NVS_COND_LT, NVS_COND_EQ, NVS_COND_LE, NVS_COND_GT,
   NVS_COND_NE, NVS_COND_GE, NVS_COND_TR, NVS_COND_UN,
   NVS_COND_UNKNOWN
} nvsCond;


typedef enum {
   NVS_TEX_TARGET_1D,
   NVS_TEX_TARGET_2D,
   NVS_TEX_TARGET_3D,
   NVS_TEX_TARGET_CUBE,
   NVS_TEX_TARGET_RECT,
   NVS_TEX_TARGET_UNKNOWN = 0
} nvsTexTarget;

typedef enum {
	NVS_SCALE_1X	 = 0,
	NVS_SCALE_2X	 = 1,
	NVS_SCALE_4X	 = 2,
	NVS_SCALE_8X	 = 3,
	NVS_SCALE_INV_2X = 5,
	NVS_SCALE_INV_4X = 6,
	NVS_SCALE_INV_8X = 7,
} nvsScale;

/* Arith/TEX instructions */
typedef struct nvs_instruction {
   struct _nvs_command_set * header;

   nvsOpcode	op;
   unsigned int saturate;

   nvsRegister	dest;
   unsigned int	mask;
   nvsScale	dest_scale;

   nvsRegister	src[3];

   unsigned int tex_unit;
   nvsTexTarget tex_target;

   nvsCond	cond;
   nvsSwzComp	cond_swizzle[4];
   int		cond_reg;
   int		cond_test;
   int		cond_update;
} nvsInstruction;


/* BRA, CAL, IF */
typedef struct nvs_branch {
	struct _nvs_command_set *  header;

	nvsOpcode	op;

	nvsCond		cond;
	nvsSwzComp	cond_swizzle[4];
	int		cond_test;

	struct _nvs_command_set *target_head;
	struct _nvs_command_set *target_tail;
	struct _nvs_command_set *else_head;
	struct _nvs_command_set *else_tail;
} nvsBranch;

/* LOOP+ENDLOOP */
typedef struct {
	struct _nvs_command_set *  header;

	int                count;
	int                initial;
	int                increment;

	struct _nvs_command_set *insn_head;
	struct _nvs_command_set *insn_tail;
} nvsLoop;

/* label+following instructions */
typedef struct nvs_subroutine {
	struct _nvs_command_set *  header;

	char *             label;
	struct _nvs_command_set *insn_head;
	struct _nvs_command_set *insn_tail;
} nvsSubroutine;


typedef struct _nvs_command_set
{
   struct _nvs_command_set * parent;
   struct _nvs_command_set * prev;
   struct _nvs_command_set * next;
   enum {
      NVS_INSTRUCTION,
      NVS_BRANCH,
      NVS_LOOP,
      NVS_SUBROUTINE
   } type;
   union {
      nvsInstruction I;
      nvsBranch B;
      nvsLoop L;
      nvsSubroutine S;
   } data;
} nvsCommandSet;



//Now we get to the good stuff
//Compile an arbfp10 program
nvsCommandSet * AssembleShader( const char * shader );




#endif

/*
 * Copyright (C) 2006 Ben Skeggs
 * Copyright (C) 2007 Charles Lohr (mostly reformatting, no new content)
 *
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 */

/*
 * Authors:
 *   Ben Skeggs <[email protected]>
 */
The .c file.

Code: Select all

#include "srecomp.h"


struct _opcode_info
&#123;
   const char *name;
   int numsrc;
   int flags;
&#125;;

#define CHECK_RANGE&#40;idx, arr&#41; &#40;&#40;idx&#41;<sizeof&#40;_##arr&#41;/sizeof&#40;const char *&#41;&#41; \
	? _##arr&#91;&#40;idx&#41;&#93; &#58; #arr"_OOB"

#define NODS      &#40;1<<0&#41;
#define BRANCH_TR &#40;1<<1&#41;
#define BRANCH_EL &#40;1<<2&#41;
#define BRANCH_EN &#40;1<<3&#41;
#define BRANCH_RE &#40;1<<4&#41;
#define BRANCH_ALL &#40;BRANCH_TR|BRANCH_EL|BRANCH_EN&#41;
#define COUNT_INC &#40;1<<4&#41;
#define COUNT_IND &#40;1<<5&#41;
#define COUNT_NUM &#40;1<<6&#41;
#define COUNT_ALL &#40;COUNT_INC|COUNT_IND|COUNT_NUM&#41;
#define TI_UNIT   &#40;1<<7&#41;

static struct _opcode_info ops&#91;&#93; = &#123;
   &#91;NVS_OP_ABS&#93; = &#123;"ABS", 1, 0&#125;,
   &#91;NVS_OP_ADD&#93; = &#123;"ADD", 2, 0&#125;,
   &#91;NVS_OP_ARA&#93; = &#123;"ARA", 1, 0&#125;,
   &#91;NVS_OP_ARL&#93; = &#123;"ARL", 1, 0&#125;,
   &#91;NVS_OP_ARR&#93; = &#123;"ARR", 1, 0&#125;,
   &#91;NVS_OP_BRA&#93; = &#123;"BRA", 0, NODS | BRANCH_TR&#125;,
   &#91;NVS_OP_BRK&#93; = &#123;"BRK", 0, NODS&#125;,
   &#91;NVS_OP_CAL&#93; = &#123;"CAL", 0, NODS | BRANCH_TR&#125;,
   &#91;NVS_OP_CMP&#93; = &#123;"CMP", 2, 0&#125;,
   &#91;NVS_OP_COS&#93; = &#123;"COS", 1, 0&#125;,
   &#91;NVS_OP_DIV&#93; = &#123;"DIV", 2, 0&#125;,
   &#91;NVS_OP_DDX&#93; = &#123;"DDX", 1, 0&#125;,
   &#91;NVS_OP_DDY&#93; = &#123;"DDY", 1, 0&#125;,
   &#91;NVS_OP_DP2&#93; = &#123;"DP2", 2, 0&#125;,
   &#91;NVS_OP_DP2A&#93; = &#123;"DP2A", 3, 0&#125;,
   &#91;NVS_OP_DP3&#93; = &#123;"DP3", 2, 0&#125;,
   &#91;NVS_OP_DP4&#93; = &#123;"DP4", 2, 0&#125;,
   &#91;NVS_OP_DPH&#93; = &#123;"DPH", 2, 0&#125;,
   &#91;NVS_OP_DST&#93; = &#123;"DST", 2, 0&#125;,
   &#91;NVS_OP_EX2&#93; = &#123;"EX2", 1, 0&#125;,
   &#91;NVS_OP_EXP&#93; = &#123;"EXP", 1, 0&#125;,
   &#91;NVS_OP_FLR&#93; = &#123;"FLR", 1, 0&#125;,
   &#91;NVS_OP_FRC&#93; = &#123;"FRC", 1, 0&#125;,
   &#91;NVS_OP_IF&#93; = &#123;"IF", 0, NODS | BRANCH_EL | BRANCH_EN&#125;,
   &#91;NVS_OP_KIL&#93; = &#123;"KIL", 1, 0&#125;,
   &#91;NVS_OP_LG2&#93; = &#123;"LG2", 1, 0&#125;,
   &#91;NVS_OP_LIT&#93; = &#123;"LIT", 1, 0&#125;,
   &#91;NVS_OP_LOG&#93; = &#123;"LOG", 1, 0&#125;,
   &#91;NVS_OP_LOOP&#93; = &#123;"LOOP", 0, NODS | COUNT_ALL | BRANCH_EN&#125;,
   &#91;NVS_OP_LRP&#93; = &#123;"LRP", 3, 0&#125;,
   &#91;NVS_OP_MAD&#93; = &#123;"MAD", 3, 0&#125;,
   &#91;NVS_OP_MAX&#93; = &#123;"MAX", 2, 0&#125;,
   &#91;NVS_OP_MIN&#93; = &#123;"MIN", 2, 0&#125;,
   &#91;NVS_OP_MOV&#93; = &#123;"MOV", 1, 0&#125;,
   &#91;NVS_OP_MUL&#93; = &#123;"MUL", 2, 0&#125;,
   &#91;NVS_OP_NRM&#93; = &#123;"NRM", 1, 0&#125;,
   &#91;NVS_OP_PK2H&#93; = &#123;"PK2H", 1, 0&#125;,
   &#91;NVS_OP_PK2US&#93; = &#123;"PK2US", 1, 0&#125;,
   &#91;NVS_OP_PK4B&#93; = &#123;"PK4B", 1, 0&#125;,
   &#91;NVS_OP_PK4UB&#93; = &#123;"PK4UB", 1, 0&#125;,
   &#91;NVS_OP_POW&#93; = &#123;"POW", 2, 0&#125;,
   &#91;NVS_OP_POPA&#93; = &#123;"POPA", 0, 0&#125;,
   &#91;NVS_OP_PUSHA&#93; = &#123;"PUSHA", 1, NODS&#125;,
   &#91;NVS_OP_RCC&#93; = &#123;"RCC", 1, 0&#125;,
   &#91;NVS_OP_RCP&#93; = &#123;"RCP", 1, 0&#125;,
   &#91;NVS_OP_REP&#93; = &#123;"REP", 0, NODS | BRANCH_EN | COUNT_NUM&#125;,
   &#91;NVS_OP_RET&#93; = &#123;"RET", 0, NODS&#125;,
   &#91;NVS_OP_RFL&#93; = &#123;"RFL", 1, 0&#125;,
   &#91;NVS_OP_RSQ&#93; = &#123;"RSQ", 1, 0&#125;,
   &#91;NVS_OP_SCS&#93; = &#123;"SCS", 1, 0&#125;,
   &#91;NVS_OP_SEQ&#93; = &#123;"SEQ", 2, 0&#125;,
   &#91;NVS_OP_SFL&#93; = &#123;"SFL", 2, 0&#125;,
   &#91;NVS_OP_SGE&#93; = &#123;"SGE", 2, 0&#125;,
   &#91;NVS_OP_SGT&#93; = &#123;"SGT", 2, 0&#125;,
   &#91;NVS_OP_SIN&#93; = &#123;"SIN", 1, 0&#125;,
   &#91;NVS_OP_SLE&#93; = &#123;"SLE", 2, 0&#125;,
   &#91;NVS_OP_SLT&#93; = &#123;"SLT", 2, 0&#125;,
   &#91;NVS_OP_SNE&#93; = &#123;"SNE", 2, 0&#125;,
   &#91;NVS_OP_SSG&#93; = &#123;"SSG", 1, 0&#125;,
   &#91;NVS_OP_STR&#93; = &#123;"STR", 2, 0&#125;,
   &#91;NVS_OP_SUB&#93; = &#123;"SUB", 2, 0&#125;,
   &#91;NVS_OP_TEX&#93; = &#123;"TEX", 1, TI_UNIT&#125;,
   &#91;NVS_OP_TXB&#93; = &#123;"TXB", 1, TI_UNIT&#125;,
   &#91;NVS_OP_TXD&#93; = &#123;"TXD", 3, TI_UNIT&#125;,
   &#91;NVS_OP_TXL&#93; = &#123;"TXL", 1, TI_UNIT&#125;,
   &#91;NVS_OP_TXP&#93; = &#123;"TXP", 1, TI_UNIT&#125;,
   &#91;NVS_OP_UP2H&#93; = &#123;"UP2H", 1, 0&#125;,
   &#91;NVS_OP_UP2US&#93; = &#123;"UP2US", 1, 0&#125;,
   &#91;NVS_OP_UP4B&#93; = &#123;"UP4B", 1, 0&#125;,
   &#91;NVS_OP_UP4UB&#93; = &#123;"UP4UB", 1, 0&#125;,
   &#91;NVS_OP_X2D&#93; = &#123;"X2D", 3, 0&#125;,
   &#91;NVS_OP_XPD&#93; = &#123;"XPD", 2, 0&#125;,
   &#91;NVS_OP_NOP&#93; = &#123;"NOP", 0, NODS&#125;,
&#125;;

static struct _opcode_info *
_get_op_info&#40;int op&#41;
&#123;
   if &#40;op >= &#40;sizeof&#40;ops&#41; / sizeof&#40;struct _opcode_info&#41;&#41;&#41;
      return 0;
   if &#40;ops&#91;op&#93;.name == 0&#41;
      return 0;
   return &ops&#91;op&#93;;
&#125;

static const char *_SFR_STRING&#91;&#93; = &#123;
   &#91;NVS_FR_POSITION&#93; = "position",
   &#91;NVS_FR_WEIGHT&#93; = "weight",
   &#91;NVS_FR_NORMAL&#93; = "normal",
   &#91;NVS_FR_COL0&#93; = "color",
   &#91;NVS_FR_COL1&#93; = "color.secondary",
   &#91;NVS_FR_BFC0&#93; = "bfc",
   &#91;NVS_FR_BFC1&#93; = "bfc.secondary",
   &#91;NVS_FR_FOGCOORD&#93; = "fogcoord",
   &#91;NVS_FR_POINTSZ&#93; = "pointsize",
   &#91;NVS_FR_TEXCOORD0&#93; = "texcoord&#91;0&#93;",
   &#91;NVS_FR_TEXCOORD1&#93; = "texcoord&#91;1&#93;",
   &#91;NVS_FR_TEXCOORD2&#93; = "texcoord&#91;2&#93;",
   &#91;NVS_FR_TEXCOORD3&#93; = "texcoord&#91;3&#93;",
   &#91;NVS_FR_TEXCOORD4&#93; = "texcoord&#91;4&#93;",
   &#91;NVS_FR_TEXCOORD5&#93; = "texcoord&#91;5&#93;",
   &#91;NVS_FR_TEXCOORD6&#93; = "texcoord&#91;6&#93;",
   &#91;NVS_FR_TEXCOORD7&#93; = "texcoord&#91;7&#93;",
   &#91;NVS_FR_FRAGDATA0&#93; = "data&#91;0&#93;",
   &#91;NVS_FR_FRAGDATA1&#93; = "data&#91;1&#93;",
   &#91;NVS_FR_FRAGDATA2&#93; = "data&#91;2&#93;",
   &#91;NVS_FR_FRAGDATA3&#93; = "data&#91;3&#93;",
   &#91;NVS_FR_CLIP0&#93; = "clip_plane&#91;0&#93;",
   &#91;NVS_FR_CLIP1&#93; = "clip_plane&#91;1&#93;",
   &#91;NVS_FR_CLIP2&#93; = "clip_plane&#91;2&#93;",
   &#91;NVS_FR_CLIP3&#93; = "clip_plane&#91;3&#93;",
   &#91;NVS_FR_CLIP4&#93; = "clip_plane&#91;4&#93;",
   &#91;NVS_FR_CLIP5&#93; = "clip_plane&#91;5&#93;",
   &#91;NVS_FR_CLIP6&#93; = "clip_plane&#91;6&#93;",
   &#91;NVS_FR_FACING&#93; = "facing",
&#125;;

#define SFR_STRING&#40;idx&#41; CHECK_RANGE&#40;&#40;idx&#41;, SFR_STRING&#41;

static const char *_SWZ_STRING&#91;&#93; = &#123;
   &#91;NVS_SWZ_X&#93; = "x",
   &#91;NVS_SWZ_Y&#93; = "y",
   &#91;NVS_SWZ_Z&#93; = "z",
   &#91;NVS_SWZ_W&#93; = "w"
&#125;;

#define SWZ_STRING&#40;idx&#41; CHECK_RANGE&#40;&#40;idx&#41;, SWZ_STRING&#41;

static const char *_NVS_PREC_STRING&#91;&#93; = &#123;
   &#91;NVS_PREC_FLOAT32&#93; = "R",
   &#91;NVS_PREC_FLOAT16&#93; = "H",
   &#91;NVS_PREC_FIXED12&#93; = "X",
   &#91;NVS_PREC_UNKNOWN&#93; = "?"
&#125;;

#define NVS_PREC_STRING&#40;idx&#41; CHECK_RANGE&#40;&#40;idx&#41;, NVS_PREC_STRING&#41;

static const char *_NVS_COND_STRING&#91;&#93; = &#123;
   &#91;NVS_COND_FL&#93; = "FL",
   &#91;NVS_COND_LT&#93; = "LT",
   &#91;NVS_COND_EQ&#93; = "EQ",
   &#91;NVS_COND_LE&#93; = "LE",
   &#91;NVS_COND_GT&#93; = "GT",
   &#91;NVS_COND_NE&#93; = "NE",
   &#91;NVS_COND_GE&#93; = "GE",
   &#91;NVS_COND_TR&#93; = "TR",
   &#91;NVS_COND_UNKNOWN&#93; = "??"
&#125;;

nvsCommandSet * AssembleShader&#40; const char * shader &#41;
&#123;

&#125;



/*
 * Copyright &#40;C&#41; 2006 Ben Skeggs
 * Copyright &#40;C&#41; 2007 Charles Lohr &#40;mostly just reformatting&#41;
 *
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files &#40;the
 * "Software"&#41;, to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions&#58;
 *
 * The above copyright notice and this permission notice &#40;including the
 * next paragraph&#41; shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE COPYRIGHT OWNER&#40;S&#41; AND/OR ITS SUPPLIERS BE
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 */

/*
 * Authors&#58;
 *   Ben Skeggs <[email protected]>
 */


EDIT: I guess I must be going crazy. I can't seem to find the bitwise structure. I'm starting to think I may have gotten confused when I was reading the mesa stuff and may be thinking of the brw_structs for the intel cards.

Additionally, the code that this was ripped from was http://gitweb.freedesktop.org/?p=mesa/m ... ri/nouveau
And it contains all the goodies for the end opcodes for the cards (take a look at nv40_shader.h)

Sorry all I've been able to do yet was research.
IronPeter
Posts: 207
Joined: Mon Aug 06, 2007 12:46 am
Contact:

Post by IronPeter »

ps2devman
Posts: 259
Joined: Mon Oct 09, 2006 3:56 pm

VP40 assembler grammar study

Post by ps2devman »

- Get cgc.exe from NVidia SDK 9.5 (free)

- Create vs_texture.cg (theoretical source of nv_shader.h content) :

Code: Select all


struct myVertexInput&#123;
	float4 position &#58; POSITION;
	float4 texcoord0 &#58; TEXCOORD0;
	float4 texcoord1 &#58; TEXCOORD1;
&#125;;

struct myVertexOutput&#123;
	float4 pos &#58; POSITION;
	float4 tex0 &#58; TEXCOORD0;
	float4 tex1 &#58; TEXCOORD1;
&#125;;

myVertexOutput main&#40;myVertexInput I&#41;
&#123;
	myVertexOutput result;
	
	result.pos = I.position;
	result.tex0 = I.texcoord0;
	result.tex1 = I.texcoord1;

	return result;
&#125;

- Cgc.exe -profile vp40 -o vs_texture.vsh vs_texture.cg

- You obtain vs_texture.vsh :

!!ARBvp1.0
OPTION NV_vertex_program3;
# cgc version 1.3.0001, build date Jan 7 2005 14:01:35
# command line args: -profile vp40
# source file: vs_texture.cg
#vendor NVIDIA Corporation
#version 1.0.02
#profile vp40
#program main
#var float4 I.position : $vin.ATTR0 : ATTR0 : 0 : 1
#var float4 I.texcoord0 : $vin.ATTR8 : ATTR8 : 0 : 1
#var float4 I.texcoord1 : $vin.ATTR9 : ATTR9 : 0 : 1
#var float4 main.pos : $vout.HPOS : HPOS : -1 : 1
#var float4 main.tex0 : $vout.TEX0 : TEX0 : -1 : 1
#var float4 main.tex1 : $vout.TEX1 : TEX1 : -1 : 1
PARAM c[1] = { program.local[0] };
TEMP CC;
BB1:
MOV result.position, vertex.attrib[0];
MOV result.texcoord[0], vertex.attrib[8];
MOV result.texcoord[1], vertex.attrib[9];
END
# 3 instructions, 0 R-regs

- You can notice the line "OPTION NV_vertex_program3;"

- A quick search on internet gives 3 official grammar description files :

http://www.nvidia.com/dev_content/nvope ... rogram.txt
http://www.nvidia.com/dev_content/nvope ... ogram2.txt
http://www.nvidia.com/dev_content/nvope ... ogram3.txt
ps2devman
Posts: 259
Joined: Mon Oct 09, 2006 3:56 pm

FP30 assembler study

Post by ps2devman »

- You write ps_texture.cg (theoretical source of nv_shader.h content):

Code: Select all



struct myVertexOutput &#123;
	float2 texture_coords &#58; TEXCOORD0;
&#125;;

float4 main&#40;myVertexOutput I, uniform sampler2D colorMap&#41;&#58;COLOR
&#123;
	return tex2D&#40;colorMap, I.texture_coords.xy&#41;;
&#125;


- Cgc.exe -profile fp30 -o ps_texture.psh ps_texture.cg
(possible option for fp30: -profileopts NumInstructionSlots=<val>)

or
- Cgc.exe -profile fp30unlimited -o ps_texture.psh ps_texture.cg
(no option)

- You obtain ps_texture.psh (1st case) :

Code: Select all


!!FP1.0
# cgc version 1.3.0001, build date Jan  7 2005 14&#58;01&#58;35
# command line args&#58; -profile fp30
# source file&#58; ps_texture.cg
#vendor NVIDIA Corporation
#version 1.0.02
#profile fp30
#program main
#semantic main.colorMap
#var float2 I.texture_coords &#58; $vin.TEX0 &#58; TEX0 &#58; 0 &#58; 1
#var sampler2D colorMap &#58;  &#58; texunit 0 &#58; 1 &#58; 1
#var float4 main &#58; $vout.COL &#58; COL &#58; -1 &#58; 1
TEX   o&#91;COLR&#93;, f&#91;TEX0&#93;, TEX0, 2D;
END
# 1 instructions, 0 R-regs, 0 H-regs

- Specs

http://www.nvidia.com/dev_content/nvope ... rogram.txt
http://www.nvidia.com/dev_content/nvope ... ogram2.txt
IronPeter
Posts: 207
Joined: Mon Aug 06, 2007 12:46 am
Contact:

Post by IronPeter »

Hi, I commited basic vertex shader compiler into libps3rsx. It is able to compile test example:

Code: Select all

!!VP1.0
DP4 o&#91;HPOS&#93;.x,  v&#91;OPOS&#93;, c&#91;0&#93;;
DP4 o&#91;HPOS&#93;.y,  v&#91;OPOS&#93;, c&#91;1&#93;;
DP4 o&#91;HPOS&#93;.z,  v&#91;OPOS&#93;, c&#91;2&#93;;
DP4 o&#91;HPOS&#93;.w,  v&#91;OPOS&#93;, c&#91;3&#93;;
MOV o&#91;TEX0&#93;,    v&#91;TEX0&#93;;
END
Just few hours with yacc. It is relative full nv_vertex_program compiler ( only without address register stuff ). It is pretty nice if somebody is able to extend it into nv_vertex_program2 compiler.

I want to make offline tool for shader compilation. And runtime code for loading under libps3rsx. In day or two.
ps2devman
Posts: 259
Joined: Mon Oct 09, 2006 3:56 pm

Post by ps2devman »

Great! You rule!
IronPeter
Posts: 207
Joined: Mon Aug 06, 2007 12:46 am
Contact:

Post by IronPeter »

yacc rulzz.

I coded loading shaders from memory. Shader compiler generates binary shader on the hard disk, I'm loading this microcode.

Code: Select all

int load_vertex_shader&#40;  uint32_t *fifo &#41;
&#123;
	int fd, size;
	void *file = map_file&#40; "../../data/mvp.vertex", &fd, &size &#41;;

	if&#40; size && file &#41;
	&#123;
		vertex_shader_desc_t *desc = file;
		int res = set_vertex_shader&#40; desc, &#40;uint32_t *&#41;&#40; &#40;uint8 *&#41;file + sizeof&#40; *desc &#41; &#41;, fifo, Nv3D &#41;;
		unmap_file&#40; file, fd, size &#41;;
		return res;
	&#125;

	return 0;
&#125;
IronPeter
Posts: 207
Joined: Mon Aug 06, 2007 12:46 am
Contact:

Post by IronPeter »

You can play with vertex shader compiler, loading of broken microcode does not hang up GPU. I think, there are bugs in compiler.

If somebody want he can write fragment shader compiler. It is easy with yacc. I'll wait for few days.
IronPeter
Posts: 207
Joined: Mon Aug 06, 2007 12:46 am
Contact:

Post by IronPeter »

Hi, does somebody want to implement fragment shader assembler? By analogy with vertex one. cnlohr :)?

If noreply in 12 hours - I'll implement it :).
ps2devman
Posts: 259
Joined: Mon Oct 09, 2006 3:56 pm

Post by ps2devman »

I'm lacking free time (end of fiscal year, insane work to do, even on weekends). Today I've just managed to keep some free time for trying to apply the 1080i patch to marcus other_os demo 1.1. I'm trying that at the moment.

Hehe... You are too fast for us, you know...
User avatar
mc
Posts: 211
Joined: Wed Jan 12, 2005 7:32 am
Location: Linköping

Post by mc »

ps2devman: Triple buffering in 1080i will use more DDR-ram, so you'll
need to increase "BB" to at least 24. This also means you'll need to map
more DDR-ram to the CPU for texture upload to work.
Flying at a high speed
Having the courage
Getting over crisis
I rescue the people
ps2devman
Posts: 259
Joined: Mon Oct 09, 2006 3:56 pm

Post by ps2devman »

Thx for the hint. That will help me.
Hanging in HD mode hurts since you have to reinitialize bios and play with cables each time to see something again in LD... But it's ok, I'm over motivated (but I don't have much time to succeed, I cross fingers) !

EDIT: Strange, the reset didn't make my PS3 lose the HD settings... Maybe it depends on how bad it hangs...
ps2devman
Posts: 259
Joined: Mon Oct 09, 2006 3:56 pm

Post by ps2devman »

I failed. I applied same change to av.c and proto.h as usual (see post "otheros demo 1080i patch").

In demo.c I've tried BB=2 (I was probably wrong to assume it was the number of buffers like in otheros demo 1.0), but I've tried also BB=24 like you said with DDR_SIZE=48Mb in fb.c, but it hangs right at startup.

I'm not good enough (yet) to debug this... I will retry in January.
User avatar
mc
Posts: 211
Joined: Wed Jan 12, 2005 7:32 am
Location: Linköping

Post by mc »

Yes, BB is not number of buffers, but number of megabytes up in DDR
ram where stuff like textures and z-buffer is placed. (This comes from
IronPeter's samples.) Trying to map as much as 48M might overflow the
MMU htab. The easiest way to get it working is probably to leave BB and
DDR_SIZE as they are, and instead modify the code to put the framebuffers
_after_ the Z buffer. Since the CPU doesn't need to access the framebuffers
(all rendering is done by the RSX), they can be after the end of the
MMU mapped region without any problems. Just change the computation
of "offset" in the main loop to add 64MB or something to the offset. Then

there should be plenty of space for large framebuffers.

(Sorry about the off-topicness of these posts...)
Flying at a high speed
Having the courage
Getting over crisis
I rescue the people
IronPeter
Posts: 207
Joined: Mon Aug 06, 2007 12:46 am
Contact:

Post by IronPeter »

hi, I implemented fp assembler. Buggy as hell. I need few days to clean up.
User avatar
mc
Posts: 211
Joined: Wed Jan 12, 2005 7:32 am
Location: Linköping

Post by mc »

Sweet. Now all I need to do is learn fp language, and I'll modify my demo
to compute the fractals on the fly using fragment programs. :-)
(Any links to a good reference and/or tutorial?)
Flying at a high speed
Having the courage
Getting over crisis
I rescue the people
IronPeter
Posts: 207
Joined: Mon Aug 06, 2007 12:46 am
Contact:

Post by IronPeter »

nice idea, good testcase.

it is mandelbrot shader from Humus http://www.humus.ca/index.php?page=3D&start=48

http://www.everfall.com/paste/id.php?09yzovsck729

It is written in arbfp, not nvfp asm. Probably I'll be able to support both semantics, probably only nvfp one.

Good testcase.
IronPeter
Posts: 207
Joined: Mon Aug 06, 2007 12:46 am
Contact:

fp asm

Post by IronPeter »

I commited it.

It is able to compile

!!FP1.0

TEXX H0, f[TEX0], TEX0, 2D;

MULX H0, H0, {0.3, 0.6, 1.0, 1.0};

END

Feel free to post bugs :).
ArtVandelae
Posts: 3
Joined: Thu Nov 08, 2007 11:39 pm

Post by ArtVandelae »

Nice work. I've been able to successfully compile several shaders by using your assembler after using the Nvidia Cg compiler to go from GLSL to asm code. There seem to be a few issues when compiling more complex shaders, however.

In particular, the assembler errors when the code wants to use the "HC" temporary register as in the line:

MULXC HC.x, H0, H0.w;

It also seems to error when the code attempts to use a condition code such as the following:

MOVR R3.xy(EQ.x), {-0.0009765625, 0.0009765625};

The entire shader is listed below. For the record, it is a fragment program implementation of the 2xSaI filter hard-coded for reading from a 1024x1024 texture. The original GLSL code was written by Guest(r) and can be found here. It was compiled from GLSL by using Cg with the following command line:

cgc.exe -oglsl -profile fp30 -entry main -o 2xSaI.fp 2xSaI.glsl

Code: Select all

!!FP1.0
MULR  R0.xy, f&#91;TEX0&#93;, &#123;1024&#125;.x;
FRCR  R0.zw, R0.xyxy;
FLRR  R0.xy, R0;
MULR  R3.zw, R0.xyxy, &#123;0.0009765625&#125;.x;
MOVR  R3.xy, &#123;0.0009765625, -0.0009765625&#125;;
SLTR  H0.zw, R0, &#123;0.5&#125;.x;
SGER  H0.xy, R0.zwzw, &#123;0.5&#125;.x;
MULXC HC.x, H0, H0.w;
MULX  H0.x, H0, H0.y;
MOVR  R3.xy&#40;EQ.x&#41;, &#123;-0.0009765625, 0.0009765625&#125;;
MADR  R0.xy, R3, &#123;0.5&#125;.x, R3.zwzw;
ADDR  R4.xy, R0, &#123;-0.00048828125&#125;.x;
ADDR  R2.xy, R4, -R3;
ADDR  R4.zw, R2.xyxy, &#123;0.0009765625&#125;.x;
TEX   R1, R4.zwzw, TEX0, 2D;
ADDR  R0.xy, R3.zwzw, -R3;
TEX   R0, R0, TEX0, 2D;
DP3R  R1.w, R1, &#123;65536, 256, 1&#125;;
DP3R  R5.z, R0, &#123;65536, 256, 1&#125;;
ADDR  R5.xy, R3.zwzw, &#123;0.0009765625&#125;.x;
ADDR  R0.xy, R5, -R3;
TEX   R0, R0, TEX0, 2D;
DP3R  R2.w, R0, &#123;65536, 256, 1&#125;;
ADDR  R0.x, R1.w, -R2.w;
ADDR  R2.z, R1.w, -R5;
SLTR  H1.y, |R0.x|, -|R2.z|;
SGTR  H1.x, |R0|, -|R2.z|;
TEX   R0, R3.zwzw, TEX0, 2D;
DP3R  R0.w, R0, &#123;65536, 256, 1&#125;;
SEQR  H1.w, R0, R1;
MOVR  o&#91;COLR&#93;.xyz, R0;
ADDR  R2.z, H1.x, -H1.y;
ADDR  R7.xy, R4, &#123;0.0009765625&#125;.x;
MULX  H0.y, H0.z, H0.w;
ADDR  R5.w, R0, -R5.z;
ADDR  R2.w, R0, -R2;
SLTR  H1.y, |R2.w|, -|R5.w|;
SGTR  H1.x, |R2.w|, -|R5.w|;
ADDR  R2.w, H1.x, -H1.y;
ADDR  R6.x, R2.z, -R2.w;
TEX   R2, R2, TEX0, 2D;
DP3R  R6.y, R2, &#123;65536, 256, 1&#125;;
TEX   R2, R4, TEX0, 2D;
DP3R  R5.w, R2, &#123;65536, 256, 1&#125;;
ADDR  R6.z, R1.w, -R6.y;
ADDR  R2.x, R1.w, -R5.w;
SLTR  H1.y, |R2.x|, -|R6.z|;
SGTR  H1.x, |R2|, -|R6.z|;
ADDR  R2.z, R0.w, -R6.y;
ADDR  R2.x, H1, -H1.y;
ADDR  R2.y, R0.w, -R5.w;
SLTR  H1.y, |R2|, -|R2.z|;
SGTR  H1.x, |R2.y|, -|R2.z|;
ADDR  R2.y, H1.x, -H1;
ADDR  R2.x, R2.y, -R2;
ADDR  R6.x, R2, -R6;
ADDR  R2.xy, R5, R3;
TEX   R2, R2, TEX0, 2D;
DP3R  R6.y, R2, &#123;65536, 256, 1&#125;;
ADDR  R2.xy, R3.zwzw, R3;
TEX   R2, R2, TEX0, 2D;
DP3R  R6.w, R2, &#123;65536, 256, 1&#125;;
SEQR  H0.z, R0.w, R6.w;
ADDR  R6.z, R1.w, -R6.y;
ADDR  R2.x, R1.w, -R6.w;
SLTR  H1.y, |R2.x|, -|R6.z|;
SGTR  H1.x, |R2|, -|R6.z|;
ADDR  R2.z, R0.w, -R6.y;
ADDR  R2.x, H1, -H1.y;
ADDR  R2.y, R0.w, -R6.w;
SLTR  H1.y, |R2|, -|R2.z|;
SGTR  H1.x, |R2.y|, -|R2.z|;
ADDR  R2.y, H1.x, -H1;
ADDR  R2.x, R2, -R2.y;
ADDR  R6.x, R6, -R2;
ADDR  R2.xy, R4.zwzw, &#123;0.0009765625&#125;.x;
TEX   R2, R2, TEX0, 2D;
DP3R  R4.z, R2, &#123;65536, 256, 1&#125;;
ADDR  R2.xy, R7, &#123;0.0009765625&#125;.x;
TEX   R2, R2, TEX0, 2D;
DP3R  R7.z, R2, &#123;65536, 256, 1&#125;;
ADDR  R4.w, R1, -R4.z;
ADDR  R2.x, R1.w, -R7.z;
SLTR  H1.y, |R4.w|, -|R2.x|;
SGTR  H1.x, |R4.w|, -|R2|;
ADDR  R2.y, R0.w, -R4.z;
TEX   R4, R5, TEX0, 2D;
ADDR  R2.x, H1, -H1.y;
ADDR  R2.z, R0.w, -R7;
SLTR  H1.y, |R2|, -|R2.z|;
SGTR  H1.x, |R2.y|, -|R2.z|;
ADDR  R2.y, H1.x, -H1;
ADDR  R6.y, R2, -R2.x;
TEX   R2, R7, TEX0, 2D;
DP3R  R2.w, R2, &#123;65536, 256, 1&#125;;
SGTR  H1.x, R6, -R6.y;
SNER  H2.y, R1.w, R2.w;
DP3R  R4.w, R4, &#123;65536, 256, 1&#125;;
SEQX  H1.y, H1.x, &#123;0&#125;.x;
SEQR  H1.x, R0.w, R4.w;
SEQX  H2.x, H2.y, &#123;0&#125;;
MULX  H2.z, H1.x, H2.x;
SEQX  H1.z, H1.w, &#123;0&#125;.x;
MULX  H2.w, H2.z, H1.z;
SLTR  H1.z, R6.x, -R6.y;
MULX  H1.y, H2.w, H1;
MULXC HC.x, H1.y, H1.z;
ADDR  R6.xyz, R0, R1;
ADDR  R6.xyz, R2, R6;
ADDR  R4.xyz, R4, R6;
MOVR  o&#91;COLR&#93;.xyz&#40;NE.x&#41;, R1;
SEQX  H1.z, H1, &#123;0&#125;.x;
MULXC HC.x, H1.y, H1.z;
MULR  R4.xyz, R4, &#123;0.25&#125;.x;
SEQX  H1.y, H1.x, &#123;0&#125;.x;
SEQR  H1.z, R1.w, R2.w;
MULX  H2.x, H1.y, H1.z;
MOVR  o&#91;COLR&#93;.xyz&#40;NE.x&#41;, R4;
MOVXC RC.x, H2;
MOVR  o&#91;COLR&#93;.xyz&#40;NE.x&#41;, R1;
SEQX  H1.z, H1, &#123;0&#125;.x;
MULX  H1.y, H1, H1.z;
MOVXC RC.x, H1.y;
MOVR  o&#91;COLR&#93;.xyz&#40;NE.x&#41;, R4;
SEQX  H0.x, H0, &#123;0&#125;;
MULXC HC.x, H0, H0.y;
ADDR  R1.xy, R3, R7;
TEX   R1, R1, TEX0, 2D;
MOVR  R4.xyz, R0;
MOVR  o&#91;COLR&#93;.xyz&#40;NE.x&#41;, R0;
SNER  H0.w, R5, R2;
MULX  H0.z, H1.w, H0;
MULX  H0.z, H0, H0.w;
DP3R  R1.x, R1, &#123;65536, 256, 1&#125;;
SEQR  H0.w, R2, R1.x;
MULX  H0.z, H0, H0.w;
ADDR  R1.xyz, R0, R2;
MULR  R6.xyz, R1, &#123;0.5&#125;.x;
ADDR  R1.xy, R3.zwzw, &#123;-0.0009765625&#125;.x;
TEX   R1, R1, TEX0, 2D;
SEQR  H1.z, R2.w, R7;
SEQR  H0.w, R0, R5;
MADX_SAT H0.w, H0, H1.z, H0.z;
SEQX  H0.w, H0, &#123;0&#125;.x;
MULX  H1.x, H1, H2.y;
MULXC HC.x, H1, H0.w;
MOVR  R4.xyz&#40;NE.x&#41;, R6;
MULXC HC.x, H1.w, H2.z;
MOVR  R4.xyz&#40;NE.x&#41;, R0;
MOVXC RC.x, H2.w;
MOVR  R4.xyz&#40;NE.x&#41;, R6;
SEQR  H1.x, R2.w, R4.w;
SEQR  H0.w, R2, R5;
MULX  H0.w, H0, H1.x;
SNER  H1.x, R0.w, R6.w;
MULX  H0.w, H0, H1.x;
DP3R  R1.x, R1, &#123;65536, 256, 1&#125;;
SEQR  H1.x, R0.w, R1;
MULX  H0.w, H0, H1.x;
SEQR  H1.x, R2.w, R6.w;
SEQR  H1.z, R0.w, R5;
MADX_SAT H1.x, H1, H1.z, H0.w;
MULXC HC.x, H2, H1;
MOVR  R4.xyz&#40;NE.x&#41;, R2;
SEQX  H1.x, H1, &#123;0&#125;;
MULXC HC.x, H2, H1;
MOVR  R4.xyz&#40;NE.x&#41;, R6;
MULXC HC.x, H1.y, H0.z;
SEQX  H0.z, H0, &#123;0&#125;.x;
MOVR  R4.xyz&#40;NE.x&#41;, R0;
MULX  H0.z, H1.y, H0;
MULXC HC.x, H0.z, H0.w;
MOVR  R4.xyz&#40;NE.x&#41;, R2;
SEQX  H0.w, H0, &#123;0&#125;.x;
MULXC HC.x, H0.z, H0.w;
MOVR  R4.xyz&#40;NE.x&#41;, R6;
SEQX  H0.y, H0, &#123;0&#125;.x;
MULXC HC.x, H0, H0.y;
MOVR  o&#91;COLR&#93;.xyz&#40;NE.x&#41;, R4;
END
IronPeter
Posts: 207
Joined: Mon Aug 06, 2007 12:46 am
Contact:

Post by IronPeter »

Ok, thanks for your test. Conditional ops are supported, it is probably small bug in semantic.

Situation with HC, RC is worser. I do not want to use "real" register. Does write mask affect conditional mask, is it possible to use "real" register like H0, R0 with zero write mask?

Also there are problems with RSQ operation. It must be implemented as exp ( 0.5 * lg x ).

The main problem now is fp constants. The code for constants setting is not yet written :).
hermes
Posts: 25
Joined: Tue Mar 30, 2004 5:22 am
Location: Spain

Post by hermes »

Hi.
I have done some modification to the IronPeter shader compilers.
If you use an output file .h it work in different form and the output is a .h C header with an array of datas and some defines based in the same name of the file.

for example:

shader.vp

Code: Select all

!!VP1.0
MOV o&#91;HPOS&#93;, v&#91;OPOS&#93;;
MOV o&#91;COL0&#93;, v&#91;OCOL0&#93;;
MOV o&#91;TEX0&#93;,    v&#91;TEX0&#93;;
END
./vp myvp.h < shader.vp
(note vp program is a.out renamed from /toolshaderv)

The output:
myvp.h

Code: Select all

// Vertex Program - Output by Hermes / www.elotrolado.net

#define myvp_vp_in 0x109

#define myvp_vp_out 0x4001

#define myvp_vp_len  12

unsigned int myvp_vp&#91;myvp_vp_len&#93;=&#123;
    0x401f9c6c, 0x40000d, 0x8106c083, 0x6041ff80,
    0x401f9c6c, 0x40030d, 0x8106c083, 0x6041ff84,
    0x401f9c6c, 0x40080d, 0x8106c083, 0x6041ff9d&#125;;

// End

In the same way the fragment program output:

shader_tex_color.fp

Code: Select all

!!FP1.0

TEX   H0, f&#91;TEX0&#93;, TEX0, 2D;
MULT H0,H0,f&#91;COL0&#93;;
END
./fp fp_tex_color.h < shader_tex_color.fp
(note fp program is a.out renamed from /toolshaderf)

The Output:
fp_tex_color.h

Code: Select all

// Fragment Program - Output by Hermes / www.elotrolado.net

#define fp_tex_color_fp_regs 2

#define fp_tex_color_fp_len  8

unsigned int fp_tex_color_fp&#91;fp_tex_color_fp_len&#93;=&#123;
    0x17009e80, 0x1c9dc801, 0x0, 0x0,
    0x2003e81, 0x1c9dc900, 0x1c901, 0x0&#125;;

// End
You can donwload the modified source and the binary code from this link:

http://mods.elotrolado.net/~hermes/ps3/ ... ers.tar.gz

My little example code

Well, as you can see, i am using a light component to modulate the texture color.

I have done two fragment programs to draw triangles with and without texture. If you want to draw triangles without textures, you can use this Fragment Program:

shader_color.fp

Code: Select all

!!FP1.0
MOV H0,f&#91;COL0&#93;;
END
./fp fp_color.h < shader_color.fp

The Output:
fp_color.h

Code: Select all

// Fragment Program - Output by Hermes / www.elotrolado.net

#define fp_color_fp_regs 2

#define fp_color_fp_len  4

unsigned int fp_color_fp&#91;fp_color_fp_len&#93;=&#123;
    0x1003e81, 0x1c9dc801, 0x0, 0x0&#125;;
// End

To load the vertex program, you can use this modifieds functions:

Code: Select all

int NV40_LoadVtxProg2&#40; uint32_t *fifo, uint32_t vp_in_reg, uint32_t vp_out_reg, uint32_t *shader, int shader_size&#41;
&#123;
  volatile uint32_t *ptr = fifo;
  uint32_t i;

  BEGIN_RING&#40;Nv3D, NV40TCL_VP_UPLOAD_FROM_ID, 1&#41;;
  OUT_RING  &#40; 0 &#41;;
  for &#40;i=0; i<shader_size; i+=4&#41;
  &#123;
    BEGIN_RING&#40;Nv3D, NV40TCL_VP_UPLOAD_INST&#40;0&#41;, 4&#41;;
    OUT_RING  &#40;shader&#91;i + 0&#93;&#41;;
    OUT_RING  &#40;shader&#91;i + 1&#93;&#41;;
    OUT_RING  &#40;shader&#91;i + 2&#93;&#41;;
    OUT_RING  &#40;shader&#91;i + 3&#93;&#41;;

  &#125;

  BEGIN_RING&#40;Nv3D, NV40TCL_VP_START_FROM_ID, 1&#41;;
  OUT_RING  &#40;0&#41;;

  BEGIN_RING&#40;Nv3D, NV40TCL_VP_ATTRIB_EN, 2&#41;;
  OUT_RING  &#40;vp_in_reg&#41;;
  OUT_RING  &#40;vp_out_reg&#41;;

  BEGIN_RING&#40; Nv3D, 0x1478, 1 &#41;;
  OUT_RING  &#40;0&#41;;

  return ptr - fifo;
&#125;

int NV40_LoadFragProg2&#40; uint32_t *fifo, uint32_t *fbmem, uint32_t *shader_hw_id, uint32_t *shader, int shader_size, int shader_regs&#41;
&#123;
  uint32_t i;
  uint32_t offset = fp_offset / 4;
  uint32_t *ptr = fifo;
  static int next_hw_id_offset = 0;
  unsigned int hw_id=0;

if&#40;shader_hw_id&#41; hw_id=*shader_hw_id;

  if &#40;!hw_id&#41;
  &#123;

    for&#40; i = 0; i < shader_size; ++i &#41;
    &#123;
      fbmem&#91; offset + next_hw_id_offset + i&#93; = endian_fp&#40; shader&#91;i&#93; &#41;;
    &#125;


    hw_id  = offset;
    hw_id += next_hw_id_offset;
    hw_id *= 4;

    next_hw_id_offset += shader_size;
    next_hw_id_offset = &#40;next_hw_id_offset + 63&#41; & ~63;
  &#125;
if&#40;shader_hw_id&#41; *shader_hw_id=hw_id;

  //printf&#40; "frag prog 0x%x \n", shader->hw_id &#41;;
  BEGIN_RING&#40;Nv3D, NV40TCL_FP_ADDRESS, 1&#41;;
  OUT_RING  &#40;hw_id | NV40TCL_FP_ADDRESS_DMA0&#41;;
  BEGIN_RING&#40;Nv3D, NV40TCL_FP_CONTROL, 1&#41;;
  OUT_RING  &#40; &#40; shader_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT &#41; &#41;;

  return ptr - fifo;
&#125;
To use it:

Code: Select all

#include "myvp.h"
#include "fp_color.h"
#include "fp_tex_color.h"

uint32_t fp_color=0; //  to 0 the first time
uint32_t fp_tex_color=0; //  to 0 the first time
.....
.....

int bind3d&#40;....&#41;
&#123;
....
....

ptr += NV40_LoadVtxProg2&#40; ptr, myvp_vp_in,myvp_vp_out,myvp_vp, myvp_vp_len &#41;;

  ptr += NV40_LoadFragProg2&#40; ptr, fbmem, &fp_color, fp_color_fp, fp_color_fp_len, fp_color_fp_regs&#41;;

  ptr += NV40_LoadFragProg2&#40; ptr, fbmem, &fp_tex_color, fp_tex_color_fp, fp_tex_color_fp_len, fp_tex_color_fp_regs&#41;;
With this code i load the Vertex Program and the two Fragment Programs the first time.

I can use the variables fp_color and fp_tex_color to select the FP to use the next times.

To buid the vertex you can use this macros:

Code: Select all

#define CV_COLOR&#40;r,g,b,a&#41; BEGIN_RING&#40;Nv3D, NV40TCL_VTX_ATTR_4F_X&#40;3&#41;, 4&#41;;\
			  OUT_RINGf &#40;&#40;r&#41;&#41;; OUT_RINGf &#40;&#40;g&#41;&#41;;\
	                  OUT_RINGf &#40;&#40;b&#41;&#41;; OUT_RINGf &#40;&#40;a&#41;&#41;;\

#define CV_VERT&#40;sx,sy,sz&#41; BEGIN_RING&#40;Nv3D, NV40TCL_VTX_ATTR_4F_X&#40;0&#41;, 4&#41;;\
			  OUT_RINGf &#40;&#40;sx&#41;&#41;; OUT_RINGf &#40;&#40;sy&#41;&#41;;\
	                  OUT_RINGf &#40;&#40;sz&#41;&#41;; OUT_RINGf &#40;&#40;1.0f&#41;&#41;;\

#define CV_TEXT&#40;tx,ty&#41; BEGIN_RING&#40;Nv3D, NV40TCL_VTX_ATTR_2F_X&#40;8&#41;, 2&#41;;\
			  OUT_RINGf &#40;&#40;tx&#41;&#41;; OUT_RINGf &#40;&#40;ty&#41;&#41;;\



#define CV_OUT1&#40;r,g,b,a, sx,sy, sz, tx, ty&#41; do &#123;                                                     \
	CV_COLOR&#40;r,g,b,a&#41;\
	CV_VERT&#40;sx,sy,sz&#41;\
	CV_TEXT&#40;tx,ty&#41;\
&#125; while&#40;0&#41;
And example code to use triangles with and without textures, using NV40TCL_SHADE_MODEL_SMOOTH (gouraud) and vertex color:

Code: Select all

int NV40_EmitGeometry&#40; uint32_t *fifo, uint32_t *fbmem &#41;
&#123;
 volatile uint32_t *ptr = fifo;
 uint32_t i;
 static float A=0.0f,B=1.0f; 
 float Y=&#40;float&#41; &#40;height/2&#41;;


 // TEXTURED
 ptr += NV40_LoadFragProg2&#40; ptr, fbmem, &fp_tex_color, fp_tex_color_fp, fp_tex_color_fp_len, fp_tex_color_fp_regs&#41;;

  BEGIN_RING&#40;Nv3D, 0x1718, 1&#41;;
  OUT_RING  &#40;0&#41;;
  BEGIN_RING&#40;Nv3D, 0x1718, 1&#41;;
  OUT_RING  &#40;0&#41;;
  BEGIN_RING&#40;Nv3D, 0x1718, 1&#41;;
  OUT_RING  &#40;0&#41;;

  
  BEGIN_RING&#40;Nv3D, NV40TCL_BEGIN_END, 1&#41;;
  OUT_RING  &#40;NV40TCL_BEGIN_END_TRIANGLES&#41;;

  float pi = atan&#40; 1.0f &#41; * 1.0f;

  for&#40; i = 0; i < 12; ++i &#41;
  &#123;
   float si = sin&#40; -&#40;X/256.0f&#41;+i * pi / 1.5f &#41;;
    float co = cos&#40;-&#40;X/256.0f&#41;+ i * pi / 1.5f &#41;;
 si=si*A/128.0f;
 co=co*A/128.0f;
    float x1 = 200.0f, y1 = 80.0f;
    float x2 = -200.0f, y2 = 10.0f;
    float x3 = -200.0f, y3 = 150.0f;

    CV_OUT1&#40;1.0f,1.0f,1.0f,1.0f, width-X + x1 * co + y1 * si, Y - x1 * si + y1 * co, 1.0f, 0.5f, 0.0f  &#41;;
    CV_OUT1&#40;1.0f,0.0f,1.0f,1.0f, width-X + x2 * co + y2 * si, Y - x2 * si + y2 * co, 0.5f, 0.0f, 1.0f  &#41;;
    CV_OUT1&#40;0.0f,1.0f,0.0f,0.0f, width-X + x3 * co + y3 * si, Y - x3 * si + y3 * co, 0.5f, 1.0f, 1.0f  &#41;;

 &#125;
  BEGIN_RING&#40;Nv3D, NV40TCL_BEGIN_END, 1&#41;;
 OUT_RING  &#40;NV40TCL_BEGIN_END_STOP&#41;;

// NONTEXTURED
ptr += NV40_LoadFragProg2&#40; ptr, fbmem, &fp_color, fp_color_fp, fp_color_fp_len, fp_color_fp_regs&#41;;


 BEGIN_RING&#40;Nv3D, NV40TCL_BEGIN_END, 1&#41;;
  OUT_RING  &#40;NV40TCL_BEGIN_END_TRIANGLES&#41;;

  for&#40; i = 0; i < 12; ++i &#41;
  &#123;
    float si = sin&#40; &#40;X/256.0f&#41;+i * pi / 1.5f &#41;;
    float co = cos&#40;&#40;X/256.0f&#41;+ i * pi / 1.5f &#41;;
 si=si*A/128.0f;
 co=co*A/128.0f;
    float x1 = 200.0f, y1 = 80.0f;
    float x2 = -200.0f, y2 = 10.0f;
    float x3 = -200.0f, y3 = 150.0f;

    if&#40;i & 1&#41;
	&#123;
	 CV_OUT1&#40;1.0f,1.0f,0.0f,1.0f, X + x1 * co + y1 * si, Y - x1 * si + y1 * co, 0.5f, 0.5f, 0.0f  &#41;;
   	 CV_OUT1&#40;1.0f,1.0f,0.0f,1.0f, X + x2 * co + y2 * si, Y - x2 * si + y2 * co, 0.0f, 0.0f, 1.0f  &#41;;
   	 CV_OUT1&#40;1.0f,1.0f,0.0f,1.0f, X + x3 * co + y3 * si, Y - x3 * si + y3 * co, 0.0f, 1.0f, 1.0f  &#41;;
	&#125;
else
&#123;
    CV_OUT1&#40;1.0f,0.0f,0.0f,1.0f, X + x1 * co + y1 * si, Y - x1 * si + y1 * co, 0.5f, 0.5f, 0.0f  &#41;;
    CV_OUT1&#40;1.0f,0.0f,0.0f,1.0f, X + x2 * co + y2 * si, Y - x2 * si + y2 * co, 0.0f, 0.0f, 1.0f  &#41;;
    CV_OUT1&#40;1.0f,0.0f,0.0f,1.0f, X + x3 * co + y3 * si, Y - x3 * si + y3 * co, 0.0f, 1.0f, 1.0f  &#41;;
&#125;
  &#125;
 
  A+=B;

if&#40;A>256.0f&#41; B=-1.0f;
if&#40;A<=0.0f&#41; B=1.0f;

 

  BEGIN_RING&#40;Nv3D, NV40TCL_BEGIN_END, 1&#41;;
  OUT_RING  &#40;NV40TCL_BEGIN_END_STOP&#41;;

  return ptr - fifo;
&#125;
I hope you understand my example code... it is the same example from my Yellow Dog Linux 5.01 3D patch released on the PS3 Linux forum, but modified to use the news shaders and the vertex color.

Maybe i release all the source code more later, when it can work using /dev/fb0, /dev/fb1 and /dev/ps3gpu_* methods .

I am thinking to port my PS2 Game named "Guitar Fun" to the PS3 (Guitar Hero clone) under GPL license and i think it can be done using this littles shaders :) (it is not easy to do, but it is possible). To many thanks to IronPeter and others for you work.

Sorry for my bad english ;)
IronPeter
Posts: 207
Joined: Mon Aug 06, 2007 12:46 am
Contact:

Post by IronPeter »

You can look at fragment.h and vertex.h files. These files contain clean refactored interface to shader stuff. look simple_dxt for example of usage.

If you want to generate .h and .c files with shaders - please, keep common ( with fragment.h and vertex.h ) interface for shader setup.

If you extend compiler with .h and .cpp output - please, keep it as alernative cmd line option.

I can commit your work in repo in that case.
hermes
Posts: 25
Joined: Tue Mar 30, 2004 5:22 am
Location: Spain

Post by hermes »

IronPeter wrote: If you extend compiler with .h and .cpp output - please, keep it as alernative cmd line option.

I can commit your work in repo in that case.
.h output is alternative cmd option line: Only if you use .h it use this output. If you use for example, ./vp myshader.bin < shader it use your output method


For the others question, i must see the news changes in detail;)
Post Reply