[ALLEGREX / CPU +FPU +VFPU] isa

Discuss the development of new homebrew software, tools and libraries.

Moderators: cheriff, TyRaNiD

Post Reply
hlide
Posts: 739
Joined: Sun Sep 10, 2006 2:31 am

[ALLEGREX / CPU +FPU +VFPU] isa

Post by hlide »

ALLEGREX / CPU Instructions set

status : mostly done

Code: Select all

field
{
    rs:5; rt:5; rd:5;

    shamt:5;

    imm3:3;imm:16; imm26:26;

    code:20; // syscall/break code

    lsb:5; msb:5; // ins/ext bit positions

    func:5; // cache function, specific to Allegrex

    c0dr:5; c0cr:5;
}

group mips
{
    // SPECIAL
    
    nop(00000000000000000000000000000000)
    {
        cycles="1"
        operation=
        "
            1: no operation
        "
    }

    sll(00000000000:rt:rd:shamt:000000)
    {
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = u32&#40;GPR&#91;rt&#93;&#41; << shamt
        "
    &#125;

    srl&#40;00000000000&#58;rt&#58;rd&#58;shamt&#58;000010&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = u32&#40;GPR&#91;rt&#93;&#41; >> shamt
        "
    &#125;

    sra&#40;00000000000&#58;rt&#58;rd&#58;shamt&#58;000011&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = s32&#40;GPR&#91;rt&#93;&#41; >> shamt
        "
    &#125;

    sllv&#40;000000&#58;rs&#58;rt&#58;rd&#58;00000000100&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = u32&#40;GPR&#91;rt&#93;&#41; << u32&#40;GPR&#91;rs&#93;&31&#41;
        "
    &#125;

    srlv&#40;000000&#58;rs&#58;rt&#58;rd&#58;00000000110&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = u32&#40;GPR&#91;rt&#93;&#41; >> u32&#40;GPR&#91;rs&#93;&31&#41;
        "
    &#125;

    srav&#40;000000&#58;rs&#58;rt&#58;rd&#58;00000000111&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = s32&#40;GPR&#91;rt&#93;&#41; >> u32&#40;GPR&#91;rs&#93;&31&#41;
        "
    &#125;

    jr&#40;000000&#58;rs&#58;000000000000000001000&#41;
    &#123;
        cycles="2"
        operation=
        "
            1&#58; target = GPR&#91;rs&#93;
               execute instruction at PC+4
            2&#58; PC = target
        "
    &#125;

    jalr&#40;000000&#58;rs&#58;000000&#58;rd&#58;000000001001&#41;
    &#123;
        cycles="2"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = PC+8
               target = GPR&#91;rs&#93;
               execute instruction at PC+4
            2&#58; PC = target
        "
    &#125;

    mfhi&#40;0000000000000000&#58;rd&#58;00000010000&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = HI
        "
    &#125;

    mthi&#40;000000&#58;rs&#58;000000000000000010001&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; HI = GPR&#91;rs&#93;
        "
    &#125;

    mflo&#40;0000000000000000&#58;rd&#58;00000010010&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = LO
        "
    &#125;

    mtlo&#40;000000&#58;rs&#58;000000000000000010011&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; LO = GPR&#91;rs&#93;
        "
    &#125;

    mult&#40;000000&#58;rs&#58;rt&#58;0000000000011000&#41;
    &#123;
        cycles="5"
        operation=
        "
            1&#58; result&#58;64 = s64&#40;GPR&#91;rs&#93;&#41; * s64&#40;GPR&#91;rs&#93;&#41;
               LO = result&#91;31..0&#93;
               HI = result&#91;63..32&#93;
        "
    &#125;

    multu&#40;000000&#58;rs&#58;rt&#58;0000000000011001&#41;
    &#123;
        cycles="5"
        operation=
        "
            1&#58; result&#58;64 = u64&#40;GPR&#91;rs&#93;&#41; * u64&#40;GPR&#91;rs&#93;&#41;
               LO = result&#91;31..0&#93;
               HI = result&#91;63..32&#93;
        "
    &#125;

    div&#40;000000&#58;rs&#58;rt&#58;0000000000011010&#41;
    &#123;
        cycles="36"
        operation=
        "
            1&#58; LO = s32&#40;GPR&#91;rs&#93;&#41; / s32&#40;GPR&#91;rs&#93;&#41;
               HI = s32&#40;GPR&#91;rs&#93;&#41; % s32&#40;GPR&#91;rs&#93;&#41;
        "
    &#125;

    divu&#40;000000&#58;rs&#58;rt&#58;0000000000011011&#41;
    &#123;
        cycles="36"
        operation=
        "
            1&#58; LO = u32&#40;GPR&#91;rs&#93;&#41; / u32&#40;GPR&#91;rs&#93;&#41;
               HI = u32&#40;GPR&#91;rs&#93;&#41; % u32&#40;GPR&#91;rs&#93;&#41;
        "
    &#125;

    add&#40;000000&#58;rs&#58;rt&#58;rd&#58;00000100000&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; result&#58;33 = &#40;&#40;GPR&#91;rs&#93;&#91;31&#93;&#41; << 32&#41; | GPR&#91;rs&#93;&#41; + &#40;&#40;GPR&#91;rt&#93;&#91;31&#93;&#41; << 32&#41; | GPR&#91;rt&#93;&#41;
               if &#40;result&#91;32&#93; == result&#91;31&#93;&#41;
                 GPR&#91;rd&#93; = result&#91;31..0&#93;
               else
                 raise integer overflow exception
        "
    &#125;

    addu&#40;000000&#58;rs&#58;rt&#58;rd&#58;00000100001&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = GPR&#91;rs&#93; + GPR&#91;rt&#93;
        "
    &#125;

    sub&#40;000000&#58;rs&#58;rt&#58;rd&#58;00000100010&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; result&#58;33 = &#40;&#40;GPR&#91;rs&#93;&#91;31&#93;&#41; << 32&#41; | GPR&#91;rs&#93;&#41; - &#40;&#40;GPR&#91;rt&#93;&#91;31&#93;&#41; << 32&#41; | GPR&#91;rt&#93;&#41;
               if &#40;result&#91;32&#93; == result&#91;31&#93;&#41;
                 GPR&#91;rd&#93; = result&#91;31..0&#93;
               else
                 raise integer overflow exception
        "
    &#125;

    subu&#40;000000&#58;rs&#58;rt&#58;rd&#58;00000100011&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = GPR&#91;rs&#93; - GPR&#91;rt&#93;
        "
    &#125;

    and&#40;000000&#58;rs&#58;rt&#58;rd&#58;00000100100&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = GPR&#91;rs&#93; & GPR&#91;rt&#93;
        "
    &#125;

    or&#40;000000&#58;rs&#58;rt&#58;rd&#58;00000100101&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = GPR&#91;rs&#93; | GPR&#91;rt&#93;
        "
    &#125;

    xor&#40;000000&#58;rs&#58;rt&#58;rd&#58;00000100110&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = GPR&#91;rs&#93; ^ GPR&#91;rt&#93;
        "
    &#125;

    nor&#40;000000&#58;rs&#58;rt&#58;rd&#58;00000100111&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = ~&#40;GPR&#91;rs&#93; | GPR&#91;rt&#93;&#41;
        "
    &#125;

    slt&#40;000000&#58;rs&#58;rt&#58;rd&#58;00000101010&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = s32&#40;GPR&#91;rs&#93;&#41; < s32&#40;GPR&#91;rt&#93;&#41;
        "
    &#125;

    sltu&#40;000000&#58;rs&#58;rt&#58;rd&#58;00000101011&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = u32&#40;GPR&#91;rs&#93;&#41; + u32&#40;GPR&#91;rt&#93;&#41;
        "
    &#125;

    // REGIMM
    
    bltz&#40;000001&#58;rs&#58;00000&#58;imm16&#41;
    &#123;
        cycles="3"
        operation=
        "
            1&#58; ct = &#40;s32&#40;GPR&#91;rs&#93;&#41; < 0&#41;
               execute instruction at PC+4
            2&#58; if &#40;ct&#41; 
                 PC = PC + &#40;s16&#40;imm16&#41; << 2&#41;
        "
        delayslot="1"
    &#125;

    bgez&#40;000001&#58;rs&#58;00001&#58;imm16&#41;
    &#123;
        cycles="3"
        operation=
        "
            1&#58; ct = &#40;s32&#40;GPR&#91;rs&#93;&#41; >= 0&#41;
               execute instruction at PC+4
            2&#58; if &#40;ct&#41; 
                 PC = PC + &#40;s16&#40;imm16&#41; << 2&#41;
        "
        delayslot="1"
    &#125;

    bltzl&#40;000001&#58;rs&#58;00010&#58;imm16&#41;
    &#123;
        cycles="3"
        operation=
        "
            1&#58; ct = &#40;s32&#40;GPR&#91;rs&#93;&#41; < 0&#41;
               if &#40;ct&#41;
                 execute instruction at PC+4
            2&#58; if &#40;ct&#41;
                 PC = PC + &#40;s16&#40;imm16&#41; << 2&#41;
        "
        delayslot="1"
    &#125;

    bgezl&#40;000001&#58;rs&#58;00011&#58;imm16&#41;
    &#123;
        cycles="3"
        operation=
        "
            1&#58; ct = &#40;s32&#40;GPR&#91;rs&#93;&#41; >= 0&#41;
               if &#40;ct&#41;
                 execute instruction at PC+4
            2&#58; if &#40;ct&#41;
                 PC = PC + &#40;s16&#40;imm16&#41; << 2&#41;
        "
        delayslot="1"
    &#125;

    bltzal&#40;000001&#58;rs&#58;10000&#58;imm16&#41;
    &#123;
        cycles="3"
        operation=
        "
            1&#58; ct = &#40;s32&#40;GPR&#91;rs&#93;&#41; < 0&#41;
               execute instruction at PC+4
               if &#40;ct&#41;
                 GPR&#40;31&#41; = PC+8
            2&#58; if &#40;ct&#41;
                 PC = PC + &#40;s16&#40;imm16&#41; << 2&#41;
        "
        delayslot="1"
    &#125;

    bgezal&#40;000001&#58;rs&#58;10001&#58;imm16&#41;
    &#123;
        cycles="3"
        operation=
        "
            1&#58; ct = &#40;s32&#40;GPR&#91;rs&#93;&#41; >= 0&#41;
               execute instruction at PC+4
               if &#40;ct&#41;
                 GPR&#40;31&#41; = PC+8
            2&#58; if &#40;ct&#41;
                 PC = PC + &#40;s16&#40;imm16&#41; << 2&#41;
        "
        delayslot="1"
    &#125;

    bltzall&#40;000001&#58;rs&#58;10010&#58;imm16&#41;
    &#123;
        cycles="3"
        operation=
        "
            1&#58; ct = &#40;s32&#40;GPR&#91;rs&#93;&#41; < 0&#41;
               if &#40;ct&#41;
                 execute instruction at PC+4
               if &#40;ct&#41;
                 GPR&#40;31&#41; = PC+8
            2&#58; if &#40;ct&#41;
                 PC = PC + &#40;s16&#40;imm16&#41; << 2&#41;
        "
        delayslot="1"
    &#125;

    bgezall&#40;000001&#58;rs&#58;10011&#58;imm16&#41;
    &#123;
        cycles="3"
        operation=
        "
            1&#58; ct = &#40;s32&#40;GPR&#91;rs&#93;&#41; >= 0&#41;
               if &#40;ct&#41;
                 execute instruction at PC+4
               if &#40;ct&#41;
                 GPR&#40;31&#41; = PC+8
            2&#58; if &#40;ct&#41;
                 PC = PC + &#40;s16&#40;imm16&#41; << 2&#41;
        "
        delayslot="1"
    &#125;

    // OPCODE #1
    
    j&#40;000010&#58;imm26&#41;
    &#123;
        cycles="2"
        operation=
        "
            1&#58; execute instruction at PC+4
            2&#58; PC = PC&#91;31..28&#93; | &#40;u32&#40;imm26&#41; << 2&#41;
        "
        delayslot="1"
    &#125;

    jal&#40;000011&#58;imm26&#41;
    &#123;
        cycles="2"
        operation=
        "
            1&#58; GPR&#40;31&#41; = PC+8
               execute instruction at PC+4
            2&#58; PC = PC&#91;31..28&#93; | &#40;u32&#40;imm26&#41; << 2&#41;
        "
        delayslot="1"
    &#125;

    beq&#40;000100&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="3"
        operation=
        "
            1&#58; ct = &#40;GPR&#91;rs&#93; == GPR&#91;rt&#93;&#41;
               execute instruction at PC+4
            2&#58; if &#40;ct&#41;
                 PC = PC + &#40;s16&#40;imm16&#41; << 2&#41;
        "
        delayslot="1"
    &#125;

    bne&#40;000101&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="3"
        operation=
        "
            1&#58; ct = &#40;GPR&#91;rs&#93; <> GPR&#91;rt&#93;&#41;
               execute instruction at PC+4
            2&#58; if &#40;ct&#41;
                 PC = PC + &#40;s16&#40;imm16&#41; << 2&#41;
        "
        delayslot="1"
    &#125;

    blez&#40;000110&#58;rs&#58;00000&#58;imm16&#41;
    &#123;
        cycles="3"
        operation=
        "
            1&#58; ct = &#40;s32&#40;GPR&#91;rs&#93;&#41; <= 0&#41;
               execute instruction at PC+4
            2&#58; if &#40;ct&#41;
                 PC = PC + &#40;s16&#40;imm16&#41; << 2&#41;
        "
        delayslot="1"
    &#125;

    bgtz&#40;000111&#58;rs&#58;00000&#58;imm16&#41;
    &#123;
        cycles="3"
        operation=
        "
            1&#58; ct = &#40;s32&#40;GPR&#91;rs&#93;&#41; > 0&#41;
               execute instruction at PC+4
            2&#58; if &#40;ct&#41;
                 PC = PC + &#40;s16&#40;imm16&#41; << 2&#41;
        "
        delayslot="1"
    &#125;

    addi&#40;001000&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; result&#58;33 = &#40;&#40;GPR&#91;rs&#93;&#91;31&#93;&#41; << 32&#41; | GPR&#91;rs&#93;&#41; + s32&#40;imm16&#41;
               if &#40;result&#91;32&#93; == result&#91;31&#93;&#41;
                 GPR&#91;rt&#93; = result&#91;31..0&#93;
               else
                 raise integer overflow exception
        "
    &#125;

    addiu&#40;001001&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rt&#93; = GPR&#91;rs&#93; + s32&#40;imm16&#41;
        "
    &#125;

    slti&#40;001010&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rt&#93; = s32&#40;GPR&#91;rs&#93;&#41; < s32&#40;imm16&#41;
        "
    &#125;

    sltiu&#40;001011&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rt&#93; = u32&#40;GPR&#91;rs&#93;&#41; < u32&#40;s32&#40;imm16&#41;&#41;
        "
    &#125;

    andi&#40;001100&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rt&#93; = s32&#40;GPR&#91;rs&#93;&#41; & u32&#40;imm16&#41;
        "
    &#125;

    ori&#40;001101&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rt&#93; = s32&#40;GPR&#91;rs&#93;&#41; | u32&#40;imm16&#41;
        "
    &#125;

    xori&#40;001110&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rt&#93; = s32&#40;GPR&#91;rs&#93;&#41; ^ u32&#40;imm16&#41;
        "
    &#125;

    lui&#40;00111100000&#58;rt&#58;imm16&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rt&#93; = s32&#40;GPR&#91;rs&#93;&#41; | &#40;u32&#40;imm16&#41; << 16&#41;
        "
    &#125;
    
    // COP0

    mfc0&#40;01000000000&#58;rt&#58;c0dr&#58;00000000000&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; GPR&#91;rt&#93; = C0DR&#40;c0dr&#41;
        "
    &#125;

    cfc0&#40;01000000010&#58;rt&#58;c0cr&#58;00000000000&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; GPR&#91;rt&#93; = C0CR&#40;c0cr&#41;
        "
    &#125;

    mtc0&#40;01000000100&#58;rt&#58;c0dr&#58;00000000000&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; C0DR&#40;c0dr&#41; = GPR&#91;rt&#93;
        "
    &#125;

    ctc0&#40;01000100110&#58;rt&#58;c0cr&#58;00000000000&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; C0CR&#40;c0dr&#41; = GPR&#91;rt&#93;
        "
    &#125;

    eret&#40;01000000000000000000000000011000&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; if &#40;ERL == 1&#41;
                 PC = ErrorEPC
               else
                 PC = RPC
               if &#40;ERL == 0&#41;
                 EXL = 0
               LLBit = 0
        "		
    &#125;
    
    // OPCODE #2

    beql&#40;010100&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="3"
        operation=
        "
            1&#58; ct = &#40;GPR&#91;rs&#93; == GPR&#91;rt&#93;&#41;
               if &#40;ct&#41;
                 execute instruction at PC+4
            2&#58; if &#40;ct&#41;
                 PC = PC + &#40;s16&#40;imm16&#41; << 2&#41;
        "
        delayslot="1"
    &#125;

    bnel&#40;010101&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="3"
        operation=
        "
            1&#58; ct = &#40;GPR&#91;rs&#93; <> GPR&#91;rt&#93;&#41;
               if &#40;ct&#41;
                 execute instruction at PC+4
            2&#58; if &#40;ct&#41;
                 PC = PC + &#40;s16&#40;imm16&#41; << 2&#41;
        "
        delayslot="1"
    &#125;

    blezl&#40;010110&#58;rs&#58;00000&#58;imm16&#41;
    &#123;
        cycles="3"
        operation=
        "
            1&#58; ct = &#40;s32&#40;GPR&#91;rs&#93;&#41; <= 0&#41;
               if &#40;ct&#41;
                 execute instruction at PC+4
               if &#40;ct&#41;
                 PC = PC + &#40;s16&#40;imm16&#41; << 2&#41;
        "
        delayslot="1"
    &#125;

    bgtzl&#40;010111&#58;rs&#58;00000&#58;imm16&#41;
    &#123;
        cycles="3"
        operation=
        "
            1&#58; ct = &#40;s32&#40;GPR&#91;rs&#93;&#41; > 0&#41;
               if &#40;ct&#41;
                 execute instruction at PC+4
            2&#58; if &#40;ct&#41;
                 PC = PC + &#40;s16&#40;imm16&#41; << 2&#41;
        "
        delayslot="1"
    &#125;

    lb&#40;100000&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; address = GPR&#91;rs&#93; + s32&#40;imm16&#41;
               GPR&#91;rt&#93; = s32&#40;MemoryRead8&#40;address&#41;&#41;
        "
    &#125;

    lh&#40;100001&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; address = GPR&#91;rs&#93; + s32&#40;imm16&#41;
               if &#40;address & 1&#41;
                 raise address error exception
               else
                 GPR&#91;rt&#93; = s32&#40;MemoryRead16&#40;address&#41;&#41;
        "
    &#125;

    lwl&#40;100010&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="?"
    &#125;

    lw&#40;100011&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; address = GPR&#91;rs&#93; + s32&#40;imm16&#41;
               if &#40;address & 3&#41;
                 raise address error exception
               else
                 GPR&#91;rt&#93; = MemoryRead32&#40;address&#41;
        "
    &#125;

    lbu&#40;100100&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; address = GPR&#91;rs&#93; + s32&#40;imm16&#41;
               GPR&#91;rt&#93; = u32&#40;MemoryRead8&#40;address&#41;&#41;
        "
    &#125;

    lhu&#40;100101&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; address = GPR&#91;rs&#93; + s32&#40;imm16&#41;
               if &#40;address & 1&#41;
                 raise address error exception
               else
                 GPR&#91;rt&#93; = u32&#40;MemoryRead16&#40;address&#41;&#41;
        "
    &#125;

    lwr&#40;100110&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="?"
    &#125;

    sb&#40;101000&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; address = GPR&#91;rs&#93; + s32&#40;imm16&#41;
               MemoryWrite8&#40;address, GPR&#91;rt&#93;&#91;7..0&#93;&#41;
        "
    &#125;

    sh&#40;101001&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; address = GPR&#91;rs&#93; + s32&#40;imm16&#41;
               if &#40;address & 1&#41;
                 raise address error exception
               else
                 MemoryWrite16&#40;address, GPR&#91;rt&#93;&#91;15..0&#93;&#41;
        "
    &#125;

    swl&#40;101010&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="?"
    &#125;

    sw&#40;101011&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; address = GPR&#91;rs&#93; + s32&#40;imm16&#41;
               if &#40;address & 3&#41;
                 raise address error exception
               else
                 MemoryWrite32&#40;address, GPR&#91;rt&#93;&#41;
        "
    &#125;

    swr&#40;101110&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="?"
    &#125;

    ll&#40;110000&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; address = GPR&#91;rs&#93; + s32&#40;imm16&#41;
               if &#40;address & 3&#41;
                 raise address error exception
               else
                 GPR&#91;rt&#93; = MemoryRead32&#40;address&#41;
               LLBit = 1
        "
    &#125;

    sc&#40;111000&#58;rs&#58;rt&#58;imm16&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; address = GPR&#91;rs&#93; + s32&#40;imm16&#41;
               if &#40;address & 3&#41;
                 raise address error exception
               else if &#40;LLBit == 1&#41;
                 MemoryWrite32&#40;address, GPR&#91;rt&#93;&#41;
               GPR&#91;rt&#93; = u32&#40;LLBit&#41;
        "
    &#125;
&#125;

group allegrex
&#123;
    // SPECIAL
    
    rotr&#40;00000000001&#58;rt&#58;rd&#58;shamt&#58;000010&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = &#40;u32&#40;GPR&#91;rt&#93;&#41; >> shamt&#41; | &#40;GPR&#91;rt&#93; << &#40;32 - shamt&#41;&#41; 
        "
    &#125;

    rotrv&#40;000000&#58;rs&#58;rt&#58;rd&#58;00001000110&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; s = GPR&#91;rs&#93; & 31
               GPR&#91;rd&#93; = &#40;u32&#40;GPR&#91;rt&#93;&#41; >> s&#41; | &#40;GPR&#91;rt&#93; << &#40;32 - s&#41;&#41; 
        "
    &#125;

    movz&#40;000000&#58;rs&#58;rt&#58;rd&#58;00000001010&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; if &#40;GPR&#91;rt&#93; == 0&#41;
                 GPR&#91;rd&#93; = GPR&#91;rs&#93; 
        "
    &#125;
    
    movn&#40;000000&#58;rs&#58;rt&#58;rd&#58;00000001011&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; if &#40;GPR&#91;rt&#93; <> 0&#41;
                 GPR&#91;rd&#93; = GPR&#91;rs&#93; 
        "
    &#125;
    
    syscall&#40;000000&#58;code&#58;001100&#41;
    &#123;
        cycles="?"
    &#125;

    break&#40;000000&#58;code&#58;001100&#41;
    &#123;
        cycles="?"
    &#125;
    
    sync&#40;00000000000000000000000000001111&#41;
    &#123;
        cycles="?"
    &#125;

    clz&#40;000000&#58;rs&#58;00000&#58;rd&#58;00000010110&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; count = 32
               i = 31
               loop
                 if &#40;GPR&#91;rs&#93;&#91;i&#93; == 1&#41;
                   count = 31 - i
               while &#40;count == 32 and i-- <> 0&#41;
               GPR&#91;rd&#93; = count;
        "
    &#125;
    
    clo&#40;000000&#58;rs&#58;00000&#58;rd&#58;00000010111&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; count = 32
               i = 31
               loop
                 if &#40;GPR&#91;rs&#93;&#91;i&#93; == 0&#41;
                   count = 31 - i
               while &#40;count == 32 and i-- <> 0&#41;
               GPR&#91;rd&#93; = count;
        "
    &#125;
    
    madd&#40;000000&#58;rs&#58;rt&#58;0000000000011100&#41;
    &#123;
        cycles="5"
        operation=
        "
            1&#58; result&#58;64 = u64&#40;LO&#41; + s64&#40;HI<<32&#41; + s64&#40;GPR&#91;rs&#93;&#41; * s64&#40;GPR&#91;rs&#93;&#41;
               LO = result&#91;31..0&#93;
               HI = result&#91;63..32&#93;
        "
    &#125;
    
    maddu&#40;000000&#58;rs&#58;rt&#58;0000000000011101&#41;
    &#123;
        cycles="5"
        operation=
        "
            1&#58; result&#58;64 = u64&#40;LO&#41; + u64&#40;HI<<32&#41; + u64&#40;GPR&#91;rs&#93;&#41; * u64&#40;GPR&#91;rs&#93;&#41;
               LO = result&#91;31..0&#93;
               HI = result&#91;63..32&#93;
        "
    &#125;
    
    max&#40;000000&#58;rs&#58;rt&#58;rd&#58;00000101100&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = &#40;s32&#40;GPR&#91;rs&#93;&#41; < s2&#40;GPR&#91;rt&#93;&#41;&#41; ? GPR&#91;rt&#93; &#58; GPR&#91;rs&#93;;
        "
    &#125;
    
    min&#40;000000&#58;rs&#58;rt&#58;rd&#58;00000101101&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = &#40;s32&#40;GPR&#91;rs&#93;&#41; < s2&#40;GPR&#91;rt&#93;&#41;&#41; ? GPR&#91;rs&#93; &#58; GPR&#91;rt&#93;;
        "
    &#125;
    
    msub&#40;000000&#58;rs&#58;rt&#58;000000000101110&#41;
    &#123;
        cycles="5"
        operation=
        "
            1&#58; result&#58;64 = u64&#40;LO&#41; + s64&#40;HI<<32&#41; - s64&#40;GPR&#91;rs&#93;&#41; * s64&#40;GPR&#91;rs&#93;&#41;
               LO = result&#91;31..0&#93;
               HI = result&#91;63..32&#93;
        "
    &#125;
    
    msubu&#40;000000&#58;rs&#58;rt&#58;000000000101111&#41;
    &#123;
        cycles="5"
        operation=
        "
            1&#58; result&#58;64 = u64&#40;LO&#41; + u64&#40;HI<<32&#41; - u64&#40;GPR&#91;rs&#93;&#41; * u64&#40;GPR&#91;rs&#93;&#41;
               LO = result&#91;31..0&#93;
               HI = result&#91;63..32&#93;
        "
    &#125;
    
    // OPCODE #1
    
    halt&#40;01110000000000000000000000000000&#41;
    &#123;
        cycles="?"
    &#125;

    // SPECIAL3
    
    ext&#40;011111&#58;rs&#58;rt&#58;&#40;msb-lsb&#41;&#58;lsb&#58;000000&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rt&#93; = GPR&#91;rs&#93;&#91;msb..lsb&#93;;
        "		
    &#125;

    ins&#40;011111&#58;rs&#58;rt&#58;msb&#58;lsb&#58;000100&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rt&#93;&#91;msb..lsb&#93; = GPR&#91;rs&#93;&#91;msb-lsb..0&#93;;
        "		
    &#125;

    wsbh&#40;01111100000&#58;rt&#58;rd&#58;00010100000&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rd&#93;&#91; 7.. 0&#93; = GPR&#91;rt&#93;&#91;15.. 8&#93;;
               GPR&#91;rd&#93;&#91;15.. 8&#93; = GPR&#91;rt&#93;&#91; 7.. 0&#93;;
               GPR&#91;rd&#93;&#91;23..16&#93; = GPR&#91;rt&#93;&#91;31..24&#93;;
               GPR&#91;rd&#93;&#91;31..24&#93; = GPR&#91;rt&#93;&#91;23..16&#93;;
        "		
    &#125;

    wsbw&#40;01111100000&#58;rt&#58;rd&#58;00011100000&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rd&#93;&#91; 7.. 0&#93; = GPR&#91;rt&#93;&#91;15.. 8&#93;;
               GPR&#91;rd&#93;&#91;15.. 8&#93; = GPR&#91;rt&#93;&#91;23..16&#93;;
               GPR&#91;rd&#93;&#91;23..16&#93; = GPR&#91;rt&#93;&#91;15.. 8&#93;;
               GPR&#91;rd&#93;&#91;31..24&#93; = GPR&#91;rt&#93;&#91; 7.. 0&#93;;
        "		
    &#125;

    seb&#40;01111100000&#58;rt&#58;rd&#58;10000100001&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = s32&#40;GPR&#91;rt&#93;&#91;7..0&#93;&#41;;
        "		
    &#125;

    bitrev&#40;01111100000&#58;rt&#58;rd&#58;10100100000&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; for each i in &#91;31..0&#93;
                 GPR&#91;rd&#93;&#91;i&#93; = GPR&#91;rt&#93;&#91;31-i&#93;;
        "		
    &#125;

    seh&#40;01111100000&#58;rt&#58;rd&#58;11000100000&#41;
    &#123;
        cycles="1"
        operation=
        "
            1&#58; GPR&#91;rd&#93; = s32&#40;GPR&#91;rt&#93;&#91;15..0&#93;&#41;;
        "		
    &#125;

    // OPCODE #2
    
    cache&#40;101111&#58;rs&#58;func&#58;imm16&#41;
    &#123;
        cycles="?"
    &#125;
&#125;
Last edited by hlide on Mon Jun 16, 2008 12:00 am, edited 6 times in total.
hlide
Posts: 739
Joined: Sun Sep 10, 2006 2:31 am

Post by hlide »

FPU instruction set

status : WIP

Code: Select all

field
&#123;
	rs&#58;5; rt&#58;5; rd&#58;5;

	imm3&#58;3; imm&#58;16; imm26&#58;26;
	
        cc&#58;3;

	cond&#58;4 = f&#40;0000&#41;  |un&#40;0001&#41;  |eq&#40;0010&#41; |ueq&#40;0011&#41;|
	         olt&#40;0100&#41;|ult&#40;0101&#41; |ole&#40;0110&#41;|ule&#40;0111&#41;|
	         sf&#40;1000&#41; |ngle&#40;1001&#41;|seq&#40;1010&#41;|ngl&#40;1011&#41;|
	         lt&#40;1100&#41; |nge&#40;1101&#41; |le&#40;1110&#41; |ngt&#40;1111&#41;;

	fs&#58;5; ft&#58;5; fd&#58;5;
&#125;

group fpu
&#123;
	// cycles = "pitch/latency/repeat rate"
	
	add.s&#40;01000110000&#58;ft&#58;fs&#58;fd&#58;000000&#41;
	&#123;
		cycles="1/4/3"
	&#125;

	sub.s&#40;01000110000&#58;ft&#58;fs&#58;fd&#58;000001&#41;
	&#123;
		cycles="1/4/3"
	&#125;

	mul.s&#40;01000110000&#58;ft&#58;fs&#58;fd&#58;000010&#41;
	&#123;
		cycles="1/7/6"
	&#125;

	div.s&#40;01000110000&#58;ft&#58;fs&#58;fd&#58;000011&#41;
	&#123;
		cycles="28/28/27"
	&#125;

	sqrt.s&#40;01000110000-----&#58;fs&#58;fd&#58;000100&#41;
	&#123;
		cycles="28/28/27"
	&#125;

	abs.s&#40;01000110000-----&#58;fs&#58;fd&#58;000101&#41;
	&#123;
		cycles="1/2/1"
	&#125;

	mov.s&#40;01000110000-----&#58;fs&#58;fd&#58;000110&#41;
	&#123;
		cycles="1/1/1"
	&#125;

	neg.s&#40;01000110000-----&#58;fs&#58;fd&#58;000111&#41;
	&#123;
		cycles="1/2/1"
	&#125;

	round.w.s&#40;01000110000-----&#58;fs&#58;fd&#58;001100&#41;
	&#123;
		cycles="1/4/3"
	&#125;

	trunc.w.s&#40;01000110000-----&#58;fs&#58;fd&#58;001101&#41;
	&#123;
		cycles="1/4/3"
	&#125;

	ceil.w.s&#40;01000110000-----&#58;fs&#58;fd&#58;001110&#41;
	&#123;
		cycles="1/4/3"
	&#125;

	floor.w.s&#40;01000110000-----&#58;fs&#58;fd&#58;001111&#41;
	&#123;
		cycles="1/4/3"
	&#125;

	cvt.s.w&#40;01000110100-----&#58;fs&#58;fd&#58;100000&#41;
	&#123;
		cycles="1/6/5"
	&#125;

	cvt.w.s&#40;01000110000-----&#58;fs&#58;fd&#58;100100&#41;
	&#123;
		cycles="1/4/3"
	&#125;

	c.<cond>.s&#40;01000110000&#58;ft&#58;fs&#58;cc&#58;--11&#58;cond&#41;
	&#123;
		cycles="1/3/2"
	&#125;

	mfc1&#40;01000100000&#58;rt&#58;fs&#58;-----------&#41;
	&#123;
		cycles="1/?/?"
	&#125;

	cfc1&#40;01000100010&#58;rt&#58;fs&#58;-----------&#41;
	&#123;
		cycles="1/?/?"
	&#125;

	mtc1&#40;01000100100&#58;rt&#58;fs&#58;-----------&#41;
	&#123;
		cycles="1/?/?"
	&#125;

	ctc1&#40;01000100110&#58;rt&#58;fs&#58;-----------&#41;
	&#123;
		cycles="1/?/?"
	&#125;

	bc1f&#40;01000101000&#58;cc&#58;00&#58;imm16&#41;
	&#123;
		cycles="1/?/?"
	&#125;

	bc1t&#40;01000101000&#58;cc&#58;01&#58;imm16&#41;
	&#123;
		cycles="1/?/?"
	&#125;

	bc1fl&#40;01000101000&#58;cc&#58;00&#58;imm16&#41;
	&#123;
		cycles="1/?/?"
	&#125;

	bc1tl&#40;01000101000&#58;cc&#58;01&#58;imm16&#41;
	&#123;
		cycles="1/?/?"
	&#125;

	lwc1&#40;110001&#58;rs&#58;rt&#58;imm16&#41;
	&#123;
		cycles="1/?/?"
	&#125;

	swc1&#40;111001&#58;rs&#58;rt&#58;imm16&#41;
	&#123;
		cycles="1/?/?"
	&#125;
&#125;
Last edited by hlide on Sun Jun 15, 2008 6:59 am, edited 3 times in total.
hlide
Posts: 739
Joined: Sun Sep 10, 2006 2:31 am

Post by hlide »

VFPU instruction set

PART 1/2

status : WIP

Code: Select all

field
&#123;
    rs&#58;5; rt&#58;5;

    imm3&#58;3; imm7&#58;7; imm&#58;14; imm16&#58;16;

    vs_m&#58;3; vs_c&#58;2; vs_r&#58;2; vs_x&#58;1; vs_o&#58;1;
    vt_m&#58;3; vt_c&#58;2; vt_r&#58;2; vt_x&#58;1; vt_o&#58;1;
    vd_m&#58;3; vd_c&#58;2; vd_r&#58;2; vt_x&#58;1; vt_o&#58;1;

    negw&#58;1; negz&#58;1; negy&#58;1; negx&#58;1;
    cstw&#58;1; cstz&#58;1; csty&#58;1; cstx&#58;1;
    absw&#58;1; absz&#58;1; absy&#58;1; absx&#58;1;
    swzw&#58;2; swzz&#58;2; swzy&#58;2; swzx&#58;2;
    mskw&#58;1; mskz&#58;1; msky&#58;1; mskx&#58;1;
    satw&#58;2; satz&#58;2; saty&#58;2; satx&#58;2;
&#125;

macro
&#123;
    Mask&#40;i&#58;2&#41;
    &#123;
        VFPU_PFXD&#91;i+8&#93; = 1
        set VPFXD as taken
    &#125;

    NotMasked&#58;1&#40;i&#58;2&#41;
    &#123;
        return &#40;VFPU_PFXD >> 8&#41;&#91;i&#93; == 1
    &#125;

    Transform&#58;32&#40;swz&#58;1, abs&#58;1, cst&#58;1, neg&#58;1, x&#58;32, y&#58;32, z&#58;32, w&#58;32&#41;
    &#123;
      if &#40;cst == 1&#41;
        when swz is
          0 &#58;
            value = abs ? 0.0f &#58; 3.0f
          1 &#58;
            value = abs ? 1.0f &#58; 1.0f/3.0f
          2 &#58;
            value = abs ? 2.0f &#58; 1.0f/4.0f
          3 &#58;
            value = abs ? 0.5f &#58; 1.0f/6.0f
      else
        when swz is
          0 &#58;
            value' = x
          1 &#58;
            value' = y
          2 &#58;
            value' = z
          3 &#58;
            value' = w
     
        if &#40;abs == 1&#41;
          value = value' < 0.0f ? -value' &#58; value' 
        else
          value = value'
      
      return &#40;neg == 1&#41; ? -value &#58; value
    &#125;	

    PrefixS&#58;32&#40;i&#58;2, x&#58;32&#91;, y&#58;32 &#91;, z&#58;32 &#91;, w&#58;32&#93;&#93;&#93;&#41;
    &#123;
      swz = &#40;VFPU_PFXS >> 2*i&#41;&#91;1..0&#93;
      abs = &#40;VFPU_PFXS >> 8&#41;&#91;i&#93;
      cst = &#40;VFPU_PFXS >> 12&#41;&#91;i&#93;
      neg = &#40;VFPU_PFXS >> 16&#41;&#91;i&#93;
      
      return Transform&#40;swz, abs, cst, neg, x, y, z, w&#41;
    &#125;	

    PrefixS&#58;32&#40;i&#58;2, x&#58;32&#91;, y&#58;32 &#91;, z&#58;32 &#91;, w&#58;32&#93;&#93;&#93;&#41;
    &#123;
      swz = &#40;VFPU_PFXT >> 2*i&#41;&#91;1..0&#93;
      abs = &#40;VFPU_PFXT >> 8&#41;&#91;i&#93;
      cst = &#40;VFPU_PFXT >> 12&#41;&#91;i&#93;
      neg = &#40;VFPU_PFXT >> 16&#41;&#91;i&#93; 
      
      return Transform&#40;swz, abs, cst, neg, x, y, z, w&#41;
    &#125;	

    PrefixD&#58;32&#40;i&#58;2, value&#58;32&#41;
    &#123;
        when &#40;VFPU_PFXD >> 2*i&#41;&#91;1..0&#93; is
          0 &#58;
            return value
          1 &#58;
            return &#40;value < 0.0f&#41; ? 0.0f &#58; &#40;&#40;value > 1.0f ? 1.0f &#58; value&#41;&#41;
          3 &#58;
            return &#40;value < -1.0f&#41; ? -1.0f &#58; &#40;&#40;value > 1.0f ? 1.0f &#58; value&#41;&#41;
    &#125;
    
    SaveRegisterD&#40;n&#58;2, &#40;r&#58;2 | &#91;ofs&#58;1,&#93; xch&#58;1&#41;, m&#58;3, c&#58;2, x&#58;32&#91;, y&#58;32&#91;, z&#58;32&#91;, w&#58;32&#93;&#93;&#41;
    &#123;
        when n is
          1 &#58; 
            if &#40;VPFXD is taken&#41;
              if NotMasked&#40;0&#41; VFPR&#91;m&#93;&#91;c&#93;&#91;r&#93; = PrefixD&#40;0, x&#41;
              set VPFXD as free
            else
              VFPR&#91;m&#93;&#91;c&#93;&#91;r&#93; = x
                    
          2 &#58;
            if &#40;VPFXD is taken&#41;
              if &#40;xch&#41;			   
                if NotMasked&#40;0&#41; VFPR&#91;m&#93;&#91;0+ofs<<1&#93;&#91;c&#93; = PrefixD&#40;0, x&#41;
                if NotMasked&#40;1&#41; VFPR&#91;m&#93;&#91;1+ofs<<1&#93;&#91;c&#93; = PrefixD&#40;1, y&#41;
              else
                if NotMasked&#40;0&#41; VFPR&#91;m&#93;&#91;c&#93;&#91;0+ofs<<1&#93; = PrefixD&#40;0, x&#41;
                if NotMasked&#40;1&#41; VFPR&#91;m&#93;&#91;c&#93;&#91;1+ofs<<1&#93; = PrefixD&#40;1, y&#41;         
              set VPFXD as free
            else
              if &#40;xch&#41;			   
                VFPR&#91;m&#93;&#91;0+ofs<<1&#93;&#91;c&#93; = x
                VFPR&#91;m&#93;&#91;1+ofs<<1&#93;&#91;c&#93; = y
              else
                VFPR&#91;m&#93;&#91;c&#93;&#91;0+ofs<<1&#93; = x
                VFPR&#91;m&#93;&#91;c&#93;&#91;1+ofs<<1&#93; = y         
            
          3 &#58;
            if &#40;VPFXD is taken&#41;
              if &#40;xch&#41;			   
                if NotMasked&#40;0&#41; VFPR&#91;m&#93;&#91;0+ofs&#93;&#91;c&#93; = PrefixD&#40;0, x&#41;
                if NotMasked&#40;1&#41; VFPR&#91;m&#93;&#91;1+ofs&#93;&#91;c&#93; = PrefixD&#40;1, y&#41;
                if NotMasked&#40;2&#41; VFPR&#91;m&#93;&#91;2+ofs&#93;&#91;c&#93; = PrefixD&#40;2, z&#41;
              else
                if NotMasked&#40;0&#41; VFPR&#91;m&#93;&#91;c&#93;&#91;0+ofs&#93; = PrefixD&#40;0, x&#41;
                if NotMasked&#40;1&#41; VFPR&#91;m&#93;&#91;c&#93;&#91;1+ofs&#93; = PrefixD&#40;1, y&#41;
                if NotMasked&#40;2&#41; VFPR&#91;m&#93;&#91;c&#93;&#91;2+ofs&#93; = PrefixD&#40;2, z&#41;
              set VPFXD as free
            else
              if &#40;xch&#41;			   
                VFPR&#91;m&#93;&#91;0+o&#93;&#91;c&#93; = x
                VFPR&#91;m&#93;&#91;1+o&#93;&#91;c&#93; = y
                VFPR&#91;m&#93;&#91;2+o&#93;&#91;c&#93; = z
              else
                VFPR&#91;m&#93;&#91;c&#93;&#91;0+o&#93; = x
                VFPR&#91;m&#93;&#91;c&#93;&#91;1+o&#93; = y
                VFPR&#91;m&#93;&#91;c&#93;&#91;2+o&#93; = z
        
          4 &#58;
            if &#40;VPFXD is taken&#41;
              if &#40;xch&#41;			   
                if NotMasked&#40;0&#41; VFPR&#91;m&#93;&#91;0&#93;&#91;c&#93; = PrefixD&#40;0, x&#41;
                if NotMasked&#40;1&#41; VFPR&#91;m&#93;&#91;1&#93;&#91;c&#93; = PrefixD&#40;1, y&#41;
                if NotMasked&#40;2&#41; VFPR&#91;m&#93;&#91;2&#93;&#91;c&#93; = PrefixD&#40;2, z&#41;
                if NotMasked&#40;3&#41; VFPR&#91;m&#93;&#91;2&#93;&#91;c&#93; = PrefixD&#40;3, w&#41;
              else
                if NotMasked&#40;0&#41; VFPR&#91;m&#93;&#91;c&#93;&#91;0&#93; = PrefixD&#40;0, x&#41;
                if NotMasked&#40;1&#41; VFPR&#91;m&#93;&#91;c&#93;&#91;1&#93; = PrefixD&#40;1, y&#41;
                if NotMasked&#40;2&#41; VFPR&#91;m&#93;&#91;c&#93;&#91;2&#93; = PrefixD&#40;2, z&#41;
                if NotMasked&#40;3&#41; VFPR&#91;m&#93;&#91;c&#93;&#91;3&#93; = PrefixD&#40;3, w&#41;
                set VPFXD as free
            else
              if &#40;xch&#41;			   
                VFPR&#91;m&#93;&#91;0&#93;&#91;c&#93; = x
                VFPR&#91;m&#93;&#91;1&#93;&#91;c&#93; = y
                VFPR&#91;m&#93;&#91;2&#93;&#91;c&#93; = z
                VFPR&#91;m&#93;&#91;2&#93;&#91;c&#93; = w
              else
                VFPR&#91;m&#93;&#91;c&#93;&#91;0&#93; = x
                VFPR&#91;m&#93;&#91;c&#93;&#91;1&#93; = y
                VFPR&#91;m&#93;&#91;c&#93;&#91;2&#93; = z
                VFPR&#91;m&#93;&#91;c&#93;&#91;3&#93; = w
    &#125;

    LoadRegisterS&#40;n&#58;2, &#40;r&#58;2 | &#91;ofs&#58;1,&#93; xch&#58;1&#41;, m&#58;3, c&#58;2, out x&#58;32&#91;, out y&#58;32&#91;, out z&#58;32&#91;, out w&#58;32&#93;&#93;&#41;
    &#123;
        when n is
          1 &#58; 
            x' = VFPR&#91;m&#93;&#91;c&#93;&#91;r&#93;

            if &#40;VPFXS is taken&#41;
              x = PrefixS&#40;0, x'&#41;
              set VPFXS as free
            else
              x = x'
                    
          2 &#58;
            if &#40;xch&#41;
              x' = VFPR&#91;m&#93;&#91;0+ofs<<1&#93;&#91;c&#93;
              y' = VFPR&#91;m&#93;&#91;1+ofs<<1&#93;&#91;c&#93;
            else
              x' = VFPR&#91;m&#93;&#91;c&#93;&#91;0+ofs<<1&#93;
              y' = VFPR&#91;m&#93;&#91;c&#93;&#91;1+ofs<<1&#93;
              
            if &#40;VPFXS is taken&#41;
              x = PrefixS&#40;0, x', y'&#41;
              y = PrefixS&#40;1, x', y'&#41;	
              set VPFXS as free
            else
              x = x'
              y = y'
            
          3 &#58;
            if &#40;xch&#41;
              x' = VFPR&#91;m&#93;&#91;0+ofs&#93;&#91;c&#93;
              y' = VFPR&#91;m&#93;&#91;1+ofs&#93;&#91;c&#93;
              z' = VFPR&#91;m&#93;&#91;2+ofs&#93;&#91;c&#93;
            else
              x' = VFPR&#91;m&#93;&#91;c&#93;&#91;0+ofs&#93;
              y' = VFPR&#91;m&#93;&#91;c&#93;&#91;1+ofs&#93;
              z' = VFPR&#91;m&#93;&#91;c&#93;&#91;2+ofs&#93;
            
            if &#40;VPFXS is taken&#41;
              x = PrefixS&#40;0, x', y', z'&#41; 
              y = PrefixS&#40;1, x', y', z'&#41; 
              z = PrefixS&#40;2, x', y', z'&#41; 
              set VPFXS as free
            else
              x = x'
              y = y'
              z = z'
        
          4 &#58;
            if &#40;xch&#41;
              x' = VFPR&#91;vs_m&#93;&#91;0&#93;&#91;vs_c&#93;
              y' = VFPR&#91;vs_m&#93;&#91;1&#93;&#91;vs_c&#93;
              z' = VFPR&#91;vs_m&#93;&#91;2&#93;&#91;vs_c&#93;
              w' = VFPR&#91;vs_m&#93;&#91;3&#93;&#91;vs_c&#93;
            else
              x' = VFPR&#91;vs_m&#93;&#91;vs_c&#93;&#91;0&#93;
              y' = VFPR&#91;vs_m&#93;&#91;vs_c&#93;&#91;1&#93;
              z' = VFPR&#91;vs_m&#93;&#91;vs_c&#93;&#91;2&#93;
              w' = VFPR&#91;vs_m&#93;&#91;vs_c&#93;&#91;3&#93;
            
            if &#40;VPFXS is taken&#41;
              x = PrefixS&#40;0, x', y', z', w'&#41;
              y = PrefixS&#40;1, x', y', z', w'&#41; 
              z = PrefixS&#40;2, x', y', z', w'&#41;
              w = PrefixS&#40;3, x', y', z', w'&#41; 
              set VPFXS as free
            else
              x = x'
              y = y'
              z = z'
              w = w'
    &#125;

    LoadRegisterT&#40;n&#58;2, &#40;r&#58;2 | &#91;ofs&#58;1,&#93; xch&#58;1&#41;, m&#58;3, c&#58;2, out x&#58;32&#91;, out y&#58;32&#91;, out z&#58;32&#91;, out w&#58;32&#93;&#93;&#41;
    &#123;
        when n is
          1 &#58; 
            x' = VFPR&#91;m&#93;&#91;c&#93;&#91;r&#93;

            if &#40;VPFXT is taken&#41;
              x = PrefixT&#40;0, x'&#41;
              set VPFXT as free
            else
              x = x'
                    
          2 &#58;
            if &#40;xch&#41;
              x' = VFPR&#91;m&#93;&#91;0+ofs<<1&#93;&#91;c&#93;
              y' = VFPR&#91;m&#93;&#91;1+ofs<<1&#93;&#91;c&#93;
            else
              x' = VFPR&#91;m&#93;&#91;c&#93;&#91;0+ofs<<1&#93;
              y' = VFPR&#91;m&#93;&#91;c&#93;&#91;1+ofs<<1&#93;
              
            if &#40;VPFXT is taken&#41;
              x = PrefixT&#40;0, x', y'&#41;
              y = PrefixT&#40;1, x', y'&#41;	
              set VPFXT as free
            else
              x = x'
              y = y'
            
          3 &#58;
            if &#40;xch&#41;
              x' = VFPR&#91;m&#93;&#91;0+ofs&#93;&#91;c&#93;
              y' = VFPR&#91;m&#93;&#91;1+ofs&#93;&#91;c&#93;
              z' = VFPR&#91;m&#93;&#91;2+ofs&#93;&#91;c&#93;
            else
              x' = VFPR&#91;m&#93;&#91;c&#93;&#91;0+ofs&#93;
              y' = VFPR&#91;m&#93;&#91;c&#93;&#91;1+ofs&#93;
              z' = VFPR&#91;m&#93;&#91;c&#93;&#91;2+ofs&#93;
            
            if &#40;VPFXT is taken&#41;
              x = PrefixT&#40;0, x', y', z'&#41; 
              y = PrefixT&#40;1, x', y', z'&#41; 
              z = PrefixT&#40;2, x', y', z'&#41; 
              set VPFXT as free
            else
              x = x'
              y = y'
              z = z'
        
          4 &#58;
            if &#40;xch&#41;
              x' = VFPR&#91;vs_m&#93;&#91;0&#93;&#91;vs_c&#93;
              y' = VFPR&#91;vs_m&#93;&#91;1&#93;&#91;vs_c&#93;
              z' = VFPR&#91;vs_m&#93;&#91;2&#93;&#91;vs_c&#93;
              w' = VFPR&#91;vs_m&#93;&#91;3&#93;&#91;vs_c&#93;
            else
              x' = VFPR&#91;vs_m&#93;&#91;vs_c&#93;&#91;0&#93;
              y' = VFPR&#91;vs_m&#93;&#91;vs_c&#93;&#91;1&#93;
              z' = VFPR&#91;vs_m&#93;&#91;vs_c&#93;&#91;2&#93;
              w' = VFPR&#91;vs_m&#93;&#91;vs_c&#93;&#91;3&#93;
            
            if &#40;VPFXT is taken&#41;
              x = PrefixT&#40;0, x', y', z', w'&#41;
              y = PrefixT&#40;1, x', y', z', w'&#41; 
              z = PrefixT&#40;2, x', y', z', w'&#41;
              w = PrefixT&#40;3, x', y', z', w'&#41; 
              set VPFXT as free
            else
              x = x'
              y = y'
              z = z'
              w = w'
    &#125;
&#125;

shortcut
&#123;
    v<op0>.s&#40;110100&#58;op0&#58;0&#58;0000000&#58;0&#58;vd_r&#58;vd_m&#58;vd_c&#41;
    &#123;
        when <op0> is
          zero&#40;0000000110&#41; &#58;
            x = 0.0f
          one&#40;0000000111&#41; &#58;
            x = 1.0f
        
        SaveRegisterD&#40;1, vd_r, vd_m, vd_c, x&#41;
    &#125;

    v<op0>.p&#40;110100&#58;op0&#58;0&#58;0000000&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        when <op0> is
          idt&#40;0000000011&#41; &#58;
            x = vd_c&#91;0&#93; ? 0.0f &#58; 1.0f
            y = vd_c&#91;0&#93; ? 1.0f &#58; 0.0f
          zero&#40;0000000110&#41; &#58;
            x = 0.0f
            y = 0.0f
          one&#40;0000000111&#41; &#58;
            x = 1.0f
            y = 1.0f
        
        SaveRegisterD&#40;2, vd_o, vd_x, vd_m, vd_c, x, y&#41;
    &#125;

    v<op0>.t&#40;110100&#58;op0&#58;1&#58;0000000&#58;0&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        when <op0> is
          zero&#40;0000000110&#41; &#58;
            x = 0.0f
            y = 0.0f
            z = 0.0f
          one&#40;0000000111&#41; &#58;
            x = 0.0f
            y = 0.0f
            z = 0.0f
        
        SaveRegisterD&#40;3, vd_o, vd_x, vd_m, vd_c, x, y&#41;
    &#125;

    v<op0>.q&#40;110100&#58;op0&#58;1&#58;0000000&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        when <op0> is
          idt&#40;0000000011&#41; &#58;
            x = vd_c&#91;1..0&#93; == 0 ? 1.0f &#58; 0.0f
            y = vd_c&#91;1..0&#93; == 1 ? 1.0f &#58; 0.0f
            z = vd_c&#91;1..0&#93; == 2 ? 1.0f &#58; 0.0f
            w = vd_c&#91;1..0&#93; == 3 ? 1.0f &#58; 0.0f
          zero&#40;0000000110&#41; &#58;
            x = 0.0f
            y = 0.0f
            z = 0.0f
            w = 0.0f
          one&#40;0000000111&#41; &#58;
            x = 1.0f
            y = 1.0f
            z = 1.0f
            w = 1.0f
        
        SaveRegisterD&#40;4, vd_x, vd_m, vd_c, x, y&#41;
    &#125;

    v<op1>.s&#40;110100&#58;op1&#58;0&#58;vs_r&#58;vs_m&#58;vs_c&#58;0&#58;vd_r&#58;vd_m&#58;vd_c&#41;
    &#123;
        LoadRegisterS&#40;1, vs_r, vs_m, vs_c, x1&#41; 
        
        when <op1> is
          abs&#40;0000000001&#41; &#58;
            x = &#40;x1 < 0.0f&#41; ? -x1 &#58; x1

          sin&#40;0000010010&#41; &#58;
            x = sin&#40;PI * x1 / 2.0f&#41; 

          cos&#40;0000010011&#41; &#58;
            x = cos&#40;PI * x1 / 2.0f&#41; 

          asin&#40;0000010111&#41; &#58;
            x = 2.0f * asin&#40;x1&#41; / PI

        SaveRegisterD&#40;1, vd_r, vd_m, vd_c, x&#41;
    &#125;

    v<op1>.s2p&#40;110100&#58;op1&#58;0&#58;vs_r&#58;vs_m&#58;vs_c&#58;0&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        LoadRegisterS&#40;1, vs_r, vs_m, vs_c, x1&#41;        
        
        when <op1> is

          us2i&#40;0000111010&#41; &#58;
            x = x1&#91;15.. 0&#93; << 15
            y = x1&#91;31..16&#93; << 15

          s2i&#40;0000111011&#41; &#58;
            x = x1&#91;15.. 0&#93; << 16
            y = x1&#91;31..16&#93; << 16
                                
        SaveRegisterD&#40;2, vd_o, vd_x, vd_m, vd_c, x, y&#41;
    &#125;

    v<op1>.s2q&#40;110100&#58;op1&#58;0&#58;vs_r&#58;vs_m&#58;vs_c&#58;0&#58;0&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        LoadRegisterS&#40;1, vs_r, vs_m, vs_c, x1&#41;        
        
        when <op1> is

          uc2i&#40;0000111000&#41; &#58;
            x = x1&#91; 7.. 0&#93; << 23 | x1&#91; 7.. 0&#93; << 15 | x1&#91; 7.. 0&#93; << 7 | x1&#91; 7.. 1&#93;
            y = x1&#91;15.. 8&#93; << 23 | x1&#91;15.. 8&#93; << 15 | x1&#91;15.. 8&#93; << 7 | x1&#91;15.. 9&#93;
            z = x1&#91;23..16&#93; << 23 | x1&#91;23..16&#93; << 15 | x1&#91;23..16&#93; << 7 | x1&#91;23..17&#93; 
            w = x1&#91;31..24&#93; << 23 | x1&#91;31..24&#93; << 15 | x1&#91;31..24&#93; << 7 | x1&#91;31..25&#93; 

          c2i&#40;0000111001&#41; &#58;
            x = x1&#91; 7.. 0&#93; << 24
            y = x1&#91;15.. 8&#93; << 24
            z = x1&#91;23..16&#93; << 24
            w = x1&#91;31..24&#93; << 24
                                
        SaveRegisterD&#40;4, vd_x, vd_m, vd_c, x, y, z, w&#41;
    &#125;

    v<op1>.p&#40;110100&#58;op1&#58;0&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        LoadRegisterS&#40;2, vs_o, vs_x, vs_m, vs_c, x1, y1&#41;        
        
        when <op1> is
          abs&#40;0000000001&#41; &#58;
            x = &#40;x1 < 0.0f&#41; ? -x1 &#58; x1
            y = &#40;y1 < 0.0f&#41; ? -y1 &#58; y1
          sin&#40;0000010010&#41; &#58;
            x = sin&#40;PI * x1 / 2.0f&#41; 
            y = sin&#40;PI * y1 / 2.0f&#41;
          cos&#40;0000010011&#41; &#58;
            x = cos&#40;PI * x1 / 2.0f&#41; 
            y = cos&#40;PI * y1 / 2.0f&#41;
          asin&#40;0000010111&#41; &#58;
            x = 2.0f * asin&#40;x1&#41; / PI
            y = 2.0f * asin&#40;y1&#41; / PI
          bfy1&#40;0001000010&#41; &#58;
            x = x1 + y1
            y = x1 - y1
        
        SaveRegisterD&#40;2, vd_o, vd_x, vd_m, vd_c, x, y&#41;
    &#125;

    v<op1>.p2s&#40;110100&#58;op1&#58;0&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_r&#58;vd_m&#58;vd_c&#41;
    &#123;
        LoadRegisterS&#40;2, vs_o, vs_x, vs_m, vs_c, x1, y1&#41;        
        
        when <op1> is
          fad&#40;0001000110&#41; &#58;
            x = x1 + y1

          avg&#40;0001000111&#41; &#58;
            x = &#40;x1 + y1&#41; / 2.0f
        
        SaveRegisterD&#40;1, vd_r, vd_m, vd_c, x&#41;
    &#125;

    v<op1>.p2q&#40;110100&#58;op1&#58;0&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;0&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        LoadRegisterS&#40;2, vs_o, vs_x, vs_m, vs_c, x1, y1&#41;        
        
        when <op1> is
          us2i&#40;0000111010&#41; &#58;
            x = x1&#91;15.. 0&#93; << 15
            y = x1&#91;31..16&#93; << 15	                        
            z = y1&#91;15.. 0&#93; << 15
            w = y1&#91;31..16&#93; << 15	                        

          s2i&#40;0000111011&#41; &#58;
            x = x1&#91;15.. 0&#93; << 16
            y = x1&#91;31..16&#93; << 16	                        
            z = y1&#91;15.. 0&#93; << 16
            w = y1&#91;31..16&#93; << 16	                        
        
        SaveRegisterD&#40;4, vd_x, vd_m, vd_c, x, y, z, w&#41;
    &#125;

    v<op1>.t&#40;110100&#58;op1&#58;0000000&#58;1&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;0&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        LoadRegisterS&#40;3, vs_o, vs_x, vs_m, vs_c, x1, y1, z1&#41;        
    
        when <op1> is
          abs&#40;0000000001&#41; &#58;
            x = &#40;x1 < 0.0f&#41; ? -x1 &#58; x1
            y = &#40;y1 < 0.0f&#41; ? -y1 &#58; y1
            z = &#40;z1 < 0.0f&#41; ? -z1 &#58; z1

          sin&#40;0000010010&#41; &#58;
            x = sin&#40;PI * x1 / 2.0f&#41; 
            y = sin&#40;PI * y1 / 2.0f&#41;
            z = sin&#40;PI * z1 / 2.0f&#41;

          cos&#40;0000010011&#41; &#58;
            x = cos&#40;PI * x1 / 2.0f&#41; 
            y = cos&#40;PI * y1 / 2.0f&#41;
            z = cos&#40;PI * z1 / 2.0f&#41;

          asin&#40;0000010111&#41; &#58;
            x = 2.0f * asin&#40;x1&#41; / PI
            y = 2.0f * asin&#40;y1&#41; / PI
            z = 2.0f * asin&#40;z1&#41; / PI
               
        SaveRegisterD&#40;3, vd_o, vd_x, vd_m, vd_c, x, y, z&#41;
    &#125;

    v<op1>.t2s&#40;110100&#58;op1&#58;0000000&#58;1&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;0&#58;vd_r&#58;vd_m&#58;vd_c&#41;
    &#123;
        LoadRegisterS&#40;3, vs_o, vs_x, vs_m, vs_c, x1, y1, z1&#41;        
    
        when <op1> is
          fad&#40;0001000110&#41; &#58;
            x = x1 + y1 + z1

          avg&#40;0001000111&#41; &#58;
            x = &#40;x1 + y1 + z1&#41; / 3.0f
               
        SaveRegisterD&#40;1, vd_r, vd_m, vd_c, x&#41;
    &#125;

    v<op1>.q&#40;110100&#58;op1&#58;1&#58;0&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;0&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        LoadRegisterS&#40;4, vs_x, vs_m, vs_c, x1, y1, z1, w1&#41;        
           
        when <1, op1> is
          abs&#40;0000000001&#41; &#58;
            x = &#40;x1 < 0.0f&#41; ? -x1 &#58; x1
            y = &#40;y1 < 0.0f&#41; ? -y1 &#58; y1
            z = &#40;z1 < 0.0f&#41; ? -z1 &#58; z1
            w = &#40;w1 < 0.0f&#41; ? -w1 &#58; w1

          sin&#40;0000010010&#41; &#58;
            x = sin&#40;PI * x1 / 2.0f&#41; 
            y = sin&#40;PI * y1 / 2.0f&#41;
            z = sin&#40;PI * z1 / 2.0f&#41;
            w = sin&#40;PI * w1 / 2.0f&#41;

          cos&#40;0000010011&#41; &#58;
            x = cos&#40;PI * x1 / 2.0f&#41; 
            y = cos&#40;PI * y1 / 2.0f&#41;
            z = cos&#40;PI * z1 / 2.0f&#41;
            w = cos&#40;PI * w1 / 2.0f&#41;

          bfy1&#40;0001000010&#41; &#58;
            x = x1 + y1
            y = x1 - y1
            z = z1 + w1
            w = z1 - w1

          bfy2&#40;0001000011&#41; &#58;
            x = x1 + z1
            y = y1 + w1
            z = x1 - z1
            w = y1 - w1
        
        SaveRegisterD&#40;4, vd_x, vd_m, vd_c, x, y, z, w&#41;
    &#125;

    v<op1>.q2s&#40;110100&#58;op1&#58;1&#58;0&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_r&#58;vd_m&#58;vd_c&#41;
    &#123;
        LoadRegisterS&#40;4, vs_x, vs_m, vs_c, x1, y1, z1, w1&#41;        
           
        when <1, op1> is
          fad&#40;0001000110&#41; &#58;
            x = x1 + y1 + z1 + w1

          avg&#40;0001000111&#41; &#58;
            x = &#40;x1 + y1 + z1 + w1&#41; / 4.0f
        
        SaveRegisterD&#40;1, vd_r, vd_m, vd_c, x&#41;
    &#125;

    v<op2>.s&#40;0110&#58;op2&#58;vt_r&#58;vt_m&#58;vt_c&#58;0&#58;vs_r&#58;vs_m&#58;vs_c&#58;0&#58;vd_r&#58;vd_m&#58;vd_c&#41;
    &#123;
        LoadRegisterS&#40;1, vs_r, vs_m, vs_c, x1&#41;        
        
        LoadRegisterT&#40;1, vt_r, vt_m, vt_c, x2&#41;        
               
        when <op2> is
          add&#40;00000&#41; &#58;
            x = x1 + x2

          sub&#40;00001&#41; &#58;
            x = x1 - x2

          div&#40;00111&#41;&#58;
            x = x1 / x2

          mul&#40;01000&#41;&#58;
            x = x1 * x2

          min&#40;11010&#41; &#58;
            x = &#40;x1 < x2&#41; ? x1 &#58; x2

          max&#40;11011&#41; &#58;
            x = &#40;x1 > x2&#41; ? x1 &#58; x2
          
        SaveRegisterD&#40;1, vd_r, vd_m, vd_c, x&#41;
    &#125;

    v<op2>.p&#40;0110&#58;op2&#58;vt_o&#58;vt_x&#58;vt_m&#58;vt_c&#58;0&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        LoadRegisterS&#40;2, vs_o, vs_x, vs_m, vs_c, x1, y2&#41;        
        
        LoadRegisterT&#40;2, vt_o, vt_x, vt_m, vt_c, x2, y2&#41;                      
        
        when <op2> is
          add&#40;00000&#41; &#58;
            x = x1 + x2
            y = y1 + y2

          sub&#40;00001&#41; &#58;
            x = x1 - x2
            y = y1 - y2

          div&#40;00111&#41;&#58;
            x = x1 / x2
            y = y1 / y2

          mul&#40;01000&#41;&#58;
            x = x1 * x2
            y = y1 * y2

          min&#40;11010&#41; &#58;
            x = &#40;x1 < x2&#41; ? x1 &#58; x2
            y = &#40;y1 < y2&#41; ? y1 &#58; y2

          max&#40;11011&#41; &#58;
            x = &#40;x1 > x2&#41; ? x1 &#58; x2
            y = &#40;y1 > y2&#41; ? y1 &#58; y2
            
        SaveRegisterD&#40;2, vd_o, vd_x, vd_m, vd_c, x, y&#41;
    &#125;

    v<op2>.t&#40;0110&#58;op2&#58;vt_o&#58;vt_x&#58;vt_m&#58;vt_c&#58;1&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;0&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        LoadRegisterS&#40;3, vs_o, vs_x, vs_m, vs_c, x1, y2, z1&#41;        
        
        LoadRegisterT&#40;3, vt_o, vt_x, vt_m, vt_c, x2, y2, z2&#41;                      
        
        when <op2> is
          add&#40;00000&#41; &#58;
            x = x1 + x2
            y = y1 + y2
            z = z1 + z2

          sub&#40;00001&#41; &#58;
            x = x1 - x2
            y = y1 - y2
            z = z1 - z2

          div&#40;00111&#41;&#58;
            x = x1 / x2
            y = y1 / y2
            z = z1 / z2

          mul&#40;01000&#41;&#58;
            x = x1 * x2
            y = y1 * y2
            z = z1 * z2

          min&#40;11010&#41; &#58;
            x = &#40;x1 < x2&#41; ? x1 &#58; x2
            y = &#40;y1 < y2&#41; ? y1 &#58; y2
            z = &#40;z1 < z2&#41; ? z1 &#58; z2

          max&#40;11011&#41; &#58;
            x = &#40;x1 > x2&#41; ? x1 &#58; x2
            y = &#40;y1 < y2&#41; ? y1 &#58; y2
            z = &#40;z1 < z2&#41; ? z1 &#58; z2

        SaveRegisterD&#40;3, vd_o, vd_x, vd_m, vd_c, x, y, z&#41;
    &#125;

    v<op2>.q&#40;0110&#58;op2&#58;vt_o&#58;vt_x&#58;vt_m&#58;vt_c&#58;1&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        LoadRegisterS&#40;4, vs_x, vs_m, vs_c, x1, y2, z1, w1&#41;        
        
        LoadRegisterT&#40;4, vt_x, vt_m, vt_c, x2, y2, z2, w2&#41;                      
        
        when <op2> is
          add&#40;00000&#41; &#58;
            x = x1 + x2
            y = y1 + y2
            z = z1 + z2
            w = w1 + w2

          sub&#40;00001&#41; &#58;
            x = x1 - x2
            y = y1 - y2
            z = z1 - z2
            w = w1 - w2

          div&#40;00111&#41;&#58;
            x = x1 / x2
            y = y1 / y2
            z = z1 / z2
            w = w1 / w2

          mul&#40;01000&#41;&#58;
            x = x1 * x2
            y = y1 * y2
            z = z1 * z2
            w = w1 * w2

          min&#40;11010&#41; &#58;
            x = &#40;x1 < x2&#41; ? x1 &#58; x2
            y = &#40;y1 < y2&#41; ? y1 &#58; y2
            z = &#40;z1 < z2&#41; ? z1 &#58; z2
            w = &#40;w1 < w2&#41; ? w1 &#58; w2

          max&#40;11011&#41; &#58;
            x = &#40;x1 > x2&#41; ? x1 &#58; x2
            y = &#40;y1 < y2&#41; ? y1 &#58; y2
            z = &#40;z1 < z2&#41; ? z1 &#58; z2
            w = &#40;w1 < w2&#41; ? w1 &#58; w2

        SaveRegisterD&#40;4, vd_x, vd_m, vd_c, x, y, z, w&#41;
    &#125;
&#125;
Last edited by hlide on Mon Jun 16, 2008 12:57 am, edited 9 times in total.
hlide
Posts: 739
Joined: Sun Sep 10, 2006 2:31 am

Post by hlide »

VFPU instruction set

PART 2/2

status : WIP

Code: Select all

group vfpu
&#123;
    mfv&#40;01001000011&#58;rt&#58;000000000&#58;vd_r&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="6/0"
        operation=
        "
            1&#58; GPR&#91;rt&#93; = VFPR&#91;vd_m&#93;&#91;vd_c&#93;&#91;vd_r&#93; 
        "
    &#125;

    mfvc&#40;01001000011&#58;rt&#58;000000001&#58;imm7&#41;
    &#123;
        cycles="6/0"
        operation=
        "
            1&#58; GPR&#91;rt&#93; = VFCR&#91;imm7&#93; 
        "
    &#125;

    mtv&#40;01001000111&#58;rt&#58;000000000&#58;vd_r&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"
        operation=
        "
            1&#58; VFPR&#91;vd_m&#93;&#91;vd_c&#93;&#91;vd_r&#93; = GPR&#91;rt&#93; 
        "
    &#125;

    mtvc&#40;01001000111&#58;rt&#58;000000001&#58;imm7&#41;
    &#123;
        cycles="1/3"
        operation=
        "
            1&#58; VFCR&#91;imm7&#93; = GPR&#91;rt&#93; 
        "
    &#125;

    bvf&#40;01001001000&#58;imm3&#58;00&#58;imm16&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; c = VFPU_CC&#91;imm3&#93; == 0
               execute instruction at PC+4
            2&#58; if &#40;c&#41;
                 PC = PC + &#40;s16&#40;imm16&#41; << 2&#41;
        "
        delayslot="1"         
    &#125;

    bvfl&#40;01001001000&#58;imm3&#58;10&#58;imm16&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; c = VFPU_CC&#91;imm3&#93; == 0
               if &#40;c&#41;
                 execute instruction at PC+4
            2&#58; if &#40;c&#41;
                 PC = PC + &#40;s16&#40;imm16&#41; << 2&#41;
        "
        delayslot="1"         
    &#125;

    bvt&#40;01001001000&#58;imm3&#58;01&#58;imm16&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; c = VFPU_CC&#91;imm3&#93; == 1
               execute instruction at PC+4
            2&#58; if &#40;c&#41;
                 PC = PC + &#40;s16&#40;imm16&#41; << 2&#41;
        "
        delayslot="1"         
    &#125;

    bvtl&#40;01001001000&#58;imm3&#58;11&#58;imm16&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; c = VFPU_CC&#91;imm3&#93; == 1
               if &#40;c&#41;
                 execute instruction at PC+4
            2&#58; if &#40;c&#41;
                 PC = PC + &#40;s16&#40;imm16&#41; << 2&#41;
        "
        delayslot="1"         
    &#125;

    vadd.s&#40;011000000&#58;vt_r&#58;vt_m&#58;vt_c&#58;0&#58;vs_r&#58;vs_m&#58;vs_c&#58;0&#58;vd_r&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/5"

        prefixes="taken,taken,taken"
        
        see v<op2>.s add
    &#125;

    vadd.p&#40;011000000&#58;vt_o&#58;vt_x&#58;vt_m&#58;vt_c&#58;0&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/5"

        prefixes="taken,taken,taken"
        
        see v<op2>.p add
    &#125;

    vadd.t&#40;011000000&#58;vt_o&#58;vt_x&#58;vt_m&#58;vt_c&#58;1&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;0&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/5"

        prefixes="taken,taken,taken"
        
        see v<op2>.t add
    &#125;

    vadd.q&#40;011000&#58;000&#58;vt_o&#58;vt_x&#58;vt_m&#58;vt_c&#58;1&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/5"

        prefixes="taken,taken,taken"
        
        see v<op2>.q add
    &#125;

    vsub.s&#40;011000001&#58;vt_r&#58;vt_m&#58;vt_c&#58;0&#58;vs_r&#58;vs_m&#58;vs_c&#58;0&#58;vd_r&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/5"

        prefixes="taken,taken,taken"
        
        see v<op2>.s sub
    &#125;

    vsub.p&#40;011000001&#58;vt_o&#58;vt_x&#58;vt_m&#58;vt_c&#58;0&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/5"

        prefixes="taken,taken,taken"
        
        see v<op2>.p sub
    &#125;

    vsub.t&#40;011000001&#58;vt_o&#58;vt_x&#58;vt_m&#58;vt_c&#58;1&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;0&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/5"

        prefixes="taken,taken,taken"
        
        see v<op2>.t sub
    &#125;

    vsub.q&#40;011000001&#58;vt_o&#58;vt_x&#58;vt_m&#58;vt_c&#58;1&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/5"

        prefixes="taken,taken,taken"
        
        see v<op2>.q sub
    &#125;

    vdiv.s&#40;011000111&#58;vt_r&#58;vt_m&#58;vt_c&#58;0&#58;vs_r&#58;vs_m&#58;vs_c&#58;0&#58;vd_r&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="14/17"

        prefixes="taken,taken,taken"
        
        see v<op2>.s div
    &#125;

    vdiv.p&#40;011000111&#58;vt_o&#58;vt_x&#58;vt_m&#58;vt_c&#58;0&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="28/31"
        
        prefixes="prohibed,prohibed,prohibed"

        see v<op2>.p div
    &#125;

    vdiv.t&#40;011000111&#58;vt_o&#58;vt_x&#58;vt_m&#58;vt_c&#58;1&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;0&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="42/45"

        prefixes="prohibed,prohibed,prohibed"

        see v<op2>.t div
    &#125;

    vdiv.q&#40;011000001&#58;vt_o&#58;vt_x&#58;vt_m&#58;vt_c&#58;1&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="56/59"

        prefixes="prohibed,prohibed,prohibed"

        see v<op2>.q div
    &#125;

    vmul.s&#40;011001000&#58;vt_r&#58;vt_m&#58;vt_c&#58;0&#58;vs_r&#58;vs_m&#58;vs_c&#58;0&#58;vd_r&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/5"

        prefixes="taken,taken,taken"
        
        see v<op2>.s mul
    &#125;

    vmul.p&#40;011001000&#58;vt_o&#58;vt_x&#58;vt_m&#58;vt_c&#58;0&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/5"

        prefixes="taken,taken,taken"
        
        see v<op2>.p mul
    &#125;

    vmul.t&#40;011001000&#58;vt_o&#58;vt_x&#58;vt_m&#58;vt_c&#58;1&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;0&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/5"

        prefixes="taken,taken,taken"
        
        see v<op2>.t mul
    &#125;

    vmul.q&#40;011001000&#58;vt_o&#58;vt_x&#58;vt_m&#58;vt_c&#58;1&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/5"

        prefixes="taken,taken,taken"
        
        see v<op2>.q mul
    &#125;

    vmin.s&#40;011011010&#58;vt_r&#58;vt_m&#58;vt_c&#58;1&#58;0&#58;vs_r&#58;vs_m&#58;vs_c&#58;0&#58;vd_r&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="taken,taken,taken"
        
        see v<op2>.s min
    &#125;

    vmin.p&#40;011011010&#58;vt_o&#58;vt_x&#58;vt_m&#58;vt_c&#58;0&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="taken,taken,taken"
        
        see v<op2>.p min
    &#125;

    vmin.t&#40;011011010&#58;vt_o&#58;vt_x&#58;vt_m&#58;vt_c&#58;1&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;0&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="taken,taken,taken"
        
        see v<op2>.t min
    &#125;

    vmin.q&#40;011011010&#58;0&#58;vt_x&#58;vt_m&#58;vt_c&#58;1&#58;0&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;0&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="taken,taken,taken"
        
        see v<op2>.q min
    &#125;

    vmax.s&#40;011011011&#58;vt_r&#58;vt_m&#58;vt_c&#58;1&#58;0&#58;vs_r&#58;vs_m&#58;vs_c&#58;0&#58;vd_r&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="taken,taken,taken"
        
        see v<op2>.s max
    &#125;

    vmax.p&#40;011011010&#58;vt_o&#58;vt_x&#58;vt_m&#58;vt_c&#58;0&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="taken,taken,taken"
        
        see v<op2>.p max
    &#125;

    vmax.t&#40;011011010&#58;vt_o&#58;vt_x&#58;vt_m&#58;vt_c&#58;1&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;0&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="taken,taken,taken"
        
        see v<op2>.t max
    &#125;

    vmax.q&#40;011011000&#58;0&#58;vt_x&#58;vt_m&#58;vt_c&#58;1&#58;0&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;0&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="taken,taken,taken"
        
        see v<op2>.q max
    &#125;

    vabs.s&#40;1101000000000001&#58;0&#58;vs_r&#58;vs_m&#58;vs_c&#58;0&#58;vd_r&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="taken,ignored,taken"
        
        see v<op1>.s abs
    &#125;

    vabs.p&#40;1101000000000001&#58;0&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="taken,ignored,taken"
        
        see v<op1>.p abs
    &#125;

    vabs.t&#40;1101000000000001&#58;1&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;0&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="taken,ignored,taken"
        
        see v<op1>.t abs
    &#125;

    vabs.q&#40;1101000000000001&#58;1&#58;0&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;0&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="taken,ignored,taken"
        
        see v<op1>.q abs
    &#125;

    vsin.s&#40;1101000000010010&#58;0&#58;vs_r&#58;vs_m&#58;vs_c&#58;0&#58;vd_r&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/7"

        prefixes="taken,ignored,taken"
        
        see v<op1>.s sin
    &#125;

    vsin.p&#40;1101000000010010&#58;0&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="2/8"

        prefixes="prohibed,ignored,prohibed"
        
        see v<op1>.p sin
    &#125;

    vsin.t&#40;1101000000010010&#58;1&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;0&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="3/9"

        prefixes="prohibed,ignored,prohibed"
        
        see v<op1>.t sin
    &#125;

    vsin.q&#40;1101000000010010&#58;1&#58;0&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;0&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="4/10"

        prefixes="prohibed,ignored,prohibed"
        
        see v<op1>.q sin
    &#125;

    vcos.s&#40;1101000000010011&#58;0&#58;vs_r&#58;vs_m&#58;vs_c&#58;0&#58;vd_r&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/7"

        prefixes="taken,ignored,taken"
        
        see v<op1>.s cos
    &#125;

    vcos.p&#40;1101000000010011&#58;0&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="2/8"

        prefixes="prohibed,ignored,prohibed"
        
        see v<op1>.p cos
    &#125;

    vcos.t&#40;1101000000010011&#58;1&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;0&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="3/9"

        prefixes="prohibed,ignored,prohibed"
        
        see v<op1>.t cos
    &#125;

    vcos.q&#40;1101000000010011&#58;1&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;0&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="4/10"

        prefixes="prohibed,ignored,prohibed"
        
        see v<op1>.q cos
    &#125;

    vidt.p&#40;1101000000000011&#58;0&#58;0000000&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="ignored,ignored,taken"
        
        see v<op0>.p idt
    &#125;

    vidt.q&#40;1101000000000011&#58;1&#58;0000000&#58;1&#58;0&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="ignored,ignored,taken"
        
        see v<op0>.q idt
    &#125;

    vzero.s&#40;1101000000000110&#58;0&#58;0000000&#58;0&#58;vd_r&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="ignored,ignored,taken"
        
        see v<op0>.s zero
    &#125;

    vzero.p&#40;1101000000000110&#58;0&#58;0000000&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="ignored,ignored,taken"
        
        see v<op0>.p zero
    &#125;

    vzero.t&#40;1101000000000111&#58;1&#58;0000000&#58;0&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="ignored,ignored,taken"
        
        see v<op0>.t zero
    &#125;

    vzero.q&#40;1101000000000110&#58;1&#58;0000000&#58;1&#58;0&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="ignored,ignored,taken"
        
        see v<op0>.q zero
    &#125;

    vone.s&#40;1101000000000111&#58;0&#58;0000000&#58;0&#58;vd_r&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="ignored,ignored,taken"
        
        see v<op0>.s one
    &#125;

    vone.p&#40;1101000000000111&#58;0&#58;0000000&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="ignored,ignored,taken"
        
        see v<op0>.p one
    &#125;

    vone.t&#40;1101000000000111&#58;1&#58;0000000&#58;0&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="ignored,ignored,taken"
        
        see v<op0>.t one
    &#125;

    vone.q&#40;1101000000000111&#58;1&#58;0000000&#58;1&#58;0&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="ignored,ignored,taken"
        
        see v<op0>.q one
    &#125;

    vasin.s&#40;1101000000010111&#58;0&#58;vs_r&#58;vs_m&#58;vs_c&#58;0&#58;vd_r&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/7"

        prefixes="taken,ignored,taken"
        
        see v<op1>.s asin
    &#125;


    vasin.p&#40;1101000000010111&#58;0&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="2/8"

        prefixes="prohibed,ignored,prohibed"
        
        see v<op1>.p asin
    &#125;


    vasin.t&#40;1101000000010111&#58;1&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;0&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="3/9"

        prefixes="prohibed,ignored,prohibed"
        
        see v<op1>.t asin
    &#125;


    vasin.q&#40;1101000000010111&#58;1&#58;0&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;0&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="4/9"

        prefixes="prohibed,ignored,prohibed"
        
        see v<op1>.q asin
    &#125;

    vuc2i.s&#40;1101000000111000&#58;0&#58;vs_r&#58;vs_m&#58;vs_c&#58;0&#58;0&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="prohibed,ignored,maskonly"
        
        see v<op1>.s2q uc2i
    &#125;

    vc2i.s&#40;1101000000111001&#58;0&#58;vs_r&#58;vs_m&#58;vs_c&#58;0&#58;0&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="prohibed,ignored,maskonly"
        
        see v<op1>.s2q c2i
    &#125;

    vus2i.s&#40;1101000000111010&#58;0&#58;vs_r&#58;vs_m&#58;vs_c&#58;0&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="prohibed,ignored,maskonly"
        
        see v<op1>.s2p us2i
    &#125;

    vus2i.p&#40;1101000000111010&#58;0&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;0&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="prohibed,ignored,maskonly"
        
        see v<op1>.p2q us2i
    &#125;

    vs2i.s&#40;1101000000111011&#58;0&#58;vs_r&#58;vs_m&#58;vs_c&#58;0&#58;0&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="prohibed,ignored,maskonly"
        
        see v<op1>.s2p s2i
    &#125;

    vs2i.p&#40;1101000000111011&#58;0&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;0&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/3"

        prefixes="prohibed,ignored,maskonly"
        
        see v<op1>.p2q s2i
    &#125;

    vfad.p&#40;1101000001000110&#58;0&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/7"

        prefixes="taken,ignored,taken"
        
        see v<op1>.p2s fad
    &#125;

    vfad.t&#40;1101000001000110&#58;1&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;0&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/7"

        prefixes="taken,ignored,taken"
        
        see v<op1>.t2s fad
    &#125;

    vfad.q&#40;1101000001000110&#58;1&#58;0&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;0&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/7"

        prefixes="taken,ignored,taken"
        
        see v<op1>.q2s fad
    &#125;

    vavg.p&#40;1101000001000111&#58;0&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/7"

        prefixes="taken,ignored,taken"
        
        see v<op1>.p2s avg
    &#125;

    vavg.t&#40;1101000001000111&#58;1&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;0&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/7"

        prefixes="taken,ignored,taken"
        
        see v<op1>.t2s avg
    &#125;

    vavg.q&#40;1101000001000111&#58;1&#58;0&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;0&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/7"

        prefixes="taken,ignored,taken"
        
        see v<op1>.q2s avg
    &#125;

    vbfy1.p&#40;1101000001000010&#58;0&#58;vs_o&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;vd_o&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/5"

        prefixes="prohibed,ignored,prohibed"
        
        see v<op1>.p bfy1
    &#125;

    vbfy1.q&#40;1101000001000010&#58;1&#58;0&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;0&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/5"

        prefixes="prohibed,ignored,prohibed"
        
        see v<op1>.q bfy1
    &#125;

    vbfy2.q&#40;1101000001000011&#58;1&#58;0&#58;vs_x&#58;vs_m&#58;vs_c&#58;1&#58;0&#58;vd_x&#58;vd_m&#58;vd_c&#41;
    &#123;
        cycles="1/5"

        prefixes="prohibed,ignored,prohibed"
        
        see v<op1>.q bfy2
    &#125;

    vpfxs&#40;110111&#58;00&#58;negw&#58;negz&#58;negy&#58;negx&#58;cstw&#58;cstz&#58;csty&#58;cstx&#58;absw&#58;absz&#58;absy&#58;absx&#58;swzw&#58;swzz&#58;swzy&#58;swzx&#41;
    &#123;
        cycles="1/0"

        prefixes="overridden,ignored,ignored"       
        
        operation=
        "
            1&#58; VFPU_PFXS&#91;1..0&#93; = swzx
               VFPU_PFXS&#91;3..2&#93; = swzy
               VFPU_PFXS&#91;5..4&#93; = swzz
               VFPU_PFXS&#91;7..6&#93; = swzw
               
               VFPU_PFXS&#91; 8&#93; = absx
               VFPU_PFXS&#91; 9&#93; = absy
               VFPU_PFXS&#91;10&#93; = absz
               VFPU_PFXS&#91;11&#93; = absw

               VFPU_PFXS&#91;12&#93; = cstx
               VFPU_PFXS&#91;13&#93; = csty
               VFPU_PFXS&#91;14&#93; = cstz
               VFPU_PFXS&#91;15&#93; = cstw

               VFPU_PFXS&#91;16&#93; = negx
               VFPU_PFXS&#91;17&#93; = negy
               VFPU_PFXS&#91;18&#93; = negz
               VFPU_PFXS&#91;19&#93; = negw

               set VPFXS as taken
        "
    &#125;

    vpfxt&#40;110111&#58;01&#58;negw&#58;negz&#58;negy&#58;negx&#58;cstw&#58;cstz&#58;csty&#58;cstx&#58;absw&#58;absz&#58;absy&#58;absx&#58;swzw&#58;swzz&#58;swzy&#58;swzx&#41;
    &#123;
        cycles="1/0"
        
        prefixes="ignored,overridden,ignored"       
        
        operation=
        "
            1&#58; VFPU_PFXT&#91;1..0&#93; = swzx
               VFPU_PFXT&#91;3..2&#93; = swzy
               VFPU_PFXT&#91;5..4&#93; = swzz
               VFPU_PFXT&#91;7..6&#93; = swzw
               
               VFPU_PFXT&#91; 8&#93; = absx
               VFPU_PFXT&#91; 9&#93; = absy
               VFPU_PFXT&#91;10&#93; = absz
               VFPU_PFXT&#91;11&#93; = absw

               VFPU_PFXT&#91;12&#93; = cstx
               VFPU_PFXT&#91;13&#93; = csty
               VFPU_PFXT&#91;14&#93; = cstz
               VFPU_PFXT&#91;15&#93; = cstw

               VFPU_PFXT&#91;16&#93; = negx
               VFPU_PFXT&#91;17&#93; = negy
               VFPU_PFXT&#91;18&#93; = negz
               VFPU_PFXT&#91;19&#93; = negw

               set VPFXT as taken
        "
    &#125;

    vpfxd&#40;110111&#58;10&#58;00000000&#58;mskw&#58;mskz&#58;msky&#58;mskx&#58;satw&#58;satz&#58;saty&#58;satx&#41;
    &#123;
        cycles="1/0"
        
        prefixes="ignored,ignored,overridden"       
        
        operation=
        "
            1&#58; VFPU_PFXD&#91;1..0&#93; = satx
               VFPU_PFXD&#91;3..2&#93; = saty
               VFPU_PFXD&#91;5..4&#93; = satz
               VFPU_PFXD&#91;7..6&#93; = satw
               
               VFPU_PFXS&#91; 8&#93; = mskx
               VFPU_PFXS&#91; 9&#93; = msky
               VFPU_PFXS&#91;10&#93; = mskz
               VFPU_PFXS&#91;11&#93; = mskw

               set VPFXD as taken
        "
    &#125;

    lv.s&#40;110010&#58;rs&#58;vt_m&#58;vt_c&#58;imm14&#58;vt_r&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; address = GPR&#91;rs&#93; + &#40;s32&#40;imm14&#41;<<2&#41;
               VFPR&#91;vt_m&#93;&#91;vt_c&#93;&#91;vt_r&#93; = MemoryRead32&#40;address&#41;
        "
    &#125;

    lv.q&#40;110110&#58;rs&#58;vt_m&#58;vt_c&#58;imm14&#58;0&#58;vt_x&#41;
    &#123;
        cycles="?"
        operation=
        "
            1&#58; address = GPR&#91;rs&#93; + &#40;s32&#40;imm14&#41;<<2&#41;
               if &#40;address & 15&#41;
                 raise address error exception
               else if &#40;rt_x&#41;
                 qword = MemoryRead128&#40;address&#41;
                 if &#40;rt_x&#41;
                   VFPR&#91;vt_m&#93;&#91;0&#93;&#91;vt_c&#93; = qword&#91; 31..  0&#93;
                   VFPR&#91;vt_m&#93;&#91;1&#93;&#91;vt_c&#93; = qword&#91; 63.. 32&#93;
                   VFPR&#91;vt_m&#93;&#91;2&#93;&#91;vt_c&#93; = qword&#91; 95.. 64&#93;
                   VFPR&#91;vt_m&#93;&#91;3&#93;&#91;vt_c&#93; = qword&#91;127.. 96&#93;
                 else
                   VFPR&#91;vt_m&#93;&#91;vt_c&#93;&#91;0&#93; = qword&#91; 31..  0&#93;
                   VFPR&#91;vt_m&#93;&#91;vt_c&#93;&#91;1&#93; = qword&#91; 63.. 32&#93;
                   VFPR&#91;vt_m&#93;&#91;vt_c&#93;&#91;2&#93; = qword&#91; 95.. 64&#93;
                   VFPR&#91;vt_m&#93;&#91;vt_c&#93;&#91;3&#93; = qword&#91;127.. 96&#93;
        "
    &#125;

    sv.s&#40;110010&#58;rs&#58;vt_m&#58;vt_c&#58;imm14&#58;vt_r&#41;
    &#123;
        cycles="7/?"
        operation=
        "
            1&#58; address = GPR&#91;rs&#93; + &#40;s32&#40;imm14&#41;<<2&#41;
               if &#40;address & 3&#41;
                 raise address error exception
               else
                 MemoryWrite32&#40;address, VFPR&#91;vt_m&#93;&#91;vt_c&#93;&#91;vt_r&#93;&#41;
        "
    &#125;

    svl.q&#40;111101&#58;rs&#58;vt_m&#58;vt_c&#58;imm14&#58;0&#58;vt_x&#41;
    &#123;
        cycles="cached&#58;7/?,uncached&#58;10/?"
        operation=
        "
            1&#58; address = GPR&#91;rs&#93; + &#40;s32&#40;imm14&#41;<<2&#41;
               if &#40;address & 3&#41;
                 raise address error exception
               else if &#40;vt_x&#41;
                 data = MemoryRead128&#40;address&#91;31..4&#93;<<4&#41;
                 if &#40;vt_x&#41;
                   qword&#91; 31..  0&#93; = VFPR&#91;vt_m&#93;&#91;0&#93;&#91;vt_c&#93;
                   qword&#91; 63.. 32&#93; = VFPR&#91;vt_m&#93;&#91;1&#93;&#91;vt_c&#93;
                   qword&#91; 95.. 64&#93; = VFPR&#91;vt_m&#93;&#91;2&#93;&#91;vt_c&#93;
                   qword&#91;127.. 96&#93; = VFPR&#91;vt_m&#93;&#91;3&#93;&#91;vt_c&#93;
                 else				  
                   qword&#91; 31..  0&#93; = VFPR&#91;vt_m&#93;&#91;vt_c&#93;&#91;0&#93;
                   qword&#91; 63.. 32&#93; = VFPR&#91;vt_m&#93;&#91;vt_c&#93;&#91;1&#93;
                   qword&#91; 95.. 64&#93; = VFPR&#91;vt_m&#93;&#91;vt_c&#93;&#91;2&#93;
                   qword&#91;127.. 96&#93; = VFPR&#91;vt_m&#93;&#91;vt_c&#93;&#91;3&#93;
                 when &#40;address&#91;3..2&#93;&#41; is
                   0 &#58;
                     data&#91; 31.. 0&#93; = qword&#91;127..96&#93;
                   1 &#58;
                     data&#91; 31.. 0&#93; = qword&#91; 95..64&#93;
                     data&#91; 63..32&#93; = qword&#91;127..96&#93;
                   2 &#58;
                     data&#91; 31.. 0&#93; = qword&#91; 63..32&#93;
                     data&#91; 63..32&#93; = qword&#91; 95..64&#93;
                     data&#91; 95..64&#93; = qword&#91;127..96&#93;
                   3 &#58;
                     data&#91; 31.. 0&#93; = qword&#91; 31.. 0&#93;
                     data&#91; 63..32&#93; = qword&#91; 63..32&#93;
                     data&#91; 95..64&#93; = qword&#91; 95..64&#93;
                     data&#91;127..96&#93; = qword&#91;127..96&#93;
                 MemoryWrite128&#40;address, data, wb&#41;
        "
    &#125;

    svr.q&#40;111101&#58;rs&#58;vt_m&#58;vt_c&#58;imm14&#58;1&#58;vt_x&#41;
    &#123;
        cycles="cached&#58;7/?,uncached&#58;10/?"
        operation=
        "
            1&#58; address = GPR&#91;rs&#93; + &#40;s32&#40;imm14&#41;<<2&#41;
               if &#40;address & 3&#41;
                 raise address error exception
               else if &#40;vt_x&#41;
                 data = MemoryRead128&#40;address&#91;31..4&#93;<<4&#41;
                 if &#40;vt_x&#41;
                   qword&#91; 31..  0&#93; = VFPR&#91;vt_m&#93;&#91;0&#93;&#91;vt_c&#93;
                   qword&#91; 63.. 32&#93; = VFPR&#91;vt_m&#93;&#91;1&#93;&#91;vt_c&#93;
                   qword&#91; 95.. 64&#93; = VFPR&#91;vt_m&#93;&#91;2&#93;&#91;vt_c&#93;
                   qword&#91;127.. 96&#93; = VFPR&#91;vt_m&#93;&#91;3&#93;&#91;vt_c&#93;
                 else				  
                   qword&#91; 31..  0&#93; = VFPR&#91;vt_m&#93;&#91;vt_c&#93;&#91;0&#93;
                   qword&#91; 63.. 32&#93; = VFPR&#91;vt_m&#93;&#91;vt_c&#93;&#91;1&#93;
                   qword&#91; 95.. 64&#93; = VFPR&#91;vt_m&#93;&#91;vt_c&#93;&#91;2&#93;
                   qword&#91;127.. 96&#93; = VFPR&#91;vt_m&#93;&#91;vt_c&#93;&#91;3&#93;
                 when &#40;address&#91;3..2&#93;&#41; is
                   0 &#58;
                     data&#91; 31.. 0&#93; = qword&#91; 31.. 0&#93;
                     data&#91; 63..32&#93; = qword&#91; 63..32&#93;
                     data&#91; 95..64&#93; = qword&#91; 95..64&#93;
                     data&#91;127..96&#93; = qword&#91;127..96&#93;
                   1 &#58;
                     data&#91; 31.. 0&#93; = qword&#91; 63..32&#93;
                     data&#91; 63..32&#93; = qword&#91; 95..64&#93;
                     data&#91; 95..64&#93; = qword&#91;127..96&#93;
                   2 &#58;
                     data&#91; 31.. 0&#93; = qword&#91; 95..64&#93;
                     data&#91; 63..32&#93; = qword&#91;127..96&#93;
                   3 &#58;
                     data&#91; 31.. 0&#93; = qword&#91;127..96&#93;
                 MemoryWrite128&#40;address, data, wb&#41;
        "
    &#125;

    sv.q&#40;111110&#58;rs&#58;vt_m&#58;vt_c&#58;imm14&#58;wb&#58;vt_x&#41;
    &#123;
        cycles="cached&#58;7/?,uncached&#58;10/?,uncached-wb&#58;1/0"
        operation=
        "
            1&#58; address = GPR&#91;rs&#93; + &#40;s32&#40;imm14&#41;<<2&#41;
               if &#40;address & 15&#41;
                 raise address error exception
               else if &#40;vt_x&#41;
                 if &#40;vt_x&#41;
                   qword&#91; 31..  0&#93; = VFPR&#91;vt_m&#93;&#91;0&#93;&#91;vt_c&#93;
                   qword&#91; 63.. 32&#93; = VFPR&#91;vt_m&#93;&#91;1&#93;&#91;vt_c&#93;
                   qword&#91; 95.. 64&#93; = VFPR&#91;vt_m&#93;&#91;2&#93;&#91;vt_c&#93;
                   qword&#91;127.. 96&#93; = VFPR&#91;vt_m&#93;&#91;3&#93;&#91;vt_c&#93;
                 else				  
                   qword&#91; 31..  0&#93; = VFPR&#91;vt_m&#93;&#91;vt_c&#93;&#91;0&#93;
                   qword&#91; 63.. 32&#93; = VFPR&#91;vt_m&#93;&#91;vt_c&#93;&#91;1&#93;
                   qword&#91; 95.. 64&#93; = VFPR&#91;vt_m&#93;&#91;vt_c&#93;&#91;2&#93;
                   qword&#91;127.. 96&#93; = VFPR&#91;vt_m&#93;&#91;vt_c&#93;&#91;3&#93;
                 MemoryWrite128&#40;address, qword, wb&#41;
        "
    &#125;

    vcmovf.s&#40;&#41;
    &#123;
    &#125;

    vcmovf.p&#40;&#41;
    &#123;
    &#125;

    vcmovf.t&#40;&#41;
    &#123;
    &#125;

    vcmovf.q&#40;&#41;
    &#123;
    &#125;

    vcmovf.s&#40;&#41;
    &#123;
    &#125;

    vcmovf.p&#40;&#41;
    &#123;
    &#125;

    vcmovt.t&#40;&#41;
    &#123;
    &#125;

    vcmovt.q&#40;&#41;
    &#123;
    &#125;

    vcmp.s&#40;&#41;
    &#123;
    &#125;

    vcmp.p&#40;&#41;
    &#123;
    &#125;

    vcmp.t&#40;&#41;
    &#123;
    &#125;

    vcmp.q&#40;&#41;
    &#123;
    &#125;

    vcrs.t&#40;&#41;
    &#123;
    &#125;

    vdet.p&#40;&#41;
    &#123;
    &#125;

    vdiv.s&#40;&#41;
    &#123;
    &#125;

    vdiv.p&#40;&#41;
    &#123;
    &#125;

    vdiv.t&#40;&#41;
    &#123;
    &#125;

    vdiv.q&#40;&#41;
    &#123;
    &#125;

    vdot.s&#40;&#41;
    &#123;
    &#125;

    vdot.p&#40;&#41;
    &#123;
    &#125;

    vdot.t&#40;&#41;
    &#123;
    &#125;

    vdot.q&#40;&#41;
    &#123;
    &#125;

    vexp2.s&#40;&#41;
    &#123;
    &#125;

    vexp2.p&#40;&#41;
    &#123;
    &#125;

    vexp2.t&#40;&#41;
    &#123;
    &#125;

    vexp2.q&#40;&#41;
    &#123;
    &#125;

    vf2h.p&#40;&#41;
    &#123;
    &#125;

    vf2h.q&#40;&#41;
    &#123;
    &#125;

    vf2id.s&#40;&#41;
    &#123;
    &#125;

    vf2id.p&#40;&#41;
    &#123;
    &#125;

    vf2id.t&#40;&#41;
    &#123;
    &#125;

    vf2id.q&#40;&#41;
    &#123;
    &#125;

    vf2in.s&#40;&#41;
    &#123;
    &#125;

    vf2in.p&#40;&#41;
    &#123;
    &#125;

    vf2in.t&#40;&#41;
    &#123;
    &#125;

    vf2in.q&#40;&#41;
    &#123;
    &#125;

    vf2id.s&#40;&#41;
    &#123;
    &#125;

    vf2id.p&#40;&#41;
    &#123;
    &#125;

    vf2id.t&#40;&#41;
    &#123;
    &#125;

    vf2id.q&#40;&#41;
    &#123;
    &#125;

    vf2iu.s&#40;&#41;
    &#123;
    &#125;

    vf2iu.p&#40;&#41;
    &#123;
    &#125;

    vf2iu.t&#40;&#41;
    &#123;
    &#125;

    vf2iu.q&#40;&#41;
    &#123;
    &#125;

    vf2iz.s&#40;&#41;
    &#123;
    &#125;

    vf2iz.p&#40;&#41;
    &#123;
    &#125;

    vf2iz.t&#40;&#41;
    &#123;
    &#125;

    vf2iz.q&#40;&#41;
    &#123;
    &#125;

    vfim.s&#40;&#41;
    &#123;
    &#125;

    vflush&#40;&#41;
    &#123;
    &#125;

    vh2f.s&#40;&#41;
    &#123;
    &#125;

    vh2f.p&#40;&#41;
    &#123;
    &#125;

    vhdp.p&#40;&#41;
    &#123;
    &#125;

    vhdp.t&#40;&#41;
    &#123;
    &#125;

    vhdp.q&#40;&#41;
    &#123;
    &#125;

    vhtfm2.p&#40;&#41;
    &#123;
    &#125;

    vhtfm3.t&#40;&#41;
    &#123;
    &#125;

    vhtfm4.q&#40;&#41;
    &#123;
    &#125;

    vi2c.q&#40;&#41;
    &#123;
    &#125;

    vi2f.s&#40;&#41;
    &#123;
    &#125;

    vi2f.p&#40;&#41;
    &#123;
    &#125;

    vi2f.t&#40;&#41;
    &#123;
    &#125;

    vi2f.q&#40;&#41;
    &#123;
    &#125;

    vi2s.p&#40;&#41;
    &#123;
    &#125;

    vi2s.q&#40;&#41;
    &#123;
    &#125;

    vi2uc.q&#40;&#41;
    &#123;
    &#125;

    vi2us.p&#40;&#41;
    &#123;
    &#125;

    vi2us.q&#40;&#41;
    &#123;
    &#125;

    viim.s&#40;&#41;
    &#123;
    &#125;

    vlgb.s&#40;&#41;
    &#123;
    &#125;

    vlog2.s&#40;&#41;
    &#123;
    &#125;

    vlog2.p&#40;&#41;
    &#123;
    &#125;

    vlog2.t&#40;&#41;
    &#123;
    &#125;

    vlog2.q&#40;&#41;
    &#123;
    &#125;

    vmfvc&#40;&#41;
    &#123;
    &#125;

    vmidt.p&#40;&#41;
    &#123;
    &#125;

    vmidt.t&#40;&#41;
    &#123;
    &#125;

    vmidt.q&#40;&#41;
    &#123;
    &#125;

    vmmov.p&#40;&#41;
    &#123;
    &#125;

    vmmov.t&#40;&#41;
    &#123;
    &#125;

    vmmov.q&#40;&#41;
    &#123;
    &#125;

    vmmul.p&#40;&#41;
    &#123;
    &#125;

    vmmul.t&#40;&#41;
    &#123;
    &#125;

    vmmul.q&#40;&#41;
    &#123;
    &#125;

    vmone.p&#40;&#41;
    &#123;
    &#125;

    vmone.t&#40;&#41;
    &#123;
    &#125;

    vmone.q&#40;&#41;
    &#123;
    &#125;

    vmov.s&#40;&#41;
    &#123;
    &#125;

    vmov.p&#40;&#41;
    &#123;
    &#125;

    vmov.t&#40;&#41;
    &#123;
    &#125;

    vmov.q&#40;&#41;
    &#123;
    &#125;

    vmscl.p&#40;&#41;
    &#123;
    &#125;

    vmscl.t&#40;&#41;
    &#123;
    &#125;

    vmscl.q&#40;&#41;
    &#123;
    &#125;

    vmtvc&#40;&#41;
    &#123;
    &#125;

    vmzero.p&#40;&#41;
    &#123;
    &#125;

    vmzero.t&#40;&#41;
    &#123;
    &#125;

    vmzero.q&#40;&#41;
    &#123;
    &#125;

    vneg.s&#40;&#41;
    &#123;
    &#125;

    vneg.p&#40;&#41;
    &#123;
    &#125;

    vneg.t&#40;&#41;
    &#123;
    &#125;

    vneg.q&#40;&#41;
    &#123;
    &#125;

    vnop&#40;&#41;
    &#123;
    &#125;

    vnrcp.s&#40;&#41;
    &#123;
    &#125;

    vnrcp.p&#40;&#41;
    &#123;
    &#125;

    vnrcp.t&#40;&#41;
    &#123;
    &#125;

    vnrcp.q&#40;&#41;
    &#123;
    &#125;

    vnsin.s&#40;&#41;
    &#123;
    &#125;

    vnsin.p&#40;&#41;
    &#123;
    &#125;

    vnsin.t&#40;&#41;
    &#123;
    &#125;

    vnsin.q&#40;&#41;
    &#123;
    &#125;

    vocp.s&#40;&#41;
    &#123;
    &#125;

    vocp.p&#40;&#41;
    &#123;
    &#125;

    vocp.t&#40;&#41;
    &#123;
    &#125;

    vocp.q&#40;&#41;
    &#123;
    &#125;


    vrcp.s&#40;&#41;
    &#123;
    &#125;

    vrcp.p&#40;&#41;
    &#123;
    &#125;

    vrcp.t&#40;&#41;
    &#123;
    &#125;

    vrcp.q&#40;&#41;
    &#123;
    &#125;

    vrexp2.s&#40;&#41;
    &#123;
    &#125;

    vrexp2.p&#40;&#41;
    &#123;
    &#125;

    vrexp2.t&#40;&#41;
    &#123;
    &#125;

    vrexp2.q&#40;&#41;
    &#123;
    &#125;

    vrndf1.s&#40;&#41;
    &#123;
    &#125;

    vrndf1.p&#40;&#41;
    &#123;
    &#125;

    vrndf1.t&#40;&#41;
    &#123;
    &#125;

    vrndf1.q&#40;&#41;
    &#123;
    &#125;

    vrndf2.s&#40;&#41;
    &#123;
    &#125;

    vrndf2.p&#40;&#41;
    &#123;
    &#125;

    vrndf2.t&#40;&#41;
    &#123;
    &#125;

    vrndf2.q&#40;&#41;
    &#123;
    &#125;

    vrndi.s&#40;&#41;
    &#123;
    &#125;

    vrndi.p&#40;&#41;
    &#123;
    &#125;

    vrndi.t&#40;&#41;
    &#123;
    &#125;

    vrndi.q&#40;&#41;
    &#123;
    &#125;

    vrnds.s&#40;&#41;
    &#123;
    &#125;

    vrot.p&#40;&#41;
    &#123;
    &#125;

    vrot.t&#40;&#41;
    &#123;
    &#125;

    vrot.q&#40;&#41;
    &#123;
    &#125;

    vrsq.s&#40;&#41;
    &#123;
    &#125;

    vrsq.p&#40;&#41;
    &#123;
    &#125;

    vrsq.t&#40;&#41;
    &#123;
    &#125;

    vrsq.q&#40;&#41;
    &#123;
    &#125;

    vsat0.s&#40;&#41;
    &#123;
    &#125;

    vsat0.p&#40;&#41;
    &#123;
    &#125;

    vsat0.t&#40;&#41;
    &#123;
    &#125;

    vsat0.q&#40;&#41;
    &#123;
    &#125;

    vsat1.s&#40;&#41;
    &#123;
    &#125;

    vsat1.p&#40;&#41;
    &#123;
    &#125;

    vsat1.t&#40;&#41;
    &#123;
    &#125;

    vsat1.q&#40;&#41;
    &#123;
    &#125;

    vsbn.s&#40;&#41;
    &#123;
    &#125;

    vsbz.s&#40;&#41;
    &#123;
    &#125;

    vscl.s&#40;&#41;
    &#123;
    &#125;

    vscl.p&#40;&#41;
    &#123;
    &#125;

    vscl.t&#40;&#41;
    &#123;
    &#125;

    vscl.q&#40;&#41;
    &#123;
    &#125;

    vscmp.s&#40;&#41;
    &#123;
    &#125;

    vscmp.p&#40;&#41;
    &#123;
    &#125;

    vscmp.t&#40;&#41;
    &#123;
    &#125;

    vscmp.q&#40;&#41;
    &#123;
    &#125;

    vsge.s&#40;&#41;
    &#123;
    &#125;

    vsge.p&#40;&#41;
    &#123;
    &#125;

    vsge.t&#40;&#41;
    &#123;
    &#125;

    vsge.q&#40;&#41;
    &#123;
    &#125;

    vsgn.s&#40;&#41;
    &#123;
    &#125;

    vsgn.p&#40;&#41;
    &#123;
    &#125;

    vsgn.t&#40;&#41;
    &#123;
    &#125;

    vsgn.q&#40;&#41;
    &#123;
    &#125;

    vslt.s&#40;&#41;
    &#123;
    &#125;

    vslt.p&#40;&#41;
    &#123;
    &#125;

    vslt.t&#40;&#41;
    &#123;
    &#125;

    vslt.q&#40;&#41;
    &#123;
    &#125;

    vsocp.s&#40;&#41;
    &#123;
    &#125;

    vsocp.p&#40;&#41;
    &#123;
    &#125;

    vsqrt.s&#40;&#41;
    &#123;
    &#125;

    vsqrt.p&#40;&#41;
    &#123;
    &#125;

    vsqrt.t&#40;&#41;
    &#123;
    &#125;

    vsqrt.q&#40;&#41;
    &#123;
    &#125;

    vsrt1.q&#40;&#41;
    &#123;
    &#125;

    vsrt2.q&#40;&#41;
    &#123;
    &#125;

    vsrt3.q&#40;&#41;
    &#123;
    &#125;

    vsrt4.q&#40;&#41;
    &#123;
    &#125;

    vsync&#40;&#41;
    &#123;
    &#125;

    vt4444.q&#40;&#41;
    &#123;
    &#125;

    vt5551.q&#40;&#41;
    &#123;
    &#125;

    vt5651.q&#40;&#41;
    &#123;
    &#125;

    vtfm2.p&#40;&#41;
    &#123;
    &#125;

    vtfm3.t&#40;&#41;
    &#123;
    &#125;

    vtfm4.q&#40;&#41;
    &#123;
    &#125;

    vwbn.s&#40;&#41;
    &#123;
    &#125;

&#125;
Last edited by hlide on Mon Jun 16, 2008 1:08 am, edited 3 times in total.
crazyc
Posts: 408
Joined: Fri Jun 17, 2005 10:13 am

Post by crazyc »

hlide wrote:for those who want to make a PSP emulator using either an interpreter or a dynarec and want to know how you feel when trying to implement VFPU, I have only one word to my mind : HELL !
Well, it's no worse then x86 modrm.
J.F.
Posts: 2906
Joined: Sun Feb 22, 2004 11:41 am

Post by J.F. »

This is a really awesome thread. I appreciate the effort you're putting into this, hlide. I'm kind of an assembly language nut, having done the majority of my early commercial work in 100% assembly. The more info there is like this, the better. :)
hlide
Posts: 739
Joined: Sun Sep 10, 2006 2:31 am

Post by hlide »

@CrazyC : maybe, but making the same operation in a normal C code is absolutely crazy to code and very slow. I don't even dare to think how to exploit vectorial SSE instructions in X86 to emulate VFPU. I'm pretty sure PSP emulator's authors were panicking when considering vfpu emulation.

For a dynarec, the situation may be better to optimize but I don't think we could easily issue vectorial SSE instructions even this way.

NOTE:
I'm using a new approach for VFPU by factorizing operations as possible
hlide
Posts: 739
Joined: Sun Sep 10, 2006 2:31 am

Post by hlide »

yes, MrMr[iCE], still on it. :)

About the buggy LVL.Q/LVR.Q see :
http://forums.ps2dev.org/viewtopic.php?t=10608
J.F.
Posts: 2906
Joined: Sun Feb 22, 2004 11:41 am

Post by J.F. »

Just thought I'd post a correction I just verified. For ins and ext, the constants are start_bit:count, not msb:lsb. So if you wanted to extract bits 16 through 23, you'd use "ext v0, a0, 16, 8".

Further info: a count of 0 is the same as 32, also, bits don't wrap around. For example, "ext v0, a0, 4, 0" extracts bits 4 through 35, where anything above 31 is just 0. "ext v0, a0, 0, 0" would theoretically just be the same as moving the long, but the assembler won't compile that. N:0 and 0:N are fine, it's just 0:0 that won't compile... but as I said, that's just a move, so it doesn't matter.
Last edited by J.F. on Mon Jul 28, 2008 9:16 am, edited 1 time in total.
hlide
Posts: 739
Joined: Sun Sep 10, 2006 2:31 am

Post by hlide »

J.F. wrote:Just thought I'd post a correction I just verified. For ins and ext, the constants are start_bit:count, not msb:lsb. So if you wanted to extract bits 16 through 23, you'd use "ext v0, a0, 16, 8".
lsb = least significant bit = start position bit
msb = most significant bit = last position bit = start position bit+count-1

Code: Select all

ext&#40;011111&#58;rs&#58;rt&#58;&#40;msb-lsb&#41;&#58;lsb&#58;000000&#41; <=> ext&#40;011111&#58;rs&#58;rt&#58;&#40;count-1&#41;&#58;start_bit&#58;000000&#41;

ins&#40;011111&#58;rs&#58;rt&#58;msb&#58;lsb&#58;000100&#41; <=> ins&#40;011111&#58;rs&#58;rt&#58;&#40;start_bit+count-1&#41;&#58;start_bit&#58;000100&#41;
So it should be okay. This is not because we write them in ASM :

INS Rt, Rs, Pos, Count
EXT Rt, Rs, Pos, Count

that Pos and Count would be encoded the same way. What you see is the encoding bitmap of INS/EXT instructions from bit 31 to bit 0.

And don't ask me why MIPS32R2 authors choose this way to encode, that's probably be something like that :

EXT Rt, Rs, Pos, Len <=> Rt = sext((Rs>>Pos)&(1<<(Len-1))) <=> sext((Rs>>lsb)&(1<<(msb-lsb)))

INS Rt, Rs, Pos, Len <=> Rt[Pos+Len-1:Pos] = Rs <=> Rt[msb:lsb] = Rs

Just some speculation, of course :)
J.F.
Posts: 2906
Joined: Sun Feb 22, 2004 11:41 am

Post by J.F. »

hlide wrote:
J.F. wrote:Just thought I'd post a correction I just verified. For ins and ext, the constants are start_bit:count, not msb:lsb. So if you wanted to extract bits 16 through 23, you'd use "ext v0, a0, 16, 8".
lsb = least significant bit = start position bit
msb = most significant bit = last position bit = start position bit+count-1

Code: Select all

ext&#40;011111&#58;rs&#58;rt&#58;&#40;msb-lsb&#41;&#58;lsb&#58;000000&#41; <=> ext&#40;011111&#58;rs&#58;rt&#58;&#40;count-1&#41;&#58;start_bit&#58;000000&#41;

ins&#40;011111&#58;rs&#58;rt&#58;msb&#58;lsb&#58;000100&#41; <=> ins&#40;011111&#58;rs&#58;rt&#58;&#40;start_bit+count-1&#41;&#58;start_bit&#58;000100&#41;
So it should be okay. This is not because we write them in ASM :

INS Rt, Rs, Pos, Count
EXT Rt, Rs, Pos, Count

that Pos and Count would be encoded the same way. What you see is the encoding bitmap of INS/EXT instructions from bit 31 to bit 0.
Okay, so it's the assembler syntax versus the encoded value.
shepherd
Posts: 2
Joined: Tue Sep 02, 2008 5:24 pm

Post by shepherd »

Good job, TKS!
hlide
Posts: 739
Joined: Sun Sep 10, 2006 2:31 am

Post by hlide »

what does TKS mean ?
User avatar
Wally
Posts: 663
Joined: Mon Sep 26, 2005 11:25 am

Post by Wally »

hlide wrote:what does TKS mean ?
Thanks
amorphophallus
Posts: 2
Joined: Fri May 29, 2009 1:33 am

VFPU spec

Post by amorphophallus »

Just noob's question.

Ive read document The Naked PSP paragraph of Vector Processor, sentence "128 32bits registers".
Dose this mean 128 pieces of 32bits registers?
---
amorph
hlide
Posts: 739
Joined: Sun Sep 10, 2006 2:31 am

Re: VFPU spec

Post by hlide »

amorphophallus wrote:Just noob's question.

Ive read document The Naked PSP paragraph of Vector Processor, sentence "128 32bits registers".
Dose this mean 128 pieces of 32bits registers?
VFPU has 8 banks of 16 registers ==> 128 registers. It does mean you can use up to 128 float scalar registers
amorphophallus
Posts: 2
Joined: Fri May 29, 2009 1:33 am

Post by amorphophallus »

thx :)
Post Reply