I've finally been able to setup and use a second independent context. I was able to perform the 'upper VRAM workaround' from this second context, even though the first context (setup by ps3fb) has restricted upper VRAM access through DMA (by means of the lv1_gpu_memory_allocate(ps3fb_videomemory.size,...) call).
The contexts are truely independent including:
- object bindings: since lv1_gpu_context_attribute:fb_setup fails with LV1_BUSY, we have to bind objects by hand in the newly created context. For this we can use the exact same commands FB_SETUP puts in the FIFO (http://www.everfall.com/paste/id.php?ew29498z816w) when creating the first context.
- iomapping: the lv1_gpu_context_iomap call has to be done again to allow the GPU to access XDR. The location of the mapping in GPU space (GPU_IOIF) can be the same or different from the value used by ps3fb (0x0d000000)
- FIFO control and location: the FIFO control registers initially read as zero. They can be written to with the address of the second context FIFO. In my test I used the 64kB just before the ps3fb FIFO (i.e. 128kb from the end of the XDR ps3fb_videomemory region). So I put 0x0e1e0000 in the registers (0x10000 less than the value I read in ps3fb context), yet we still have to figure out how this value is obtained from the address of the ps3fb_videomemory, so that we can locate the FIFO anywhere we want.
This means interesting things:
- We don't need the FIFO workaround anymore! But the 'upper VRAM' one is still needed and can be executed from second context.
- We should finally be able to provide one (or several) independent kernel module for all our GPU work (3D,Xorg,VRAM mtd). I'll look into this tomorrow and try to provide this module.
- We should be able to have both 3D and accelerated Xorg working at the same time.
In the meantime, this is the code snippet that does RAMHT -> lower VRAM copy from second context, for those interested:
Code: Select all
#define OP(subch, tag, size) (((size) << 18) | ((subch) << 13) | (tag))
u32 fifo_program[] = {
// init
OP(1, 0x000, 1), // bind to subchannel 1
0x31337303, // Memory to Memory instance
OP(1, 0x180, 3), //
0x66604200, // DMA notifier to reports + 0x1000
0xfeed0001, // DMA source from DMA system RAM instance
0xfeed0000, // DMA dest to DMA video RAM instance
OP(2, 0x000, 1), // bind to subchannel 2
0x3137c0de, // Memory to Memory instance
OP(2, 0x180, 3), //
0x66604200, // DMA notifier to reports + 0x1000
0xfeed0000, // DMA source from DMA video RAM instance
0xfeed0001, // DMA dest to DMA system RAM instance
OP(3, 0x000, 1), // bind to subchannel 3
0x313371c3, // 2D ContextSurface instance
OP(3, 0x180, 3), //
0x66604200, // DMA notifier to reports + 0x1000
0xfeed0000, // DMA source from DMA video RAM instance
0xfeed0000, // DMA dest to DMA video RAM instance
OP(4, 0x000, 1), // bind to subchannel 4
0x31337a73, // Swizzled Surface instance
OP(4, 0x180, 2), //
0x66604200, // DMA notifier to reports + 0x1000
0xfeed0000, // DMA source from DMA video RAM instance
OP(5, 0x000, 1), // bind to subchannel 5
0x31337808, // Image from CPU instance
OP(5, 0x180, 8), //
0x66604200, // DMA notifier to reports + 0x1000
0x00000000, // colorkey
0x00000000, // clip rectangle
0x00000000, // pattern
0x00000000, // ROP
0x00000000, // beta1
0x00000000, // beta4
0x313371c3, // surface
OP(5, 0x2fc, 2), //
0x00000003, // operation srccopy
0x00000004, // color format A8R8G8B8
OP(6, 0x000, 1), // bind to subchannel 6
0x3137af00, // Scaled Image instance
OP(6, 0x180, 1), //
0x66604200, // DMA notifier to reports + 0x1000
// blit DDR->DDR
OP(6, 0x184, 1),
0xfeed0000, // DMA image from video memory
OP(6, 0x198, 1),
0x313371c3, // surface
OP(3, 0x300, 1),
0x0000000a, // surface format A8R8G8B8
OP(3, 0x30c, 1),
8*1024*1024, // surface offset video RAM plus 8MB
OP(3, 0x304, 1),
0x10001000, // surface pitch 4096
OP(6, 0x2fc, 9),
0x00000001, // color conversion truncate
0x00000003, // color format A8R8G8B8
0x00000003, // operation srccopy
0x00000000, // clip point (0,0)
0x02000400, // clip size (1024,512)
0x00000000, // out point (0,0)
0x02000400, // out size (1024,512)
0x00100000, // du/dx 1.0
0x00100000, // dv/dy 1.0
OP(6, 0x400, 4),
0x02000400, // size (1024x512)
0x00021000, // pitch 4096, origin corner, no filtering
254*1024*1024, // address 2MB to end of vram
0x00000000, // point (0,0)
OP(6, 0x104, 1), // notify
0,
OP(6, 0x100, 1), // wait
0,
};
[...]
u32 *notify = ps3gpu.reports;
u32 *fb = ps3gpu.vram;
ps3gpu.fifo = ps3gpu.xdr + ps3fb_videomemory.size - 2*GPU_CMD_BUF_SIZE;
memset(ps3gpu.fifo, 0, GPU_CMD_BUF_SIZE);
memcpy(ps3gpu.fifo, fifo_program, sizeof(fifo_program));
notify[0x1000 / 4 + 0] = 0xffffffff;
notify[0x1000 / 4 + 1] = 0xffffffff;
notify[0x1000 / 4 + 2] = 0xffffffff;
notify[0x1000 / 4 + 3] = 0xffffffff;
fb[8*1024*1024/4 + 0x190000 / 4 + 0] = 0xdeadbeef;
fb[8*1024*1024/4 + 0x190000 / 4 + 1] = 0xdeadbeef;
printk("fifo regs = %p\n", ps3gpu.fifo_regs);
printk("fifo regs[0x10] = %08x\n", ps3gpu.fifo_regs[0x10]);
printk("fifo regs[0x11] = %08x\n", ps3gpu.fifo_regs[0x11]);
printk("fifo regs[0x15] = %08x\n", ps3gpu.fifo_regs[0x15]);
msleep(100);
ps3gpu.fifo_regs[0x11] = 0x0e1e0000;
ps3gpu.fifo_regs[0x15] = 0x0e1e0000;
ps3gpu.fifo_regs[0x10] = 0x0e1e0000;
printk("fifo regs[0x10] = %08x\n", ps3gpu.fifo_regs[0x10]);
printk("fifo regs[0x11] = %08x\n", ps3gpu.fifo_regs[0x11]);
printk("fifo regs[0x15] = %08x\n", ps3gpu.fifo_regs[0x15]);
msleep(100);
ps3gpu.fifo_regs[0x10] = 0x0e1e0000 + sizeof(fifo_program);
printk("fifo regs[0x10] = %08x\n", ps3gpu.fifo_regs[0x10]);
printk("fifo regs[0x11] = %08x\n", ps3gpu.fifo_regs[0x11]);
printk("fifo regs[0x15] = %08x\n", ps3gpu.fifo_regs[0x15]);
msleep(100);
printk("fifo regs[0x10] = %08x\n", ps3gpu.fifo_regs[0x10]);
printk("fifo regs[0x11] = %08x\n", ps3gpu.fifo_regs[0x11]);
printk("fifo regs[0x15] = %08x\n", ps3gpu.fifo_regs[0x15]);
printk("notify = %08x/%08x/%08x/%08x\n",
notify[0x1000 / 4 + 0],
notify[0x1000 / 4 + 1],
notify[0x1000 / 4 + 2],
notify[0x1000 / 4 + 3]);
printk("%08x %08x\n",
fb[8*1024*1024/4 + 0x190000 / 4 + 0],
fb[8*1024*1024/4 + 0x190000 / 4 + 1]);
Code: Select all
fifo regs = d0000800907c0000
fifo regs[0x10] = 00000000
fifo regs[0x11] = 00000000
fifo regs[0x15] = 00000000
fifo regs[0x10] = 0e1e0000
fifo regs[0x11] = 0e1e0000
fifo regs[0x15] = 0e1e0000
fifo regs[0x10] = 0e1e0118
fifo regs[0x11] = 0e1e0000
fifo regs[0x15] = 0e1e0000
fifo regs[0x10] = 0e1e0118
fifo regs[0x11] = 0e1e0118
fifo regs[0x15] = 0e1e0118
notify = 0000002c/56a025a0/00000000/00000000
00000000 00501000