/* fuc microcode util functions for gf100 PGRAPH
*
* Copyright 2011 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Ben Skeggs
*/
#ifdef INCLUDE_CODE
// queue_put - add request to queue
//
// In : $r13 queue pointer
// $r14 command
// $r15 data
//
queue_put:
// make sure we have space..
ld b32 $r8 D[$r13 + 0x0] // GET
ld b32 $r9 D[$r13 + 0x4] // PUT
xor $r8 8
cmpu b32 $r8 $r9
bra ne #queue_put_next
mov $r15 E_CMD_OVERFLOW
call(error)
ret
// store cmd/data on queue
queue_put_next:
and $r8 $r9 7
shl b32 $r8 3
add b32 $r8 $r13
add b32 $r8 8
st b32 D[$r8 + 0x0] $r14
st b32 D[$r8 + 0x4] $r15
// update PUT
add b32 $r9 1
and $r9 0xf
st b32 D[$r13 + 0x4] $r9
ret
// queue_get - fetch request from queue
//
// In : $r13 queue pointer
//
// Out: $p1 clear on success (data available)
// $r14 command
// $r15 data
//
queue_get:
bset $flags $p1
ld b32 $r8 D[$r13 + 0x0] // GET
ld b32 $r9 D[$r13 + 0x4] // PUT
cmpu b32 $r8 $r9
bra e #queue_get_done
// fetch first cmd/data pair
and $r9 $r8 7
shl b32 $r9 3
add b32 $r9 $r13
add b32 $r9 8
ld b32 $r14 D[$r9 + 0x0]
ld b32 $r15 D[$r9 + 0x4]
// update GET
add b32 $r8 1
and $r8 0xf
st b32 D[$r13 + 0x0] $r8
bclr $flags $p1
queue_get_done:
ret
// nv_rd32 - read 32-bit value from nv register
//
// In : $r14 register
// Out: $r15 value
//
nv_rd32:
mov b32 $r12 $r14
bset $r12 31 // MMIO_CTRL_PENDING
nv_iowr(NV_PGRAPH_FECS_MMIO_CTRL, 0, $r12)
nv_rd32_wait:
nv_iord($r12, NV_PGRAPH_FECS_MMIO_CTRL, 0)
xbit $r12 $r12 31
bra ne #nv_rd32_wait
mov $r10 6 // DONE_MMIO_RD
call(wait_doneo)
nv_iord($r15, NV_PGRAPH_FECS_MMIO_RDVAL, 0)
ret
// nv_wr32 - write 32-bit value to nv register
//
// In : $r14 register
// $r15 value
//
nv_wr32:
nv_iowr(NV_PGRAPH_FECS_MMIO_WRVAL, 0, $r15)
mov b32 $r12 $r14
bset $r12 31 // MMIO_CTRL_PENDING
bset $r12 30 // MMIO_CTRL_WRITE
nv_iowr(NV_PGRAPH_FECS_MMIO_CTRL, 0, $r12)
nv_wr32_wait:
nv_iord($r12, NV_PGRAPH_FECS_MMIO_CTRL, 0)
xbit $r12 $r12 31
bra ne #nv_wr32_wait
ret
// wait_donez - wait on FUC_DONE bit to become clear
//
// In : $r10 bit to wait on
//
wait_donez:
trace_set(T_WAIT);
nv_iowr(NV_PGRAPH_FECS_CC_SCRATCH_VAL(6), 0, $r10)
wait_donez_ne:
nv_iord($r8, NV_PGRAPH_FECS_SIGNAL, 0)
xbit $r8 $r8 $r10
bra ne #wait_donez_ne
trace_clr(T_WAIT)
ret
// wait_doneo - wait on FUC_DONE bit to become set
//
// In : $r10 bit to wait on
//
wait_doneo:
trace_set(T_WAIT);
nv_iowr(NV_PGRAPH_FECS_CC_SCRATCH_VAL(6), 0, $r10)
wait_doneo_e:
nv_iord($r8, NV_PGRAPH_FECS_SIGNAL, 0)
xbit $r8 $r8 $r10
bra e #wait_doneo_e
trace_clr(T_WAIT)
ret
// mmctx_size - determine size of a mmio list transfer
//
// In : $r14 mmio list head
// $r15 mmio list tail
// Out: $r15 transfer size (in bytes)
//
mmctx_size:
clear b32 $r9
nv_mmctx_size_loop:
ld b32 $r8 D[$r14]
shr b32 $r8 26
add b32 $r8 1
shl b32 $r8 2
add b32 $r9 $r8
add b32 $r14 4
cmpu b32 $r14 $r15
bra ne #nv_mmctx_size_loop
mov b32 $r15 $r9
ret
// mmctx_xfer - execute a list of mmio transfers
//
// In : $r10 flags
// bit 0: direction (0 = save, 1 = load)
// bit 1: set if first transfer
// bit 2: set if last transfer
// $r11 base
// $r12 mmio list head
// $r13 mmio list tail
// $r14 multi_stride
// $r15 multi_mask
//
mmctx_xfer:
trace_set(T_MMCTX)
clear b32 $r9
or $r11 $r11
bra e #mmctx_base_disabled
nv_iowr(NV_PGRAPH_FECS_MMCTX_BASE, 0, $r11)
bset $r9 0 // BASE_EN
mmctx_base_disabled:
or $r14 $r14
bra e #mmctx_multi_disabled
nv_iowr(NV_PGRAPH_FECS_MMCTX_MULTI_STRIDE, 0, $r14)
nv_iowr(NV_PGRAPH_FECS_MMCTX_MULTI_MASK, 0, $r15)
bset $r9 1 // MULTI_EN
mmctx_multi_disabled:
xbit $r11 $r10 0
shl b32 $r11 16 // DIR
bset $r11 12 // QLIMIT = 0x10
xbit $r14 $r10 1
shl b32 $r14 17
or $r11 $r14 // START_TRIGGER
nv_iowr(NV_PGRAPH_FECS_MMCTX_CTRL, 0, $r11)
// loop over the mmio list, and send requests to the hw
mmctx_exec_loop:
// wait for space in mmctx queue
mmctx_wait_free:
nv_iord($r14, NV_PGRAPH_FECS_MMCTX_CTRL, 0)
and $r14 0x1f
bra e #mmctx_wait_free
// queue up an entry
ld b32 $r14 D[$r12]
or $r14 $r9
nv_iowr(NV_PGRAPH_FECS_MMCTX_QUEUE, 0, $r14)
add b32 $r12 4
cmpu b32 $r12 $r13
bra ne #mmctx_exec_loop
xbit $r11 $r10 2
bra ne #mmctx_stop
// wait for queue to empty
mmctx_fini_wait:
nv_iord($r11, NV_PGRAPH_FECS_MMCTX_CTRL, 0)
and $r11 0x1f
cmpu b32 $r11 0x10
bra ne #mmctx_fini_wait
mov $r10 5 // DONE_MMCTX
call(wait_donez)
bra #mmctx_done
mmctx_stop:
xbit $r11 $r10 0
shl b32 $r11 16 // DIR
bset $r11 12 // QLIMIT = 0x10
bset $r11 18 // STOP_TRIGGER
nv_iowr(NV_PGRAPH_FECS_MMCTX_CTRL, 0, $r11)
mmctx_stop_wait:
// wait for STOP_TRIGGER to clear
nv_iord($r11, NV_PGRAPH_FECS_MMCTX_CTRL, 0)
xbit $r11 $r11 18
bra ne #mmctx_stop_wait
mmctx_done:
trace_clr(T_MMCTX)
ret
// Wait for DONE_STRAND
//
strand_wait:
push $r10
mov $r10 2
call(wait_donez)
pop $r10
ret
// unknown - call before issuing strand commands
//
strand_pre:
mov $r9 NV_PGRAPH_FECS_STRAND_CMD_ENABLE
nv_iowr(NV_PGRAPH_FECS_STRAND_CMD, 0x3f, $r9)
call(strand_wait)
ret
// unknown - call after issuing strand commands
//
strand_post:
mov $r9 NV_PGRAPH_FECS_STRAND_CMD_DISABLE
nv_iowr(NV_PGRAPH_FECS_STRAND_CMD, 0x3f, $r9)
call(strand_wait)
ret
// Selects strand set?!
//
// In: $r14 id
//
strand_set:
mov $r12 0xf
nv_iowr(NV_PGRAPH_FECS_STRAND_FILTER, 0x3f, $r12)
mov $r12 NV_PGRAPH_FECS_STRAND_CMD_DEACTIVATE_FILTER
nv_iowr(NV_PGRAPH_FECS_STRAND_CMD, 0x3f, $r12)
nv_iowr(NV_PGRAPH_FECS_STRAND_FILTER, 0x3f, $r14)
mov $r12 NV_PGRAPH_FECS_STRAND_CMD_ACTIVATE_FILTER
nv_iowr(NV_PGRAPH_FECS_STRAND_CMD, 0x3f, $r12)
call(strand_wait)
ret
// Initialise strand context data
//
// In : $r15 context base
// Out: $r15 context size (in bytes)
//
// Strandset(?) 3 hardcoded currently
//
strand_ctx_init:
trace_set(T_STRINIT)
call(strand_pre)
mov $r14 3
call(strand_set)
clear b32 $r12
nv_iowr(NV_PGRAPH_FECS_STRAND_SELECT, 0x3f, $r12)
mov $r12 NV_PGRAPH_FECS_STRAND_CMD_SEEK
nv_iowr(NV_PGRAPH_FECS_STRAND_CMD, 0x3f, $r12)
call(strand_wait)
sub b32 $r12 $r0 1
nv_iowr(NV_PGRAPH_FECS_STRAND_DATA, 0x3f, $r12)
mov $r12 NV_PGRAPH_FECS_STRAND_CMD_GET_INFO
nv_iowr(NV_PGRAPH_FECS_STRAND_CMD, 0x3f, $r12)
call(strand_wait)
call(strand_post)
// read the size of each strand, poke the context offset of
// each into STRAND_{SAVE,LOAD}_SWBASE now, no need to worry
// about it later then.
nv_mkio($r8, NV_PGRAPH_FECS_STRAND_SAVE_SWBASE, 0x00)
nv_iord($r9, NV_PGRAPH_FECS_STRANDS_CNT, 0x00)
shr b32 $r14 $r15 8
ctx_init_strand_loop:
iowr I[$r8 + 0x000] $r14 // STRAND_SAVE_SWBASE
iowr I[$r8 + 0x100] $r14 // STRAND_LOAD_SWBASE
iord $r10 I[$r8 + 0x200] // STRAND_SIZE
shr b32 $r10 6
add b32 $r10 1
add b32 $r14 $r10
add b32 $r8 4
sub b32 $r9 1
bra ne #ctx_init_strand_loop
shl b32 $r14 8
sub b32 $r15 $r14 $r15
trace_clr(T_STRINIT)
ret
#endif