/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Hardware interface of the NX-GZIP compression accelerator
*
* Copyright (C) IBM Corporation, 2020
*
* Author: Bulent Abali <[email protected]>
*
*/
#ifndef _NXU_H
#define _NXU_H
#include <stdint.h>
#include <endian.h>
#include "nx.h"
/* deflate */
#define LLSZ 286
#define DSZ 30
/* nx */
#define DHTSZ 18
#define DHT_MAXSZ 288
#define MAX_DDE_COUNT 256
/* util */
#ifdef NXDBG
#define NXPRT(X) X
#else
#define NXPRT(X)
#endif
#ifdef NXTIMER
#include <sys/platform/ppc.h>
#define NX_CLK(X) X
#define nx_get_time() __ppc_get_timebase()
#define nx_get_freq() __ppc_get_timebase_freq()
#else
#define NX_CLK(X)
#define nx_get_time() (-1)
#define nx_get_freq() (-1)
#endif
#define NX_MAX_FAULTS 500
/*
* Definitions of acronyms used here. See
* P9 NX Gzip Accelerator User's Manual for details:
* https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
*
* adler/crc: 32 bit checksums appended to stream tail
* ce: completion extension
* cpb: coprocessor parameter block (metadata)
* crb: coprocessor request block (command)
* csb: coprocessor status block (status)
* dht: dynamic huffman table
* dde: data descriptor element (address, length)
* ddl: list of ddes
* dh/fh: dynamic and fixed huffman types
* fc: coprocessor function code
* histlen: history/dictionary length
* history: sliding window of up to 32KB of data
* lzcount: Deflate LZ symbol counts
* rembytecnt: remaining byte count
* sfbt: source final block type; last block's type during decomp
* spbc: source processed byte count
* subc: source unprocessed bit count
* tebc: target ending bit count; valid bits in the last byte
* tpbc: target processed byte count
* vas: virtual accelerator switch; the user mode interface
*/
union nx_qw_t {
uint32_t word[4];
uint64_t dword[2];
} __aligned(16);
/*
* Note: NX registers with fewer than 32 bits are declared by
* convention as uint32_t variables in unions. If *_offset and *_mask
* are defined for a variable, then use get_ put_ macros to
* conveniently access the register fields for endian conversions.
*/
struct nx_dde_t {
/* Data Descriptor Element, Section 6.4 */
union {
uint32_t dde_count;
/* When dde_count == 0 ddead is a pointer to a data buffer;
* ddebc is the buffer length bytes.
* When dde_count > 0 dde is an indirect dde; ddead is a
* pointer to a contiguous list of direct ddes; ddebc is the
* total length of all data pointed to by the list of direct
* ddes. Note that only one level of indirection is permitted.
* See Section 6.4 of the user manual for additional details.
*/
};
uint32_t ddebc; /* dde byte count */
uint64_t ddead; /* dde address */
} __aligned(16);
struct nx_csb_t {
/* Coprocessor Status Block, Section 6.6 */
union {
uint32_t csb_v;
/* Valid bit. v must be set to 0 by the program
* before submitting the coprocessor command.
* Software can poll for the v bit
*/
uint32_t csb_f;
/* 16B CSB size. Written to 0 by DMA when it writes the CPB */
uint32_t csb_cs;
/* cs completion sequence; unused */
uint32_t csb_cc;
/* cc completion code; cc != 0 exception occurred */
uint32_t csb_ce;
/* ce completion extension */
};
uint32_t tpbc;
/* target processed byte count TPBC */
uint64_t fsaddr;
/* Section 6.12.1 CSB NonZero error summary. FSA Failing storage
* address. Address where error occurred. When available, written
* to A field of CSB
*/
} __aligned(16);
struct nx_ccb_t {
/* Coprocessor Completion Block, Section 6.7 */
uint32_t reserved[3];
union {
/* When crb.c==0 (no ccb defined) it is reserved;
* When crb.c==1 (ccb defined) it is cm
*/
uint32_t ccb_cm;
/* Signal interrupt of crb.c==1 and cm==1 */
uint32_t word;
/* generic access to the 32bit word */
};
} __aligned(16);
struct vas_stamped_crb_t {
/*
* CRB operand of the paste coprocessor instruction is stamped
* in quadword 4 with the information shown here as its written
* in to the receive FIFO of the coprocessor
*/
union {
uint32_t vas_buf_num;
/* Verification only vas buffer number which correlates to
* the low order bits of the atag in the paste command
*/
uint32_t send_wc_id;
/* Pointer to Send Window Context that provides for NX address
* translation information, such as MSR and LPCR bits, job
* completion interrupt RA, PSWID, and job utilization counter.
*/
};
union {
uint32_t recv_wc_id;
/* Pointer to Receive Window Context. NX uses this to return
* credits to a Receive FIFO as entries are dequeued.
*/
};
uint32_t reserved2;
union {
uint32_t vas_invalid;
/* Invalid bit. If this bit is 1 the CRB is discarded by
* NX upon fetching from the receive FIFO. If this bit is 0
* the CRB is processed normally. The bit is stamped to 0
* by VAS and may be written to 1 by hypervisor while
* the CRB is in the receive FIFO (in memory).
*/
};
};
struct nx_stamped_fault_crb_t {
/*
* A CRB that has a translation fault is stamped by NX in quadword 4
* and pasted to the Fault Send Window in VAS.
*/
uint64_t fsa;
union {
uint32_t nxsf_t;
uint32_t nxsf_fs;
};
uint32_t pswid;
};
union stamped_crb_t {
struct vas_stamped_crb_t vas;
struct nx_stamped_fault_crb_t nx;
};
struct nx_gzip_cpb_t {
/*
* Coprocessor Parameter Block In/Out are used to pass metadata
* to/from accelerator. Tables 6.5 and 6.6 of the user manual.
*/
/* CPBInput */
struct {
union {
union nx_qw_t qw0;
struct {
uint32_t in_adler; /* bits 0:31 */
uint32_t in_crc; /* bits 32:63 */
union {
uint32_t in_histlen; /* bits 64:75 */
uint32_t in_subc; /* bits 93:95 */
};
union {
/* bits 108:111 */
uint32_t in_sfbt;
/* bits 112:127 */
uint32_t in_rembytecnt;
/* bits 116:127 */
uint32_t in_dhtlen;
};
};
};
union {
union nx_qw_t in_dht[DHTSZ]; /* qw[1:18] */
char in_dht_char[DHT_MAXSZ]; /* byte access */
};
union nx_qw_t reserved[5]; /* qw[19:23] */
};
/* CPBOutput */
volatile struct {
union {
union nx_qw_t qw24;
struct {
uint32_t out_adler; /* bits 0:31 qw[24] */
uint32_t out_crc; /* bits 32:63 qw[24] */
union {
/* bits 77:79 qw[24] */
uint32_t out_tebc;
/* bits 80:95 qw[24] */
uint32_t out_subc;
};
union {
/* bits 108:111 qw[24] */
uint32_t out_sfbt;
/* bits 112:127 qw[24] */
uint32_t out_rembytecnt;
/* bits 116:127 qw[24] */
uint32_t out_dhtlen;
};
};
};
union {
union nx_qw_t qw25[79]; /* qw[25:103] */
/* qw[25] compress no lzcounts or wrap */
uint32_t out_spbc_comp_wrap;
uint32_t out_spbc_wrap; /* qw[25] wrap */
/* qw[25] compress no lzcounts */
uint32_t out_spbc_comp;
/* 286 LL and 30 D symbol counts */
uint32_t out_lzcount[LLSZ+DSZ];
struct {
union nx_qw_t out_dht[DHTSZ]; /* qw[25:42] */
/* qw[43] decompress */
uint32_t out_spbc_decomp;
};
};
/* qw[104] compress with lzcounts */
uint32_t out_spbc_comp_with_count;
};
} __aligned(128);
struct nx_gzip_crb_t {
union { /* byte[0:3] */
uint32_t gzip_fc; /* bits[24-31] */
};
uint32_t reserved1; /* byte[4:7] */
union {
uint64_t csb_address; /* byte[8:15] */
struct {
uint32_t reserved2;
union {
uint32_t crb_c;
/* c==0 no ccb defined */
uint32_t crb_at;
/* at==0 address type is ignored;
* all addrs effective assumed.
*/
};
};
};
struct nx_dde_t source_dde; /* byte[16:31] */
struct nx_dde_t target_dde; /* byte[32:47] */
volatile struct nx_ccb_t ccb; /* byte[48:63] */
volatile union {
/* byte[64:239] shift csb by 128 bytes out of the crb; csb was
* in crb earlier; JReilly says csb written with partial inject
*/
union nx_qw_t reserved64[11];
union stamped_crb_t stamp; /* byte[64:79] */
};
volatile struct nx_csb_t csb;
} __aligned(128);
struct nx_gzip_crb_cpb_t {
struct nx_gzip_crb_t crb;
struct nx_gzip_cpb_t cpb;
} __aligned(2048);
/*
* NX hardware convention has the msb bit on the left numbered 0.
* The defines below has *_offset defined as the right most bit
* position of a field. x of size_mask(x) is the field width in bits.
*/
#define size_mask(x) ((1U<<(x))-1)
/*
* Offsets and Widths within the containing 32 bits of the various NX
* gzip hardware registers. Use the getnn/putnn macros to access
* these regs
*/
#define dde_count_mask size_mask(8)
#define dde_count_offset 23
/* CSB */
#define csb_v_mask size_mask(1)
#define csb_v_offset 0
#define csb_f_mask size_mask(1)
#define csb_f_offset 6
#define csb_cs_mask size_mask(8)
#define csb_cs_offset 15
#define csb_cc_mask size_mask(8)
#define csb_cc_offset 23
#define csb_ce_mask size_mask(8)
#define csb_ce_offset 31
/* CCB */
#define ccb_cm_mask size_mask(3)
#define ccb_cm_offset 31
/* VAS stamped CRB fields */
#define vas_buf_num_mask size_mask(6)
#define vas_buf_num_offset 5
#define send_wc_id_mask size_mask(16)
#define send_wc_id_offset 31
#define recv_wc_id_mask size_mask(16)
#define recv_wc_id_offset 31
#define vas_invalid_mask size_mask(1)
#define vas_invalid_offset 31
/* NX stamped fault CRB fields */
#define nxsf_t_mask size_mask(1)
#define nxsf_t_offset 23
#define nxsf_fs_mask size_mask(8)
#define nxsf_fs_offset 31
/* CPB input */
#define in_histlen_mask size_mask(12)
#define in_histlen_offset 11
#define in_dhtlen_mask size_mask(12)
#define in_dhtlen_offset 31
#define in_subc_mask size_mask(3)
#define in_subc_offset 31
#define in_sfbt_mask size_mask(4)
#define in_sfbt_offset 15
#define in_rembytecnt_mask size_mask(16)
#define in_rembytecnt_offset 31
/* CPB output */
#define out_tebc_mask size_mask(3)
#define out_tebc_offset 15
#define out_subc_mask size_mask(16)
#define out_subc_offset 31
#define out_sfbt_mask size_mask(4)
#define out_sfbt_offset 15
#define out_rembytecnt_mask size_mask(16)
#define out_rembytecnt_offset 31
#define out_dhtlen_mask size_mask(12)
#define out_dhtlen_offset 31
/* CRB */
#define gzip_fc_mask size_mask(8)
#define gzip_fc_offset 31
#define crb_c_mask size_mask(1)
#define crb_c_offset 28
#define crb_at_mask size_mask(1)
#define crb_at_offset 30
#define csb_address_mask ~(15UL) /* mask off bottom 4b */
/*
* Access macros for the registers. Do not access registers directly
* because of the endian conversion. P9 processor may run either as
* Little or Big endian. However the NX coprocessor regs are always
* big endian.
* Use the 32 and 64b macros to access respective
* register sizes.
* Use nn forms for the register fields shorter than 32 bits.
*/
#define getnn(ST, REG) ((be32toh(ST.REG) >> (31-REG##_offset)) \
& REG##_mask)
#define getpnn(ST, REG) ((be32toh((ST)->REG) >> (31-REG##_offset)) \
& REG##_mask)
#define get32(ST, REG) (be32toh(ST.REG))
#define getp32(ST, REG) (be32toh((ST)->REG))
#define get64(ST, REG) (be64toh(ST.REG))
#define getp64(ST, REG) (be64toh((ST)->REG))
#define unget32(ST, REG) (get32(ST, REG) & ~((REG##_mask) \
<< (31-REG##_offset)))
/* get 32bits less the REG field */
#define ungetp32(ST, REG) (getp32(ST, REG) & ~((REG##_mask) \
<< (31-REG##_offset)))
/* get 32bits less the REG field */
#define clear_regs(ST) memset((void *)(&(ST)), 0, sizeof(ST))
#define clear_dde(ST) do { ST.dde_count = ST.ddebc = 0; ST.ddead = 0; \
} while (0)
#define clearp_dde(ST) do { (ST)->dde_count = (ST)->ddebc = 0; \
(ST)->ddead = 0; \
} while (0)
#define clear_struct(ST) memset((void *)(&(ST)), 0, sizeof(ST))
#define putnn(ST, REG, X) (ST.REG = htobe32(unget32(ST, REG) | (((X) \
& REG##_mask) << (31-REG##_offset))))
#define putpnn(ST, REG, X) ((ST)->REG = htobe32(ungetp32(ST, REG) \
| (((X) & REG##_mask) << (31-REG##_offset))))
#define put32(ST, REG, X) (ST.REG = htobe32(X))
#define putp32(ST, REG, X) ((ST)->REG = htobe32(X))
#define put64(ST, REG, X) (ST.REG = htobe64(X))
#define putp64(ST, REG, X) ((ST)->REG = htobe64(X))
/*
* Completion extension ce(0) ce(1) ce(2). Bits ce(3-7)
* unused. Section 6.6 Figure 6.7.
*/
#define get_csb_ce(ST) ((uint32_t)getnn(ST, csb_ce))
#define get_csb_ce_ms3b(ST) (get_csb_ce(ST) >> 5)
#define put_csb_ce_ms3b(ST, X) putnn(ST, csb_ce, ((uint32_t)(X) << 5))
#define CSB_CE_PARTIAL 0x4
#define CSB_CE_TERMINATE 0x2
#define CSB_CE_TPBC_VALID 0x1
#define csb_ce_termination(X) (!!((X) & CSB_CE_TERMINATE))
/* termination, output buffers may be modified, SPBC/TPBC invalid Fig.6-7 */
#define csb_ce_check_completion(X) (!csb_ce_termination(X))
/* if not terminated then check full or partial completion */
#define csb_ce_partial_completion(X) (!!((X) & CSB_CE_PARTIAL))
#define csb_ce_full_completion(X) (!csb_ce_partial_completion(X))
#define csb_ce_tpbc_valid(X) (!!((X) & CSB_CE_TPBC_VALID))
/* TPBC indicates successfully stored data count */
#define csb_ce_default_err(X) csb_ce_termination(X)
/* most error CEs have CE(0)=0 and CE(1)=1 */
#define csb_ce_cc3_partial(X) csb_ce_partial_completion(X)
/* some CC=3 are partially completed, Table 6-8 */
#define csb_ce_cc64(X) ((X)&(CSB_CE_PARTIAL \
| CSB_CE_TERMINATE) == 0)
/* Compression: when TPBC>SPBC then CC=64 Table 6-8; target didn't
* compress smaller than source.
*/
/* Decompress SFBT combinations Tables 5-3, 6-4, 6-6 */
#define SFBT_BFINAL 0x1
#define SFBT_LIT 0x4
#define SFBT_FHT 0x5
#define SFBT_DHT 0x6
#define SFBT_HDR 0x7
/*
* NX gzip function codes. Table 6.2.
* Bits 0:4 are the FC. Bit 5 is used by the DMA controller to
* select one of the two Byte Count Limits.
*/
#define GZIP_FC_LIMIT_MASK 0x01
#define GZIP_FC_COMPRESS_FHT 0x00
#define GZIP_FC_COMPRESS_DHT 0x02
#define GZIP_FC_COMPRESS_FHT_COUNT 0x04
#define GZIP_FC_COMPRESS_DHT_COUNT 0x06
#define GZIP_FC_COMPRESS_RESUME_FHT 0x08
#define GZIP_FC_COMPRESS_RESUME_DHT 0x0a
#define GZIP_FC_COMPRESS_RESUME_FHT_COUNT 0x0c
#define GZIP_FC_COMPRESS_RESUME_DHT_COUNT 0x0e
#define GZIP_FC_DECOMPRESS 0x10
#define GZIP_FC_DECOMPRESS_SINGLE_BLK_N_SUSPEND 0x12
#define GZIP_FC_DECOMPRESS_RESUME 0x14
#define GZIP_FC_DECOMPRESS_RESUME_SINGLE_BLK_N_SUSPEND 0x16
#define GZIP_FC_WRAP 0x1e
#define fc_is_compress(fc) (((fc) & 0x10) == 0)
#define fc_has_count(fc) (fc_is_compress(fc) && (((fc) & 0x4) != 0))
/* CSB.CC Error codes */
#define ERR_NX_OK 0
#define ERR_NX_ALIGNMENT 1
#define ERR_NX_OPOVERLAP 2
#define ERR_NX_DATA_LENGTH 3
#define ERR_NX_TRANSLATION 5
#define ERR_NX_PROTECTION 6
#define ERR_NX_EXTERNAL_UE7 7
#define ERR_NX_INVALID_OP 8
#define ERR_NX_PRIVILEGE 9
#define ERR_NX_INTERNAL_UE 10
#define ERR_NX_EXTERN_UE_WR 12
#define ERR_NX_TARGET_SPACE 13
#define ERR_NX_EXCESSIVE_DDE 14
#define ERR_NX_TRANSL_WR 15
#define ERR_NX_PROTECT_WR 16
#define ERR_NX_SUBFUNCTION 17
#define ERR_NX_FUNC_ABORT 18
#define ERR_NX_BYTE_MAX 19
#define ERR_NX_CORRUPT_CRB 20
#define ERR_NX_INVALID_CRB 21
#define ERR_NX_INVALID_DDE 30
#define ERR_NX_SEGMENTED_DDL 31
#define ERR_NX_DDE_OVERFLOW 33
#define ERR_NX_TPBC_GT_SPBC 64
#define ERR_NX_MISSING_CODE 66
#define ERR_NX_INVALID_DIST 67
#define ERR_NX_INVALID_DHT 68
#define ERR_NX_EXTERNAL_UE90 90
#define ERR_NX_WDOG_TIMER 224
#define ERR_NX_AT_FAULT 250
#define ERR_NX_INTR_SERVER 252
#define ERR_NX_UE253 253
#define ERR_NX_NO_HW 254
#define ERR_NX_HUNG_OP 255
#define ERR_NX_END 256
/* initial values for non-resume operations */
#define INIT_CRC 0 /* crc32(0L, Z_NULL, 0) */
#define INIT_ADLER 1 /* adler32(0L, Z_NULL, 0) adler is initialized to 1 */
/* prototypes */
int nxu_submit_job(struct nx_gzip_crb_cpb_t *c, void *handle);
extern void nxu_sigsegv_handler(int sig, siginfo_t *info, void *ctx);
extern int nxu_touch_pages(void *buf, long buf_len, long page_len, int wr);
/* caller supplies a print buffer 4*sizeof(crb) */
char *nx_crb_str(struct nx_gzip_crb_t *crb, char *prbuf);
char *nx_cpb_str(struct nx_gzip_cpb_t *cpb, char *prbuf);
char *nx_prt_hex(void *cp, int sz, char *prbuf);
char *nx_lzcount_str(struct nx_gzip_cpb_t *cpb, char *prbuf);
char *nx_strerror(int e);
#ifdef NX_SIM
#include <stdio.h>
int nx_sim_init(void *ctx);
int nx_sim_end(void *ctx);
int nxu_run_sim_job(struct nx_gzip_crb_cpb_t *c, void *ctx);
#endif /* NX_SIM */
/* Deflate stream manipulation */
#define set_final_bit(x) (x |= (unsigned char)1)
#define clr_final_bit(x) (x &= ~(unsigned char)1)
#define append_empty_fh_blk(p, b) do { *(p) = (2 | (1&(b))); *((p)+1) = 0; \
} while (0)
/* append 10 bits 0000001b 00...... ;
* assumes appending starts on a byte boundary; b is the final bit.
*/
#ifdef NX_842
/* 842 Engine */
struct nx_eft_crb_t {
union { /* byte[0:3] */
uint32_t eft_fc; /* bits[29-31] */
};
uint32_t reserved1; /* byte[4:7] */
union {
uint64_t csb_address; /* byte[8:15] */
struct {
uint32_t reserved2;
union {
uint32_t crb_c;
/* c==0 no ccb defined */
uint32_t crb_at;
/* at==0 address type is ignored;
* all addrs effective assumed.
*/
};
};
};
struct nx_dde_t source_dde; /* byte[16:31] */
struct nx_dde_t target_dde; /* byte[32:47] */
struct nx_ccb_t ccb; /* byte[48:63] */
union {
union nx_qw_t reserved64[3]; /* byte[64:96] */
};
struct nx_csb_t csb;
} __aligned(128);
/* 842 CRB */
#define EFT_FC_MASK size_mask(3)
#define EFT_FC_OFFSET 31
#define EFT_FC_COMPRESS 0x0
#define EFT_FC_COMPRESS_WITH_CRC 0x1
#define EFT_FC_DECOMPRESS 0x2
#define EFT_FC_DECOMPRESS_WITH_CRC 0x3
#define EFT_FC_BLK_DATA_MOVE 0x4
#endif /* NX_842 */
#endif /* _NXU_H */