465 lines
16 KiB
C
465 lines
16 KiB
C
|
/***********************license start***************
|
||
|
* Copyright (c) 2003-2008 Cavium Networks (support@cavium.com). All rights
|
||
|
* reserved.
|
||
|
*
|
||
|
*
|
||
|
* Redistribution and use in source and binary forms, with or without
|
||
|
* modification, are permitted provided that the following conditions are
|
||
|
* met:
|
||
|
*
|
||
|
* * Redistributions of source code must retain the above copyright
|
||
|
* notice, this list of conditions and the following disclaimer.
|
||
|
*
|
||
|
* * Redistributions in binary form must reproduce the above
|
||
|
* copyright notice, this list of conditions and the following
|
||
|
* disclaimer in the documentation and/or other materials provided
|
||
|
* with the distribution.
|
||
|
*
|
||
|
* * Neither the name of Cavium Networks nor the names of
|
||
|
* its contributors may be used to endorse or promote products
|
||
|
* derived from this software without specific prior written
|
||
|
* permission.
|
||
|
*
|
||
|
* TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
|
||
|
* AND WITH ALL FAULTS AND CAVIUM NETWORKS MAKES NO PROMISES, REPRESENTATIONS
|
||
|
* OR WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH
|
||
|
* RESPECT TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
|
||
|
* REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
|
||
|
* DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
|
||
|
* OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
|
||
|
* PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT, QUIET
|
||
|
* POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK ARISING OUT
|
||
|
* OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
|
||
|
*
|
||
|
*
|
||
|
* For any questions regarding licensing please contact marketing@caviumnetworks.com
|
||
|
*
|
||
|
***********************license end**************************************/
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
/**
|
||
|
* @file
|
||
|
*
|
||
|
* Interface to the PCI / PCIe DMA engines. These are only avialable
|
||
|
* on chips with PCI / PCIe.
|
||
|
*
|
||
|
* <hr>$Revision: 41586 $<hr>
|
||
|
*/
|
||
|
#include "executive-config.h"
|
||
|
#include "cvmx-config.h"
|
||
|
#include "cvmx.h"
|
||
|
#include "cvmx-cmd-queue.h"
|
||
|
#include "cvmx-dma-engine.h"
|
||
|
|
||
|
#ifdef CVMX_ENABLE_PKO_FUNCTIONS
|
||
|
|
||
|
/**
|
||
|
* Return the number of DMA engimes supported by this chip
|
||
|
*
|
||
|
* @return Number of DMA engines
|
||
|
*/
|
||
|
int cvmx_dma_engine_get_num(void)
|
||
|
{
|
||
|
if (octeon_has_feature(OCTEON_FEATURE_PCIE))
|
||
|
{
|
||
|
if (OCTEON_IS_MODEL(OCTEON_CN52XX_PASS1_X))
|
||
|
return 4;
|
||
|
else
|
||
|
return 5;
|
||
|
}
|
||
|
else
|
||
|
return 2;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Initialize the DMA engines for use
|
||
|
*
|
||
|
* @return Zero on success, negative on failure
|
||
|
*/
|
||
|
int cvmx_dma_engine_initialize(void)
|
||
|
{
|
||
|
cvmx_npei_dmax_ibuff_saddr_t dmax_ibuff_saddr;
|
||
|
int engine;
|
||
|
|
||
|
for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
|
||
|
{
|
||
|
cvmx_cmd_queue_result_t result;
|
||
|
result = cvmx_cmd_queue_initialize(CVMX_CMD_QUEUE_DMA(engine),
|
||
|
0, CVMX_FPA_OUTPUT_BUFFER_POOL,
|
||
|
CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE);
|
||
|
if (result != CVMX_CMD_QUEUE_SUCCESS)
|
||
|
return -1;
|
||
|
dmax_ibuff_saddr.u64 = 0;
|
||
|
dmax_ibuff_saddr.s.saddr = cvmx_ptr_to_phys(cvmx_cmd_queue_buffer(CVMX_CMD_QUEUE_DMA(engine))) >> 7;
|
||
|
if (octeon_has_feature(OCTEON_FEATURE_PCIE))
|
||
|
cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), dmax_ibuff_saddr.u64);
|
||
|
else
|
||
|
{
|
||
|
if (engine)
|
||
|
cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, dmax_ibuff_saddr.u64);
|
||
|
else
|
||
|
cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, dmax_ibuff_saddr.u64);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (octeon_has_feature(OCTEON_FEATURE_PCIE))
|
||
|
{
|
||
|
cvmx_npei_dma_control_t dma_control;
|
||
|
dma_control.u64 = 0;
|
||
|
if (cvmx_dma_engine_get_num() >= 5)
|
||
|
dma_control.s.dma4_enb = 1;
|
||
|
dma_control.s.dma3_enb = 1;
|
||
|
dma_control.s.dma2_enb = 1;
|
||
|
dma_control.s.dma1_enb = 1;
|
||
|
dma_control.s.dma0_enb = 1;
|
||
|
dma_control.s.o_mode = 1; /* Pull NS and RO from this register, not the pointers */
|
||
|
//dma_control.s.dwb_denb = 1;
|
||
|
//dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
|
||
|
dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
|
||
|
dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
|
||
|
cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64);
|
||
|
/* As a workaround for errata PCIE-811 we only allow a single
|
||
|
outstanding DMA read over PCIe at a time. This limits performance,
|
||
|
but works in all cases. If you need higher performance, remove
|
||
|
this code and implement the more complicated workaround documented
|
||
|
in the errata. This only affects CN56XX pass 2.0 chips */
|
||
|
if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS2_0))
|
||
|
{
|
||
|
cvmx_npei_dma_pcie_req_num_t pcie_req_num;
|
||
|
pcie_req_num.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM);
|
||
|
pcie_req_num.s.dma_cnt = 1;
|
||
|
cvmx_write_csr(CVMX_PEXP_NPEI_DMA_PCIE_REQ_NUM, pcie_req_num.u64);
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
cvmx_npi_dma_control_t dma_control;
|
||
|
dma_control.u64 = 0;
|
||
|
//dma_control.s.dwb_denb = 1;
|
||
|
//dma_control.s.dwb_ichk = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/128;
|
||
|
dma_control.s.o_add1 = 1;
|
||
|
dma_control.s.fpa_que = CVMX_FPA_OUTPUT_BUFFER_POOL;
|
||
|
dma_control.s.hp_enb = 1;
|
||
|
dma_control.s.lp_enb = 1;
|
||
|
dma_control.s.csize = CVMX_FPA_OUTPUT_BUFFER_POOL_SIZE/8;
|
||
|
cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64);
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Shutdown all DMA engines. The engeines must be idle when this
|
||
|
* function is called.
|
||
|
*
|
||
|
* @return Zero on success, negative on failure
|
||
|
*/
|
||
|
int cvmx_dma_engine_shutdown(void)
|
||
|
{
|
||
|
int engine;
|
||
|
|
||
|
for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
|
||
|
{
|
||
|
if (cvmx_cmd_queue_length(CVMX_CMD_QUEUE_DMA(engine)))
|
||
|
{
|
||
|
cvmx_dprintf("ERROR: cvmx_dma_engine_shutdown: Engine not idle.\n");
|
||
|
return -1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (octeon_has_feature(OCTEON_FEATURE_PCIE))
|
||
|
{
|
||
|
cvmx_npei_dma_control_t dma_control;
|
||
|
dma_control.u64 = cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL);
|
||
|
if (cvmx_dma_engine_get_num() >= 5)
|
||
|
dma_control.s.dma4_enb = 0;
|
||
|
dma_control.s.dma3_enb = 0;
|
||
|
dma_control.s.dma2_enb = 0;
|
||
|
dma_control.s.dma1_enb = 0;
|
||
|
dma_control.s.dma0_enb = 0;
|
||
|
cvmx_write_csr(CVMX_PEXP_NPEI_DMA_CONTROL, dma_control.u64);
|
||
|
/* Make sure the disable completes */
|
||
|
cvmx_read_csr(CVMX_PEXP_NPEI_DMA_CONTROL);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
cvmx_npi_dma_control_t dma_control;
|
||
|
dma_control.u64 = cvmx_read_csr(CVMX_NPI_DMA_CONTROL);
|
||
|
dma_control.s.hp_enb = 0;
|
||
|
dma_control.s.lp_enb = 0;
|
||
|
cvmx_write_csr(CVMX_NPI_DMA_CONTROL, dma_control.u64);
|
||
|
/* Make sure the disable completes */
|
||
|
cvmx_read_csr(CVMX_NPI_DMA_CONTROL);
|
||
|
}
|
||
|
|
||
|
for (engine=0; engine < cvmx_dma_engine_get_num(); engine++)
|
||
|
{
|
||
|
cvmx_cmd_queue_shutdown(CVMX_CMD_QUEUE_DMA(engine));
|
||
|
if (octeon_has_feature(OCTEON_FEATURE_PCIE))
|
||
|
cvmx_write_csr(CVMX_PEXP_NPEI_DMAX_IBUFF_SADDR(engine), 0);
|
||
|
else
|
||
|
{
|
||
|
if (engine)
|
||
|
cvmx_write_csr(CVMX_NPI_HIGHP_IBUFF_SADDR, 0);
|
||
|
else
|
||
|
cvmx_write_csr(CVMX_NPI_LOWP_IBUFF_SADDR, 0);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Submit a series of DMA comamnd to the DMA engines.
|
||
|
*
|
||
|
* @param engine Engine to submit to (0-4)
|
||
|
* @param header Command header
|
||
|
* @param num_buffers
|
||
|
* The number of data pointers
|
||
|
* @param buffers Comamnd data pointers
|
||
|
*
|
||
|
* @return Zero on success, negative on failure
|
||
|
*/
|
||
|
int cvmx_dma_engine_submit(int engine, cvmx_dma_engine_header_t header, int num_buffers, cvmx_dma_engine_buffer_t buffers[])
|
||
|
{
|
||
|
cvmx_cmd_queue_result_t result;
|
||
|
int cmd_count = 1;
|
||
|
uint64_t cmds[num_buffers + 1];
|
||
|
|
||
|
if (OCTEON_IS_MODEL(OCTEON_CN56XX_PASS1_X))
|
||
|
{
|
||
|
/* Check for Errata PCIe-604 */
|
||
|
if ((header.s.nfst > 11) || (header.s.nlst > 11) || (header.s.nfst + header.s.nlst > 15))
|
||
|
{
|
||
|
cvmx_dprintf("DMA engine submit too large\n");
|
||
|
return -1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
cmds[0] = header.u64;
|
||
|
while (num_buffers--)
|
||
|
{
|
||
|
cmds[cmd_count++] = buffers->u64;
|
||
|
buffers++;
|
||
|
}
|
||
|
|
||
|
/* Due to errata PCIE-13315, it is necessary to have the queue lock while we
|
||
|
ring the doorbell for the DMA engines. This prevents doorbells from
|
||
|
possibly arriving out of order with respect to the command queue
|
||
|
entries */
|
||
|
__cvmx_cmd_queue_lock(CVMX_CMD_QUEUE_DMA(engine), __cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine)));
|
||
|
result = cvmx_cmd_queue_write(CVMX_CMD_QUEUE_DMA(engine), 0, cmd_count, cmds);
|
||
|
/* This SYNCWS is needed since the command queue didn't do locking, which
|
||
|
normally implies the SYNCWS. This one makes sure the command queue
|
||
|
updates make it to L2 before we ring the doorbell */
|
||
|
CVMX_SYNCWS;
|
||
|
/* A syncw isn't needed here since the command queue did one as part of the queue unlock */
|
||
|
if (cvmx_likely(result == CVMX_CMD_QUEUE_SUCCESS))
|
||
|
{
|
||
|
if (octeon_has_feature(OCTEON_FEATURE_PCIE))
|
||
|
{
|
||
|
/* DMA doorbells are 32bit writes in little endian space. This means we need to xor the address with 4 */
|
||
|
cvmx_write64_uint32(CVMX_PEXP_NPEI_DMAX_DBELL(engine)^4, cmd_count);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (engine)
|
||
|
cvmx_write_csr(CVMX_NPI_HIGHP_DBELL, cmd_count);
|
||
|
else
|
||
|
cvmx_write_csr(CVMX_NPI_LOWP_DBELL, cmd_count);
|
||
|
}
|
||
|
}
|
||
|
/* Here is the unlock for the above errata workaround */
|
||
|
__cvmx_cmd_queue_unlock(__cvmx_cmd_queue_get_state(CVMX_CMD_QUEUE_DMA(engine)));
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* @INTERNAL
|
||
|
* Function used by cvmx_dma_engine_transfer() to build the
|
||
|
* internal address list.
|
||
|
*
|
||
|
* @param buffers Location to store the list
|
||
|
* @param address Address to build list for
|
||
|
* @param size Length of the memory pointed to by address
|
||
|
*
|
||
|
* @return Number of internal pointer chunks created
|
||
|
*/
|
||
|
static inline int __cvmx_dma_engine_build_internal_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size)
|
||
|
{
|
||
|
int segments = 0;
|
||
|
while (size)
|
||
|
{
|
||
|
/* Each internal chunk can contain a maximum of 8191 bytes */
|
||
|
int chunk = size;
|
||
|
if (chunk > 8191)
|
||
|
chunk = 8191;
|
||
|
buffers[segments].u64 = 0;
|
||
|
buffers[segments].internal.size = chunk;
|
||
|
buffers[segments].internal.addr = address;
|
||
|
address += chunk;
|
||
|
size -= chunk;
|
||
|
segments++;
|
||
|
}
|
||
|
return segments;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* @INTERNAL
|
||
|
* Function used by cvmx_dma_engine_transfer() to build the PCI / PCIe address
|
||
|
* list.
|
||
|
* @param buffers Location to store the list
|
||
|
* @param address Address to build list for
|
||
|
* @param size Length of the memory pointed to by address
|
||
|
*
|
||
|
* @return Number of PCI / PCIe address chunks created. The number of words used
|
||
|
* will be segments + (segments-1)/4 + 1.
|
||
|
*/
|
||
|
static inline int __cvmx_dma_engine_build_external_pointers(cvmx_dma_engine_buffer_t *buffers, uint64_t address, int size)
|
||
|
{
|
||
|
const int MAX_SIZE = 65535;
|
||
|
int segments = 0;
|
||
|
while (size)
|
||
|
{
|
||
|
/* Each block of 4 PCI / PCIe pointers uses one dword for lengths followed by
|
||
|
up to 4 addresses. This then repeats if more data is needed */
|
||
|
buffers[0].u64 = 0;
|
||
|
if (size <= MAX_SIZE)
|
||
|
{
|
||
|
/* Only one more segment needed */
|
||
|
buffers[0].pcie_length.len0 = size;
|
||
|
buffers[1].u64 = address;
|
||
|
segments++;
|
||
|
break;
|
||
|
}
|
||
|
else if (size <= MAX_SIZE * 2)
|
||
|
{
|
||
|
/* Two more segments needed */
|
||
|
buffers[0].pcie_length.len0 = MAX_SIZE;
|
||
|
buffers[0].pcie_length.len1 = size - MAX_SIZE;
|
||
|
buffers[1].u64 = address;
|
||
|
address += MAX_SIZE;
|
||
|
buffers[2].u64 = address;
|
||
|
segments+=2;
|
||
|
break;
|
||
|
}
|
||
|
else if (size <= MAX_SIZE * 3)
|
||
|
{
|
||
|
/* Three more segments needed */
|
||
|
buffers[0].pcie_length.len0 = MAX_SIZE;
|
||
|
buffers[0].pcie_length.len1 = MAX_SIZE;
|
||
|
buffers[0].pcie_length.len2 = size - MAX_SIZE * 2;
|
||
|
buffers[1].u64 = address;
|
||
|
address += MAX_SIZE;
|
||
|
buffers[2].u64 = address;
|
||
|
address += MAX_SIZE;
|
||
|
buffers[3].u64 = address;
|
||
|
segments+=3;
|
||
|
break;
|
||
|
}
|
||
|
else if (size <= MAX_SIZE * 4)
|
||
|
{
|
||
|
/* Four more segments needed */
|
||
|
buffers[0].pcie_length.len0 = MAX_SIZE;
|
||
|
buffers[0].pcie_length.len1 = MAX_SIZE;
|
||
|
buffers[0].pcie_length.len2 = MAX_SIZE;
|
||
|
buffers[0].pcie_length.len3 = size - MAX_SIZE * 3;
|
||
|
buffers[1].u64 = address;
|
||
|
address += MAX_SIZE;
|
||
|
buffers[2].u64 = address;
|
||
|
address += MAX_SIZE;
|
||
|
buffers[3].u64 = address;
|
||
|
address += MAX_SIZE;
|
||
|
buffers[4].u64 = address;
|
||
|
segments+=4;
|
||
|
break;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/* Five or more segments are needed */
|
||
|
buffers[0].pcie_length.len0 = MAX_SIZE;
|
||
|
buffers[0].pcie_length.len1 = MAX_SIZE;
|
||
|
buffers[0].pcie_length.len2 = MAX_SIZE;
|
||
|
buffers[0].pcie_length.len3 = MAX_SIZE;
|
||
|
buffers[1].u64 = address;
|
||
|
address += MAX_SIZE;
|
||
|
buffers[2].u64 = address;
|
||
|
address += MAX_SIZE;
|
||
|
buffers[3].u64 = address;
|
||
|
address += MAX_SIZE;
|
||
|
buffers[4].u64 = address;
|
||
|
address += MAX_SIZE;
|
||
|
size -= MAX_SIZE*4;
|
||
|
buffers += 5;
|
||
|
segments+=4;
|
||
|
}
|
||
|
}
|
||
|
return segments;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Build the first and last pointers based on a DMA engine header
|
||
|
* and submit them to the engine. The purpose of this function is
|
||
|
* to simplify the building of DMA engine commands by automatically
|
||
|
* converting a simple address and size into the apropriate internal
|
||
|
* or PCI / PCIe address list. This function does not support gather lists,
|
||
|
* so you will need to build your own lists in that case.
|
||
|
*
|
||
|
* @param engine Engine to submit to (0-4)
|
||
|
* @param header DMA Command header. Note that the nfst and nlst fields do not
|
||
|
* need to be filled in. All other fields must be set properly.
|
||
|
* @param first_address
|
||
|
* Address to use for the first pointers. In the case of INTERNAL,
|
||
|
* INBOUND, and OUTBOUND this is an Octeon memory address. In the
|
||
|
* case of EXTERNAL, this is the source PCI / PCIe address.
|
||
|
* @param last_address
|
||
|
* Address to use for the last pointers. In the case of EXTERNAL,
|
||
|
* INBOUND, and OUTBOUND this is a PCI / PCIe address. In the
|
||
|
* case of INTERNAL, this is the Octeon memory destination address.
|
||
|
* @param size Size of the transfer to perform.
|
||
|
*
|
||
|
* @return Zero on success, negative on failure
|
||
|
*/
|
||
|
int cvmx_dma_engine_transfer(int engine, cvmx_dma_engine_header_t header,
|
||
|
uint64_t first_address, uint64_t last_address,
|
||
|
int size)
|
||
|
{
|
||
|
cvmx_dma_engine_buffer_t buffers[32];
|
||
|
int words = 0;
|
||
|
|
||
|
switch (header.s.type)
|
||
|
{
|
||
|
case CVMX_DMA_ENGINE_TRANSFER_INTERNAL:
|
||
|
header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size);
|
||
|
words += header.s.nfst;
|
||
|
header.s.nlst = __cvmx_dma_engine_build_internal_pointers(buffers + words, last_address, size);
|
||
|
words += header.s.nlst;
|
||
|
break;
|
||
|
case CVMX_DMA_ENGINE_TRANSFER_INBOUND:
|
||
|
case CVMX_DMA_ENGINE_TRANSFER_OUTBOUND:
|
||
|
header.s.nfst = __cvmx_dma_engine_build_internal_pointers(buffers, first_address, size);
|
||
|
words += header.s.nfst;
|
||
|
header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size);
|
||
|
words += header.s.nlst + ((header.s.nlst-1) >> 2) + 1;
|
||
|
break;
|
||
|
case CVMX_DMA_ENGINE_TRANSFER_EXTERNAL:
|
||
|
header.s.nfst = __cvmx_dma_engine_build_external_pointers(buffers, first_address, size);
|
||
|
words += header.s.nfst + ((header.s.nfst-1) >> 2) + 1;
|
||
|
header.s.nlst = __cvmx_dma_engine_build_external_pointers(buffers + words, last_address, size);
|
||
|
words += header.s.nlst + ((header.s.nlst-1) >> 2) + 1;
|
||
|
break;
|
||
|
}
|
||
|
return cvmx_dma_engine_submit(engine, header, words, buffers);
|
||
|
}
|
||
|
|
||
|
#endif
|