pipeline: rework optimization pattern for header generation
The P4 language requires marking a header as valid before any of the header fields are written as opposed to after the writes are done. Hence, the optimization of replacing the sequence of instructions to generate a header by reading it from the table action data with a single DMA internal instruction are reworked from "mov all + validate -> dma" to "validate + mov all -> dma". Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
This commit is contained in:
parent
ce1511f4b8
commit
a1b2afe431
@ -83,10 +83,10 @@ struct nexthop_action_args_t {
|
||||
|
||||
action nexthop_action args instanceof nexthop_action_args_t {
|
||||
//Set Ethernet header.
|
||||
validate h.ethernet
|
||||
mov h.ethernet.dst_addr t.ethernet_dst_addr
|
||||
mov h.ethernet.src_addr t.ethernet_src_addr
|
||||
mov h.ethernet.ethertype t.ethernet_ethertype
|
||||
validate h.ethernet
|
||||
|
||||
//Decrement the TTL and update the checksum within the IPv4 header.
|
||||
cksub h.ipv4.hdr_checksum h.ipv4.ttl
|
||||
|
@ -115,12 +115,13 @@ struct vxlan_encap_args_t {
|
||||
|
||||
action vxlan_encap args instanceof vxlan_encap_args_t {
|
||||
//Set the outer Ethernet header.
|
||||
validate h.outer_ethernet
|
||||
mov h.outer_ethernet.dst_addr t.ethernet_dst_addr
|
||||
mov h.outer_ethernet.src_addr t.ethernet_src_addr
|
||||
mov h.outer_ethernet.ethertype t.ethernet_ethertype
|
||||
validate h.outer_ethernet
|
||||
|
||||
//Set the outer IPv4 header.
|
||||
validate h.outer_ipv4
|
||||
mov h.outer_ipv4.ver_ihl t.ipv4_ver_ihl
|
||||
mov h.outer_ipv4.diffserv t.ipv4_diffserv
|
||||
mov h.outer_ipv4.total_len t.ipv4_total_len
|
||||
@ -131,21 +132,20 @@ action vxlan_encap args instanceof vxlan_encap_args_t {
|
||||
mov h.outer_ipv4.hdr_checksum t.ipv4_hdr_checksum
|
||||
mov h.outer_ipv4.src_addr t.ipv4_src_addr
|
||||
mov h.outer_ipv4.dst_addr t.ipv4_dst_addr
|
||||
validate h.outer_ipv4
|
||||
|
||||
//Set the outer UDP header.
|
||||
validate h.outer_udp
|
||||
mov h.outer_udp.src_port t.udp_src_port
|
||||
mov h.outer_udp.dst_port t.udp_dst_port
|
||||
mov h.outer_udp.length t.udp_length
|
||||
mov h.outer_udp.checksum t.udp_checksum
|
||||
validate h.outer_udp
|
||||
|
||||
//Set the outer VXLAN header.
|
||||
validate h.outer_vxlan
|
||||
mov h.outer_vxlan.flags t.vxlan_flags
|
||||
mov h.outer_vxlan.reserved t.vxlan_reserved
|
||||
mov h.outer_vxlan.vni t.vxlan_vni
|
||||
mov h.outer_vxlan.reserved2 t.vxlan_reserved2
|
||||
validate h.outer_vxlan
|
||||
|
||||
//Set the output port.
|
||||
mov m.port_out t.port_out
|
||||
|
@ -2284,6 +2284,7 @@ instr_hdr_validate_translate(struct rte_swx_pipeline *p,
|
||||
|
||||
instr->type = INSTR_HDR_VALIDATE;
|
||||
instr->valid.header_id = h->id;
|
||||
instr->valid.struct_id = h->struct_id;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -6754,7 +6755,7 @@ action_arg_src_mov_count(struct action *a,
|
||||
uint32_t n_instructions);
|
||||
|
||||
static int
|
||||
instr_pattern_mov_all_validate_search(struct rte_swx_pipeline *p,
|
||||
instr_pattern_validate_mov_all_search(struct rte_swx_pipeline *p,
|
||||
struct action *a,
|
||||
struct instruction *instr,
|
||||
struct instruction_data *data,
|
||||
@ -6771,51 +6772,42 @@ instr_pattern_mov_all_validate_search(struct rte_swx_pipeline *p,
|
||||
if (!a || !a->st)
|
||||
return 0;
|
||||
|
||||
/* First instruction: MOV_HM. */
|
||||
if (data[0].invalid || (instr[0].type != INSTR_MOV_HM))
|
||||
/* First instruction: HDR_VALIDATE. Second instruction: MOV_HM. */
|
||||
if (data[0].invalid ||
|
||||
(instr[0].type != INSTR_HDR_VALIDATE) ||
|
||||
(n_instr < 2) ||
|
||||
data[1].invalid ||
|
||||
(instr[1].type != INSTR_MOV_HM) ||
|
||||
instr[1].mov.src.struct_id)
|
||||
return 0;
|
||||
|
||||
h = header_find_by_struct_id(p, instr[0].mov.dst.struct_id);
|
||||
if (!h || h->st->var_size)
|
||||
h = header_find_by_struct_id(p, instr[0].valid.struct_id);
|
||||
if (!h ||
|
||||
h->st->var_size ||
|
||||
(n_instr < 1 + h->st->n_fields))
|
||||
return 0;
|
||||
|
||||
for (src_field_id = 0; src_field_id < a->st->n_fields; src_field_id++)
|
||||
if (instr[0].mov.src.offset == a->st->fields[src_field_id].offset / 8)
|
||||
if (instr[1].mov.src.offset == a->st->fields[src_field_id].offset / 8)
|
||||
break;
|
||||
|
||||
if (src_field_id == a->st->n_fields)
|
||||
if (src_field_id + h->st->n_fields > a->st->n_fields)
|
||||
return 0;
|
||||
|
||||
if (instr[0].mov.dst.offset ||
|
||||
(instr[0].mov.dst.n_bits != h->st->fields[0].n_bits) ||
|
||||
instr[0].mov.src.struct_id ||
|
||||
(instr[0].mov.src.n_bits != a->st->fields[src_field_id].n_bits) ||
|
||||
(instr[0].mov.dst.n_bits != instr[0].mov.src.n_bits))
|
||||
return 0;
|
||||
|
||||
if ((n_instr < h->st->n_fields + 1) ||
|
||||
(a->st->n_fields < src_field_id + h->st->n_fields + 1))
|
||||
return 0;
|
||||
|
||||
/* Subsequent instructions: MOV_HM. */
|
||||
for (i = 1; i < h->st->n_fields; i++)
|
||||
if (data[i].invalid ||
|
||||
data[i].n_users ||
|
||||
(instr[i].type != INSTR_MOV_HM) ||
|
||||
(instr[i].mov.dst.struct_id != h->struct_id) ||
|
||||
(instr[i].mov.dst.offset != h->st->fields[i].offset / 8) ||
|
||||
(instr[i].mov.dst.n_bits != h->st->fields[i].n_bits) ||
|
||||
instr[i].mov.src.struct_id ||
|
||||
(instr[i].mov.src.offset != a->st->fields[src_field_id + i].offset / 8) ||
|
||||
(instr[i].mov.src.n_bits != a->st->fields[src_field_id + i].n_bits) ||
|
||||
(instr[i].mov.dst.n_bits != instr[i].mov.src.n_bits))
|
||||
/* Second and subsequent instructions: MOV_HM. */
|
||||
for (i = 0; i < h->st->n_fields; i++)
|
||||
if (data[1 + i].invalid ||
|
||||
data[1 + i].n_users ||
|
||||
(instr[1 + i].type != INSTR_MOV_HM) ||
|
||||
(instr[1 + i].mov.dst.struct_id != h->struct_id) ||
|
||||
(instr[1 + i].mov.dst.offset != h->st->fields[i].offset / 8) ||
|
||||
(instr[1 + i].mov.dst.n_bits != h->st->fields[i].n_bits) ||
|
||||
instr[1 + i].mov.src.struct_id ||
|
||||
(instr[1 + i].mov.src.offset != a->st->fields[src_field_id + i].offset / 8) ||
|
||||
(instr[1 + i].mov.src.n_bits != a->st->fields[src_field_id + i].n_bits) ||
|
||||
(instr[1 + i].mov.dst.n_bits != instr[1 + i].mov.src.n_bits))
|
||||
return 0;
|
||||
|
||||
/* Last instruction: HDR_VALIDATE. */
|
||||
if ((instr[i].type != INSTR_HDR_VALIDATE) ||
|
||||
(instr[i].valid.header_id != h->id))
|
||||
return 0;
|
||||
|
||||
/* Check that none of the action args that are used as source for this
|
||||
* DMA transfer are not used as source in any other mov instruction.
|
||||
*/
|
||||
@ -6831,12 +6823,12 @@ instr_pattern_mov_all_validate_search(struct rte_swx_pipeline *p,
|
||||
return 0;
|
||||
}
|
||||
|
||||
*n_pattern_instr = 1 + i;
|
||||
*n_pattern_instr = 1 + h->st->n_fields;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
instr_pattern_mov_all_validate_replace(struct rte_swx_pipeline *p,
|
||||
instr_pattern_validate_mov_all_replace(struct rte_swx_pipeline *p,
|
||||
struct action *a,
|
||||
struct instruction *instr,
|
||||
struct instruction_data *data,
|
||||
@ -6846,19 +6838,16 @@ instr_pattern_mov_all_validate_replace(struct rte_swx_pipeline *p,
|
||||
uint32_t src_field_id, src_offset, i;
|
||||
|
||||
/* Read from the instructions before they are modified. */
|
||||
h = header_find_by_struct_id(p, instr[0].mov.dst.struct_id);
|
||||
h = header_find_by_struct_id(p, instr[1].mov.dst.struct_id);
|
||||
if (!h)
|
||||
return;
|
||||
|
||||
src_offset = instr[1].mov.src.offset;
|
||||
|
||||
for (src_field_id = 0; src_field_id < a->st->n_fields; src_field_id++)
|
||||
if (instr[0].mov.src.offset == a->st->fields[src_field_id].offset / 8)
|
||||
if (src_offset == a->st->fields[src_field_id].offset / 8)
|
||||
break;
|
||||
|
||||
if (src_field_id == a->st->n_fields)
|
||||
return;
|
||||
|
||||
src_offset = instr[0].mov.src.offset;
|
||||
|
||||
/* Modify the instructions. */
|
||||
instr[0].type = INSTR_DMA_HT;
|
||||
instr[0].dma.dst.header_id[0] = h->id;
|
||||
@ -6875,7 +6864,7 @@ instr_pattern_mov_all_validate_replace(struct rte_swx_pipeline *p,
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
instr_pattern_mov_all_validate_optimize(struct rte_swx_pipeline *p,
|
||||
instr_pattern_validate_mov_all_optimize(struct rte_swx_pipeline *p,
|
||||
struct action *a,
|
||||
struct instruction *instructions,
|
||||
struct instruction_data *instruction_data,
|
||||
@ -6892,8 +6881,8 @@ instr_pattern_mov_all_validate_optimize(struct rte_swx_pipeline *p,
|
||||
uint32_t n_instr = 0;
|
||||
int detected;
|
||||
|
||||
/* Mov all + validate. */
|
||||
detected = instr_pattern_mov_all_validate_search(p,
|
||||
/* Validate + mov all. */
|
||||
detected = instr_pattern_validate_mov_all_search(p,
|
||||
a,
|
||||
instr,
|
||||
data,
|
||||
@ -6903,7 +6892,7 @@ instr_pattern_mov_all_validate_optimize(struct rte_swx_pipeline *p,
|
||||
n_instructions,
|
||||
&n_instr);
|
||||
if (detected) {
|
||||
instr_pattern_mov_all_validate_replace(p, a, instr, data, n_instr);
|
||||
instr_pattern_validate_mov_all_replace(p, a, instr, data, n_instr);
|
||||
i += n_instr;
|
||||
continue;
|
||||
}
|
||||
@ -7020,8 +7009,8 @@ instr_optimize(struct rte_swx_pipeline *p,
|
||||
instruction_data,
|
||||
n_instructions);
|
||||
|
||||
/* Mov all + validate. */
|
||||
n_instructions = instr_pattern_mov_all_validate_optimize(p,
|
||||
/* Validate + mov all. */
|
||||
n_instructions = instr_pattern_validate_mov_all_optimize(p,
|
||||
a,
|
||||
instructions,
|
||||
instruction_data,
|
||||
|
@ -632,6 +632,7 @@ struct instr_io {
|
||||
|
||||
struct instr_hdr_validity {
|
||||
uint8_t header_id;
|
||||
uint8_t struct_id;
|
||||
};
|
||||
|
||||
struct instr_table {
|
||||
@ -2228,11 +2229,22 @@ __instr_hdr_validate_exec(struct rte_swx_pipeline *p __rte_unused,
|
||||
const struct instruction *ip)
|
||||
{
|
||||
uint32_t header_id = ip->valid.header_id;
|
||||
uint32_t struct_id = ip->valid.struct_id;
|
||||
uint64_t valid_headers = t->valid_headers;
|
||||
struct header_runtime *h = &t->headers[header_id];
|
||||
|
||||
TRACE("[Thread %2u] validate header %u\n", p->thread_id, header_id);
|
||||
|
||||
/* If this header is already valid, then its associated t->structs[] element is also valid
|
||||
* and therefore it should not be modified. It could point to the packet buffer (in case of
|
||||
* extracted header) and setting it to the default location (h->ptr0) would be incorrect.
|
||||
*/
|
||||
if (MASK64_BIT_GET(valid_headers, header_id))
|
||||
return;
|
||||
|
||||
/* Headers. */
|
||||
t->valid_headers = MASK64_BIT_SET(t->valid_headers, header_id);
|
||||
t->structs[struct_id] = h->ptr0;
|
||||
t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
|
||||
}
|
||||
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user