Skip to content

Commit

Permalink
ndp: avoid races between control and data planes
Browse files Browse the repository at this point in the history
Do not duplicate neighbor solicitations packets to reply without going
through the control plane. This is bad design and can lead to races
between the control and data planes.

Send all valid NDP packets to control plane and if they are neighbor
solicitations, only *after* handling the request contents to update our
nexthop cache, send a neighbor advertisement for our requested local
address.

Signed-off-by: Robin Jarry <[email protected]>
Acked-by: David Marchand <[email protected]>
  • Loading branch information
rjarry committed Jan 27, 2025
1 parent ece5919 commit b51dfd9
Show file tree
Hide file tree
Showing 6 changed files with 503 additions and 436 deletions.
604 changes: 311 additions & 293 deletions docs/graph.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
93 changes: 57 additions & 36 deletions modules/ip6/control/nexthop.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,72 +111,92 @@ void ip6_nexthop_unreachable_cb(struct rte_mbuf *m) {
rte_pktmbuf_free(m);
}

static control_input_t ndp_na_output_node;

void ndp_probe_input_cb(struct rte_mbuf *m) {
const struct icmp6 *icmp6 = rte_pktmbuf_mtod(m, const struct icmp6 *);
const struct iface *iface = mbuf_data(m)->iface;
const struct rte_ipv6_addr *remote, *local;
const struct ip6_local_mbuf_data *d;
const struct icmp6_neigh_solicit *ns;
const struct icmp6_neigh_advert *na;
const struct rte_ipv6_addr *target;
const struct iface *iface;
struct rte_ether_addr mac;
struct nexthop *nh;
struct nexthop *nh = NULL;
bool lladdr_found;

d = (const struct ip6_local_mbuf_data *)control_output_mbuf_data(m)->cb_data;
iface = control_output_mbuf_data(m)->iface;

switch (icmp6->type) {
case ICMP6_TYPE_NEIGH_SOLICIT:
ns = PAYLOAD(icmp6);
// HACK: the target IP contains the *SOURCE* address of the NS sender. It was
// replaced in ndp_ns_input_process to avoid copying the whole IPv6 header.
target = &ns->target;
local = &ns->target;
remote = &d->src;
lladdr_found = icmp6_get_opt(
m, sizeof(*icmp6) + sizeof(*ns), ICMP6_OPT_SRC_LLADDR, &mac
);
break;
case ICMP6_TYPE_NEIGH_ADVERT:
na = PAYLOAD(icmp6);
target = &na->target;
local = NULL;
remote = &na->target;
lladdr_found = icmp6_get_opt(
m, sizeof(*icmp6) + sizeof(*na), ICMP6_OPT_TARGET_LLADDR, &mac
);
break;
default:
goto free;
}
if (!lladdr_found)
goto free;

nh = ip6_nexthop_lookup(iface->vrf_id, iface->id, target);
if (nh == NULL) {
// We don't have an entry for the probe sender address yet.
//
// Create one now. If the sender has requested our mac address, they
// will certainly contact us soon and it will save us an NDP solicitation.
if ((nh = ip6_nexthop_new(iface->vrf_id, iface->id, target)) == NULL) {
LOG(ERR, "ip6_nexthop_new: %s", strerror(errno));
goto free;
if (!rte_ipv6_addr_is_unspec(remote) && !rte_ipv6_addr_is_mcast(remote)) {
nh = ip6_nexthop_lookup(iface->vrf_id, iface->id, remote);
if (nh == NULL) {
// We don't have an entry for the probe sender address yet.
//
// Create one now. If the sender has requested our mac address, they
// will certainly contact us soon and it will save us an NDP solicitation.
if ((nh = ip6_nexthop_new(iface->vrf_id, iface->id, remote)) == NULL) {
LOG(ERR, "ip6_nexthop_new: %s", strerror(errno));
goto free;
}

// Add an internal /128 route to reference the newly created nexthop.
if (ip6_route_insert(
iface->vrf_id, iface->id, remote, RTE_IPV6_MAX_DEPTH, nh
)
< 0) {
LOG(ERR, "ip6_route_insert: %s", strerror(errno));
goto free;
}
}
}

// Add an internal /128 route to reference the newly created nexthop.
if (ip6_route_insert(iface->vrf_id, iface->id, target, RTE_IPV6_MAX_DEPTH, nh)
< 0) {
LOG(ERR, "ip6_route_insert: %s", strerror(errno));
if (nh && !(nh->flags & GR_NH_F_STATIC) && lladdr_found) {
// Refresh all fields.
nh->last_reply = rte_get_tsc_cycles();
nh->iface_id = iface->id;
nh->flags |= GR_NH_F_REACHABLE;
nh->flags &= ~(GR_NH_F_STALE | GR_NH_F_PENDING | GR_NH_F_FAILED);
nh->ucast_probes = 0;
nh->bcast_probes = 0;
nh->lladdr = mac;
nexthop_push_notification(NEXTHOP_EVENT_UPDATE, nh);
}

if (icmp6->type == ICMP6_TYPE_NEIGH_SOLICIT && local != NULL) {
// send a reply for our local ip
struct ndp_na_output_mbuf_data *d = ndp_na_output_mbuf_data(m);
d->local = ip6_nexthop_lookup(iface->vrf_id, iface->id, local);
d->remote = nh;
d->iface = iface;
if (post_to_stack(ndp_na_output_node, m) < 0) {
LOG(ERR, "post_to_stack: %s", strerror(errno));
goto free;
}
// prevent double free, mbuf has been re-consumed by datapath
m = NULL;
}

// Static next hops never need updating.
if (nh->flags & GR_NH_F_STATIC)
goto free;

// Refresh all fields.
nh->last_reply = rte_get_tsc_cycles();
nh->iface_id = iface->id;
nh->flags |= GR_NH_F_REACHABLE;
nh->flags &= ~(GR_NH_F_STALE | GR_NH_F_PENDING | GR_NH_F_FAILED);
nh->ucast_probes = 0;
nh->bcast_probes = 0;
nh->lladdr = mac;
nexthop_push_notification(NEXTHOP_EVENT_UPDATE, nh);

// Flush all held packets.
struct rte_mbuf *held = nh->held_pkts_head;
while (held != NULL) {
Expand Down Expand Up @@ -309,6 +329,7 @@ static void nh6_init(struct event_base *ev_base) {
ABORT("nh_pool_new(AF_INET6) failed");

ip6_output_node = gr_control_input_register_handler("ip6_output", true);
ndp_na_output_node = gr_control_input_register_handler("ndp_na_output", true);
}

static void nh6_fini(struct event_base *) {
Expand Down
5 changes: 5 additions & 0 deletions modules/ip6/datapath/gr_ip6_datapath.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ GR_MBUF_PRIV_DATA_TYPE(ip6_local_mbuf_data, {
uint8_t proto;
});

GR_MBUF_PRIV_DATA_TYPE(ndp_na_output_mbuf_data, {
const struct nexthop *local;
const struct nexthop *remote;
});

void ip6_input_local_add_proto(uint8_t proto, const char *next_node);
void ip6_output_register_interface(uint16_t iface_type_id, const char *next_node);
int ip6_nexthop_solicit(struct nexthop *nh);
Expand Down
1 change: 1 addition & 0 deletions modules/ip6/datapath/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ src += files(
'ip6_local.c',
'ip6_output.c',
'ndp_na_input.c',
'ndp_na_output.c',
'ndp_ns_input.c',
'ndp_ns_output.c',
'ndp_rs_input.c',
Expand Down
105 changes: 105 additions & 0 deletions modules/ip6/datapath/ndp_na_output.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
// SPDX-License-Identifier: BSD-3-Clause
// Copyright (c) 2024 Robin Jarry

#include <gr_control_input.h>
#include <gr_graph.h>
#include <gr_icmp6.h>
#include <gr_ip6_control.h>
#include <gr_ip6_datapath.h>
#include <gr_log.h>
#include <gr_mbuf.h>
#include <gr_trace.h>

#include <rte_byteorder.h>
#include <rte_ether.h>
#include <rte_graph_worker.h>
#include <rte_ip6.h>

enum {
OUTPUT = 0,
EDGE_COUNT,
};

static uint16_t ndp_na_output_process(
struct rte_graph *graph,
struct rte_node *node,
void **objs,
uint16_t nb_objs
) {
const struct nexthop *local, *remote;
struct ip6_local_mbuf_data *d;
struct icmp6_neigh_advert *na;
struct icmp6_opt_lladdr *ll;
struct icmp6_opt *opt;
struct rte_mbuf *mbuf;
uint16_t payload_len;
struct icmp6 *icmp6;

for (uint16_t i = 0; i < nb_objs; i++) {
mbuf = objs[i];

local = ndp_na_output_mbuf_data(mbuf)->local;
remote = ndp_na_output_mbuf_data(mbuf)->remote;

rte_pktmbuf_trim(mbuf, rte_pktmbuf_pkt_len(mbuf));

// Fill ICMP6 layer.
payload_len = sizeof(*icmp6) + sizeof(*na) + sizeof(*opt) + sizeof(*ll);
icmp6 = (struct icmp6 *)rte_pktmbuf_append(mbuf, payload_len);
icmp6->type = ICMP6_TYPE_NEIGH_ADVERT;
icmp6->code = 0;
na = PAYLOAD(icmp6);
na->override = 1;
na->router = 1;
na->solicited = remote != NULL;
na->target = local->ipv6;
opt = PAYLOAD(na);
opt->type = ICMP6_OPT_TARGET_LLADDR;
opt->len = ICMP6_OPT_LEN(sizeof(*opt) + sizeof(*ll));
ll = PAYLOAD(opt);
ll->mac = local->lladdr;

// Fill in IP local data
d = ip6_local_mbuf_data(mbuf);
d->iface = iface_from_id(local->iface_id);
d->src = local->ipv6;
if (remote == NULL) {
// If the source of the solicitation is the unspecified address, the
// node MUST set the Solicited flag to zero and multicast the
// advertisement to the all-nodes address.
d->dst = (struct rte_ipv6_addr)RTE_IPV6_ADDR_ALLNODES_LINK_LOCAL;
} else {
d->dst = remote->ipv6;
}
d->len = payload_len;
d->hop_limit = IP6_DEFAULT_HOP_LIMIT;
d->proto = IPPROTO_ICMPV6;

if (gr_mbuf_is_traced(mbuf)) {
uint8_t trace_len = RTE_MIN(payload_len, GR_TRACE_ITEM_MAX_LEN);
struct icmp6 *t = gr_mbuf_trace_add(mbuf, node, trace_len);
memcpy(t, icmp6, trace_len);
}
rte_node_enqueue_x1(graph, node, OUTPUT, mbuf);
}

return nb_objs;
}

static struct rte_node_register node = {
.name = "ndp_na_output",

.process = ndp_na_output_process,

.nb_edges = EDGE_COUNT,
.next_nodes = {
[OUTPUT] = "icmp6_output",
},
};

static struct gr_node_info info = {
.node = &node,
.trace_format = (gr_trace_format_cb_t)trace_icmp6_format,
};

GR_NODE_REGISTER(info);
Loading

0 comments on commit b51dfd9

Please sign in to comment.