diff --git a/docs/Settings.md b/docs/Settings.md index 9df659b894..8d6d947245 100644 --- a/docs/Settings.md +++ b/docs/Settings.md @@ -177,6 +177,7 @@ These parameters are accessed by calling [GetParam](./api/GetParam.md) or [SetPa | `QUIC_PARAM_CONN_STATISTICS_V2`
22 | QUIC_STATISTICS_V2 | Get-only | Connection-level statistics, version 2. | | `QUIC_PARAM_CONN_STATISTICS_V2_PLAT`
23 | QUIC_STATISTICS_V2 | Get-only | Connection-level statistics with platform-specific time format, version 2. | | `QUIC_PARAM_CONN_ORIG_DEST_CID`
24 | uint8_t[] | Get-only | The original destination connection ID used by the client to connect to the server. | +| `QUIC_PARAM_CONN_DSCP`
25 | uint8_t | Both | The DiffServ Code Point put in the DiffServ field (formerly TypeOfService/TrafficClass) on packets sent from this connection. | ### QUIC_PARAM_CONN_STATISTICS_V2 diff --git a/src/core/binding.c b/src/core/binding.c index 348e1228d0..22f03f5f62 100644 --- a/src/core/binding.c +++ b/src/core/binding.c @@ -803,7 +803,7 @@ QuicBindingProcessStatelessOperation( Binding, OperationType); - CXPLAT_SEND_CONFIG SendConfig = { RecvPacket->Route, 0, CXPLAT_ECN_NON_ECT, 0 }; + CXPLAT_SEND_CONFIG SendConfig = { RecvPacket->Route, 0, CXPLAT_ECN_NON_ECT, 0, 0 }; CXPLAT_SEND_DATA* SendData = CxPlatSendDataAlloc(Binding->Socket, &SendConfig); if (SendData == NULL) { QuicTraceEvent( diff --git a/src/core/connection.c b/src/core/connection.c index b59d19ba45..e58ad9974c 100644 --- a/src/core/connection.c +++ b/src/core/connection.c @@ -6623,6 +6623,27 @@ QuicConnParamSet( return QUIC_STATUS_SUCCESS; } + case QUIC_PARAM_CONN_DSCP: + { + if (BufferLength != sizeof(uint8_t) || Buffer == NULL) { + Status = QUIC_STATUS_INVALID_PARAMETER; + break; + } + + uint8_t DSCP = 0; + CxPlatCopyMemory(&DSCP, Buffer, BufferLength); + + if (DSCP > CXPLAT_MAX_DSCP) { + Status = QUIC_STATUS_INVALID_PARAMETER; + break; + } + + Connection->DSCP = DSCP; + + Status = QUIC_STATUS_SUCCESS; + break; + } + // // Private // @@ -7207,27 +7228,55 @@ QuicConnParamGet( } case QUIC_PARAM_CONN_ORIG_DEST_CID: + if (Connection->OrigDestCID == NULL) { Status = QUIC_STATUS_INVALID_STATE; break; } + if (*BufferLength < Connection->OrigDestCID->Length) { Status = QUIC_STATUS_BUFFER_TOO_SMALL; *BufferLength = Connection->OrigDestCID->Length; break; } + if (Buffer == NULL) { Status = QUIC_STATUS_INVALID_PARAMETER; break; } + CxPlatCopyMemory( Buffer, Connection->OrigDestCID->Data, Connection->OrigDestCID->Length); + // // Tell app how much buffer we copied. // *BufferLength = Connection->OrigDestCID->Length; + + Status = QUIC_STATUS_SUCCESS; + break; + + case QUIC_PARAM_CONN_DSCP: + + if (*BufferLength < sizeof(Connection->DSCP)) { + Status = QUIC_STATUS_BUFFER_TOO_SMALL; + *BufferLength = sizeof(Connection->DSCP); + break; + } + + if (Buffer == NULL) { + Status = QUIC_STATUS_INVALID_PARAMETER; + break; + } + + CxPlatCopyMemory( + Buffer, + &Connection->DSCP, + sizeof(Connection->DSCP)); + + *BufferLength = sizeof(Connection->DSCP); Status = QUIC_STATUS_SUCCESS; break; diff --git a/src/core/connection.h b/src/core/connection.h index c754115f52..0b3dcc5017 100644 --- a/src/core/connection.h +++ b/src/core/connection.h @@ -462,6 +462,12 @@ typedef struct QUIC_CONNECTION { // uint8_t PeerReorderingThreshold; + // + // DSCP value to set on all sends from this connection. + // Default value of 0. + // + uint8_t DSCP; + // // The ACK frequency sequence number we are currently using to send. // diff --git a/src/core/packet_builder.c b/src/core/packet_builder.c index 9d380fa09a..05eb49286c 100644 --- a/src/core/packet_builder.c +++ b/src/core/packet_builder.c @@ -265,7 +265,8 @@ QuicPacketBuilderPrepare( DatagramSize), Builder->EcnEctSet ? CXPLAT_ECN_ECT_0 : CXPLAT_ECN_NON_ECT, Builder->Connection->Registration->ExecProfile == QUIC_EXECUTION_PROFILE_TYPE_MAX_THROUGHPUT ? - CXPLAT_SEND_FLAGS_MAX_THROUGHPUT : CXPLAT_SEND_FLAGS_NONE + CXPLAT_SEND_FLAGS_MAX_THROUGHPUT : CXPLAT_SEND_FLAGS_NONE, + Connection->DSCP }; Builder->SendData = CxPlatSendDataAlloc(Builder->Path->Binding->Socket, &SendConfig); diff --git a/src/cs/lib/msquic_generated.cs b/src/cs/lib/msquic_generated.cs index 3e6f9d96b7..e725408c40 100644 --- a/src/cs/lib/msquic_generated.cs +++ b/src/cs/lib/msquic_generated.cs @@ -3447,6 +3447,9 @@ internal static unsafe partial class MsQuic [NativeTypeName("#define QUIC_PARAM_CONN_ORIG_DEST_CID 0x05000018")] internal const uint QUIC_PARAM_CONN_ORIG_DEST_CID = 0x05000018; + [NativeTypeName("#define QUIC_PARAM_CONN_DSCP 0x50000019")] + internal const uint QUIC_PARAM_CONN_DSCP = 0x50000019; + [NativeTypeName("#define QUIC_PARAM_TLS_HANDSHAKE_INFO 0x06000000")] internal const uint QUIC_PARAM_TLS_HANDSHAKE_INFO = 0x06000000; diff --git a/src/generated/linux/datapath_winkernel.c.clog.h b/src/generated/linux/datapath_winkernel.c.clog.h index 4e2479cf34..fa75a3704b 100644 --- a/src/generated/linux/datapath_winkernel.c.clog.h +++ b/src/generated/linux/datapath_winkernel.c.clog.h @@ -213,6 +213,42 @@ tracepoint(CLOG_DATAPATH_WINKERNEL_C, DatapathQueryRecvMaxCoalescedSizeFailedAsy +/*---------------------------------------------------------- +// Decoder Ring for DatapathTestSetIpv6TrafficClassFailed +// [data] Test setting IPV6_TCLASS failed, 0x%x +// QuicTraceLogWarning( + DatapathTestSetIpv6TrafficClassFailed, + "[data] Test setting IPV6_TCLASS failed, 0x%x", + Status); +// arg2 = arg2 = Status = arg2 +----------------------------------------------------------*/ +#ifndef _clog_3_ARGS_TRACE_DatapathTestSetIpv6TrafficClassFailed +#define _clog_3_ARGS_TRACE_DatapathTestSetIpv6TrafficClassFailed(uniqueId, encoded_arg_string, arg2)\ +tracepoint(CLOG_DATAPATH_WINKERNEL_C, DatapathTestSetIpv6TrafficClassFailed , arg2);\ + +#endif + + + + +/*---------------------------------------------------------- +// Decoder Ring for DatapathTestSetIpv6TrafficClassFailedAsync +// [data] Test setting IPV6_TCLASS failed (async), 0x%x +// QuicTraceLogWarning( + DatapathTestSetIpv6TrafficClassFailedAsync, + "[data] Test setting IPV6_TCLASS failed (async), 0x%x", + Status); +// arg2 = arg2 = Status = arg2 +----------------------------------------------------------*/ +#ifndef _clog_3_ARGS_TRACE_DatapathTestSetIpv6TrafficClassFailedAsync +#define _clog_3_ARGS_TRACE_DatapathTestSetIpv6TrafficClassFailedAsync(uniqueId, encoded_arg_string, arg2)\ +tracepoint(CLOG_DATAPATH_WINKERNEL_C, DatapathTestSetIpv6TrafficClassFailedAsync , arg2);\ + +#endif + + + + /*---------------------------------------------------------- // Decoder Ring for DatapathDropEmptyMdl // [%p] Dropping datagram with empty mdl. diff --git a/src/generated/linux/datapath_winkernel.c.clog.h.lttng.h b/src/generated/linux/datapath_winkernel.c.clog.h.lttng.h index 6efeb61ee7..704051c779 100644 --- a/src/generated/linux/datapath_winkernel.c.clog.h.lttng.h +++ b/src/generated/linux/datapath_winkernel.c.clog.h.lttng.h @@ -191,6 +191,44 @@ TRACEPOINT_EVENT(CLOG_DATAPATH_WINKERNEL_C, DatapathQueryRecvMaxCoalescedSizeFai +/*---------------------------------------------------------- +// Decoder Ring for DatapathTestSetIpv6TrafficClassFailed +// [data] Test setting IPV6_TCLASS failed, 0x%x +// QuicTraceLogWarning( + DatapathTestSetIpv6TrafficClassFailed, + "[data] Test setting IPV6_TCLASS failed, 0x%x", + Status); +// arg2 = arg2 = Status = arg2 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_DATAPATH_WINKERNEL_C, DatapathTestSetIpv6TrafficClassFailed, + TP_ARGS( + unsigned int, arg2), + TP_FIELDS( + ctf_integer(unsigned int, arg2, arg2) + ) +) + + + +/*---------------------------------------------------------- +// Decoder Ring for DatapathTestSetIpv6TrafficClassFailedAsync +// [data] Test setting IPV6_TCLASS failed (async), 0x%x +// QuicTraceLogWarning( + DatapathTestSetIpv6TrafficClassFailedAsync, + "[data] Test setting IPV6_TCLASS failed (async), 0x%x", + Status); +// arg2 = arg2 = Status = arg2 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_DATAPATH_WINKERNEL_C, DatapathTestSetIpv6TrafficClassFailedAsync, + TP_ARGS( + unsigned int, arg2), + TP_FIELDS( + ctf_integer(unsigned int, arg2, arg2) + ) +) + + + /*---------------------------------------------------------- // Decoder Ring for DatapathDropEmptyMdl // [%p] Dropping datagram with empty mdl. diff --git a/src/generated/linux/datapath_winuser.c.clog.h b/src/generated/linux/datapath_winuser.c.clog.h index c285e4e96b..b26f8b70b0 100644 --- a/src/generated/linux/datapath_winuser.c.clog.h +++ b/src/generated/linux/datapath_winuser.c.clog.h @@ -123,6 +123,42 @@ tracepoint(CLOG_DATAPATH_WINUSER_C, DatapathQueryRecvMaxCoalescedSizeFailed , ar +/*---------------------------------------------------------- +// Decoder Ring for DatapathOpenUdpv6SocketFailed +// [data] UDPv6 helper socket failed to open, 0x%x +// QuicTraceLogWarning( + DatapathOpenUdpv6SocketFailed, + "[data] UDPv6 helper socket failed to open, 0x%x", + WsaError); +// arg2 = arg2 = WsaError = arg2 +----------------------------------------------------------*/ +#ifndef _clog_3_ARGS_TRACE_DatapathOpenUdpv6SocketFailed +#define _clog_3_ARGS_TRACE_DatapathOpenUdpv6SocketFailed(uniqueId, encoded_arg_string, arg2)\ +tracepoint(CLOG_DATAPATH_WINUSER_C, DatapathOpenUdpv6SocketFailed , arg2);\ + +#endif + + + + +/*---------------------------------------------------------- +// Decoder Ring for DatapathTestSetIpv6TrafficClassFailed +// [data] Test setting IPV6_TCLASS failed, 0x%x +// QuicTraceLogWarning( + DatapathTestSetIpv6TrafficClassFailed, + "[data] Test setting IPV6_TCLASS failed, 0x%x", + WsaError); +// arg2 = arg2 = WsaError = arg2 +----------------------------------------------------------*/ +#ifndef _clog_3_ARGS_TRACE_DatapathTestSetIpv6TrafficClassFailed +#define _clog_3_ARGS_TRACE_DatapathTestSetIpv6TrafficClassFailed(uniqueId, encoded_arg_string, arg2)\ +tracepoint(CLOG_DATAPATH_WINUSER_C, DatapathTestSetIpv6TrafficClassFailed , arg2);\ + +#endif + + + + /*---------------------------------------------------------- // Decoder Ring for DatapathRecvEmpty // [data][%p] Dropping datagram with empty payload. diff --git a/src/generated/linux/datapath_winuser.c.clog.h.lttng.h b/src/generated/linux/datapath_winuser.c.clog.h.lttng.h index 6af0327b8e..bcd390782c 100644 --- a/src/generated/linux/datapath_winuser.c.clog.h.lttng.h +++ b/src/generated/linux/datapath_winuser.c.clog.h.lttng.h @@ -96,6 +96,44 @@ TRACEPOINT_EVENT(CLOG_DATAPATH_WINUSER_C, DatapathQueryRecvMaxCoalescedSizeFaile +/*---------------------------------------------------------- +// Decoder Ring for DatapathOpenUdpv6SocketFailed +// [data] UDPv6 helper socket failed to open, 0x%x +// QuicTraceLogWarning( + DatapathOpenUdpv6SocketFailed, + "[data] UDPv6 helper socket failed to open, 0x%x", + WsaError); +// arg2 = arg2 = WsaError = arg2 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_DATAPATH_WINUSER_C, DatapathOpenUdpv6SocketFailed, + TP_ARGS( + unsigned int, arg2), + TP_FIELDS( + ctf_integer(unsigned int, arg2, arg2) + ) +) + + + +/*---------------------------------------------------------- +// Decoder Ring for DatapathTestSetIpv6TrafficClassFailed +// [data] Test setting IPV6_TCLASS failed, 0x%x +// QuicTraceLogWarning( + DatapathTestSetIpv6TrafficClassFailed, + "[data] Test setting IPV6_TCLASS failed, 0x%x", + WsaError); +// arg2 = arg2 = WsaError = arg2 +----------------------------------------------------------*/ +TRACEPOINT_EVENT(CLOG_DATAPATH_WINUSER_C, DatapathTestSetIpv6TrafficClassFailed, + TP_ARGS( + unsigned int, arg2), + TP_FIELDS( + ctf_integer(unsigned int, arg2, arg2) + ) +) + + + /*---------------------------------------------------------- // Decoder Ring for DatapathRecvEmpty // [data][%p] Dropping datagram with empty payload. diff --git a/src/inc/msquic.h b/src/inc/msquic.h index b8dc5471db..79e96b5199 100644 --- a/src/inc/msquic.h +++ b/src/inc/msquic.h @@ -923,6 +923,7 @@ typedef struct QUIC_SCHANNEL_CREDENTIAL_ATTRIBUTE_W { #define QUIC_PARAM_CONN_STATISTICS_V2 0x05000016 // QUIC_STATISTICS_V2 #define QUIC_PARAM_CONN_STATISTICS_V2_PLAT 0x05000017 // QUIC_STATISTICS_V2 #define QUIC_PARAM_CONN_ORIG_DEST_CID 0x05000018 // uint8_t[] +#define QUIC_PARAM_CONN_DSCP 0x50000019 // uint8_t // // Parameters for TLS. diff --git a/src/inc/quic_datapath.h b/src/inc/quic_datapath.h index 9975f762a1..ec52c85d97 100644 --- a/src/inc/quic_datapath.h +++ b/src/inc/quic_datapath.h @@ -58,6 +58,17 @@ typedef enum CXPLAT_ECN_TYPE { // #define CXPLAT_ECN_FROM_TOS(ToS) (CXPLAT_ECN_TYPE)((ToS) & 0x3) +// +// Helper to get the DSCP value from the Type of Service field of received data. +// +#define CXPLAT_DSCP_FROM_TOS(ToS) (uint8_t)((ToS) >> 2) + +// +// Define the maximum type of service value allowed. +// Note: this is without the ECN bits included +// +#define CXPLAT_MAX_DSCP 63 + // // The maximum IP MTU this implementation supports for QUIC. // @@ -444,6 +455,7 @@ CxPlatDataPathUpdateConfig( #define CXPLAT_DATAPATH_FEATURE_TCP 0x0020 #define CXPLAT_DATAPATH_FEATURE_RAW 0x0040 #define CXPLAT_DATAPATH_FEATURE_TTL 0x0080 +#define CXPLAT_DATAPATH_FEATURE_DSCP 0x0100 // // Queries the currently supported features of the datapath. @@ -675,6 +687,7 @@ typedef struct CXPLAT_SEND_CONFIG { uint16_t MaxPacketSize; uint8_t ECN; // CXPLAT_ECN_TYPE uint8_t Flags; // CXPLAT_SEND_FLAGS + uint8_t DSCP; } CXPLAT_SEND_CONFIG; // diff --git a/src/manifest/clog.sidecar b/src/manifest/clog.sidecar index 907d055946..be36e869b4 100644 --- a/src/manifest/clog.sidecar +++ b/src/manifest/clog.sidecar @@ -2695,6 +2695,18 @@ ], "macroName": "QuicTraceLogWarning" }, + "DatapathOpenUdpv6SocketFailed": { + "ModuleProperites": {}, + "TraceString": "[data] UDPv6 helper socket failed to open, 0x%x", + "UniqueId": "DatapathOpenUdpv6SocketFailed", + "splitArgs": [ + { + "DefinationEncoding": "x", + "MacroVariableName": "arg2" + } + ], + "macroName": "QuicTraceLogWarning" + }, "DatapathQueryRecvMaxCoalescedSizeFailed": { "ModuleProperites": {}, "TraceString": "[data] Query for UDP_RECV_MAX_COALESCED_SIZE failed, 0x%x", @@ -3027,6 +3039,30 @@ ], "macroName": "QuicTraceLogVerbose" }, + "DatapathTestSetIpv6TrafficClassFailed": { + "ModuleProperites": {}, + "TraceString": "[data] Test setting IPV6_TCLASS failed, 0x%x", + "UniqueId": "DatapathTestSetIpv6TrafficClassFailed", + "splitArgs": [ + { + "DefinationEncoding": "x", + "MacroVariableName": "arg2" + } + ], + "macroName": "QuicTraceLogWarning" + }, + "DatapathTestSetIpv6TrafficClassFailedAsync": { + "ModuleProperites": {}, + "TraceString": "[data] Test setting IPV6_TCLASS failed (async), 0x%x", + "UniqueId": "DatapathTestSetIpv6TrafficClassFailedAsync", + "splitArgs": [ + { + "DefinationEncoding": "x", + "MacroVariableName": "arg2" + } + ], + "macroName": "QuicTraceLogWarning" + }, "DatapathTooLarge": { "ModuleProperites": {}, "TraceString": "[data][%p] Received larger than expected datagram from %!ADDR!", @@ -14199,6 +14235,11 @@ "TraceID": "DatapathOpenUdpSocketFailedAsync", "EncodingString": "[data] UDP send segmentation helper socket failed to open (async), 0x%x" }, + { + "UniquenessHash": "9af95ba3-6d62-261f-fb94-75465f7084ff", + "TraceID": "DatapathOpenUdpv6SocketFailed", + "EncodingString": "[data] UDPv6 helper socket failed to open, 0x%x" + }, { "UniquenessHash": "1ba702fe-3407-9bab-a2bf-4f694b478ac0", "TraceID": "DatapathQueryRecvMaxCoalescedSizeFailed", @@ -14294,6 +14335,16 @@ "TraceID": "DatapathTcpAuxBinding", "EncodingString": "[data][%p] Binding TCP socket to %s" }, + { + "UniquenessHash": "576dad43-cdc4-7cbc-817e-f8831afa4980", + "TraceID": "DatapathTestSetIpv6TrafficClassFailed", + "EncodingString": "[data] Test setting IPV6_TCLASS failed, 0x%x" + }, + { + "UniquenessHash": "bac8baf4-caa7-65ff-f815-8e27065cc9ed", + "TraceID": "DatapathTestSetIpv6TrafficClassFailedAsync", + "EncodingString": "[data] Test setting IPV6_TCLASS failed (async), 0x%x" + }, { "UniquenessHash": "a07c9538-e6d7-3c41-1367-ce58f2df4d9e", "TraceID": "DatapathTooLarge", diff --git a/src/perf/lib/Tcp.cpp b/src/perf/lib/Tcp.cpp index 9d71308c5c..fd328b1651 100644 --- a/src/perf/lib/Tcp.cpp +++ b/src/perf/lib/Tcp.cpp @@ -1088,7 +1088,7 @@ QUIC_BUFFER* TcpConnection::NewSendBuffer() return nullptr; } if (!BatchedSendData) { - CXPLAT_SEND_CONFIG SendConfig = { &Route, TLS_BLOCK_SIZE, CXPLAT_ECN_NON_ECT, 0 }; + CXPLAT_SEND_CONFIG SendConfig = { &Route, TLS_BLOCK_SIZE, CXPLAT_ECN_NON_ECT, 0, 0 }; BatchedSendData = CxPlatSendDataAlloc(Socket, &SendConfig); if (!BatchedSendData) { return nullptr; } } diff --git a/src/platform/datapath_epoll.c b/src/platform/datapath_epoll.c index 925880b36a..4601181cf9 100644 --- a/src/platform/datapath_epoll.c +++ b/src/platform/datapath_epoll.c @@ -340,6 +340,7 @@ CxPlatDataPathCalculateFeatureSupport( Datapath->Features |= CXPLAT_DATAPATH_FEATURE_TCP; Datapath->Features |= CXPLAT_DATAPATH_FEATURE_TTL; + Datapath->Features |= CXPLAT_DATAPATH_FEATURE_DSCP; } void @@ -887,7 +888,6 @@ CxPlatSocketContextInitialize( goto Exit; } - #ifdef UDP_GRO if (SocketContext->DatapathPartition->Datapath->Features & CXPLAT_DATAPATH_FEATURE_RECV_COALESCING) { Option = TRUE; @@ -2264,6 +2264,7 @@ SendDataAlloc( SendData->AlreadySentCount = 0; SendData->ControlBufferLength = 0; SendData->ECN = Config->ECN; + SendData->DSCP = Config->DSCP; SendData->Flags = Config->Flags; SendData->OnConnectedSocket = Socket->Connected; SendData->SegmentationSupported = @@ -2478,7 +2479,7 @@ CxPlatSendDataPopulateAncillaryData( CMsg->cmsg_level = SendData->LocalAddress.Ip.sa_family == AF_INET ? IPPROTO_IP : IPPROTO_IPV6; CMsg->cmsg_type = SendData->LocalAddress.Ip.sa_family == AF_INET ? IP_TOS : IPV6_TCLASS; CMsg->cmsg_len = CMSG_LEN(sizeof(int)); - *(int*)CMSG_DATA(CMsg) = SendData->ECN; + *(int*)CMSG_DATA(CMsg) = SendData->ECN | (SendData->DSCP << 2); if (!SendData->OnConnectedSocket) { if (SendData->LocalAddress.Ip.sa_family == AF_INET) { diff --git a/src/platform/datapath_kqueue.c b/src/platform/datapath_kqueue.c index 0bb64fa330..cd4e4554ca 100644 --- a/src/platform/datapath_kqueue.c +++ b/src/platform/datapath_kqueue.c @@ -1626,6 +1626,7 @@ CxPlatSendDataAlloc( CxPlatZeroMemory(SendData, sizeof(*SendData)); SendData->Owner = SocketContext->DatapathPartition; SendData->ECN = Config->ECN; + SendData->DSCP = Config->DSCP; SendData->SegmentSize = (Socket->Datapath->Features & CXPLAT_DATAPATH_FEATURE_SEND_SEGMENTATION) ? Config->MaxPacketSize : 0; @@ -1981,7 +1982,7 @@ CxPlatSocketSendInternal( CMsg->cmsg_level = RemoteAddress->Ip.sa_family == QUIC_ADDRESS_FAMILY_INET ? IPPROTO_IP : IPPROTO_IPV6; CMsg->cmsg_type = RemoteAddress->Ip.sa_family == QUIC_ADDRESS_FAMILY_INET ? IP_TOS : IPV6_TCLASS; CMsg->cmsg_len = CMSG_LEN(sizeof(int)); - *(int *)CMSG_DATA(CMsg) = SendData->ECN; + *(int *)CMSG_DATA(CMsg) = SendData->ECN | (SendData->DSCP << 2); if (!SocketContext->Binding->Connected) { Mhdr.msg_name = &MappedRemoteAddress; diff --git a/src/platform/datapath_raw.c b/src/platform/datapath_raw.c index 57f342d91e..e860e40c1d 100644 --- a/src/platform/datapath_raw.c +++ b/src/platform/datapath_raw.c @@ -371,7 +371,7 @@ RawSocketSend( const CXPLAT_INTERFACE* Interface = CxPlatDpRawGetInterfaceFromQueue(Route->Queue); CxPlatFramingWriteHeaders( - Socket, Route, &SendData->Buffer, SendData->ECN, + Socket, Route, &SendData->Buffer, SendData->ECN, SendData->DSCP, Interface->OffloadStatus.Transmit.NetworkLayerXsum, Interface->OffloadStatus.Transmit.TransportLayerXsum, Route->TcpState.SequenceNumber, diff --git a/src/platform/datapath_raw.h b/src/platform/datapath_raw.h index f54adc03a0..c980eee3e9 100644 --- a/src/platform/datapath_raw.h +++ b/src/platform/datapath_raw.h @@ -377,6 +377,7 @@ CxPlatFramingWriteHeaders( _In_ const CXPLAT_ROUTE* Route, _Inout_ QUIC_BUFFER* Buffer, _In_ CXPLAT_ECN_TYPE ECN, + _In_ uint8_t DSCP, _In_ BOOLEAN SkipNetworkLayerXsum, _In_ BOOLEAN SkipTransportLayerXsum, _In_ uint32_t TcpSeqNum, diff --git a/src/platform/datapath_raw_socket.c b/src/platform/datapath_raw_socket.c index 48f448e92d..d53ca7bf31 100644 --- a/src/platform/datapath_raw_socket.c +++ b/src/platform/datapath_raw_socket.c @@ -297,7 +297,7 @@ CxPlatDpRawParseIPv4( return; } - Packet->TypeOfService = IP->EcnField; + Packet->TypeOfService = IP->TypeOfServiceAndEcnField; Packet->HopLimitTTL = IP->TimeToLive; Packet->Route->RemoteAddress.Ipv4.sin_family = AF_INET; CxPlatCopyMemory(&Packet->Route->RemoteAddress.Ipv4.sin_addr, IP->Source, sizeof(IP->Source)); @@ -366,7 +366,7 @@ CxPlatDpRawParseIPv6( } VersionClassEcnFlow; VersionClassEcnFlow.Value = CxPlatByteSwapUint32(IP->VersionClassEcnFlow); - Packet->TypeOfService = (uint8_t)VersionClassEcnFlow.EcnField; + Packet->TypeOfService = ((uint8_t)VersionClassEcnFlow.EcnField) | (uint8_t)(VersionClassEcnFlow.Class << 2); Packet->HopLimitTTL = IP->HopLimit; Packet->Route->RemoteAddress.Ipv6.sin6_family = AF_INET6; CxPlatCopyMemory(&Packet->Route->RemoteAddress.Ipv6.sin6_addr, IP->Source, sizeof(IP->Source)); @@ -538,7 +538,7 @@ CxPlatDpRawSocketAckFin( CXPLAT_DBG_ASSERT(Socket->UseTcp); CXPLAT_ROUTE* Route = Packet->Route; - CXPLAT_SEND_CONFIG SendConfig = { Route, 0, CXPLAT_ECN_NON_ECT, 0 }; + CXPLAT_SEND_CONFIG SendConfig = { Route, 0, CXPLAT_ECN_NON_ECT, 0, 0 }; CXPLAT_SEND_DATA *SendData = CxPlatSendDataAlloc(CxPlatRawToSocket(Socket), &SendConfig); if (SendData == NULL) { return; @@ -558,7 +558,7 @@ CxPlatDpRawSocketAckFin( TCP_HEADER* ReceivedTcpHeader = (TCP_HEADER*)(Packet->Buffer - Packet->ReservedEx); CxPlatFramingWriteHeaders( - Socket, Route, &SendData->Buffer, SendData->ECN, + Socket, Route, &SendData->Buffer, SendData->ECN, SendData->DSCP, Interface->OffloadStatus.Transmit.NetworkLayerXsum, Interface->OffloadStatus.Transmit.TransportLayerXsum, ReceivedTcpHeader->AckNumber, @@ -577,7 +577,7 @@ CxPlatDpRawSocketAckSyn( CXPLAT_DBG_ASSERT(Socket->UseTcp); CXPLAT_ROUTE* Route = Packet->Route; - CXPLAT_SEND_CONFIG SendConfig = { Route, 0, CXPLAT_ECN_NON_ECT, 0 }; + CXPLAT_SEND_CONFIG SendConfig = { Route, 0, CXPLAT_ECN_NON_ECT, 0, 0 }; CXPLAT_SEND_DATA *SendData = CxPlatSendDataAlloc(CxPlatRawToSocket(Socket), &SendConfig); if (SendData == NULL) { return; @@ -599,7 +599,7 @@ CxPlatDpRawSocketAckSyn( CASTED_CLOG_BYTEARRAY(sizeof(Route->LocalAddress), &Route->LocalAddress)); CxPlatFramingWriteHeaders( - Socket, Route, &SendData->Buffer, SendData->ECN, + Socket, Route, &SendData->Buffer, SendData->ECN, SendData->DSCP, Interface->OffloadStatus.Transmit.NetworkLayerXsum, Interface->OffloadStatus.Transmit.TransportLayerXsum, ReceivedTcpHeader->AckNumber, @@ -619,7 +619,7 @@ CxPlatDpRawSocketAckSyn( CASTED_CLOG_BYTEARRAY(sizeof(Route->RemoteAddress), &Route->RemoteAddress), CASTED_CLOG_BYTEARRAY(sizeof(Route->LocalAddress), &Route->LocalAddress)); CxPlatFramingWriteHeaders( - Socket, Route, &SendData->Buffer, SendData->ECN, + Socket, Route, &SendData->Buffer, SendData->ECN, SendData->DSCP, Interface->OffloadStatus.Transmit.NetworkLayerXsum, Interface->OffloadStatus.Transmit.TransportLayerXsum, CxPlatByteSwapUint32(CxPlatByteSwapUint32(ReceivedTcpHeader->AckNumber) + 1), @@ -642,7 +642,7 @@ CxPlatDpRawSocketAckSyn( CASTED_CLOG_BYTEARRAY(sizeof(Route->RemoteAddress), &Route->RemoteAddress), CASTED_CLOG_BYTEARRAY(sizeof(Route->LocalAddress), &Route->LocalAddress)); CxPlatFramingWriteHeaders( - Socket, Route, &SendData->Buffer, SendData->ECN, + Socket, Route, &SendData->Buffer, SendData->ECN, SendData->DSCP, Interface->OffloadStatus.Transmit.NetworkLayerXsum, Interface->OffloadStatus.Transmit.TransportLayerXsum, ReceivedTcpHeader->AckNumber, @@ -660,7 +660,7 @@ CxPlatDpRawSocketSyn( ) { CXPLAT_DBG_ASSERT(Socket->UseTcp); - CXPLAT_SEND_CONFIG SendConfig = { (CXPLAT_ROUTE*)Route, 0, CXPLAT_ECN_NON_ECT, 0 }; + CXPLAT_SEND_CONFIG SendConfig = { (CXPLAT_ROUTE*)Route, 0, CXPLAT_ECN_NON_ECT, 0, 0 }; CXPLAT_SEND_DATA *SendData = CxPlatSendDataAlloc(CxPlatRawToSocket(Socket), &SendConfig); if (SendData == NULL) { return; @@ -678,7 +678,7 @@ CxPlatDpRawSocketSyn( CXPLAT_DBG_ASSERT(Route->Queue != NULL); const CXPLAT_INTERFACE* Interface = CxPlatDpRawGetInterfaceFromQueue(Route->Queue); CxPlatFramingWriteHeaders( - Socket, Route, &SendData->Buffer, SendData->ECN, + Socket, Route, &SendData->Buffer, SendData->ECN, SendData->DSCP, Interface->OffloadStatus.Transmit.NetworkLayerXsum, Interface->OffloadStatus.Transmit.TransportLayerXsum, Route->TcpState.SequenceNumber, 0, TH_SYN); @@ -692,6 +692,7 @@ CxPlatFramingWriteHeaders( _In_ const CXPLAT_ROUTE* Route, _Inout_ QUIC_BUFFER* Buffer, _In_ CXPLAT_ECN_TYPE ECN, + _In_ uint8_t DSCP, _In_ BOOLEAN SkipNetworkLayerXsum, _In_ BOOLEAN SkipTransportLayerXsum, _In_ uint32_t TcpSeqNum, @@ -751,7 +752,7 @@ CxPlatFramingWriteHeaders( if (Family == QUIC_ADDRESS_FAMILY_INET) { IPV4_HEADER* IPv4 = (IPV4_HEADER*)(Transport - sizeof(IPV4_HEADER)); IPv4->VersionAndHeaderLength = IPV4_DEFAULT_VERHLEN; - IPv4->TypeOfService = 0; + IPv4->TypeOfService = DSCP; IPv4->EcnField = ECN; IPv4->TotalLength = htons(sizeof(IPV4_HEADER) + TransportLength + (uint16_t)Buffer->Length); IPv4->Identification = 0; @@ -799,7 +800,7 @@ CxPlatFramingWriteHeaders( } VersionClassEcnFlow = {0}; VersionClassEcnFlow.Version = IPV6_VERSION; - VersionClassEcnFlow.Class = 0; + VersionClassEcnFlow.Class = DSCP; VersionClassEcnFlow.EcnField = ECN; VersionClassEcnFlow.Flow = (uint32_t)(uintptr_t)Socket; diff --git a/src/platform/datapath_raw_xdp_linux.c b/src/platform/datapath_raw_xdp_linux.c index 14471f91eb..0b45c993fe 100644 --- a/src/platform/datapath_raw_xdp_linux.c +++ b/src/platform/datapath_raw_xdp_linux.c @@ -1070,6 +1070,7 @@ CxPlatDpRawTxAlloc( Packet->Buffer.Length = Config->MaxPacketSize; Packet->Buffer.Buffer = &Packet->FrameBuffer[HeaderBackfill.AllLayer]; Packet->ECN = Config->ECN; + Packet->DSCP = Config->DSCP; Packet->UmemRelativeAddr = BaseAddr; Packet->DatapathType = Config->Route->DatapathType = CXPLAT_DATAPATH_TYPE_RAW; } diff --git a/src/platform/datapath_raw_xdp_win.c b/src/platform/datapath_raw_xdp_win.c index c2178bfaf9..3fac60e6e2 100644 --- a/src/platform/datapath_raw_xdp_win.c +++ b/src/platform/datapath_raw_xdp_win.c @@ -1618,6 +1618,7 @@ CxPlatDpRawTxAlloc( Packet->Buffer.Length = Config->MaxPacketSize; Packet->Buffer.Buffer = &Packet->FrameBuffer[HeaderBackfill.AllLayer]; Packet->ECN = Config->ECN; + Packet->DSCP = Config->DSCP; Packet->DatapathType = Config->Route->DatapathType = CXPLAT_DATAPATH_TYPE_RAW; } diff --git a/src/platform/datapath_winkernel.c b/src/platform/datapath_winkernel.c index 57d7862ea3..d933b80a18 100644 --- a/src/platform/datapath_winkernel.c +++ b/src/platform/datapath_winkernel.c @@ -471,7 +471,7 @@ CxPlatDataPathQuerySockoptSupport( Datapath->WskProviderNpi.Dispatch-> WskSocket( Datapath->WskProviderNpi.Client, - AF_INET, + AF_INET6, SOCK_DGRAM, IPPROTO_UDP, WSK_FLAG_BASIC_SOCKET, @@ -600,6 +600,54 @@ CxPlatDataPathQuerySockoptSupport( } while (FALSE); + do { + DWORD TypeOfService = 1; // Lower Effort + + IoReuseIrp(Irp, STATUS_SUCCESS); + IoSetCompletionRoutine( + Irp, + CxPlatDataPathIoCompletion, + &CompletionEvent, + TRUE, + TRUE, + TRUE); + CxPlatEventReset(CompletionEvent); + + Status = + Dispatch->WskControlSocket( + UdpSocket, + WskSetOption, + IPV6_TCLASS, + IPPROTO_IPV6, + sizeof(TypeOfService), + &TypeOfService, + 0, + NULL, + &OutputSizeReturned, + Irp); + if (Status == STATUS_PENDING) { + CxPlatEventWaitForever(CompletionEvent); + } else if (QUIC_FAILED(Status)) { + QuicTraceLogWarning( + DatapathTestSetIpv6TrafficClassFailed, + "[data] Test setting IPV6_TCLASS failed, 0x%x", + Status); + break; + } + + Status = Irp->IoStatus.Status; + if (QUIC_FAILED(Status)) { + QuicTraceLogWarning( + DatapathTestSetIpv6TrafficClassFailedAsync, + "[data] Test setting IPV6_TCLASS failed (async), 0x%x", + Status); + break; + } + + Datapath->Features |= CXPLAT_DATAPATH_FEATURE_DSCP; + + } while (FALSE); + // // Some USO/URO bug blocks TTL feature support on Windows Server 2022. // @@ -1441,7 +1489,7 @@ SocketCreateUdp( CxPlatDataPathSetControlSocket( Binding, WskSetOption, - IPV6_ECN, + IPV6_RECVTCLASS, IPPROTO_IPV6, sizeof(Option), &Option); @@ -1451,7 +1499,7 @@ SocketCreateUdp( "[data][%p] ERROR, %u, %s.", Binding, Status, - "Set IPV6_ECN"); + "Set IPV6_RECVTCLASS"); goto Error; } @@ -1460,7 +1508,7 @@ SocketCreateUdp( CxPlatDataPathSetControlSocket( Binding, WskSetOption, - IP_ECN, + IP_RECVTOS, IPPROTO_IP, sizeof(Option), &Option); @@ -1470,7 +1518,7 @@ SocketCreateUdp( "[data][%p] ERROR, %u, %s.", Binding, Status, - "Set IP_ECN"); + "Set IP_RECVTOS"); goto Error; } @@ -2022,7 +2070,7 @@ CxPlatDataPathSocketReceive( SOCKADDR_INET LocalAddr = { 0 }; SOCKADDR_INET RemoteAddr; UINT16 MessageLength = 0; - INT ECN = 0; + INT TOS = 0; INT HopLimitTTL = 0; // @@ -2052,9 +2100,9 @@ CxPlatDataPathSocketReceive( IsUnreachableError = TRUE; break; } - } else if (CMsg->cmsg_type == IPV6_ECN) { - ECN = *(PINT)WSA_CMSG_DATA(CMsg); - CXPLAT_DBG_ASSERT(ECN < UINT8_MAX); + } else if (CMsg->cmsg_type == IPV6_TCLASS) { + TOS = *(PINT)WSA_CMSG_DATA(CMsg); + CXPLAT_DBG_ASSERT(TOS <= UINT8_MAX); } else if (CMsg->cmsg_type == IPV6_HOPLIMIT) { HopLimitTTL = *(PINT)WSA_CMSG_DATA(CMsg); CXPLAT_DBG_ASSERT(HopLimitTTL < 256); @@ -2075,9 +2123,9 @@ CxPlatDataPathSocketReceive( IsUnreachableError = TRUE; break; } - } else if (CMsg->cmsg_type == IP_ECN) { - ECN = *(PINT)WSA_CMSG_DATA(CMsg); - CXPLAT_DBG_ASSERT(ECN < UINT8_MAX); + } else if (CMsg->cmsg_type == IP_TOS) { + TOS = *(PINT)WSA_CMSG_DATA(CMsg); + CXPLAT_DBG_ASSERT(TOS <= UINT8_MAX); } else if (CMsg->cmsg_type == IP_TTL) { HopLimitTTL = *(PINT)WSA_CMSG_DATA(CMsg); CXPLAT_DBG_ASSERT(HopLimitTTL < 256); @@ -2247,7 +2295,7 @@ CxPlatDataPathSocketReceive( Datagram->IoBlock = IoBlock; Datagram->Data.Next = NULL; Datagram->Data.PartitionIndex = (uint16_t)(CurProcNumber % Binding->Datapath->ProcCount); - Datagram->Data.TypeOfService = (uint8_t)ECN; + Datagram->Data.TypeOfService = (uint8_t)TOS; Datagram->Data.HopLimitTTL = (uint8_t)HopLimitTTL; Datagram->Data.Allocated = TRUE; Datagram->Data.QueuedOnConnection = FALSE; @@ -2439,6 +2487,7 @@ SendDataAlloc( if (SendData != NULL) { SendData->Owner = ProcContext; SendData->ECN = Config->ECN; + SendData->DSCP = Config->DSCP; SendData->WskBufs = NULL; SendData->TailBuf = NULL; SendData->TotalSize = 0; @@ -2881,6 +2930,7 @@ SocketSend( BYTE CMsgBuffer[ WSA_CMSG_SPACE(sizeof(IN6_PKTINFO)) + // IP_PKTINFO WSA_CMSG_SPACE(sizeof(INT)) + // IP_ECN + WSA_CMSG_SPACE(sizeof(INT)) + // IP_TOS/IPV6_TCLASS WSA_CMSG_SPACE(sizeof(*SegmentSize)) // UDP_SEND_MSG_SIZE ]; PWSACMSGHDR CMsg = (PWSACMSGHDR)CMsgBuffer; @@ -2923,6 +2973,18 @@ SocketSend( *(PINT)WSA_CMSG_DATA(CMsg) = SendData->ECN; } + if (Binding->Datapath->Features & CXPLAT_DATAPATH_FEATURE_DSCP) { + CMsg = (PWSACMSGHDR)&CMsgBuffer[CMsgLen]; + CMsgLen += WSA_CMSG_SPACE(sizeof(INT)); + CMsg->cmsg_level = + Route->LocalAddress.si_family == QUIC_ADDRESS_FAMILY_INET ? + IPPROTO_IP : IPPROTO_IPV6; + CMsg->cmsg_type = + Route->LocalAddress.si_family == QUIC_ADDRESS_FAMILY_INET ? + IP_TOS : IPV6_TCLASS; + CMsg->cmsg_len = WSA_CMSG_LEN(sizeof(INT)); + } + if (SendData->SegmentSize > 0) { CMsg = (PWSACMSGHDR)&CMsgBuffer[CMsgLen]; CMsgLen += WSA_CMSG_SPACE(sizeof(*SegmentSize)); diff --git a/src/platform/datapath_winuser.c b/src/platform/datapath_winuser.c index 946ceaf271..690114a286 100644 --- a/src/platform/datapath_winuser.c +++ b/src/platform/datapath_winuser.c @@ -163,7 +163,7 @@ typedef struct DATAPATH_RX_IO_BLOCK { RIO_CMSG_BASE_SIZE + WSA_CMSG_SPACE(sizeof(IN6_PKTINFO)) + // IP_PKTINFO WSA_CMSG_SPACE(sizeof(DWORD)) + // UDP_COALESCED_INFO - WSA_CMSG_SPACE(sizeof(INT)) + // IP_ECN + WSA_CMSG_SPACE(sizeof(INT)) + // IP_TOS WSA_CMSG_SPACE(sizeof(INT)) // IP_HOP_LIMIT ]; @@ -274,6 +274,7 @@ typedef struct CXPLAT_SEND_DATA { RIO_CMSG_BASE_SIZE + WSA_CMSG_SPACE(sizeof(IN6_PKTINFO)) + // IP_PKTINFO WSA_CMSG_SPACE(sizeof(INT)) + // IP_ECN + WSA_CMSG_SPACE(sizeof(INT)) + // IP_TOS/IPV6_TCLASS WSA_CMSG_SPACE(sizeof(DWORD)) // UDP_SEND_MSG_SIZE ]; @@ -701,6 +702,41 @@ CxPlatDataPathQuerySockoptSupport( } } +{ + // + // Test ToS support with IPv6, because IPv4 just fails silently. + // + SOCKET Udpv6Socket = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP); + if (UdpSocket == INVALID_SOCKET) { + int WsaError = WSAGetLastError(); + QuicTraceLogWarning( + DatapathOpenUdpv6SocketFailed, + "[data] UDPv6 helper socket failed to open, 0x%x", + WsaError); + goto Error; + } + + DWORD TypeOfService = 1; // Lower Effort + OptionLength = sizeof(TypeOfService); + Result = + setsockopt( + Udpv6Socket, + IPPROTO_IPV6, + IPV6_TCLASS, + (char*)&TypeOfService, + sizeof(TypeOfService)); + if (Result != NO_ERROR) { + int WsaError = WSAGetLastError(); + QuicTraceLogWarning( + DatapathTestSetIpv6TrafficClassFailed, + "[data] Test setting IPV6_TCLASS failed, 0x%x", + WsaError); + } else { + Datapath->Features |= CXPLAT_DATAPATH_FEATURE_DSCP; + } + closesocket(Udpv6Socket); +} + // // Some USO/URO bug blocks TTL feature support on Windows Server 2022. // @@ -1652,7 +1688,7 @@ SocketCreateUdp( setsockopt( SocketProc->Socket, IPPROTO_IPV6, - IPV6_ECN, + IPV6_RECVTCLASS, (char*)&Option, sizeof(Option)); if (Result == SOCKET_ERROR) { @@ -1662,7 +1698,7 @@ SocketCreateUdp( "[data][%p] ERROR, %u, %s.", Socket, WsaError, - "Set IPV6_ECN"); + "Set IPV6_RECVTCLASS"); Status = HRESULT_FROM_WIN32(WsaError); goto Error; } @@ -1672,7 +1708,7 @@ SocketCreateUdp( setsockopt( SocketProc->Socket, IPPROTO_IP, - IP_ECN, + IP_RECVTOS, (char*)&Option, sizeof(Option)); if (Result == SOCKET_ERROR) { @@ -1682,7 +1718,7 @@ SocketCreateUdp( "[data][%p] ERROR, %u, %s.", Socket, WsaError, - "Set IP_ECN"); + "Set IP_RECVTOS"); Status = HRESULT_FROM_WIN32(WsaError); goto Error; } @@ -3427,7 +3463,7 @@ CxPlatDataPathUdpRecvComplete( UINT16 MessageLength = NumberOfBytesTransferred; ULONG MessageCount = 0; BOOLEAN IsCoalesced = FALSE; - INT ECN = 0; + INT TOS = 0; INT HopLimitTTL = 0; if (SocketProc->Parent->UseRio) { PRIO_CMSG_BUFFER RioRcvMsg = (PRIO_CMSG_BUFFER)IoBlock->ControlBuf; @@ -3448,9 +3484,9 @@ CxPlatDataPathUdpRecvComplete( CxPlatConvertFromMappedV6(LocalAddr, LocalAddr); LocalAddr->Ipv6.sin6_scope_id = PktInfo6->ipi6_ifindex; FoundLocalAddr = TRUE; - } else if (CMsg->cmsg_type == IPV6_ECN) { - ECN = *(PINT)WSA_CMSG_DATA(CMsg); - CXPLAT_DBG_ASSERT(ECN < UINT8_MAX); + } else if (CMsg->cmsg_type == IPV6_TCLASS) { + TOS = *(PINT)WSA_CMSG_DATA(CMsg); + CXPLAT_DBG_ASSERT(TOS <= UINT8_MAX); } else if (CMsg->cmsg_type == IPV6_HOPLIMIT) { HopLimitTTL = *(PINT)WSA_CMSG_DATA(CMsg); CXPLAT_DBG_ASSERT(HopLimitTTL < 256); @@ -3464,9 +3500,9 @@ CxPlatDataPathUdpRecvComplete( LocalAddr->Ipv4.sin_port = SocketProc->Parent->LocalAddress.Ipv6.sin6_port; LocalAddr->Ipv6.sin6_scope_id = PktInfo->ipi_ifindex; FoundLocalAddr = TRUE; - } else if (CMsg->cmsg_type == IP_ECN) { - ECN = *(PINT)WSA_CMSG_DATA(CMsg); - CXPLAT_DBG_ASSERT(ECN < UINT8_MAX); + } else if (CMsg->cmsg_type == IP_TOS) { + TOS = *(PINT)WSA_CMSG_DATA(CMsg); + CXPLAT_DBG_ASSERT(TOS <= UINT8_MAX); } else if (CMsg->cmsg_type == IP_TTL) { HopLimitTTL = *(PINT)WSA_CMSG_DATA(CMsg); CXPLAT_DBG_ASSERT(HopLimitTTL < 256); @@ -3527,7 +3563,7 @@ CxPlatDataPathUdpRecvComplete( Datagram->Route = &IoBlock->Route; Datagram->PartitionIndex = SocketProc->DatapathProc->PartitionIndex % SocketProc->DatapathProc->Datapath->PartitionCount; - Datagram->TypeOfService = (uint8_t)ECN; + Datagram->TypeOfService = (uint8_t)TOS; Datagram->HopLimitTTL = (uint8_t) HopLimitTTL; Datagram->Allocated = TRUE; Datagram->Route->DatapathType = Datagram->DatapathType = CXPLAT_DATAPATH_TYPE_NORMAL; @@ -4004,6 +4040,7 @@ SendDataAlloc( SendData->Owner = DatapathProc; SendData->SendDataPool = SendDataPool; SendData->ECN = Config->ECN; + SendData->DSCP = Config->DSCP; SendData->SendFlags = Config->Flags; SendData->SegmentSize = (Socket->Type != CXPLAT_SOCKET_UDP || @@ -4491,6 +4528,16 @@ CxPlatSocketSendInline( CMsg->cmsg_len = WSA_CMSG_LEN(sizeof(INT)); *(PINT)WSA_CMSG_DATA(CMsg) = SendData->ECN; + if (Socket->Datapath->Features & CXPLAT_DATAPATH_FEATURE_DSCP) { + WSAMhdr.Control.len += WSA_CMSG_SPACE(sizeof(INT)); + CMsg = WSA_CMSG_NXTHDR(&WSAMhdr, CMsg); + CXPLAT_DBG_ASSERT(CMsg != NULL); + CMsg->cmsg_level = IPPROTO_IP; + CMsg->cmsg_type = IP_TOS; + CMsg->cmsg_len = WSA_CMSG_LEN(sizeof(INT)); + *(PINT)WSA_CMSG_DATA(CMsg) = SendData->DSCP; + } + } else { if (!Socket->HasFixedRemoteAddress) { @@ -4511,6 +4558,16 @@ CxPlatSocketSendInline( CMsg->cmsg_type = IPV6_ECN; CMsg->cmsg_len = WSA_CMSG_LEN(sizeof(INT)); *(PINT)WSA_CMSG_DATA(CMsg) = SendData->ECN; + + if (Socket->Datapath->Features & CXPLAT_DATAPATH_FEATURE_DSCP) { + WSAMhdr.Control.len += WSA_CMSG_SPACE(sizeof(INT)); + CMsg = WSA_CMSG_NXTHDR(&WSAMhdr, CMsg); + CXPLAT_DBG_ASSERT(CMsg != NULL); + CMsg->cmsg_level = IPPROTO_IPV6; + CMsg->cmsg_type = IPV6_TCLASS; + CMsg->cmsg_len = WSA_CMSG_LEN(sizeof(INT)); + *(PINT)WSA_CMSG_DATA(CMsg) = SendData->DSCP; + } } if (SendData->SegmentSize > 0) { diff --git a/src/platform/pcp.c b/src/platform/pcp.c index b91e6a54f6..e090b0bb17 100644 --- a/src/platform/pcp.c +++ b/src/platform/pcp.c @@ -391,7 +391,7 @@ CxPlatPcpSendMapRequestInternal( QUIC_ADDR LocalMappedAddress; CxPlatConvertToMappedV6(&Route.LocalAddress, &LocalMappedAddress); - CXPLAT_SEND_CONFIG SendConfig = { &Route, PCP_MAP_REQUEST_SIZE, CXPLAT_ECN_NON_ECT, 0 }; + CXPLAT_SEND_CONFIG SendConfig = { &Route, PCP_MAP_REQUEST_SIZE, CXPLAT_ECN_NON_ECT, 0, 0 }; CXPLAT_SEND_DATA* SendData = CxPlatSendDataAlloc(Socket, &SendConfig); if (SendData == NULL) { return QUIC_STATUS_OUT_OF_MEMORY; @@ -483,7 +483,7 @@ CxPlatPcpSendPeerRequestInternal( QUIC_ADDR RemotePeerMappedAddress; CxPlatConvertToMappedV6(RemotePeerAddress, &RemotePeerMappedAddress); - CXPLAT_SEND_CONFIG SendConfig = { &Route, PCP_MAP_REQUEST_SIZE, CXPLAT_ECN_NON_ECT, 0 }; + CXPLAT_SEND_CONFIG SendConfig = { &Route, PCP_MAP_REQUEST_SIZE, CXPLAT_ECN_NON_ECT, 0, 0 }; CXPLAT_SEND_DATA* SendData = CxPlatSendDataAlloc(Socket, &SendConfig); if (SendData == NULL) { return QUIC_STATUS_OUT_OF_MEMORY; diff --git a/src/platform/platform_internal.h b/src/platform/platform_internal.h index 4930ceb614..78e2350c04 100644 --- a/src/platform/platform_internal.h +++ b/src/platform/platform_internal.h @@ -98,6 +98,11 @@ typedef struct CXPLAT_SEND_DATA_COMMON { // uint8_t ECN; // CXPLAT_ECN_TYPE + // + // The DSCP value to use for this send. + // + uint8_t DSCP; + // // The total buffer size for WsaBuffers. // diff --git a/src/platform/unittest/DataPathTest.cpp b/src/platform/unittest/DataPathTest.cpp index f64d11e0c5..cbe8df2c90 100644 --- a/src/platform/unittest/DataPathTest.cpp +++ b/src/platform/unittest/DataPathTest.cpp @@ -86,7 +86,9 @@ struct UdpRecvContext { QUIC_ADDR DestinationAddress; CXPLAT_EVENT ClientCompletion; CXPLAT_ECN_TYPE EcnType {CXPLAT_ECN_NON_ECT}; + uint8_t Dscp{0}; bool TtlSupported; + bool DscpSupported; UdpRecvContext() { CxPlatEventInitialize(&ClientCompletion, FALSE, FALSE); } @@ -299,11 +301,17 @@ struct DataPathTest : public ::testing::TestWithParam ASSERT_EQ(0, RecvData->HopLimitTTL); } + if (RecvContext->DscpSupported) { + ASSERT_EQ(CXPLAT_DSCP_FROM_TOS(RecvData->TypeOfService), RecvContext->Dscp); + } else { + ASSERT_EQ(CXPLAT_DSCP_FROM_TOS(RecvData->TypeOfService), 0); + } + if (RecvData->Route->LocalAddress.Ipv4.sin_port == RecvContext->DestinationAddress.Ipv4.sin_port) { - ASSERT_EQ((CXPLAT_ECN_TYPE)RecvData->TypeOfService, RecvContext->EcnType); + ASSERT_EQ(CXPLAT_ECN_FROM_TOS(RecvData->TypeOfService), RecvContext->EcnType); - CXPLAT_SEND_CONFIG SendConfig = { RecvData->Route, 0, (uint8_t)RecvContext->EcnType, 0 }; + CXPLAT_SEND_CONFIG SendConfig = { RecvData->Route, 0, (uint8_t)RecvContext->EcnType, 0, RecvContext->Dscp }; auto ServerSendData = CxPlatSendDataAlloc(Socket, &SendConfig); ASSERT_NE(nullptr, ServerSendData); auto ServerBuffer = CxPlatSendDataAllocBuffer(ServerSendData, ExpectedDataSize); @@ -782,9 +790,12 @@ TEST_P(DataPathTest, UdpData) UdpRecvContext RecvContext; CxPlatDataPath Datapath(&UdpRecvCallbacks); RecvContext.TtlSupported = Datapath.IsSupported(CXPLAT_DATAPATH_FEATURE_TTL); + RecvContext.DscpSupported = Datapath.IsSupported(CXPLAT_DATAPATH_FEATURE_DSCP); VERIFY_QUIC_SUCCESS(Datapath.GetInitStatus()); ASSERT_NE(nullptr, Datapath.Datapath); + RecvContext.Dscp = RecvContext.DscpSupported ? 1 : 0; + auto unspecAddress = GetNewUnspecAddr(); CxPlatSocket Server(Datapath, &unspecAddress.SockAddr, nullptr, &RecvContext); while (Server.GetInitStatus() == QUIC_STATUS_ADDRESS_IN_USE) { @@ -803,7 +814,7 @@ TEST_P(DataPathTest, UdpData) VERIFY_QUIC_SUCCESS(Client.GetInitStatus()); ASSERT_NE(nullptr, Client.Socket); - CXPLAT_SEND_CONFIG SendConfig = { &Client.Route, 0, CXPLAT_ECN_NON_ECT, 0 }; + CXPLAT_SEND_CONFIG SendConfig = { &Client.Route, 0, CXPLAT_ECN_NON_ECT, 0, RecvContext.Dscp }; auto ClientSendData = CxPlatSendDataAlloc(Client, &SendConfig); ASSERT_NE(nullptr, ClientSendData); auto ClientBuffer = CxPlatSendDataAllocBuffer(ClientSendData, ExpectedDataSize); @@ -820,9 +831,12 @@ TEST_P(DataPathTest, UdpDataPolling) UdpRecvContext RecvContext; CxPlatDataPath Datapath(&UdpRecvCallbacks, nullptr, 0, &Config); RecvContext.TtlSupported = Datapath.IsSupported(CXPLAT_DATAPATH_FEATURE_TTL); + RecvContext.DscpSupported = Datapath.IsSupported(CXPLAT_DATAPATH_FEATURE_DSCP); VERIFY_QUIC_SUCCESS(Datapath.GetInitStatus()); ASSERT_NE(nullptr, Datapath.Datapath); + RecvContext.Dscp = RecvContext.DscpSupported ? 1 : 0; + auto unspecAddress = GetNewUnspecAddr(); CxPlatSocket Server(Datapath, &unspecAddress.SockAddr, nullptr, &RecvContext); while (Server.GetInitStatus() == QUIC_STATUS_ADDRESS_IN_USE) { @@ -841,7 +855,7 @@ TEST_P(DataPathTest, UdpDataPolling) VERIFY_QUIC_SUCCESS(Client.GetInitStatus()); ASSERT_NE(nullptr, Client.Socket); - CXPLAT_SEND_CONFIG SendConfig = { &Client.Route, 0, CXPLAT_ECN_NON_ECT, 0 }; + CXPLAT_SEND_CONFIG SendConfig = { &Client.Route, 0, CXPLAT_ECN_NON_ECT, 0, RecvContext.Dscp }; auto ClientSendData = CxPlatSendDataAlloc(Client, &SendConfig); ASSERT_NE(nullptr, ClientSendData); auto ClientBuffer = CxPlatSendDataAllocBuffer(ClientSendData, ExpectedDataSize); @@ -857,9 +871,12 @@ TEST_P(DataPathTest, UdpDataRebind) UdpRecvContext RecvContext; CxPlatDataPath Datapath(&UdpRecvCallbacks); RecvContext.TtlSupported = Datapath.IsSupported(CXPLAT_DATAPATH_FEATURE_TTL); + RecvContext.DscpSupported = Datapath.IsSupported(CXPLAT_DATAPATH_FEATURE_DSCP); VERIFY_QUIC_SUCCESS(Datapath.GetInitStatus()); ASSERT_NE(nullptr, Datapath.Datapath); + RecvContext.Dscp = RecvContext.DscpSupported ? 1 : 0; + auto unspecAddress = GetNewUnspecAddr(); CxPlatSocket Server(Datapath, &unspecAddress.SockAddr, nullptr, &RecvContext); while (Server.GetInitStatus() == QUIC_STATUS_ADDRESS_IN_USE) { @@ -879,7 +896,7 @@ TEST_P(DataPathTest, UdpDataRebind) VERIFY_QUIC_SUCCESS(Client.GetInitStatus()); ASSERT_NE(nullptr, Client.Socket); - CXPLAT_SEND_CONFIG SendConfig = { &Client.Route, 0, CXPLAT_ECN_NON_ECT, 0 }; + CXPLAT_SEND_CONFIG SendConfig = { &Client.Route, 0, CXPLAT_ECN_NON_ECT, 0, RecvContext.Dscp }; auto ClientSendData = CxPlatSendDataAlloc(Client, &SendConfig); ASSERT_NE(nullptr, ClientSendData); auto ClientBuffer = CxPlatSendDataAllocBuffer(ClientSendData, ExpectedDataSize); @@ -896,7 +913,7 @@ TEST_P(DataPathTest, UdpDataRebind) VERIFY_QUIC_SUCCESS(Client.GetInitStatus()); ASSERT_NE(nullptr, Client.Socket); - CXPLAT_SEND_CONFIG SendConfig = { &Client.Route, 0, CXPLAT_ECN_NON_ECT, 0 }; + CXPLAT_SEND_CONFIG SendConfig = { &Client.Route, 0, CXPLAT_ECN_NON_ECT, 0, RecvContext.Dscp }; auto ClientSendData = CxPlatSendDataAlloc(Client, &SendConfig); ASSERT_NE(nullptr, ClientSendData); auto ClientBuffer = CxPlatSendDataAllocBuffer(ClientSendData, ExpectedDataSize); @@ -914,9 +931,12 @@ TEST_P(DataPathTest, UdpDataECT0) RecvContext.EcnType = CXPLAT_ECN_ECT_0; CxPlatDataPath Datapath(&UdpRecvCallbacks); RecvContext.TtlSupported = Datapath.IsSupported(CXPLAT_DATAPATH_FEATURE_TTL); + RecvContext.DscpSupported = Datapath.IsSupported(CXPLAT_DATAPATH_FEATURE_DSCP); VERIFY_QUIC_SUCCESS(Datapath.GetInitStatus()); ASSERT_NE(nullptr, Datapath.Datapath); + RecvContext.Dscp = RecvContext.DscpSupported ? 1 : 0; + auto unspecAddress = GetNewUnspecAddr(); CxPlatSocket Server(Datapath, &unspecAddress.SockAddr, nullptr, &RecvContext); while (Server.GetInitStatus() == QUIC_STATUS_ADDRESS_IN_USE) { @@ -935,7 +955,7 @@ TEST_P(DataPathTest, UdpDataECT0) VERIFY_QUIC_SUCCESS(Client.GetInitStatus()); ASSERT_NE(nullptr, Client.Socket); - CXPLAT_SEND_CONFIG SendConfig = { &Client.Route, 0, CXPLAT_ECN_ECT_0, 0 }; + CXPLAT_SEND_CONFIG SendConfig = { &Client.Route, 0, CXPLAT_ECN_ECT_0, 0, RecvContext.Dscp }; auto ClientSendData = CxPlatSendDataAlloc(Client, &SendConfig); ASSERT_NE(nullptr, ClientSendData); auto ClientBuffer = CxPlatSendDataAllocBuffer(ClientSendData, ExpectedDataSize); @@ -951,6 +971,7 @@ TEST_P(DataPathTest, UdpShareClientSocket) UdpRecvContext RecvContext; CxPlatDataPath Datapath(&UdpRecvCallbacks); RecvContext.TtlSupported = Datapath.IsSupported(CXPLAT_DATAPATH_FEATURE_TTL); + RecvContext.DscpSupported = Datapath.IsSupported(CXPLAT_DATAPATH_FEATURE_DSCP); VERIFY_QUIC_SUCCESS(Datapath.GetInitStatus()); ASSERT_NE(nullptr, Datapath.Datapath); // TODO: Linux XDP (duonic) to support port sharing @@ -959,6 +980,8 @@ TEST_P(DataPathTest, UdpShareClientSocket) return; } + RecvContext.Dscp = RecvContext.DscpSupported ? 1 : 0; + auto serverAddress = GetNewLocalAddr(); CxPlatSocket Server1(Datapath, &serverAddress.SockAddr, nullptr, &RecvContext); while (Server1.GetInitStatus() == QUIC_STATUS_ADDRESS_IN_USE) { @@ -984,7 +1007,7 @@ TEST_P(DataPathTest, UdpShareClientSocket) CxPlatSocket Client2(Datapath, &clientAddress, &serverAddress.SockAddr, &RecvContext, CXPLAT_SOCKET_FLAG_SHARE); VERIFY_QUIC_SUCCESS(Client2.GetInitStatus()); - CXPLAT_SEND_CONFIG SendConfig = { &Client1.Route, 0, CXPLAT_ECN_NON_ECT, 0 }; + CXPLAT_SEND_CONFIG SendConfig = { &Client1.Route, 0, CXPLAT_ECN_NON_ECT, 0, RecvContext.Dscp }; auto ClientSendData = CxPlatSendDataAlloc(Client1, &SendConfig); ASSERT_NE(nullptr, ClientSendData); auto ClientBuffer = CxPlatSendDataAllocBuffer(ClientSendData, ExpectedDataSize); @@ -996,7 +1019,7 @@ TEST_P(DataPathTest, UdpShareClientSocket) ASSERT_TRUE(CxPlatEventWaitWithTimeout(RecvContext.ClientCompletion, 2000)); CxPlatEventReset(RecvContext.ClientCompletion); - CXPLAT_SEND_CONFIG SendConfig2 = { &Client2.Route, 0, CXPLAT_ECN_NON_ECT, 0 }; + CXPLAT_SEND_CONFIG SendConfig2 = { &Client2.Route, 0, CXPLAT_ECN_NON_ECT, 0, RecvContext.Dscp }; ClientSendData = CxPlatSendDataAlloc(Client2, &SendConfig2); ASSERT_NE(nullptr, ClientSendData); ClientBuffer = CxPlatSendDataAllocBuffer(ClientSendData, ExpectedDataSize); @@ -1035,6 +1058,7 @@ TEST_P(DataPathTest, MultiBindListenerSingleProcessor) { QUIC_EXECUTION_CONFIG Config = { QUIC_EXECUTION_CONFIG_FLAG_NO_IDEAL_PROC, UINT32_MAX, 1, 0 }; CxPlatDataPath Datapath(&UdpRecvCallbacks, nullptr, 0, &Config); RecvContext.TtlSupported = Datapath.IsSupported(CXPLAT_DATAPATH_FEATURE_TTL); + RecvContext.DscpSupported = Datapath.IsSupported(CXPLAT_DATAPATH_FEATURE_DSCP); auto ServerAddress = GetNewLocalAddr(); CxPlatSocket Server1(Datapath, &ServerAddress.SockAddr, nullptr, &RecvContext); @@ -1211,7 +1235,7 @@ TEST_P(DataPathTest, TcpDataClient) ASSERT_TRUE(CxPlatEventWaitWithTimeout(ListenerContext.AcceptEvent, 500)); ASSERT_NE(nullptr, ListenerContext.Server); - CXPLAT_SEND_CONFIG SendConfig = { &Client.Route, 0, CXPLAT_ECN_NON_ECT, 0 }; + CXPLAT_SEND_CONFIG SendConfig = { &Client.Route, 0, CXPLAT_ECN_NON_ECT, 0, 0 }; auto SendData = CxPlatSendDataAlloc(Client, &SendConfig); ASSERT_NE(nullptr, SendData); auto SendBuffer = CxPlatSendDataAllocBuffer(SendData, ExpectedDataSize); @@ -1256,7 +1280,7 @@ TEST_P(DataPathTest, TcpDataServer) CXPLAT_ROUTE Route = Listener.Route; Route.RemoteAddress = Client.GetLocalAddress(); - CXPLAT_SEND_CONFIG SendConfig = { &Route, 0, CXPLAT_ECN_NON_ECT, 0 }; + CXPLAT_SEND_CONFIG SendConfig = { &Route, 0, CXPLAT_ECN_NON_ECT, 0, 0 }; auto SendData = CxPlatSendDataAlloc(ListenerContext.Server, &SendConfig); ASSERT_NE(nullptr, SendData); auto SendBuffer = CxPlatSendDataAllocBuffer(SendData, ExpectedDataSize); diff --git a/src/test/lib/QuicDrill.cpp b/src/test/lib/QuicDrill.cpp index 407f0f2caf..ccc51dcd5d 100644 --- a/src/test/lib/QuicDrill.cpp +++ b/src/test/lib/QuicDrill.cpp @@ -205,7 +205,7 @@ struct DrillSender { CxPlatSocketGetLocalAddress(Binding, &Route.LocalAddress); Route.RemoteAddress = ServerAddress; - CXPLAT_SEND_CONFIG SendConfig = { &Route, DatagramLength, CXPLAT_ECN_NON_ECT, 0 }; + CXPLAT_SEND_CONFIG SendConfig = { &Route, DatagramLength, CXPLAT_ECN_NON_ECT, 0, 0 }; CXPLAT_SEND_DATA* SendData = CxPlatSendDataAlloc(Binding, &SendConfig); diff --git a/src/tools/attack/attack.cpp b/src/tools/attack/attack.cpp index 92428cec6f..c6da0c094d 100644 --- a/src/tools/attack/attack.cpp +++ b/src/tools/attack/attack.cpp @@ -173,7 +173,7 @@ void RunAttackRandom(CXPLAT_SOCKET* Binding, uint16_t DatagramLength, bool Valid continue; } - CXPLAT_SEND_CONFIG SendConfig = {&Route, DatagramLength, CXPLAT_ECN_NON_ECT, 0 }; + CXPLAT_SEND_CONFIG SendConfig = {&Route, DatagramLength, CXPLAT_ECN_NON_ECT, 0, 0 }; CXPLAT_SEND_DATA* SendData = CxPlatSendDataAlloc(Binding, &SendConfig); if (SendData == nullptr) { continue; @@ -288,7 +288,7 @@ void RunAttackValidInitial(CXPLAT_SOCKET* Binding) continue; } - CXPLAT_SEND_CONFIG SendConfig = {&Route, DatagramLength, CXPLAT_ECN_NON_ECT, 0 }; + CXPLAT_SEND_CONFIG SendConfig = {&Route, DatagramLength, CXPLAT_ECN_NON_ECT, 0, 0 }; CXPLAT_SEND_DATA* SendData = CxPlatSendDataAlloc(Binding, &SendConfig); if (SendData == nullptr) { continue; diff --git a/src/tools/lb/loadbalancer.cpp b/src/tools/lb/loadbalancer.cpp index e87ec0cc20..77be480eef 100644 --- a/src/tools/lb/loadbalancer.cpp +++ b/src/tools/lb/loadbalancer.cpp @@ -66,7 +66,7 @@ struct LbInterface { Route.LocalAddress = LocalAddress; Route.RemoteAddress = *PeerAddress; CXPLAT_SEND_DATA* Send = nullptr; - CXPLAT_SEND_CONFIG SendConfig = { &Route, MAX_UDP_PAYLOAD_LENGTH, CXPLAT_ECN_NON_ECT, 0 }; + CXPLAT_SEND_CONFIG SendConfig = { &Route, MAX_UDP_PAYLOAD_LENGTH, CXPLAT_ECN_NON_ECT, 0, 0 }; while (RecvDataChain) { if (!Send) { Send = CxPlatSendDataAlloc(Socket, &SendConfig); diff --git a/src/tools/recvfuzz/recvfuzz.cpp b/src/tools/recvfuzz/recvfuzz.cpp index 178fb00e97..32a99246de 100644 --- a/src/tools/recvfuzz/recvfuzz.cpp +++ b/src/tools/recvfuzz/recvfuzz.cpp @@ -591,7 +591,7 @@ void sendPacket( bool fuzzing = true, TlsContext* ClientContext = nullptr) { const uint16_t DatagramLength = QUIC_MIN_INITIAL_LENGTH; - CXPLAT_SEND_CONFIG SendConfig = { &Route, DatagramLength, CXPLAT_ECN_NON_ECT, 0 }; + CXPLAT_SEND_CONFIG SendConfig = { &Route, DatagramLength, CXPLAT_ECN_NON_ECT, 0, 0 }; CXPLAT_SEND_DATA* SendData = CxPlatSendDataAlloc(Binding, &SendConfig); if (!SendData) { printf("CxPlatSendDataAlloc failed\n");