diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b253a9..4d2cbb3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # CHANGELOG +## UNRELEASED + +- Update to handle back-slashes + ## 7.0.4 - Updates get-func-name to 2.0.2 diff --git a/src/__tests__/index.test.ts b/src/__tests__/index.test.ts index 4c290b3..9d5c615 100644 --- a/src/__tests__/index.test.ts +++ b/src/__tests__/index.test.ts @@ -16,7 +16,7 @@ describe("sanitizeUrl", () => { }); it("does not alter https URLs with alphanumeric characters", () => { - expect(sanitizeUrl("https://example.com")).toBe("https://example.com"); + expect(sanitizeUrl("https://example.com")).toBe("https://example.com/"); }); it("does not alter https URLs with ports with alphanumeric characters", () => { @@ -147,6 +147,28 @@ describe("sanitizeUrl", () => { }); }); + it("backslash prefixed attack vectors", () => { + const attackVectors = [ + "\fjavascript:alert()", + "\vjavascript:alert()", + "\tjavascript:alert()", + "\njavascript:alert()", + "\rjavascript:alert()", + "\u0000javascript:alert()", + "\u0001javascript:alert()", + ]; + + attackVectors.forEach((vector) => { + expect(sanitizeUrl(vector)).toBe(BLANK_URL); + }); + }); + + it("reverses backslashes", () => { + const attack = "\\j\\av\\a\\s\\cript:alert()"; + + expect(sanitizeUrl(attack)).toBe("/j/av/a/s/cript:alert()"); + }); + describe("invalid protocols", () => { describe.each(["javascript", "data", "vbscript"])("%s", (protocol) => { it(`replaces ${protocol} urls with ${BLANK_URL}`, () => { diff --git a/src/index.ts b/src/index.ts index 5fafb8b..83faca9 100644 --- a/src/index.ts +++ b/src/index.ts @@ -5,15 +5,14 @@ import { htmlEntitiesRegex, invalidProtocolRegex, relativeFirstCharacters, - urlSchemeRegex, whitespaceEscapeCharsRegex, + urlSchemeRegex, } from "./constants"; function isRelativeUrlWithoutProtocol(url: string): boolean { return relativeFirstCharacters.indexOf(url[0]) > -1; } -// adapted from https://stackoverflow.com/a/29824550/2601552 function decodeHtmlCharacters(str: string) { const removedNullByte = str.replace(ctrlCharactersRegex, ""); return removedNullByte.replace(htmlEntitiesRegex, (match, dec) => { @@ -21,6 +20,10 @@ function decodeHtmlCharacters(str: string) { }); } +function isValidUrl(url: string): boolean { + return URL.canParse(url); +} + function decodeURI(uri: string): string { try { return decodeURIComponent(uri); @@ -36,8 +39,9 @@ export function sanitizeUrl(url?: string): string { if (!url) { return BLANK_URL; } + let charsToDecode; - let decodedUrl = decodeURI(url); + let decodedUrl = decodeURI(url.trim()); do { decodedUrl = decodeHtmlCharacters(decodedUrl) @@ -54,7 +58,9 @@ export function sanitizeUrl(url?: string): string { decodedUrl.match(htmlCtrlEntityRegex) || decodedUrl.match(whitespaceEscapeCharsRegex); } while (charsToDecode && charsToDecode.length > 0); + const sanitizedUrl = decodedUrl; + if (!sanitizedUrl) { return BLANK_URL; } @@ -63,17 +69,39 @@ export function sanitizeUrl(url?: string): string { return sanitizedUrl; } - const urlSchemeParseResults = sanitizedUrl.match(urlSchemeRegex); + // Remove any leading whitespace before checking the URL scheme + const trimmedUrl = sanitizedUrl.trimStart(); + const urlSchemeParseResults = trimmedUrl.match(urlSchemeRegex); if (!urlSchemeParseResults) { return sanitizedUrl; } - const urlScheme = urlSchemeParseResults[0]; + const urlScheme = urlSchemeParseResults[0].toLowerCase().trim(); if (invalidProtocolRegex.test(urlScheme)) { return BLANK_URL; } - return sanitizedUrl; + const backSanitized = trimmedUrl.replace(/\\/g, "/"); + + // Handle special cases for mailto: and custom deep-link protocols + if (urlScheme === "mailto:" || urlScheme.includes("://")) { + return backSanitized; + } + + // For http and https URLs, perform additional validation + if (urlScheme === "http:" || urlScheme === "https:") { + if (!isValidUrl(backSanitized)) { + return BLANK_URL; + } + + const url = new URL(backSanitized); + url.protocol = url.protocol.toLowerCase(); + url.hostname = url.hostname.toLowerCase(); + + return url.toString(); + } + + return backSanitized; }