From fc505d9bf08acea3e220286cea77ee00c4543aee Mon Sep 17 00:00:00 2001 From: "Tobias S. Josefowitz" <tobij@tobij.de> Date: Wed, 2 Dec 2020 19:58:43 +0100 Subject: [PATCH] Standards.URI: Make string representation canonical again We used to simply put the raw string input URI into the sprintf_cache for the object. Unfortunately, since __hash() depends on the output of _sprintf('s') while `==() does not, this lead to identical URIs with differing hash values, as well as non-canonically-formatted string representations of the URI returned from (string)uri and _sprintf('s'). --- lib/modules/Standards.pmod/URI.pike | 33 +++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/lib/modules/Standards.pmod/URI.pike b/lib/modules/Standards.pmod/URI.pike index e7fdf1b2a8..ea5176256c 100644 --- a/lib/modules/Standards.pmod/URI.pike +++ b/lib/modules/Standards.pmod/URI.pike @@ -49,12 +49,20 @@ string raw_uri; // FIXME: What about decoding of Percent-Encoding (RFC3986 2.1)? // cf pct-encoded in the functions below. +protected enum cache_advice { + DO_NOT_CACHE, + MAY_CACHE, +}; + // Parse authority component (according to RFC 1738, � 3.1) // Updated to RFC 3986 $ 3.2. // NOTE: Censors the userinfo from the @[authority] variable. -protected void parse_authority() +protected cache_advice parse_authority() { string host_port = authority; + int default_port; + cache_advice advice = MAY_CACHE; + // authority = [ userinfo "@" ] host [ ":" port ] if(sscanf(authority, "%[^@]@%s", string userinfo, host_port) == 2) { @@ -63,18 +71,28 @@ protected void parse_authority() DEBUG("parse_authority(): user=%O, password=%O", user, password); } if(scheme) - port = Protocols.Ports.tcp[scheme]; // Set a good default � la RFC 1700 + default_port = port = Protocols.Ports.tcp[scheme]; // Set a good default � la RFC 1700 // host = IP-literal / IPv4address / reg-name if (has_prefix(host_port, "[")) { // IP-literal = "[" ( IPv6address / IPvFuture ) "]" // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) - sscanf(host_port, "[%s]%*[:]%d", host, port); + if (sscanf(host_port, "[%s]%*[:]%d", host, port) == 3 + && port == default_port) + { + advice = DO_NOT_CACHE; + } } else { // IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet // reg-name = *( unreserved / pct-encoded / sub-delims ) - sscanf(host_port, "%[^:]%*[:]%d", host, port); + if (sscanf(host_port, "%[^:]%*[:]%d", host, port) == 3 + && port == default_port) + { + advice = DO_NOT_CACHE; + } } DEBUG("parse_authority(): host=%O, port=%O", host, port); + + return advice; } // Inherit all properties except raw_uri and base_uri from the URI uri. :-) @@ -307,12 +325,15 @@ void reparse_uri(this_program|string|void base_uri) // scheme is inherited from the base URI's scheme component. if(scheme) { + int do_cache = 1; + if(authority) - parse_authority(); + do_cache = parse_authority() == MAY_CACHE; DEBUG("Scheme found! RFC 2396, �5.2, step 3 " "says we're absolute. Done!"); - sprintf_cache['s'] = raw_uri; + if (do_cache) + sprintf_cache['s'] = raw_uri; return; } scheme = this_program::base_uri->scheme; -- GitLab