#!/usr/bin/perl ############### # Usage: # # uri_test [ -s ] base-URI # # where base-URI is a hierarchical URI in absolute form and the -s option # indicates that we should behave strictly for same-scheme references. # # This is a simple test program that uses a non-validating URI reference # parser to input a sequence of URI references and, for each one, # # 1. parse the reference into its component parts # 2. resolve the reference relative to the base URI argument # 3. recombine and print the resolved target URI # # continuing until end-of-file is received via stdin. # # This software is placed in the Public Domain by Roy T. Fielding # and Day Software, Inc., for use as a test oracle by anyone that # is testing or implementing a Uniform Resource Identifier parser. # # More info: http://gbiv.com/protocols/uri/rev-2002/issues.html # ############### $strict = 0; # set to 1 if you want to reject bad references $base_uri = shift; if ($base_uri eq '-s') { $strict = 1; $base_uri = shift; } die "Usage: $0 [-s] base-URI\n" unless defined($base_uri); ($b_scheme, $b_auth, $b_path, $b_query, $b_frag) = &parse_uri($base_uri); print<<"END"; Base URI: $base_uri scheme: $b_scheme authority: $b_auth path: $b_path query: $b_query fragment: $b_frag END print "Enter a URI reference (^D exits): "; while ($ref = ) { chomp($ref); ($r_scheme, $r_auth, $r_path, $r_query, $r_frag) = &parse_uri($ref); print<<"END"; Reference: $ref scheme: $r_scheme authority: $r_auth path: $r_path query: $r_query fragment: $r_frag END print "Target URI: ", &abs_uri($b_scheme, $b_auth, $b_path, $b_query, $b_frag, $r_scheme, $r_auth, $r_path, $r_query, $r_frag), "\n"; } continue { print "\nEnter a URI reference (^D exits): "; } print "done\n"; exit(0); # A component parser based on the regular expression in rfc2396bis # sub parse_uri { local($_) = @_; if ( m|^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?| ) { return ($2, $4, $5, $7, $9); } return ('this', 'should', 'never', 'happen', undef); } # Resolve a URI reference relative to a base URI to obtain target URI # sub abs_uri { local($b_scheme, $b_auth, $b_path, $b_query, $b_frag, $r_scheme, $r_auth, $r_path, $r_query, $r_frag) = @_; local($t_scheme, $t_auth, $t_path, $t_query, $t_frag); if (!$strict && ($r_scheme eq $b_scheme)) { undef($r_scheme); } if (defined($r_scheme)) { $t_scheme = $r_scheme; $t_auth = $r_auth; $t_path = &remove_dot_segments($r_path); $t_query = $r_query; } else { if (defined($r_auth)) { $t_auth = $r_auth; $t_path = &remove_dot_segments($r_path); $t_query = $r_query; } else { if ($r_path =~ /^$/) { $t_path = $b_path; if (defined($r_query)) { $t_query = $r_query; } else { $t_query = $b_query; } } elsif ($r_path =~ /^\//) { $t_path = &remove_dot_segments($r_path); $t_query = $r_query; } else { $t_path = &path_merge($b_path, $r_path); $t_path = &remove_dot_segments($t_path); $t_query = $r_query; } $t_auth = $b_auth; } $t_scheme = $b_scheme; } $t_frag = $r_frag; return &recompose($t_scheme, $t_auth, $t_path, $t_query, $t_frag); } # Merge a relative-path reference's path with the base URI's path # sub path_merge { local($bpath, $rpath) = @_; if ($bpath eq "") { # base path is empty return "/$rpath"; } $bpath =~ s/[^\/]*$//; # remove last base path segment return ($bpath . $rpath); # and return with ref path appended } # Remove "." and ".." segments from a reference's path. # # Note that the use of below refers to a complete path segment, # bounded by "/" or the beginning or end of the buffer, that may be empty. # sub remove_dot_segments { local($_) = @_; local($buf) = ""; while ($_) { # remove any prefix of "../" or "./" # next if s/^\.\.?\///; # replace any prefix segment of "/./" or "/." with "/" # next if s/^\/\.(\/|$)/\//; # replace any prefix segment of "/../" or "/.." with "/" # and remove the last segment added to buffer (if any) # if (s/^\/\.\.(\/|$)/\//) { $buf =~ s/\/?[^\/]*$//; next; } # remove a trailing dot-segment if nothing else is left last if s/^\.\.?$//; # otherwise, remove the first segment and append it to buffer # s/^(\/?[^\/]*)//; $buf .= $1; } return $buf; } # Given a parsed URI reference, recompose the components into a URI reference # sub recompose { local($t_scheme, $t_auth, $t_path, $t_query, $t_frag) = @_; local($result) = ""; if (defined($t_scheme)) { $result .= $t_scheme . ':'; } if (defined($t_auth)) { $result .= '//' . $t_auth; } $result .= $t_path; if (defined($t_query)) { $result .= "?" . $t_query; } if (defined($t_frag)) { $result .= "#" . $t_frag; } return $result; }