diff --git a/doc/Changelog.md b/doc/Changelog.md index 6b82a67..c9091d8 100644 --- a/doc/Changelog.md +++ b/doc/Changelog.md @@ -4,6 +4,7 @@ # git master {#master} * Add a function to get the host corresponding to a given instance. +* Fix schema parsing in servus::URI: must be schema://, not only schema: # Release 1.2 (02-11-2015) {#Release010200} diff --git a/servus/uri.cpp b/servus/uri.cpp index 5bd34d2..b2ff94c 100644 --- a/servus/uri.cpp +++ b/servus/uri.cpp @@ -80,7 +80,7 @@ class uri_parse : public std::exception std::stringstream _error; }; -enum URIPart { SCHEME = 0, AUTHORITY, PATH, QUERY, FRAGMENT, HIERARCHY }; +enum URIPart { SCHEME = 0, AUTHORITY, PATH, QUERY, FRAGMENT }; bool _parseURIPart( std::string& input, const URIPart& part, std::string& output ) @@ -88,12 +88,14 @@ bool _parseURIPart( std::string& input, const URIPart& part, #ifndef NDEBUG const char requireFirst[] = { 0, 0, 0, '?', '#' }; #endif - const char* const separators[] = { ":", "/?#", "?#", "#", "" }; + const char* const separators[] = { "://", "/?#", "?#", "#", "" }; const char* const disallowed[] = { "/?#", 0, 0, 0, 0 }; + const bool fullSeparator[] = { true, false, false, false, false }; const bool needsSeparator[] = { true, false, false, false, false }; const size_t skip[] = { 0, 0, 0, 1, 1 }; - const size_t postSkip[] = { 1, 0, 0, 0, 0 }; - const size_t pos = input.find_first_of( separators[part] ); + const size_t postSkip[] = { 3, 0, 0, 0, 0 }; + const size_t pos = fullSeparator[part] ? input.find( separators[part] ) + : input.find_first_of( separators[part] ); if( pos == std::string::npos ) { @@ -194,9 +196,8 @@ class URI private: URIData _uriData; - void _parseURI( const std::string& uri ) + void _parseURI( std::string input ) { - std::string input = uri; URIPart part = SCHEME; while( !input.empty( )) { @@ -214,23 +215,11 @@ class URI throw std::invalid_argument(""); } part = _uriData.scheme == "file" || _uriData.scheme.empty() ? - PATH : HIERARCHY; + PATH : AUTHORITY; // from http://en.wikipedia.org/wiki/File_URI_scheme: // "file:///foo.txt" is okay, while "file://foo.txt" // is not, although some interpreters manage to handle // the latter. We are "some". - if( _uriData.scheme == "file" && input.substr( 0, 2 ) == "//" ) - input = input.substr( 2 ); - break; - case HIERARCHY: - // Distinguishing from - if( input.substr( 0, 2 ) == "//" ) - { - part = AUTHORITY; - input = input.substr( 2 ); - } - else - part = PATH; break; case AUTHORITY: { @@ -279,6 +268,7 @@ URI::URI( const URI& from ) : _impl( new detail::URI( *from._impl )) { } + servus::URI::~URI() { delete _impl; diff --git a/servus/uri.h b/servus/uri.h index 33a72f5..4edf9f4 100644 --- a/servus/uri.h +++ b/servus/uri.h @@ -50,6 +50,9 @@ namespace detail { class URI; } * Queries are parsed into key-value pairs and can be accessed using * findQuery(), queryBegin() and queryEnd(). * + * We enforce schemas to have the separator "://", not only ":" which is enough + * for the RFC specification. + * * Example: @include tests/uri.cpp */ class URI diff --git a/tests/uri.cpp b/tests/uri.cpp index 6938b42..54e1200 100644 --- a/tests/uri.cpp +++ b/tests/uri.cpp @@ -53,7 +53,7 @@ BOOST_AUTO_TEST_CASE(test_uri_parts) BOOST_CHECK_EQUAL( userHostURI.getPort(), 0 ); BOOST_CHECK_EQUAL( userHostURI.getAuthority(), "alice@hostname" ); - const servus::URI uppercaseURI( "FOO:" ); + const servus::URI uppercaseURI( "FOO://" ); BOOST_CHECK_EQUAL( uppercaseURI.getScheme(), "foo" ); servus::URI noauthority( "scheme:///path" ); @@ -145,20 +145,6 @@ BOOST_AUTO_TEST_CASE(test_file_uris) BOOST_CHECK_EQUAL( file5.getScheme(), "scheme" ); BOOST_CHECK( file5.getQuery().empty( )); BOOST_CHECK( file5.getFragment().empty( )); - - servus::URI path1( "foo:/bla.txt" ); - BOOST_CHECK( path1.getHost().empty( )); - BOOST_CHECK_EQUAL( path1.getPath(), "/bla.txt" ); - BOOST_CHECK_EQUAL( path1.getScheme(), "foo" ); - BOOST_CHECK( path1.getQuery().empty( )); - BOOST_CHECK( path1.getFragment().empty( )); - - servus::URI path2( "foo:bla.txt" ); - BOOST_CHECK( path2.getHost().empty( )); - BOOST_CHECK_EQUAL( path2.getPath(), "bla.txt" ); - BOOST_CHECK_EQUAL( path2.getScheme(), "foo" ); - BOOST_CHECK( path2.getQuery().empty( )); - BOOST_CHECK( path2.getFragment().empty( )); } BOOST_AUTO_TEST_CASE(test_uri_query) @@ -210,11 +196,11 @@ BOOST_AUTO_TEST_CASE(test_uri_comparisons) BOOST_AUTO_TEST_CASE(test_invalid_uri) { - BOOST_CHECK_THROW( servus::URI uri( "bad_schema:" ), + BOOST_CHECK_THROW( servus::URI uri( "bad_schema://" ), std::exception ); - BOOST_CHECK_THROW( servus::URI uri( "8ad-schema:" ), + BOOST_CHECK_THROW( servus::URI uri( "8ad-schema://" ), std::exception ); - BOOST_CHECK_NO_THROW( servus::URI uri( "g00d-sch+ma:" )); + BOOST_CHECK_NO_THROW( servus::URI uri( "g00d-sch+ma://" )); BOOST_CHECK_THROW( servus::URI uri( "http://host:port" ), std::exception ); BOOST_CHECK_THROW( servus::URI uri( "http://host:" ), @@ -281,5 +267,24 @@ BOOST_AUTO_TEST_CASE(test_print) uri.setFragment( "fragment" ); BOOST_CHECK_EQUAL( std::to_string( uri ), "foo://user@localhost:1024/path?key=value#fragment" ); +} +BOOST_AUTO_TEST_CASE(test_host_port_without_schema) +{ + const servus::URI uri( "host:12345" ); + BOOST_CHECK_EQUAL( uri.getHost(), "" ); + BOOST_CHECK_EQUAL( uri.getPort(), 0 ); + BOOST_CHECK_EQUAL( uri.getPath(), "host:12345" ); + + servus::URI uri2; + uri2.setHost( "host" ); + uri2.setPort( 12345 ); + BOOST_CHECK( uri2.getScheme().empty( )); + BOOST_CHECK_EQUAL( uri2.getHost(), "host" ); + BOOST_CHECK_EQUAL( uri2.getPort(), 12345 ); + + const servus::URI uri3( uri2 ); + BOOST_CHECK( uri3.getScheme().empty( )); + BOOST_CHECK_EQUAL( uri3.getHost(), "host" ); + BOOST_CHECK_EQUAL( uri3.getPort(), 12345 ); }