diff options
Diffstat (limited to 'include/crawler/URL.hpp')
-rwxr-xr-x | include/crawler/URL.hpp | 140 |
1 files changed, 140 insertions, 0 deletions
diff --git a/include/crawler/URL.hpp b/include/crawler/URL.hpp new file mode 100755 index 0000000..255a2db --- /dev/null +++ b/include/crawler/URL.hpp @@ -0,0 +1,140 @@ +#ifndef __URL_H +#define __URL_H + +#include "CrawlerExportable.hpp" + +#include <string> +#include <iostream> +#include <sstream> + +using namespace std; + +class URL { + protected: + string m_protocol; + string m_host; + unsigned short m_port; + string m_path; + string m_query; + string m_fragment; + + public: + URL( ) + : m_protocol( "" ), m_host( "" ), m_port( 0 ), m_path( "" ), m_query( "" ), m_fragment( "" ) + { + } + + URL( const URL& url ) + : m_protocol( url.m_protocol ), m_host( url.m_host ), m_port( url.m_port ), m_path( url.m_path ), m_query( url.m_query ), m_fragment( url.m_fragment ) + { + } + + URL( const std::string _protocol, const std::string _host, const unsigned short _port, const std::string _path, const std::string _query, const std::string _fragment ) + : m_protocol( _protocol ), m_host( _host ), m_port( _port ), m_path( _path ), m_query( _query ), m_fragment( _fragment ) + { + } + + URL& operator=( const URL& u ) { + if( this != &u ) { + this->m_protocol = u.m_protocol; + this->m_port = u.m_port; + this->m_host = u.m_host; + this->m_path = u.m_path; + this->m_query = u.m_query; + this->m_fragment = u.m_fragment; + } + return *this; + } + + const string protocol( ) const + { + return m_protocol; + } + + const string host( ) const + { + return m_host; + } + + unsigned short port( ) const + { + return m_port; + } + + const string path( ) const + { + return m_path; + } + + const string query( ) const + { + return m_query; + } + + std::string fragment( ) const + { + return m_fragment; + } + + std::string str( ) const + { + std::ostringstream os; + os << *this; + return os.str( ); + } + + static URL CRAWLER_DLL_VISIBLE Null; + + bool operator!=( const URL &other ) const + { + return( str( ) != other.str( ) ); + } + + bool operator==( const URL &other ) const + { + return( str( ) == other.str( ) ); + } + + bool operator<( const URL &other ) const + { + return( str( ) < other.str( ) ); + } + + template< typename CharT, typename TraitsT > friend + basic_ostream<CharT, TraitsT>& operator<<( basic_ostream<CharT, TraitsT>&s, const URL& u ); + + static unsigned short defaultPort( const std::string p ) + { + if( p == "http" ) return 80; + else if( p == "https" ) return 443; + else if( p == "ftp" ) return 21; + else return 0; + } +}; + +template< typename CharT, typename TraitsT > +inline basic_ostream<CharT, TraitsT>& operator<<( basic_ostream<CharT, TraitsT>&s, const URL& u ) { + if( u.protocol( ).empty( ) ) { + return s; + } + + s << u.protocol( ) << "://" << u.host( ); + + if( u.port( ) != URL::defaultPort( u.protocol( ) ) ) { + s << ":" << u.port( ); + } + + s << u.path( ); + + if( !u.query( ).empty( ) ) { + s << "?" << u.query( ); + } + + if( !u.fragment( ).empty( ) ) { + s << "#" << u.fragment( ); + } + + return s; +} + +#endif |