summaryrefslogtreecommitdiff
path: root/include/crawler/URL.hpp
blob: 255a2dbb2f0dd11a2a44cec4d4ff164daa60aedf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#ifndef __URL_H
#define __URL_H

#include "CrawlerExportable.hpp"

#include <string>
#include <iostream>
#include <sstream>

using namespace std;

class URL {
	protected:
		string m_protocol;
		string m_host;
		unsigned short m_port;
		string m_path;
		string m_query;
		string m_fragment;
		
	public:
		URL( )
			: m_protocol( "" ), m_host( "" ), m_port( 0 ), m_path( "" ), m_query( "" ), m_fragment( "" )
		{
		}
		
		URL( const URL& url )
			: m_protocol( url.m_protocol ), m_host( url.m_host ), m_port( url.m_port ), m_path( url.m_path ), m_query( url.m_query ), m_fragment( url.m_fragment )
		{			
		}
		
		URL( const std::string _protocol, const std::string _host, const unsigned short _port, const std::string _path, const std::string _query, const std::string _fragment )
			: m_protocol( _protocol ), m_host( _host ), m_port( _port ), m_path( _path ), m_query( _query ), m_fragment( _fragment )
		{
		}
		
		URL& operator=( const URL& u ) {
			if( this != &u ) {
				this->m_protocol = u.m_protocol;
				this->m_port = u.m_port;
				this->m_host = u.m_host;
				this->m_path = u.m_path;
				this->m_query = u.m_query;
				this->m_fragment = u.m_fragment;
			}
			return *this;
		}
				
		const string protocol( ) const
		{
			return m_protocol;
		}
		
		const string host( ) const
		{
			return m_host;
		}
				
		unsigned short port( ) const
		{
			return m_port;
		}
		
		const string path( ) const
		{
			return m_path;
		}
		
		const string query( ) const
		{
			return m_query;
		}
		
		std::string fragment( ) const
		{
			return m_fragment;
		}
		
		std::string str( ) const
		{
			std::ostringstream os;
			os << *this;
			return os.str( );
		}
		
		static URL CRAWLER_DLL_VISIBLE Null;
		
		bool operator!=( const URL &other ) const
		{
			return( str( ) != other.str( ) );
		}

		bool operator==( const URL &other ) const
		{
			return( str( ) == other.str( ) );
		}

		bool operator<( const URL &other ) const
		{
			return( str( ) < other.str( ) );
		}

		template< typename CharT, typename TraitsT > friend
			basic_ostream<CharT, TraitsT>& operator<<( basic_ostream<CharT, TraitsT>&s, const URL& u );			

		static unsigned short defaultPort( const std::string p )
		{
			if( p == "http" )	return 80;
			else if( p == "https" )	return 443;
			else if( p == "ftp" )	return 21;
			else return 0;
		}
};

template< typename CharT, typename TraitsT >
inline basic_ostream<CharT, TraitsT>& operator<<( basic_ostream<CharT, TraitsT>&s, const URL& u ) {
	if( u.protocol( ).empty( ) ) {
		return s;
	}
	
	s << u.protocol( ) << "://" << u.host( );

	if( u.port( ) != URL::defaultPort( u.protocol( ) ) ) {
		s << ":" << u.port( );
	}
	
	s << u.path( );
	
	if( !u.query( ).empty( ) ) {
		s << "?" << u.query( );
	}
	
	if( !u.fragment( ).empty( ) ) {
		 s << "#" << u.fragment( );
	}
	
	return s;
}

#endif