summaryrefslogtreecommitdiff
path: root/src/modules/urlnormalizer/googleurl/GoogleURLNormalizer.cpp
blob: 734afea56d23fc0a613c3715d45c95e32780f873 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include "GoogleURLNormalizer.hpp"

//#include <string>

#include "url_util.h"
#include "url_canon_stdstring.h"
#include "url_parse.h"

#ifdef WITH_LUA
#include "tolua.h"
#include "GoogleURLNormalizerLua.hpp"
#include "LuaVM.hpp"
#endif

using namespace std;
using namespace url_util;
using namespace url_canon;
using namespace url_parse;

GoogleURLNormalizer::GoogleURLNormalizer( )
{
	Initialize( );
}

GoogleURLNormalizer::~GoogleURLNormalizer( )
{
	Shutdown( );
}

static string componentString( const string &s, const Component &comp )
{
	if( comp.len <= 0 ) {
		return string( );
	} else {
		return string( s, comp.begin, comp.len );
	}
}

URL GoogleURLNormalizer::parseUrl( const string s )
{
	string canonical;
	canonical.reserve( s.size( ) + 32 );
	StdStringCanonOutput output( &canonical );
	Parsed parsed;
	bool success = Canonicalize(
		s.data( ), static_cast<int>( s.length( ) ),
		NULL, &output, &parsed );
	if( !success ) {
		return URL::Null;
	}
	output.Complete( );

	unsigned short port;
	if( parsed.port.len >= 0 ) {
		port = (unsigned short)atoi(
			componentString( canonical, parsed.port ).c_str( ) );
	} else {
		port = URL::defaultPort(
			componentString( canonical, parsed.scheme ) );
	}

	return URL(	componentString( canonical, parsed.scheme ),
			componentString( canonical, parsed.host ),
			port,
			componentString( canonical, parsed.path ),
			componentString( canonical, parsed.query ),
			"" );
}

URL GoogleURLNormalizer::normalize( const URL url, const string s )
{
	string urlstr = url.str( );
	string urlCanonical;
	urlCanonical.reserve( urlstr.size( ) + 32 );
	StdStringCanonOutput urlOutput( &urlCanonical );
	Parsed urlParsed;
	bool success = Canonicalize(
		urlstr.data( ), static_cast<int>( urlstr.length( ) ),
		NULL, &urlOutput, &urlParsed );
	if( !success ) {
		return URL::Null;
	}
	urlOutput.Complete( );

	string canonical;
	canonical.reserve( urlstr.size( ) + s.size( ) + 32 );
	StdStringCanonOutput output( &canonical );
	Parsed parsed;
	success = ResolveRelative(
		urlstr.data( ), static_cast<int>( urlstr.length( ) ), urlParsed,
		s.data( ), static_cast<int>( s.length( ) ),
		NULL, &output, &parsed );
	if( !success ) {
		return URL::Null;
	}
	output.Complete( );

	unsigned short port;
	if( parsed.port.len >= 0 ) {
		port = (unsigned short)atoi(
			componentString( canonical, parsed.port ).c_str( ) );
	} else {
		port = URL::defaultPort(
			componentString( canonical, parsed.scheme ) );
	}

	return URL(	componentString( canonical, parsed.scheme ),
			componentString( canonical, parsed.host ),
			port,
			componentString( canonical, parsed.path ),
			componentString( canonical, parsed.query ),
			"" );
}

static void initModule( void *user_data )
{
#ifdef WITH_LUA
	LuaVM *luaVm = (LuaVM *)user_data;

	tolua_GoogleURLNormalizer_open( luaVm->handle( ) );
#endif
}

static void destroyModule( void * /* user_data */ )
{
}

REGISTER_MODULE( "google_urlnormalizer", &initModule, &destroyModule, URLNormalizer, GoogleURLNormalizer )