diff options
author | Andreas Baumann <abaumann@yahoo.com> | 2013-04-16 17:14:03 +0200 |
---|---|---|
committer | Andreas Baumann <abaumann@yahoo.com> | 2013-04-16 17:14:03 +0200 |
commit | 80208e2c59fc426e2dc2ea4c9af2fa7f064a3331 (patch) | |
tree | 1d867efe5635b9da67182ae3fc6bc96cecb82d2c | |
parent | 763274ef0019017b45969b4d4e827a13784edd14 (diff) | |
download | pgfuse-80208e2c59fc426e2dc2ea4c9af2fa7f064a3331.tar.gz pgfuse-80208e2c59fc426e2dc2ea4c9af2fa7f064a3331.tar.bz2 |
added some OIDs to path code for the blocks free operation
-rw-r--r-- | DEVELOPERS | 64 | ||||
-rw-r--r-- | INSTALL | 4 | ||||
-rw-r--r-- | TODO | 80 | ||||
-rw-r--r-- | config.h | 4 | ||||
-rw-r--r-- | pgfuse.c | 6 | ||||
-rw-r--r-- | pgsql.c | 162 | ||||
-rw-r--r-- | pgsql.h | 2 |
7 files changed, 246 insertions, 76 deletions
@@ -7,6 +7,7 @@ Internal documentation for developers Directory tree in database Transaction Policies Self-containment + StatFS statistics Testing References @@ -124,10 +125,71 @@ a certain timeout when the database doesn't appear again? EIO seems a good option (as if the disk would have temporary I/O problems). +StatFS statistics +----------------- + +Per see hard to measure, database can spawn many machines, disks, etc. +The information about disk usage is intentionally hidden to the user! + +Nevertheless we can get some useable data for some scenarios, mainly +if things run on a single machine with several partitions (or SANs). + +A) What to meaure? + +Calculate the size of: +a) virtual things like blocks and inodes (that's the way we went) +b) get physical disk usage data from the database directly + +b) Detect location of tables on disk + +a) environment variable PGDATA + Is only really set for the postgresql startup script and for the + account service as DBA (postgres), we should maybe not assume + wrong setups for normal users.. +b) standard location (probed) + $PGDATA/base + show data_directory; + select setting from pg_settings where name = 'data_directory'; + /var/lib/postgres/data + ERROR: must be superuser to examine "data_directory" + Can't do this, as the db user must be underprivileged: +c) When the DBA create dedicated tablespaces, then there is no problem + +This is a rough sketch of the algorithm: + +1) Get a list of oids containing the tablespaces of PgFuse tables and indexes + + select distinct reltablespace FROM pg_class WHERE relname in ( 'dir', 'data', 'data_dir_id_idx', 'data_block_no_idx', 'dir_parent_id_idx' ); + + [0,55877] + + If there is a '0' in this list, replace it with the OID of the default tablespace: + + select dattablespace from pg_database where datname=current_database(); + + [55025,55877] + +2) Get table space locations (version dependend) + + select spclocation from pg_tablespace where oid = 55025; + >= 9.2 + select pg_tablespace_location('55025'); + + ["/media/sd/test"] + + or we get nothing, in this case, the tablespace resides in PGDATA. + We assume nobody makes symlinks there to point to other disks! So + we add $PGDATA to the list of directories + +3) Resolve list of pathes containing the relevant tablespaces to + the list of entries in /etc/mtab (getmntent_r), unique it, + then use 'statfs' to retrieve the data, eventually take the + minimum, if there are many. + Testing ------- -The makefile contains some basic functionallity tests (mostly using +The makefile contains some basic functionality tests (mostly using commands of the shell). bonnie is a good stress and performance tester. Don't despair because @@ -26,10 +26,6 @@ Installation psql -U someuser somedb < schema.sql -* Create special function (as PostgreSQL superuser) - - psql somedb < function.sql - * Mount the FUSE filesystem pgfuse "user=someuser dbname=somedb" <mount point> @@ -2,64 +2,34 @@ TODO list (in order of priority) --------- - integrate statfs patch: - - calculating the size: - a) virtual things like real blocks and inodes - b) select physical things in db: - http://www.postgresql.org/docs/current/interactive/functions-admin.html#FUNCTIONS-ADMIN-DBSIZE - - Can't do this, as the db user must be underprivileged: - show data_directory; - /var/lib/postgres/data - ERROR: must be superuser to examine "data_directory" - This is for objects in the default tablespace - 1) get list of oids containing the data of the tables/indexes - SELECT oid FROM pg_class WHERE relname = 'dir' AND relkind = 'r'; - select 'dir'::regclass::oid; - 3) use default data dir or tablespace location to resolve the path - SELECT reltablespace FROM pg_class WHERE relname = 'dir' AND relkind = 'r'; - => 0, means default tablespace of the database, so we have to ask - the database: - datname? - - - select datname,dattablespace from pg_database; - select spclocation from pg_tablespace where oid = - >= 9.2 - select pg_tablespace_location('55025'); - - other ways: + +The real df -k: - explicit tablespace in create table: - SELECT d.datname as "Name", - r.rolname as "Owner", - pg_catalog.pg_encoding_to_char(d.encoding) as "Encoding", - pg_catalog.shobj_description(d.oid, 'pg_database') as "Description", - t.spcname as "Tablespace" -FROM pg_catalog.pg_database d - JOIN pg_catalog.pg_roles r ON d.datdba = r.oid - JOIN pg_catalog.pg_tablespace t on d.dattablespace = t.oid -ORDER BY 1; +#include <mntent.h> +#include <stdio.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/vfs.h> +#include <unistd.h> +int main(void) { + FILE* mtab = setmntent("/etc/mtab", "r"); + struct mntent* m; + struct mntent mnt; + char strings[4096]; + while ((m = getmntent_r(mtab, &mnt, strings, sizeof(strings)))) { + struct statfs fs; + if ((mnt.mnt_dir != NULL) && (statfs(mnt.mnt_dir, &fs) == 0)) { + unsigned long long int size = fs.f_blocks * fs.f_bsize; + unsigned long long int free = fs.f_bfree * fs.f_bsize; + unsigned long long int avail = fs.f_bavail * fs.f_bsize; + printf("%s %s size=%lld free=%lld avail=%lld\n", + mnt.mnt_fsname, mnt.mnt_dir, size, free, avail); + } + } - --- COALESCE(TS.spclocation, XS.spclocation, '') AS "location" - SELECT N.nspname || '.' || C.relname AS "relation", - CASE WHEN reltype = 0 - THEN pg_size_pretty(pg_total_relation_size(C.oid)) || ' (index)' - ELSE pg_size_pretty(pg_total_relation_size(C.oid)) || ' (' || pg_size_pretty(pg_relation_size(C.oid)) || ' data)' - END AS "size (data)", - COALESCE(T.tablespace, I.tablespace, '') AS "tablespace" -FROM pg_class C -LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace) -LEFT JOIN pg_tables T ON (T.tablename = C.relname) -LEFT JOIN pg_indexes I ON (I.indexname = C.relname) -LEFT JOIN pg_tablespace TS ON TS.spcname = T.tablespace -LEFT JOIN pg_tablespace XS ON XS.spcname = I.tablespace -WHERE nspname NOT IN ('pg_catalog','pg_toast','information_schema') -ORDER BY pg_total_relation_size(C.oid) DESC; - - -default case: -$PGDATA/base + endmntent(mtab); +} - no Perl, Shell, Posix df dependency - handling of most file system metadata @@ -35,4 +35,8 @@ #define MAX_DB_CONNECTIONS 8 +/* maximum number of tablespaces, used for free blocks calculation */ + +#define MAX_TABLESPACE_OIDS 16 + #endif @@ -953,7 +953,7 @@ static int pgfuse_statfs( const char *path, struct statvfs *buf ) /* blocks */ - blocks_free = psql_get_fs_blocks_free( conn ); + blocks_free = psql_get_fs_blocks_free( conn, data->verbose ); if( blocks_free < 0 ) { PSQL_ROLLBACK( conn ); RELEASE( conn ); return -blocks_free; @@ -970,7 +970,7 @@ static int pgfuse_statfs( const char *path, struct statvfs *buf ) /* inodes */ - files_free = psql_get_fs_blocks_free( conn ); + files_free = psql_get_fs_files_free( conn ); if( files_free < 0 ) { PSQL_ROLLBACK( conn ); RELEASE( conn ); return -files_free; @@ -986,7 +986,7 @@ static int pgfuse_statfs( const char *path, struct statvfs *buf ) files_avail = files_free; if( data->verbose ) { - syslog( LOG_ERR, "Stats for '%s' are (%jd blocks total, %jd used, %jd free, " + syslog( LOG_DEBUG, "Stats for '%s' are (%jd blocks total, %jd used, %jd free, " "%jd files total, %jd files used, %jd files free, thread #%u", data->mountpoint, blocks_total, blocks_used, blocks_free, @@ -933,8 +933,16 @@ int64_t psql_get_fs_blocks_used( PGconn *conn ) { PGresult *res; char *data; - size_t used; + int64_t used; + /* we calculate the number of blocks occuppied by all data entries + * plus all "indoes" (in our case entries in dir), + * more like a filesystem would do it. Returning blocks as this is + * harder to overflow a size_t (in case it's 32-bit, modern + * systems shouldn't care). It's not so fast though, otherwise we + * must consider a 'stats' table which is periodically updated + * (not constantly in order to avoid a hot-spot in the database!) + */ res = PQexec( conn, "SELECT (SELECT COUNT(*) FROM data) + (SELECT COUNT(*) FROM dir)" ); if( PQresultStatus( res ) != PGRES_TUPLES_OK ) { syslog( LOG_ERR, "Error in psql_get_fs_blocks_used: %s", PQerrorMessage( conn ) ); @@ -942,12 +950,6 @@ int64_t psql_get_fs_blocks_used( PGconn *conn ) return -EIO; } - /* we calculate the number of blocks occuppied by all data entries - * plus all "indoes" (in our case entries in dir), - * more like a filesystem would do it. Returning blocks as this is - * harder to overflow a size_t (in case it's 32-bit, modern - * systems shouldn't care). It's slower though - */ data = PQgetvalue( res, 0, 0 ); used = atoi( data ); @@ -956,16 +958,148 @@ int64_t psql_get_fs_blocks_used( PGconn *conn ) return used; } -int64_t psql_get_fs_blocks_free( PGconn *conn ) +static int get_default_tablespace( PGconn *conn, int verbose ) { - return 9999; + PGresult *res; + char *data; + int oid; + + res = PQexec( conn, "select dattablespace::int4 from pg_database where datname=current_database( )" ); + + if( PQresultStatus( res ) != PGRES_TUPLES_OK ) { + syslog( LOG_ERR, "Error in get_default_tablespace: %s", PQerrorMessage( conn ) ); + PQclear( res ); + return -EIO; + } + + data = PQgetvalue( res, 0, 0 ); + oid = atoi( data ); + + if( verbose ) { + syslog( LOG_DEBUG, "Free blocks calculation, seen default tablespace is OID %d", oid ); + } + + PQclear( res ); + + return oid; +} + +static char *get_tablespace_location( PGconn *conn, const int oid, int verbose ) +{ + PGresult *res; + int param1 = htonl( oid ); + const char *values[1] = { (const char *)¶m1 }; + int lengths[1] = { sizeof( param1 ) }; + int binary[1] = { 1 }; + char *data; + int version; + + version = PQserverVersion( conn ); + if( version >= 90200 ) { + res = PQexecParams( conn, "select pg_tablespace_location($1)", + 1, NULL, values, lengths, binary, 1 ); + } else { + res = PQexecParams( conn, "select spclocation from pg_tablespace where oid = $1", + 1, NULL, values, lengths, binary, 1 ); + } + + if( PQresultStatus( res ) != PGRES_TUPLES_OK ) { + syslog( LOG_ERR, "Error in get_tablespace_location for OID %d: %s", oid, PQerrorMessage( conn ) ); + PQclear( res ); + return NULL; + } + + data = strdup( PQgetvalue( res, 0, 0 ) ); + + PQclear( res ); + + return data; +} + +int64_t psql_get_fs_blocks_free( PGconn *conn, int verbose ) +{ + PGresult *res; + char *data; + int i; + int nof_oids; + int oid[MAX_TABLESPACE_OIDS]; + char *location[MAX_TABLESPACE_OIDS]; + + /* Get a list of oids containing the tablespaces of PgFuse tables and indexes */ + res = PQexec( conn, "select distinct reltablespace::int4 FROM pg_class WHERE relname in ( 'dir', 'data', 'data_dir_id_idx', 'data_block_no_idx', 'dir_parent_id_idx' )" ); + + if( PQresultStatus( res ) != PGRES_TUPLES_OK ) { + syslog( LOG_ERR, "Error in psql_get_fs_blocks_free: %s", PQerrorMessage( conn ) ); + PQclear( res ); + return -EIO; + } + + /* weird, no tablespaces? There is something wrong here, bail out */ + if( PQntuples( res ) == 0 ) { + syslog( LOG_ERR, "Error in psql_get_fs_blocks_free, no tablespace OIDs found"); + PQclear( res ); + return -EIO; + } + + nof_oids = PQntuples( res ) ; + if( nof_oids > MAX_TABLESPACE_OIDS ) { + syslog( LOG_ERR, "Error in psql_get_fs_blocks_free, too many tablespace OIDs found, increase MAX_TABLESPACE_OIDS"); + PQclear( res ); + return -EIO; + } + + for( i = 0; i < nof_oids; i++ ) { + data = PQgetvalue( res, i, 0 ); + oid[i] = atoi( data ); + } + + PQclear( res ); + + /* we have a OID = 0 in the list, so have a look at the default + * tablespace of the current database and replace the value + */ + for( i = 0; i < nof_oids; i++ ) { + if( oid[i] == 0 ) { + int res = get_default_tablespace( conn, verbose ); + if( res < 0 ) { + return res; + } + oid[i] = res; + } + } + + /* Get table space locations, since 9.2 there is a function for + * this, before we must hunt system tables for the information + */ + for( i = 0; i < nof_oids; i++ ) { + location[i] = get_tablespace_location( conn, oid[i], verbose ); + } + + for( i = 0; i < nof_oids; i++ ) { + if( !location[i] ) { + /* No location, tablespace resides in PGDATA */ + } + } + + for( i = 0; i < nof_oids; i++ ) { + if( verbose ) { + syslog( LOG_DEBUG, "Free blocks calculation, seen tablespace OID %d, %s", + oid[i], location[i] ); + } + } + + for( i = 0; i < nof_oids; i++ ) { + if( location[i] ) free( location[i] ); + } + + return 9999; } int64_t psql_get_fs_files_used( PGconn *conn ) { PGresult *res; char *data; - size_t used; + int64_t used; res = PQexec( conn, "SELECT COUNT(*) FROM dir" ); if( PQresultStatus( res ) != PGRES_TUPLES_OK ) { @@ -975,7 +1109,7 @@ int64_t psql_get_fs_files_used( PGconn *conn ) } data = PQgetvalue( res, 0, 0 ); - used = atoi( data ); + used = atol( data ); PQclear( res ); @@ -984,6 +1118,10 @@ int64_t psql_get_fs_files_used( PGconn *conn ) int64_t psql_get_fs_files_free( PGconn *conn ) { - return 9999; + /* no restriction on the number of files storable, we could + * add some limits later, so we would calculate the difference + * here and not in pgfuse.c. + */ + return INT64_MAX; } @@ -100,7 +100,7 @@ size_t psql_get_block_size( PGconn *conn, const size_t block_size ); int64_t psql_get_fs_blocks_used( PGconn *conn ); -int64_t psql_get_fs_blocks_free( PGconn *conn ); +int64_t psql_get_fs_blocks_free( PGconn *conn, int verbose ); int64_t psql_get_fs_files_used( PGconn *conn ); |