Source

mod_gridfs / gridfs / mod_gridfs.cpp

Full commit
/*
	mod_gridfs.c -- Apache 2.2+ module that supports serving of files from MongoDB GridFS.

	See http://www.mongodb.org/ and http://www.mongodb.org/display/DOCS/GridFS for more information.

	See LICENSE file for licensing details.
*/

#include <unistd.h>

#include "client/dbclient.h"
#include "client/gridfs.h"

#include "apr_strings.h"

#include "httpd.h"
#include "http_log.h"
#include "http_config.h"
#include "http_protocol.h"
#include "http_request.h"

//	Declare module
extern "C"
{
	extern module AP_MODULE_DECLARE_DATA gridfs_module;
}

//	Default cache max age in seconds
const int DEFAULT_CACHE_MAX_AGE = 604800;

//	Default connect timeout in seconds
const int DEFAULT_CONNECT_TIMEOUT = 30;

//	Maximum cache age in seconds
const int MAX_CACHE_MAX_AGE = 86400 * 365 * 10;

//	Maximum connect timeout in seconds
const int MAX_CONNECT_TIMEOUT = 300;

//	Retry delay in milliseconds
const int RETRY_DELAY = 300;

//	Module configuration
struct gridfs_config
{
	const std::string *connection_string;
	const std::string *database;
	int cache_max_age;
	bool cache_max_age_set;
	int connect_timeout;
	bool connect_timeout_set;
	bool slave_ok;
	bool slave_ok_set;
	const std::string *context;
};

//	Creates module configuration
static void *gridfs_create_config(apr_pool_t *const pool, char *const location)
{
	gridfs_config *const config = static_cast<gridfs_config *>(apr_pcalloc(pool, sizeof(gridfs_config)));
	if (config == 0)
		return 0;
	config->cache_max_age = DEFAULT_CACHE_MAX_AGE;
	config->connect_timeout = DEFAULT_CONNECT_TIMEOUT;
	if (location != 0 && location[0] == '/' && location[1] != '\0')
	{
	  	void *const context_data = apr_palloc(pool, sizeof(std::string));
	  	if (context_data == 0)
	    	return config;
		std::string *context;
		try
		{
			context = new (context_data) std::string(location + 1);
			if (*context->rbegin() != '/')
				context->append(1, '/');
		}
		catch (...)
		{
			return config;
		}
		config->context = context;
	}
	return config;
}

//	Merges module configuration
static void *gridfs_merge_config(apr_pool_t *const pool, void *const basev, void *const addv)
{
	gridfs_config *const config = static_cast<gridfs_config *>(apr_palloc(pool, sizeof(gridfs_config)));
	if (config == 0)
		return 0;
	const gridfs_config *const base = static_cast<const gridfs_config *>(basev);
	const gridfs_config *const add = static_cast<const gridfs_config *>(addv);
	config->connection_string = add->connection_string != 0 ? add->connection_string : base->connection_string;
	config->database = add->database != 0 ? add->database : base->database;
	config->cache_max_age = add->cache_max_age_set ? add->cache_max_age : base->cache_max_age;
	config->cache_max_age_set = add->cache_max_age_set || base->cache_max_age_set;
	config->connect_timeout = add->connect_timeout_set ? add->connect_timeout : base->connect_timeout;
	config->connect_timeout_set = add->connect_timeout_set || base->connect_timeout_set;
	config->slave_ok = add->slave_ok_set ? add->slave_ok : base->slave_ok;
	config->slave_ok_set = add->slave_ok_set || base->slave_ok_set;
	config->context = add->context != 0 ? add->context : base->context;
	return config;
}

//	Handles "GridFSConnection <connection string>" command
static const char *gridfs_connection_command(cmd_parms *const command, void *const module_config, const char *const argument)
{
	gridfs_config *const config = static_cast<gridfs_config *>(module_config);
	void *const connection_string_data = apr_palloc(command->pool, sizeof(std::string));
	if (connection_string_data == 0)
		return "GridFSConnection failed to allocate data.";
	std::string *connection_string;
	try
	{
		connection_string = new (connection_string_data) std::string(argument);
	}
	catch (...)
	{
		return "GridFSConnection exception.";
	}
	config->connection_string = connection_string;
	return 0;
}

//	Handles "GridFSDatabase <database name>" command
static const char *gridfs_database_command(cmd_parms *const command, void *const module_config, const char *const argument)
{
	gridfs_config *const config = static_cast<gridfs_config *>(module_config);
	void *const database_data = apr_palloc(command->pool, sizeof(std::string));
	if (database_data == 0)
		return "GridFSDatabase failed to allocate data.";
	std::string *database;
	try
	{
		database = new (database_data) std::string(argument);
	}
	catch (...)
	{
		return "GridFSDatabase exception.";
	}
	config->database = database;
	return 0;
}

//	Handles "GridFSCacheMaxAge <cache max age>" command
static const char *gridfs_cache_max_age_command(cmd_parms *const command, void *const module_config, const char *const argument)
{
	gridfs_config *const config = static_cast<gridfs_config *>(module_config);
	const int cache_max_age = std::atoi(argument);
	if (cache_max_age < 0 || cache_max_age > MAX_CACHE_MAX_AGE)
		return "GridFSCacheMaxAge out of range.";
	config->cache_max_age = cache_max_age;
	config->cache_max_age_set = true;
	return 0;
}

//	Handles "GridFSConnectTimeout <connect timeout>" command
static const char *gridfs_connect_timeout_command(cmd_parms *const command, void *const module_config, const char *const argument)
{
	gridfs_config *const config = static_cast<gridfs_config *>(module_config);
	int connect_timeout = atoi(argument);
	if (connect_timeout < 0 || connect_timeout > MAX_CONNECT_TIMEOUT)
		return "GridFSConnectTimeout out of range.";
	config->connect_timeout = connect_timeout;
	config->connect_timeout_set = true;
	return 0;
}

//	Handles "GridFSSlaveOk <On|Off>" command
static const char *gridfs_slave_ok_command(cmd_parms *const command, void *const module_config, int flag)
{
	gridfs_config *const config = static_cast<gridfs_config *>(module_config);
	config->slave_ok = flag != 0;
	config->slave_ok_set = true;
	return 0;
}

//	Handles request
static int gridfs_handler(request_rec *const request)
{
	const gridfs_config *const config = static_cast<gridfs_config *>(ap_get_module_config(request->per_dir_config, &gridfs_module));
	if (config->connection_string == 0 || config->database == 0)
		return DECLINED;
   	request->allowed |= AP_METHOD_BIT << M_GET;
	if (request->method_number != M_GET)
		return HTTP_NOT_IMPLEMENTED;
	if (*request->uri != '/' || request->uri[1] == '\0')
		return HTTP_NOT_FOUND;
	const char * filename = request->uri + 1;
	if (config->context != 0)
	{
	  	const std::string& context = *config->context;
	  	const size_t context_length = context.length();
	  	if (context.compare(0, context_length, filename, context_length) == 0)
		  	filename += context_length;
	}
	filename = apr_pstrdup(request->pool, filename);
	if (filename == 0)
	{
		ap_log_rerror(APLOG_MARK, APLOG_CRIT, 0, request, "mod_gridfs: Failed to allocate filename memory."); 
		return HTTP_INTERNAL_SERVER_ERROR;
	}
	request->filename = const_cast<char *>(filename);
	int result;
	apr_bucket_brigade *brigade = 0;
	try
	{
		apr_off_t content_length = 0;
		std::string content_type;
		const apr_time_t retry_threshold = request->request_time + apr_time_from_sec(config->connect_timeout);
		while (true)
		{
			std::auto_ptr<mongo::ScopedDbConnection> connection(mongo::ScopedDbConnection::getScopedDbConnection(*config->connection_string, config->connect_timeout));
			try
			{
				const mongo::GridFS gridfs(connection->conn(), *config->database);
				mongo::GridFile gridfile = gridfs.findFile(filename, config->slave_ok);
				if (!gridfile.exists())
				{
					if (config->slave_ok)
					{
						gridfile = gridfs.findFile(filename, false);
						if (!gridfile.exists())
						{
							connection->done();
							return HTTP_NOT_FOUND;
						}
					}
					else
					{
						connection->done();
						return HTTP_NOT_FOUND;
					}
				}
				const mongo::Date_t upload_date = gridfile.getUploadDate();
				request->mtime = apr_time_from_sec(upload_date.toTimeT());
				ap_set_last_modified(request);
				const std::string& md5 = gridfile.getMD5();
				if (!md5.empty())
					apr_table_setn(request->headers_out, "ETag", md5.c_str());
				if (ap_meets_conditions(request) == HTTP_NOT_MODIFIED)
				{
					if (!md5.empty())
						apr_table_unset(request->headers_out, "ETag");
					connection->done();
					return HTTP_NOT_MODIFIED;
				}
				const mongo::gridfs_offset file_length = gridfile.getContentLength();
				content_length = 0;
				content_type = gridfile.getContentType();
				if (file_length != 0 && request->header_only == 0)
				{
					const int num_chunks = gridfile.getNumChunks();
					if (num_chunks == 0)
					{
						ap_log_rerror(APLOG_MARK, APLOG_ERR | APLOG_NOERRNO, 0, request, "mod_gridfs: No chunks available for file '%s'.", filename); 
						connection->done();
						return HTTP_INTERNAL_SERVER_ERROR;
					}
					brigade = apr_brigade_create(request->pool, request->connection->bucket_alloc);
					if (brigade == 0)
					{
						ap_log_rerror(APLOG_MARK, APLOG_CRIT, 0, request, "mod_gridfs: Failed to create brigade."); 
						connection->done();
						return HTTP_INTERNAL_SERVER_ERROR;
					}
					for (int chunk_index = 0;chunk_index < num_chunks;chunk_index++) 
					{
						const mongo::GridFSChunk& chunk = gridfile.getChunk(chunk_index);
						int chunk_length;
						const char *chunk_data = chunk.data(chunk_length);
						if (chunk_length == 0)
							continue;
						if ((result = apr_brigade_write(brigade, 0, 0, chunk_data, chunk_length)) != APR_SUCCESS)
						{
							ap_log_rerror(APLOG_MARK, APLOG_ERR, result, request, "mod_gridfs: Failed to write chunk %d for file '%s' to brigade (length: %d).", chunk_index, filename, chunk_length); 
							connection->done();
							apr_brigade_destroy(brigade);
							brigade = 0;
							return HTTP_INTERNAL_SERVER_ERROR;
						}
						content_length += chunk_length;
					}
					if (content_length != file_length)
					{
						ap_log_rerror(APLOG_MARK, APLOG_ERR | APLOG_NOERRNO, result, request, "mod_gridfs: Mismatching files/chunks length for file '%s' (difference: %d).", filename, static_cast<int>(file_length - content_length));
						connection->done();
						apr_brigade_destroy(brigade);
						brigade = 0;
						return HTTP_INTERNAL_SERVER_ERROR;
					}
				}
				connection->done();
				break;
			}
			catch (const mongo::DBException& exception)
			{
				if (apr_time_now() >= retry_threshold)
					throw;
				const int code = exception.getCode();
				switch (code)
				{
				case 9001:	//	default socket exception
				case 10009:	//	ReplicaSetMonitor no master found for set
				case 10276:	//	DBClientBase::findN: transport error
				case 11002:	//	pool socket exception
					ap_log_rerror(APLOG_MARK, APLOG_WARNING | APLOG_NOERRNO, result, request, "mod_gridfs: Retrying after MongoDB exception for file '%s' (code: %d): %s.", filename, code, exception.what()); 
					break;

				default:
					throw;
				}
			}
			if (brigade != 0)
			{
				apr_brigade_destroy(brigade);
				brigade = 0;
			}
			apr_sleep(RETRY_DELAY * 1000);
		}
		if (content_length != 0)
			ap_set_content_length(request, content_length);
		if (content_type.empty())
		{
			request->finfo.filetype = APR_REG;
			if ((result = ap_run_type_checker(request)) != APR_SUCCESS)
				ap_log_rerror(APLOG_MARK, APLOG_WARNING, result, request, "mod_gridfs: Failed to run type checker for file '%s'.", filename); 
		}
		else
			ap_set_content_type(request, content_type.c_str());
		if (config->cache_max_age != 0)
		{
			char cache_control[32];
			snprintf(cache_control, sizeof(cache_control) - 1, "public, max-age=%d", config->cache_max_age);
			apr_table_setn(request->headers_out, "Cache-Control", cache_control);
			apr_time_t expires_time = request->request_time + apr_time_from_sec(config->cache_max_age);
			char expires[APR_RFC822_DATE_LEN];
			apr_rfc822_date(expires, expires_time);
			apr_table_setn(request->headers_out, "Expires", expires);
		}
	}
	catch (const mongo::DBException& exception)
	{
		ap_log_rerror(APLOG_MARK, APLOG_ERR | APLOG_NOERRNO, 0, request, "mod_gridfs: Unhandled MongoDB exception occured for file '%s' (code: %d): %s.", filename, exception.getCode(), exception.what()); 
		if (brigade != 0)
			apr_brigade_destroy(brigade);
		return HTTP_INTERNAL_SERVER_ERROR;
	}
	catch (const std::exception& exception)
	{
		ap_log_rerror(APLOG_MARK, APLOG_ERR | APLOG_NOERRNO, 0, request, "mod_gridfs: Unhandled exception occured for file '%s': %s.", filename, exception.what()); 
		if (brigade != 0)
			apr_brigade_destroy(brigade);
		return HTTP_INTERNAL_SERVER_ERROR;
	}
	catch (...)
	{
		ap_log_rerror(APLOG_MARK, APLOG_ERR | APLOG_NOERRNO, 0, request, "mod_gridfs: Unknown unhandled exception occured for file '%s'.", filename); 
		if (brigade != 0)
			apr_brigade_destroy(brigade);
		return HTTP_INTERNAL_SERVER_ERROR;
	}
	if (brigade != 0)
	    return ap_pass_brigade(request->output_filters, brigade);
	return OK;
}

//	Registers hooks
static void gridfs_register_hooks(apr_pool_t *const pool)
{
	ap_hook_handler(gridfs_handler, 0, 0, APR_HOOK_MIDDLE);
}

//	Describes module configuration commands
static const command_rec gridfs_commands[] =
{
	AP_INIT_TAKE1("GridFSConnection", reinterpret_cast<cmd_func>(gridfs_connection_command), 0, OR_FILEINFO, "GridFS connection string."),
	AP_INIT_TAKE1("GridFSDatabase", reinterpret_cast<cmd_func>(gridfs_database_command), 0, OR_FILEINFO, "GridFS database name."),
	AP_INIT_TAKE1("GridFSCacheMaxAge", reinterpret_cast<cmd_func>(gridfs_cache_max_age_command), 0, OR_FILEINFO, "GridFS cache max age (seconds, 0 to disable expiration)."),
	AP_INIT_TAKE1("GridFSConnectTimeout", reinterpret_cast<cmd_func>(gridfs_connect_timeout_command), 0, OR_FILEINFO, "GridFS connect timeout (seconds, 0 for infinite)."),
	AP_INIT_FLAG("GridFSSlaveOk", reinterpret_cast<cmd_func>(gridfs_slave_ok_command), 0, OR_FILEINFO, "GridFS slaveOk flag."),
	{0}
};

//	Defines module
extern "C"
{
	module AP_MODULE_DECLARE_DATA gridfs_module =
	{
		STANDARD20_MODULE_STUFF,
		gridfs_create_config,
		gridfs_merge_config,
		0,
		0,
		gridfs_commands,
		gridfs_register_hooks
	};
}