Anonymous avatar Anonymous committed 269e780

Use posix_fallocate() for new WAL files, where available.

This function is more efficient than actually writing out zeroes to
the new file, per microbenchmarks by Jon Nelson. Also, it may reduce
the likelihood of WAL file fragmentation.

Jon Nelson, with review by Andres Freund, Greg Smith and me.

Comments (0)

Files changed (5)

 
 
 
-for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
+
+for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
 do
 as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
 { $as_echo "$as_me:$LINENO: checking for $ac_func" >&5
 LIBS_including_readline="$LIBS"
 LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
 
-AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l])
+AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate pstat readlink setproctitle setsid sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l])
 
 AC_REPLACE_FUNCS(fseeko)
 case $host_os in

src/backend/access/transam/xlog.c

 {
 	char		path[MAXPGPATH];
 	char		tmppath[MAXPGPATH];
-	char	   *zbuffer;
 	XLogSegNo	installed_segno;
 	int			max_advance;
 	int			fd;
-	int			nbytes;
 
 	XLogFilePath(path, ThisTimeLineID, logsegno);
 
 
 	unlink(tmppath);
 
-	/*
-	 * Allocate a buffer full of zeros. This is done before opening the file
-	 * so that we don't leak the file descriptor if palloc fails.
-	 *
-	 * Note: palloc zbuffer, instead of just using a local char array, to
-	 * ensure it is reasonably well-aligned; this may save a few cycles
-	 * transferring data to the kernel.
-	 */
-	zbuffer = (char *) palloc0(XLOG_BLCKSZ);
-
 	/* do not use get_sync_bit() here --- want to fsync only at end of fill */
 	fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
 					   S_IRUSR | S_IWUSR);
 				(errcode_for_file_access(),
 				 errmsg("could not create file \"%s\": %m", tmppath)));
 
-	/*
-	 * Zero-fill the file.	We have to do this the hard way to ensure that all
-	 * the file space has really been allocated --- on platforms that allow
-	 * "holes" in files, just seeking to the end doesn't allocate intermediate
-	 * space.  This way, we know that we have all the space and (after the
-	 * fsync below) that all the indirect blocks are down on disk.	Therefore,
-	 * fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the
-	 * log file.
-	 */
-	for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
+#ifdef HAVE_POSIX_FALLOCATE
 	{
-		errno = 0;
-		if ((int) write(fd, zbuffer, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
-		{
-			int			save_errno = errno;
+		errno = posix_fallocate(fd, 0, XLogSegSize);
 
-			/*
-			 * If we fail to make the file, delete it to release disk space
-			 */
-			unlink(tmppath);
+		if (errno)
+		{
+			int errno_saved = errno;
 
 			close(fd);
-
-			/* if write didn't set errno, assume problem is no disk space */
-			errno = save_errno ? save_errno : ENOSPC;
+			unlink(tmppath);
+			errno = errno_saved;
 
 			ereport(ERROR,
 					(errcode_for_file_access(),
-					 errmsg("could not write to file \"%s\": %m", tmppath)));
+					 errmsg("could not allocate space for file \"%s\" using posix_fallocate: %m",
+							tmppath)));
+		}
+	}
+#else /* !HAVE_POSIX_FALLOCATE */
+	{
+		/*
+		 * Allocate a buffer full of zeros. This is done before opening the
+		 * file so that we don't leak the file descriptor if palloc fails.
+		 *
+		 * Note: palloc zbuffer, instead of just using a local char array, to
+		 * ensure it is reasonably well-aligned; this may save a few cycles
+		 * transferring data to the kernel.
+		 */
+
+		char	*zbuffer = (char *) palloc0(XLOG_BLCKSZ);
+		int		 nbytes;
+
+		/*
+		 * Zero-fill the file. We have to do this the hard way to ensure that
+		 * all the file space has really been allocated --- on platforms that
+		 * allow "holes" in files, just seeking to the end doesn't allocate
+		 * intermediate space.  This way, we know that we have all the space
+		 * and (after the fsync below) that all the indirect blocks are down on
+		 * disk. Therefore, fdatasync(2) or O_DSYNC will be sufficient to sync
+		 * future writes to the log file.
+		 */
+		for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)
+		{
+			errno = 0;
+			if ((int) write(fd, zbuffer, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
+			{
+				int			save_errno = errno;
+
+				/*
+				 * If we fail to make the file, delete it to release disk space
+				 */
+				unlink(tmppath);
+
+				close(fd);
+
+				/* if write didn't set errno, assume no disk space */
+				errno = save_errno ? save_errno : ENOSPC;
+
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not write to file \"%s\": %m",
+								tmppath)));
+			}
 		}
+		pfree(zbuffer);
 	}
-	pfree(zbuffer);
+#endif /* HAVE_POSIX_FALLOCATE */
 
 	if (pg_fsync(fd) != 0)
 	{

src/include/pg_config.h.in

 /* Define to 1 if you have the `posix_fadvise' function. */
 #undef HAVE_POSIX_FADVISE
 
+/* Define to 1 if you have the `posix_fallocate' function. */
+#undef HAVE_POSIX_FALLOCATE
+
 /* Define to 1 if you have the POSIX signal interface. */
 #undef HAVE_POSIX_SIGNALS
 

src/include/pg_config.h.win32

 /* Define to 1 if you have the <poll.h> header file. */
 /* #undef HAVE_POLL_H */
 
+/* Define to 1 if you have the `posix_fallocate' function. */
+/* #undef HAVE_POSIX_FALLOCATE */
+
 /* Define to 1 if you have the POSIX signal interface. */
 /* #undef HAVE_POSIX_SIGNALS */
 
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.