Commits

medoc committed 634cb29

use vfork when possible + small cleanups in mt init

Comments (0)

Files changed (6)

src/common/rclinit.cpp

 #include "pathut.h"
 #include "unac.h"
 #include "smallut.h"
+#include "execmd.h"
 
 static const int catchedSigs[] = {SIGHUP, SIGINT, SIGQUIT, SIGTERM, 
      SIGUSR1, SIGUSR2};
     // Init unac locking
     unac_init_mt();
 
-    // Init pathut static values
+    // Init smallut and pathut static values
     pathut_init_mt();
+    smallut_init_mt();
 
     // Init Unac translation exceptions
     string unacex;
     if (config->getConfParam("unac_except_trans", unacex) && !unacex.empty()) 
 	unac_set_except_translations(unacex.c_str());
 
-    // Init langtocode() static table
-    langtocode("");
+#ifndef IDX_THREADS
+    ExecCmd::useVfork(true);
+#endif
 
     int flushmb;
     if (config->getConfParam("idxflushmb", &flushmb) && flushmb > 0) {

src/utils/execmd.cpp

 #include <errno.h>
 #include <signal.h>
 
-#if !defined(PUTENV_ARG_CONST)
-#include <string.h>
-#endif
-
 #include <vector>
 #include <string>
 #include <sstream>
 #include "smallut.h"
 #include "netcon.h"
 #include "closefrom.h"
-#include "ptmutex.h"
 
+extern char **environ;
+
+bool ExecCmd::o_useVfork = false;
 
 /* From FreeBSD's which command */
 static bool exec_is_there(const char *candidate)
     ExecCmdRsrc(this);
 }
 
+// In child process. Set up pipes and exec command. 
+// This must not return. _exit() on error.
+// *** This can be called after a vfork, so no modification of the
+//     process memory at all is allowed ***
+// The LOGXX calls should not be there, but they occur only after "impossible"
+// errors, which we would most definitely want to have a hint about
+inline void ExecCmd::dochild(const string &cmd, const char **argv,
+			     const char **envv,
+			     bool has_input, bool has_output)
+{
+    // Start our own process group
+    if (setpgid(0, getpid())) {
+	LOGINFO(("ExecCmd::dochild: setpgid(0, %d) failed: errno %d\n",
+		 getpid(), errno));
+    }
+
+    // Restore SIGTERM to default. Really, signal handling should be
+    // specified when creating the execmd. Help Recoll get rid of its
+    // filter children though. To be fixed one day... Not sure that
+    // all of this is needed. But an ignored sigterm and the masks are
+    // normally inherited.
+    if (signal(SIGTERM, SIG_DFL) == SIG_ERR) {
+	LOGERR(("ExecCmd::dochild: signal() failed, errno %d\n", errno));
+    }
+    sigset_t sset;
+    sigfillset(&sset);
+    pthread_sigmask(SIG_UNBLOCK, &sset, 0);
+    sigprocmask(SIG_UNBLOCK, &sset, 0);
+
+    if (has_input) {
+	close(m_pipein[1]);
+	if (m_pipein[0] != 0) {
+	    dup2(m_pipein[0], 0);
+	    close(m_pipein[0]);
+	}
+    }
+    if (has_output) {
+	close(m_pipeout[0]);
+	if (m_pipeout[1] != 1) {
+	    if (dup2(m_pipeout[1], 1) < 0) {
+		LOGERR(("ExecCmd::doexec: dup2(2) failed. errno %d\n", errno));
+	    }
+	    if (close(m_pipeout[1]) < 0) {
+		LOGERR(("ExecCmd::doexec: close(2) failed. errno %d\n", errno));
+	    }
+	}
+    }
+    // Do we need to redirect stderr ?
+    if (!m_stderrFile.empty()) {
+	int fd = open(m_stderrFile.c_str(), O_WRONLY|O_CREAT
+#ifdef O_APPEND
+		      |O_APPEND
+#endif
+		      , 0600);
+	if (fd < 0) {
+	    close(2);
+	} else {
+	    if (fd != 2) {
+		dup2(fd, 2);
+	    }
+	    lseek(2, 0, 2);
+	}
+    }
+
+    // Close all descriptors except 0,1,2
+    libclf_closefrom(3);
+
+    execve(cmd.c_str(), (char *const*)argv, (char *const*)envv);
+    // Hu ho
+    LOGERR(("ExecCmd::doexec: execvp(%s) failed. errno %d\n", cmd.c_str(),
+	    errno));
+    _exit(127);
+}
+
 int ExecCmd::startExec(const string &cmd, const vector<string>& args,
 		       bool has_input, bool has_output)
 {
 	return -1;
     }
 
-    m_pid = fork();
+
+//////////// vfork setup section
+    // We do here things that we could/should do after a fork(), but
+    // not a vfork(). Does no harm to do it here in both cases, except
+    // that it needs cleanup (as compared to doing it just before
+    // exec()).
+
+    // Allocate arg vector (2 more for arg0 + final 0)
+    typedef const char *Ccharp;
+    Ccharp *argv;
+    argv = (Ccharp *)malloc((args.size()+2) * sizeof(char *));
+    if (argv == 0) {
+	LOGERR(("ExecCmd::doexec: malloc() failed. errno %d\n",	errno));
+	exit(1);
+    }
+    // Fill up argv
+    argv[0] = cmd.c_str();
+    int i = 1;
+    vector<string>::const_iterator it;
+    for (it = args.begin(); it != args.end(); it++) {
+	argv[i++] = it->c_str();
+    }
+    argv[i] = 0;
+
+    Ccharp *envv;
+    int envsize;
+    for (envsize = 0; ; envsize++) 
+	if (environ[envsize] == 0)
+	    break;
+    envv = (Ccharp *)malloc((envsize + m_env.size() + 2) * sizeof(char *));
+    int eidx;
+    for (eidx = 0; eidx < envsize; eidx++)
+	envv[eidx] = environ[eidx];
+    for (vector<string>::const_iterator it = m_env.begin(); 
+	 it != m_env.end(); it++) {
+	envv[eidx++] = it->c_str();
+    }
+    envv[eidx] = 0;
+
+    // As we are going to use execve, not execvp, do the PATH
+    // thing. If the command is not found, exe will be empty and the
+    // exec will fail, which is what we want.
+    string exe;
+    which(cmd, exe);
+////////////////////////////////
+
+    if (o_useVfork) {
+	m_pid = vfork();
+    } else {
+	m_pid = fork();
+    }
     if (m_pid < 0) {
 	LOGERR(("ExecCmd::startExec: fork(2) failed. errno %d\n", errno));
 	return -1;
     }
     if (m_pid == 0) {
-	e.inactivate(); // needed ?
-	dochild(cmd, args, has_input, has_output);
+	// e.inactivate() is not needed. As we do not return, the call
+	// stack won't be unwound and destructors of local objects
+	// won't be called.
+	dochild(exe, argv, envv, has_input, has_output);
 	// dochild does not return. Just in case...
 	_exit(1);
     }
 
     // Father process
+////////////////////
+    // Vfork cleanup section
+    free(argv);
+    free(envv);
+///////////////////
 
     // Set the process group for the child. This is also done in the
     // child process see wikipedia(Process_group)
     }
 }
 
-// In child process. Set up pipes, environment, and exec command. 
-// This must not return. exit() on error.
-void ExecCmd::dochild(const string &cmd, const vector<string>& args,
-		      bool has_input, bool has_output)
-{
-    // Start our own process group
-    if (setpgid(0, getpid())) {
-	LOGINFO(("ExecCmd::dochild: setpgid(0, %d) failed: errno %d\n",
-		 getpid(), errno));
-    }
-
-    // Restore SIGTERM to default. Really, signal handling should be
-    // specified when creating the execmd. Help Recoll get rid of its
-    // filter children though. To be fixed one day... Not sure that
-    // all of this is needed. But an ignored sigterm and the masks are
-    // normally inherited.
-    if (signal(SIGTERM, SIG_DFL) == SIG_ERR) {
-	LOGERR(("ExecCmd::dochild: signal() failed, errno %d\n", errno));
-    }
-    sigset_t sset;
-    sigfillset(&sset);
-    pthread_sigmask(SIG_UNBLOCK, &sset, 0);
-    sigprocmask(SIG_UNBLOCK, &sset, 0);
-
-    if (has_input) {
-	close(m_pipein[1]);
-	m_pipein[1] = -1;
-	if (m_pipein[0] != 0) {
-	    dup2(m_pipein[0], 0);
-	    close(m_pipein[0]);
-	    m_pipein[0] = -1;
-	}
-    }
-    if (has_output) {
-	close(m_pipeout[0]);
-	m_pipeout[0] = -1;
-	if (m_pipeout[1] != 1) {
-	    if (dup2(m_pipeout[1], 1) < 0) {
-		LOGERR(("ExecCmd::doexec: dup2(2) failed. errno %d\n", errno));
-	    }
-	    if (close(m_pipeout[1]) < 0) {
-		LOGERR(("ExecCmd::doexec: close(2) failed. errno %d\n", errno));
-	    }
-	    m_pipeout[1] = -1;
-	}
-    }
-    // Do we need to redirect stderr ?
-    if (!m_stderrFile.empty()) {
-	int fd = open(m_stderrFile.c_str(), O_WRONLY|O_CREAT
-#ifdef O_APPEND
-		      |O_APPEND
-#endif
-		      , 0600);
-	if (fd < 0) {
-	    close(2);
-	} else {
-	    if (fd != 2) {
-		dup2(fd, 2);
-	    }
-	    lseek(2, 0, 2);
-	}
-    }
-
-    // Close all descriptors except 0,1,2
-    libclf_closefrom(3);
-
-    // Allocate arg vector (2 more for arg0 + final 0)
-    typedef const char *Ccharp;
-    Ccharp *argv;
-    argv = (Ccharp *)malloc((args.size()+2) * sizeof(char *));
-    if (argv == 0) {
-	LOGERR(("ExecCmd::doexec: malloc() failed. errno %d\n",	errno));
-	exit(1);
-    }
-	
-    // Fill up argv
-    argv[0] = cmd.c_str();
-    int i = 1;
-    vector<string>::const_iterator it;
-    for (it = args.begin(); it != args.end(); it++) {
-	argv[i++] = it->c_str();
-    }
-    argv[i] = 0;
-
-#if 0
-    {int i = 0;cerr << "cmd: " << cmd << endl << "ARGS: " << endl; 
-	while (argv[i]) cerr << argv[i++] << endl;}
-#endif
-
-    for (vector<string>::const_iterator it = m_env.begin(); 
-	 it != m_env.end(); it++) {
-#ifdef PUTENV_ARG_CONST
-	::putenv(it->c_str());
-#else
-	::putenv(strdup(it->c_str()));
-#endif
-    }
-    execvp(cmd.c_str(), (char *const*)argv);
-    // Hu ho
-    LOGERR(("ExecCmd::doexec: execvp(%s) failed. errno %d\n", cmd.c_str(),
-	    errno));
-    _exit(127);
-}
-
 ReExec::ReExec(int argc, char *args[])
 {
     init(argc, args);

src/utils/execmd.h

  */
 class ExecCmd {
  public:
+    // Use vfork instead of fork. This must not be called in a multithreaded 
+    // program.
+    static void useVfork(bool on)
+    {
+	o_useVfork  = on;
+    }
     /** 
      * Add/replace environment variable before executing command. This must
      * be called before doexec() to have an effect (possibly multiple
      */
     void zapChild() {setKill(); (void)wait();}
 
-    ExecCmd() 
+    ExecCmd()
 	: m_advise(0), m_provide(0), m_timeoutMs(1000)
     {
 	reset();
 
     friend class ExecCmdRsrc;
  private:
+    static bool      o_useVfork;
+
     vector<string>   m_env;
     ExecCmdAdvise   *m_advise;
     ExecCmdProvide  *m_provide;
 	sigemptyset(&m_blkcld);
     }
     // Child process code
-    void dochild(const string &cmd, const vector<string>& args, 
-		 bool has_input, bool has_output);
+    inline void dochild(const string &cmd, const char **argv, 
+			const char **envv, bool has_input, bool has_output);
     /* Copyconst and assignment private and forbidden */
     ExecCmd(const ExecCmd &) {}
     ExecCmd& operator=(const ExecCmd &) {return *this;};

src/utils/pathut.cpp

     return true;
 }
 
-static const char *tmplocation()
+static const string& tmplocation()
 {
-    const char *tmpdir = getenv("RECOLL_TMPDIR");
-    if (!tmpdir)
-	tmpdir = getenv("TMPDIR");
-    if (!tmpdir)
-	tmpdir = "/tmp";
-    return tmpdir;
+    static string stmpdir;
+    if (stmpdir.empty()) {
+        const char *tmpdir = getenv("RECOLL_TMPDIR");
+        if (tmpdir == 0) 
+            tmpdir = getenv("TMPDIR");
+        if (tmpdir == 0)
+            tmpdir = "/tmp";
+        stmpdir = string(tmpdir);
+    }
+    return stmpdir;
 }
 
 bool maketmpdir(string& tdir, string& reason)

src/utils/smallut.cpp

     return locale.substr(0, under);
 }
 
+// Initialization for static stuff to be called from main thread before going 
+// multiple
+void smallut_init_mt()
+{
+    // Init langtocode() static table
+    langtocode("");
+}
+
 #else // TEST_SMALLUT
 
 #include <string>

src/utils/smallut.h

 #define MAX(A,B) (((A)>(B)) ? (A) : (B))
 #endif
 
+void smallut_init_mt();
+
 #endif /* _SMALLUT_H_INCLUDED_ */