Commits

Anonymous committed c8a32d0

first incarnation of indexing thread

  • Participants
  • Parent commits 3dd05c6

Comments (0)

Files changed (18)

src/common/rclconfig.cpp

 #ifndef lint
-static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.4 2005-01-25 14:37:20 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: rclconfig.cpp,v 1.5 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 
 #include <iostream>
 #include "conftree.h"
 #include "debuglog.h"
 
-static DebugLog debuglog;
-DebugLog *dbl = &debuglog;
-class loginitializer {
- public:
-    loginitializer() {
-	dbl->setlogfilename("stderr");
-	dbl->setloglevel(10);
-    }
-};
-static loginitializer lgntlzr;
-
 using namespace std;
 
 RclConfig::RclConfig()
     : m_ok(false), conf(0), mimemap(0), mimeconf(0)
 {
+    static int loginit = 0;
+    if (!loginit) {
+	DebugLog::setfilename("stderr");
+	DebugLog::getdbl()->setloglevel(10);
+	loginit = 1;
+    }
+
     const char *cp = getenv("RECOLL_CONFDIR");
     if (cp) {
 	confdir = cp;

src/index/indexer.cpp

+#ifndef lint
+static char rcsid[] = "@(#$Id: indexer.cpp,v 1.1 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes";
+#endif
+#include <sys/stat.h>
+
+#include <strings.h>
+
+#include <iostream>
+#include <list>
+#include <map>
+
+#include "pathut.h"
+#include "conftree.h"
+#include "rclconfig.h"
+#include "fstreewalk.h"
+#include "mimetype.h"
+#include "rcldb.h"
+#include "readfile.h"
+#include "indexer.h"
+#include "csguess.h"
+#include "transcode.h"
+#include "mimehandler.h"
+#include "debuglog.h"
+
+using namespace std;
+
+#ifndef deleteZ
+#define deleteZ(X) {delete X;X = 0;}
+#endif
+
+/**
+ * Bunch holder for data used while indexing a directory tree
+ */
+class DbIndexer {
+    FsTreeWalker walker;
+    RclConfig *config;
+    string dbdir;
+    list<string> *topdirs;
+    Rcl::Db db;
+ public:
+    DbIndexer(RclConfig *cnf, const string &dbd, list<string> *top) 
+	: config(cnf), dbdir(dbd), topdirs(top)
+    { }
+
+    friend FsTreeWalker::Status 
+      indexfile(void *, const std::string &, const struct stat *, 
+		FsTreeWalker::CbFlag);
+
+    bool index();
+};
+
+bool DbIndexer::index()
+{
+    if (!db.open(dbdir, Rcl::Db::DbUpd)) {
+	LOGERR(("DbIndexer::index: error opening database in %s\n", 
+		dbdir.c_str()));
+	return false;
+    }
+    for (list<string>::const_iterator it = topdirs->begin();
+	 it != topdirs->end(); it++) {
+	LOGDEB(("DbIndexer::index: Indexing %s into %s\n", it->c_str(), 
+		dbdir.c_str()));
+	if (walker.walk(*it, indexfile, this) != FsTreeWalker::FtwOk) {
+	    LOGERR(("DbIndexer::index: error while indexing %s\n", 
+		    it->c_str()));
+	    db.close();
+	    return false;
+	}
+    }
+    db.purge();
+    if (!db.close()) {
+	LOGERR(("DbIndexer::index: error closing database in %s\n", 
+		dbdir.c_str()));
+	return false;
+    }
+    return true;
+}
+
+/** 
+ * This function gets called for every file and directory found by the
+ * tree walker. It checks with the db if the file has changed and needs to
+ * be reindexed. If so, it calls an appropriate handler depending on the mime
+ * type, which is responsible for populating an Rcl::Doc.
+ * Accent and majuscule handling are performed by the db module when doing
+ * the actual indexing work.
+ */
+FsTreeWalker::Status 
+indexfile(void *cdata, const std::string &fn, const struct stat *stp, 
+	  FsTreeWalker::CbFlag flg)
+{
+    DbIndexer *me = (DbIndexer *)cdata;
+
+    // If we're changing directories, possibly adjust parameters.
+    if (flg == FsTreeWalker::FtwDirEnter || 
+	flg == FsTreeWalker::FtwDirReturn) {
+	me->config->setKeyDir(fn);
+	return FsTreeWalker::FtwOk;
+    }
+
+    string mime = mimetype(fn, me->config->getMimeMap());
+    if (mime.empty()) {
+	// No mime type ?? pass on.
+	LOGDEB(("indexfile: (no mime) [%s]\n", fn.c_str()));
+	return FsTreeWalker::FtwOk;
+    }
+
+    // Look for appropriate handler
+    MimeHandlerFunc fun = getMimeHandler(mime, me->config->getMimeConf());
+    if (!fun) {
+	// No handler for this type, for now :(
+	LOGDEB(("indexfile: %s : no handler\n", mime.c_str()));
+	return FsTreeWalker::FtwOk;
+    }
+
+    LOGDEB(("indexfile: %s [%s]\n", mime.c_str(), fn.c_str()));
+
+    // Check db up to date ?
+    if (!me->db.needUpdate(fn, stp))
+	return FsTreeWalker::FtwOk;
+
+    // Turn file into a document. The document has fields for title, body 
+    // etc.,  all text converted to utf8
+    Rcl::Doc doc;
+    if (!fun(me->config, fn,  mime, doc))
+	return FsTreeWalker::FtwOk;
+
+    // Set up common fields:
+    doc.mimetype = mime;
+    char ascdate[20];
+    sprintf(ascdate, "%ld", long(stp->st_mtime));
+    doc.mtime = ascdate;
+
+    // Do database-specific work to update document data
+    if (!me->db.add(fn, doc))
+	return FsTreeWalker::FtwError;
+
+    return FsTreeWalker::FtwOk;
+}
+
+ConfIndexer::~ConfIndexer() 
+{
+    deleteZ(indexer);
+}
+
+bool ConfIndexer::index()
+{
+    ConfTree *conf = config->getConfig();
+
+    // Retrieve the list of directories to be indexed.
+    string topdirs;
+    if (conf->get("topdirs", topdirs, "") == 0) {
+	LOGERR(("ConfIndexer::index: no top directories in configuration\n"));
+	return false;
+    }
+
+    // Group the directories by database: it is important that all
+    // directories for a database be indexed at once so that deleted
+    // file cleanup works 
+    vector<string> tdl; // List of directories to be indexed
+    if (!ConfTree::stringToStrings(topdirs, tdl)) {
+	LOGERR(("ConfIndexer::index: parse error for directory list\n"));
+	return false;
+    }
+
+    vector<string>::iterator dirit;
+    map<string, list<string> > dbmap;
+    map<string, list<string> >::iterator dbit;
+    for (dirit = tdl.begin(); dirit != tdl.end(); dirit++) {
+	string db;
+	string dir = path_tildexpand(*dirit);
+	if (conf->get("dbdir", db, dir) == 0) {
+	    LOGERR(("ConfIndexer::index: no database directory in "
+		    "configuration for %s\n", dir.c_str()));
+	    return false;
+	}
+	db = path_tildexpand(db);
+	dbit = dbmap.find(db);
+	if (dbit == dbmap.end()) {
+	    list<string> l;
+	    l.push_back(dir);
+	    dbmap[db] = l;
+	} else {
+	    dbit->second.push_back(dir);
+	}
+    }
+
+    for (dbit = dbmap.begin(); dbit != dbmap.end(); dbit++) {
+	//cout << dbit->first << " -> ";
+	//list<string>::const_iterator dit;
+	//for (dit = dbit->second.begin(); dit != dbit->second.end(); dit++) {
+	//    cout << *dit << " ";
+	//}
+	//cout << endl;
+
+	indexer = new DbIndexer(config, dbit->first, &dbit->second);
+	if (!indexer->index()) {
+	    deleteZ(indexer);
+	    return false;
+	}
+	deleteZ(indexer);
+    }
+    return true;
+}

src/index/indexer.h

 #ifndef _INDEXER_H_INCLUDED_
 #define _INDEXER_H_INCLUDED_
-/* @(#$Id: indexer.h,v 1.3 2005-01-25 14:37:21 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: indexer.h,v 1.4 2005-01-31 14:31:09 dockes Exp $  (C) 2004 J.F.Dockes */
 
 #include "rclconfig.h"
-
-#if 0
-class FsIndexer {
-    const ConfTree &conf;
+class DbIndexer;
+class ConfIndexer {
+    RclConfig *config;
+    DbIndexer *indexer;
  public:
     enum runStatus {IndexerOk, IndexerError};
-    Indexer(const ConfTree &cnf): conf(cnf) {}
-    virtual ~Indexer() {}
-    runStatus run() = 0;
+    ConfIndexer(RclConfig *cnf) : config(cnf), indexer(0) {}
+    virtual ~ConfIndexer();
+    bool index();
 };
-#endif
 
 #endif /* _INDEXER_H_INCLUDED_ */

src/index/recollindex.cpp

 #ifndef lint
-static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.7 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.8 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 
-#include <sys/stat.h>
+#include <stdio.h>
+#include <signal.h>
 
-#include <strings.h>
+#include "indexer.h"
 
-#include <iostream>
-#include <list>
-#include <map>
-
-#include "pathut.h"
-#include "conftree.h"
-#include "rclconfig.h"
-#include "fstreewalk.h"
-#include "mimetype.h"
-#include "rcldb.h"
-#include "readfile.h"
-#include "indexer.h"
-#include "csguess.h"
-#include "transcode.h"
-#include "mimehandler.h"
-#include "debuglog.h"
-
-using namespace std;
-
-
-/**
- * Bunch holder for data used while indexing a directory tree
- */
-class DirIndexer {
-    FsTreeWalker walker;
-    RclConfig *config;
-    list<string> *topdirs;
-    string dbdir;
-    Rcl::Db db;
- public:
-    DirIndexer(RclConfig *cnf, const string &dbd, list<string> *top) 
-	: config(cnf), topdirs(top), dbdir(dbd)
-    { }
-
-    friend FsTreeWalker::Status 
-      indexfile(void *, const std::string &, const struct stat *, 
-		FsTreeWalker::CbFlag);
-
-    bool index();
-};
-
-bool DirIndexer::index()
-{
-    if (!db.open(dbdir, Rcl::Db::DbUpd)) {
-	LOGERR(("DirIndexer::index: error opening database in %s\n", 
-		dbdir.c_str()));
-	return false;
-    }
-    for (list<string>::const_iterator it = topdirs->begin();
-	 it != topdirs->end(); it++) {
-	LOGDEB(("DirIndexer::index: Indexing %s into %s\n", it->c_str(), 
-		dbdir.c_str()));
-	if (walker.walk(*it, indexfile, this) != FsTreeWalker::FtwOk) {
-	    LOGERR(("DirIndexer::index: error while indexing %s\n", 
-		    it->c_str()));
-	    db.close();
-	    return false;
-	}
-    }
-    db.purge();
-    if (!db.close()) {
-	LOGERR(("DirIndexer::index: error closing database in %s\n", 
-		dbdir.c_str()));
-	return false;
-    }
-    return true;
-}
-
-/** 
- * This function gets called for every file and directory found by the
- * tree walker. It checks with the db if the file has changed and needs to
- * be reindexed. If so, it calls an appropriate handler depending on the mime
- * type, which is responsible for populating an Rcl::Doc.
- * Accent and majuscule handling are performed by the db module when doing
- * the actual indexing work.
- */
-FsTreeWalker::Status 
-indexfile(void *cdata, const std::string &fn, const struct stat *stp, 
-	  FsTreeWalker::CbFlag flg)
-{
-    DirIndexer *me = (DirIndexer *)cdata;
-
-    if (flg == FsTreeWalker::FtwDirEnter || 
-	flg == FsTreeWalker::FtwDirReturn) {
-	me->config->setKeyDir(fn);
-	return FsTreeWalker::FtwOk;
-    }
-
-    string mime = mimetype(fn, me->config->getMimeMap());
-    if (mime.length() == 0) {
-	LOGDEB(("indexfile: (no mime) [%s]\n", fn.c_str()));
-	// No mime type ?? pass on.
-	return FsTreeWalker::FtwOk;
-    }
-
-    LOGDEB(("indexfile: %s [%s]\n", mime.c_str(), fn.c_str()));
-
-    // Look for appropriate handler
-    MimeHandlerFunc fun = getMimeHandler(mime, me->config->getMimeConf());
-    if (!fun) {
-	// No handler for this type, for now :(
-	return FsTreeWalker::FtwOk;
-    }
-
-    if (!me->db.needUpdate(fn, stp))
-	return FsTreeWalker::FtwOk;
-
-    // Turn file into a document. The document has fields for title, body 
-    // etc.,  all text converted to utf8
-    Rcl::Doc doc;
-    if (!fun(me->config, fn,  mime, doc))
-	return FsTreeWalker::FtwOk;
-
-    // Set up common fields:
-    doc.mimetype = mime;
-    char ascdate[20];
-    sprintf(ascdate, "%ld", long(stp->st_mtime));
-    doc.mtime = ascdate;
-
-    // Set up xapian document, add postings and misc fields, 
-    // add to or update database.
-    if (!me->db.add(fn, doc))
-	return FsTreeWalker::FtwError;
-
-    return FsTreeWalker::FtwOk;
-}
-
-DirIndexer *indexer;
+ConfIndexer *indexer;
 
 static void cleanup()
 {
 	signal(SIGTERM, sigcleanup);
 
     RclConfig config;
-    if (!config.ok())
-	cerr << "Config could not be built" << endl;
-
-    ConfTree *conf = config.getConfig();
-
-    // Retrieve the list of directories to be indexed.
-    string topdirs;
-    if (conf->get("topdirs", topdirs, "") == 0) {
-	cerr << "No top directories in configuration" << endl;
+    if (!config.ok()) {
+	fprintf(stderr, "Config could not be built\n");
 	exit(1);
     }
-
-    // Group the directories by database: it is important that all
-    // directories for a database be indexed at once so that deleted
-    // file cleanup works 
-    vector<string> tdl; // List of directories to be indexed
-    if (!ConfTree::stringToStrings(topdirs, tdl)) {
-	cerr << "Parse error for directory list" << endl;
-	exit(1);
-    }
-
-    vector<string>::iterator dirit;
-    map<string, list<string> > dbmap;
-    map<string, list<string> >::iterator dbit;
-    for (dirit = tdl.begin(); dirit != tdl.end(); dirit++) {
-	string db;
-	if (conf->get("dbdir", db, *dirit) == 0) {
-	    cerr << "No database directory in configuration for " 
-		 << *dirit << endl;
-	    exit(1);
-	}
-	dbit = dbmap.find(db);
-	if (dbit == dbmap.end()) {
-	    list<string> l;
-	    l.push_back(*dirit);
-	    dbmap[db] = l;
-	} else {
-	    dbit->second.push_back(*dirit);
-	}
-    }
-
-    for (dbit = dbmap.begin(); dbit != dbmap.end(); dbit++) {
-	cout << dbit->first << " -> ";
-	list<string>::const_iterator dit;
-	for (dit = dbit->second.begin(); dit != dbit->second.end(); dit++) {
-	    cout << *dit << " ";
-	}
-	cout << endl;
-	indexer = new DirIndexer(&config, dbit->first, &dbit->second);
-	if (!indexer->index()) {
-	    delete indexer;
-	    indexer = 0;
-	    exit(1);
-	}
-	delete indexer;
-	indexer = 0;
-    }
+    indexer = new ConfIndexer(&config);
+    
+    exit(!indexer->index());
 }
 all: $(LIBS)
 
 OBJS = conftree.o csguess.o debuglog.o \
-     fstreewalk.o html.o htmlparse.o \
+     fstreewalk.o html.o htmlparse.o indexer.o \
      mimehandler.o mimeparse.o mimetype.o myhtmlparse.o pathut.o \
      rclconfig.o rcldb.o readfile.o smallut.o \
      textsplit.o transcode.o \
      unacpp.o unac.o
 SRCS = ../utils/conftree.cpp ../index/csguess.cpp ../utils/debuglog.cpp \
      ../utils/fstreewalk.cpp ../common/html.cpp ../common/htmlparse.cpp \
+     ../index/indexer.cpp \
      ../common/mimehandler.cpp ../utils/mimeparse.cpp ../index/mimetype.cpp \
      ../common/myhtmlparse.cpp ../utils/pathut.cpp \
      ../common/rclconfig.cpp ../common/rcldb.cpp ../utils/readfile.cpp \
 	$(CXX) $(CXXFLAGS) -c $<
 htmlparse.o : ../common/htmlparse.cpp 
 	$(CXX) $(CXXFLAGS) -c $<
+indexer.o : ../index/indexer.cpp 
+	$(CXX) $(CXXFLAGS) -c $<
 mimehandler.o : ../common/mimehandler.cpp 
 	$(CXX) $(CXXFLAGS) -c $<
 mimeparse.o : ../utils/mimeparse.cpp 

src/qtgui/idxthread.cpp

+#include <stdio.h>
+#include <qthread.h>
+
+#include "indexer.h"
+#include "debuglog.h"
+
+class IdxThread : public QThread {
+    virtual void run();
+ public:
+    ConfIndexer *indexer;
+};
+
+int startindexing;
+int indexingdone;
+bool indexingstatus;
+int stopidxthread;
+
+void IdxThread::run()
+{
+    DebugLog::getdbl()->setloglevel(DEBDEB1);
+    for (;;) {
+	if (stopidxthread) {
+	    delete indexer;
+	    return;
+	}
+	if (startindexing) {
+	    indexingdone = indexingstatus = startindexing = 0;
+	    fprintf(stderr, "Index thread :start index\n");
+	    indexingstatus = indexer->index();
+	    indexingdone = 1;
+	} 
+	msleep(100);
+    }
+}
+
+static IdxThread idxthread;
+
+void start_idxthread(RclConfig *cnf)
+{
+    ConfIndexer *ix = new ConfIndexer(cnf);
+    idxthread.indexer = ix;
+    idxthread.start();
+}
+
+void stop_idxthread()
+{
+    stopidxthread = 1;
+    while (idxthread.running())
+	sleep(1);
+}

src/qtgui/main.cpp

 #include <signal.h>
 #include <qapplication.h>
+#include <qthread.h>
+#include <qtimer.h>
+
 #include <qmessagebox.h>
 
 #include "recollmain.h"
 #include "rcldb.h"
 #include "rclconfig.h"
+#include "pathut.h"
 
 RclConfig *rclconfig;
 Rcl::Db *rcldb;
 
-static void cleanup()
+extern void start_idxthread(RclConfig *cnf);
+extern void stop_idxthread();
+extern int startindexing;
+
+void recollCleanup()
 {
+    stop_idxthread();
     delete rcldb;
     rcldb = 0;
     delete rclconfig;
     rclconfig = 0;
 }
+
+int recollNeedsExit;
+
 static void sigcleanup(int sig)
 {
     fprintf(stderr, "sigcleanup\n");
-    cleanup();
-    exit(1);
+    // Cant call exit from here, because the atexit cleanup does some
+    // thread stuff that we can't do from signal context.
+    // Just set a flag and let the watchdog timer do the work
+    recollNeedsExit = 1;
 }
+
+
 int main( int argc, char ** argv )
 {
-    QApplication a( argc, argv );
+    QApplication a(argc, argv);
     RecollMain w;
     w.show();
-    a.connect( &a, SIGNAL( lastWindowClosed() ), &a, SLOT( quit() ) );
+    a.connect(&a, SIGNAL(lastWindowClosed()), &a, SLOT(quit()));
+    QTimer *timer = new QTimer(&a);
+    w.connect(timer, SIGNAL(timeout()), &w, SLOT(checkExit()));
+    timer->start(100);
 
-    atexit(cleanup);
+    atexit(recollCleanup);
     if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
 	signal(SIGHUP, sigcleanup);
     if (signal(SIGINT, SIG_IGN) != SIG_IGN)
 			      QString("No db directory in configuration"));
 	exit(1);
     }
-    
+    dbdir = path_tildexpand(dbdir);
+
     rcldb = new Rcl::Db;
 
     if (!rcldb->open(dbdir, Rcl::Db::DbRO)) {
-	QMessageBox::critical(0, "Recoll",
-			      QString("Could not open database in ") + 
-			      QString(dbdir));
-	exit(1);
+	startindexing = 1;
+	QMessageBox::information(0, "Recoll",
+				 QString("Could not open database in ") + 
+				 QString(dbdir) + ". Starting indexation");
+	startindexing = 1;
     }
+
+    start_idxthread(rclconfig);
+
     return a.exec();
 }

src/qtgui/recoll.pro

 
 CONFIG	+= qt warn_on release
 
-SOURCES	+= main.cpp
+SOURCES	+= main.cpp \
+	idxthread.cpp
 
 FORMS	= recollmain.ui
 

src/qtgui/recollmain.ui

         <slot>fileExit()</slot>
     </connection>
     <connection>
-        <sender>fileExitAction</sender>
-        <signal>activated()</signal>
-        <receiver>RecollMain</receiver>
-        <slot>fileExit()</slot>
-    </connection>
-    <connection>
         <sender>queryText</sender>
         <signal>returnPressed()</signal>
         <receiver>RecollMain</receiver>
 </variables>
 <slots>
     <slot>fileExit()</slot>
+    <slot>checkExit()</slot>
     <slot>reslistTE_doubleClicked( int par, int car )</slot>
     <slot>reslistTE_clicked( int par, int car )</slot>
     <slot>queryText_returnPressed()</slot>

src/qtgui/recollmain.ui.h

 extern RclConfig *rclconfig;
 extern Rcl::Db *rcldb;
 
-
+extern void recollCleanup();
 void RecollMain::fileExit()
 {
+    LOGDEB(("RecollMain: fileExit\n"));
     exit(0);
 }
 
+extern int recollNeedsExit;
+void RecollMain::checkExit()
+{
+    if (recollNeedsExit)
+	fileExit();
+}
 
 static string plaintorich(const string &in)
 {
 void RecollMain::queryText_returnPressed()
 {
     LOGDEB(("RecollMain::queryText_returnPressed()\n"));
+    if (!rcldb->isopen()) {
+	string dbdir;
+	if (rclconfig->getConfParam(string("dbdir"), dbdir) == 0) {
+	    QMessageBox::critical(0, "Recoll",
+				  QString("No db directory in configuration"));
+	    exit(1);
+	}
+	dbdir = path_tildexpand(dbdir);
+	if (!rcldb->open(dbdir, Rcl::Db::DbRO)) {
+	    QMessageBox::information(0, "Recoll",
+				     QString("Could not open database in ") + 
+				     QString(dbdir) + " wait for indexing " +
+				     "to complete?");
+	    return;
+	}
+
+    }
     reslist_current = -1;
     reslist_winfirst = -1;
 
     QCString u8 =  queryText->text().utf8();
-    
-    rcldb->setQuery(string((const char *)u8));
+
+    if (!rcldb->setQuery(string((const char *)u8)))
+	return;
     listNextPB_clicked();
 }
 
 	Rcl::Doc doc;
 	doc.erase();
 	int percent;
-	if (!rcldb->getDoc(reslist_winfirst + i, doc, &percent))
+	if (i == 0) {
+	    reslistTE->clear();
+	    previewTextEdit->clear();
+	}
+	if (!rcldb->getDoc(reslist_winfirst + i, doc, &percent)) {
+	    if (i == 0) 
+		reslist_winfirst = -1;
 	    break;
+	}
 	int resCnt = rcldb->getResCnt();
 	int last = MIN(resCnt, reslist_winfirst+respagesize);
 	if (i == 0) {
-	    reslistTE->clear();
-	    previewTextEdit->clear();
 	    reslistTE->append("<qt><head></head><body><p>");
 	    char line[80];
 	    sprintf(line, "<p><b>Displaying results %d-%d out of %d</b><br>",
 	    reslist_winfirst = 0;
     }
 }
+

src/query/qtry.cpp

 #ifndef lint
-static char rcsid[] = "@(#$Id: qtry.cpp,v 1.3 2005-01-26 11:47:27 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: qtry.cpp,v 1.4 2005-01-31 14:31:10 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 
 // Tests with the query interface
 #include "rcldb.h"
 #include "transcode.h"
 #include "mimehandler.h"
+#include "pathut.h"
 
 using namespace std;
 
 	cerr << "No database directory in configuration" << endl;
 	exit(1);
     }
-    
+    dbdir = path_tildexpand(dbdir);
     Rcl::Db *rcldb = new Rcl::Db;
 
     if (!rcldb->open(dbdir, Rcl::Db::DbRO)) {

src/rcldb/rcldb.cpp

 #ifndef lint
-static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.13 2005-01-29 15:41:11 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.14 2005-01-31 14:31:09 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 
 #include <sys/stat.h>
 	LOGERR(("Rcl::Db::open: already open\n"));
 	return false;
     }
-
+    string ermsg;
     try {
 	switch (mode) {
 	case DbUpd:
 	ndb->isopen = true;
 	return true;
     } catch (const Xapian::Error &e) {
-	cerr << "Exception: " << e.get_msg() << endl;
+	ermsg = e.get_msg();
     } catch (const string &s) {
-	cerr << "Exception: " << s << endl;
+	ermsg = s;
     } catch (const char *s) {
-	cerr << "Exception: " << s << endl;
+	ermsg = s;
     } catch (...) {
-	cerr << "Caught unknown exception" << endl;
+	ermsg = "Caught unknown exception";
     }
-    LOGERR(("Rcl::Db::open: got exception\n"));
+    LOGERR(("Rcl::Db::open: exception while opening '%s': %s\n", 
+	    dir.c_str(), ermsg.c_str()));
     return false;
 }
 
     return false;
 }
 
+bool Rcl::Db::isopen()
+{
+    if (pdata == 0)
+	return false;
+    Native *ndb = (Native *)pdata;
+    return ndb->isopen;
+}
+
 // A small class to hold state while splitting text
 class wsData {
  public:
 bool Rcl::Db::setQuery(const std::string &querystring)
 {
     LOGDEB(("Rcl::Db::setQuery: %s\n", querystring.c_str()));
+    Native *ndb = (Native *)pdata;
+    if (!ndb)
+	return false;
+
     wsQData splitData;
     TextSplit splitter(splitQCb, &splitData);
 
     }
     splitter.text_to_words(noacc);
 
-    Native *ndb = (Native *)pdata;
 
     ndb->query = Xapian::Query(Xapian::Query::OP_OR, splitData.terms.begin(), 
 			       splitData.terms.end());

src/rcldb/rcldb.h

 #ifndef _DB_H_INCLUDED_
 #define _DB_H_INCLUDED_
-/* @(#$Id: rcldb.h,v 1.7 2005-01-29 15:41:11 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: rcldb.h,v 1.8 2005-01-31 14:31:09 dockes Exp $  (C) 2004 J.F.Dockes */
 
 #include <string>
 
     enum OpenMode {DbRO, DbUpd, DbTrunc};
     bool open(const std::string &dbdir, OpenMode mode);
     bool close();
+    bool isopen();
 
     // Update-related functions
     bool add(const std::string &filename, const Doc &doc);

src/utils/Makefile

 
-CXXFLAGS = -I.
+CXXFLAGS = -I. -g
 
 BIGLIB = ../lib/librcl.a
 
 
 PATHUT_OBJS= trpathut.o pathut.o 
 trpathut : $(PATHUT_OBJS)
-	$(CXX) -o trpathut $(PATHUT_OBJS)
+	$(CXX) $(CXXFLAGS) -o trpathut $(PATHUT_OBJS)
 trpathut.o : pathut.cpp pathut.h
 	$(CXX) -o trpathut.o -c $(CXXFLAGS) \
 	       -DTEST_PATHUT pathut.cpp

src/utils/pathut.cpp

 #ifndef lint
-static char rcsid[] = "@(#$Id: pathut.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: pathut.cpp,v 1.3 2005-01-31 14:31:10 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 
 #ifndef TEST_PATHUT
 
 #include <pwd.h>
+#include <iostream>
 
 #include "pathut.h"
+using std::string;
 
-std::string path_getfather(const std::string &s) {
-    std::string father = s;
+string path_getfather(const string &s) {
+    string father = s;
 
     // ??
     if (father.empty())
 	father.erase(father.length()-1);
     }
 
-    std::string::size_type slp = father.rfind('/');
-    if (slp == std::string::npos)
+    string::size_type slp = father.rfind('/');
+    if (slp == string::npos)
 	return "./";
 
     father.erase(slp);
     return father;
 }
 
-std::string path_getsimple(const std::string &s) {
-    std::string simple = s;
+string path_getsimple(const string &s) {
+    string simple = s;
 
     if (simple.empty())
 	return simple;
 
-    std::string::size_type slp = simple.rfind('/');
-    if (slp == std::string::npos)
+    string::size_type slp = simple.rfind('/');
+    if (slp == string::npos)
 	return simple;
 
     simple.erase(0, slp+1);
     return simple;
 }
 
-std::string path_home()
+string path_home()
 {
     uid_t uid = getuid();
 
     struct passwd *entry = getpwuid(uid);
-    if (entry == 0)
+    if (entry == 0) {
+	const char *cp = getenv("HOME");
+	if (cp)
+	    return cp;
+	else 
 	return "/";
+    }
 
-    std::string homedir = entry->pw_dir;
+    string homedir = entry->pw_dir;
     path_catslash(homedir);
     return homedir;
 }
 
+extern string path_tildexpand(const string &s) 
+{
+    if (s.empty() || s[0] != '~')
+	return s;
+    string o = s;
+    if (s.length() == 1) {
+	o.replace(0, 1, path_home());
+    } else if  (s[1] == '/') {
+	o.replace(0, 2, path_home());
+    } else {
+	string::size_type pos = s.find('/');
+	int l = (pos == string::npos) ? s.length() - 1 : pos - 1;
+	struct passwd *entry = getpwnam(s.substr(1, l).c_str());
+	if (entry)
+	    o.replace(0, l+1, entry->pw_dir);
+    }
+    return o;
+}
+
 #else // TEST_PATHUT
 
 #include <iostream>
 			"/dir/.c",
 };
 
+const string ttvec[] = {"/dir", "", "~", "~/sub", "~root", "~root/sub",
+		 "~nosuch", "~nosuch/sub"};
+int nttvec = sizeof(ttvec) / sizeof(string);
+
 int main(int argc, const char **argv)
 {
-
+#if 0
     for (int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) {
 	cout << tstvec[i] << " FATHER " << path_getfather(tstvec[i]) << endl;
     }
     for (int i = 0;i < sizeof(tstvec) / sizeof(char *); i++) {
 	cout << tstvec[i] << " SIMPLE " << path_getsimple(tstvec[i]) << endl;
     }
+#endif
+    string s;
+
+    for (int i = 0; i < nttvec; i++) {
+	cout << "tildexp: '" << ttvec[i] << "' -> '" << 
+	    path_tildexpand(ttvec[i]) << "'" << endl;
+    }
+    
+
+
     return 0;
 }
 

src/utils/pathut.h

 #ifndef _PATHUT_H_INCLUDED_
 #define _PATHUT_H_INCLUDED_
-/* @(#$Id: pathut.h,v 1.2 2004-12-14 17:54:16 dockes Exp $  (C) 2004 J.F.Dockes */
+/* @(#$Id: pathut.h,v 1.3 2005-01-31 14:31:10 dockes Exp $  (C) 2004 J.F.Dockes */
 
 #include <string>
 
 extern std::string path_getsimple(const std::string &s);
 extern std::string path_getfather(const std::string &s);
 extern std::string path_home();
+extern std::string path_tildexpand(const std::string &s);
 
 #endif /* _PATHUT_H_INCLUDED_ */

src/utils/smallut.cpp

+#ifndef lint
+static char rcsid[] = "@(#$Id: smallut.cpp,v 1.1 2005-01-31 14:31:10 dockes Exp $ (C) 2004 J.F.Dockes";
+#endif
+
+#ifndef TEST_SMALLUT
+#include <string>
+#include "smallut.h"
+
+#include <ctype.h>
+
+#define MIN(A,B) ((A)<(B)?(A):(B))
+
+int stringicmp(const string & s1, const string& s2) 
+{
+    string::const_iterator it1 = s1.begin();
+    string::const_iterator it2 = s2.begin();
+    int size1 = s1.length(), size2 = s2.length();
+    char c1, c2;
+
+    if (size1 > size2) {
+	while (it1 != s1.end()) { 
+	    c1 = ::toupper(*it1);
+	    c2 = ::toupper(*it2);
+	    if (c1 != c2) {
+		return c1 > c2 ? 1 : -1;
+	    }
+	    ++it1; ++it2;
+	}
+	return size1 == size2 ? 0 : 1;
+    } else {
+	while (it2 != s2.end()) { 
+	    c1 = ::toupper(*it1);
+	    c2 = ::toupper(*it2);
+	    if (c1 != c2) {
+		return c1 > c2 ? 1 : -1;
+	    }
+	    ++it1; ++it2;
+	}
+	return size1 == size2 ? 0 : -1;
+    }
+}
+
+//  s1 is already lowercase
+int stringlowercmp(const string & s1, const string& s2) 
+{
+    string::const_iterator it1 = s1.begin();
+    string::const_iterator it2 = s2.begin();
+    int size1 = s1.length(), size2 = s2.length();
+    char c2;
+
+    if (size1 > size2) {
+	while (it1 != s1.end()) { 
+	    c2 = ::tolower(*it2);
+	    if (*it1 != c2) {
+		return *it1 > c2 ? 1 : -1;
+	    }
+	    ++it1; ++it2;
+	}
+	return size1 == size2 ? 0 : 1;
+    } else {
+	while (it2 != s2.end()) { 
+	    c2 = ::tolower(*it2);
+	    if (*it1 != c2) {
+		return *it1 > c2 ? 1 : -1;
+	    }
+	    ++it1; ++it2;
+	}
+	return size1 == size2 ? 0 : -1;
+    }
+}
+
+//  s1 is already uppercase
+int stringuppercmp(const string & s1, const string& s2) 
+{
+    string::const_iterator it1 = s1.begin();
+    string::const_iterator it2 = s2.begin();
+    int size1 = s1.length(), size2 = s2.length();
+    char c2;
+
+    if (size1 > size2) {
+	while (it1 != s1.end()) { 
+	    c2 = ::toupper(*it2);
+	    if (*it1 != c2) {
+		return *it1 > c2 ? 1 : -1;
+	    }
+	    ++it1; ++it2;
+	}
+	return size1 == size2 ? 0 : 1;
+    } else {
+	while (it2 != s2.end()) { 
+	    c2 = ::toupper(*it2);
+	    if (*it1 != c2) {
+		return *it1 > c2 ? 1 : -1;
+	    }
+	    ++it1; ++it2;
+	}
+	return size1 == size2 ? 0 : -1;
+    }
+}
+
+#else
+
+#include <string>
+#include "smallut.h"
+
+struct spair {
+    const char *s1;
+    const char *s2;
+};
+struct spair pairs[] = {
+    {"", ""},
+    {"", "a"},
+    {"a", ""},
+    {"a", "a"},
+    {"A", "a"},
+    {"a", "A"},
+    {"A", "A"},
+    {"12", "12"},
+    {"a", "ab"},
+    {"ab", "a"},
+    {"A", "Ab"},
+    {"a", "Ab"},
+};
+int npairs = sizeof(pairs) / sizeof(struct spair);
+
+int main(int argc, char **argv)
+{
+    for (int i = 0; i < npairs; i++) {
+	{
+	    int c = stringicmp(pairs[i].s1, pairs[i].s2);
+	    printf("'%s' %s '%s' ", pairs[i].s1, 
+		   c == 0 ? "==" : c < 0 ? "<" : ">", pairs[i].s2);
+	}
+	{
+	    int cl = stringlowercmp(pairs[i].s1, pairs[i].s2);
+	    printf("L '%s' %s '%s' ", pairs[i].s1, 
+		   cl == 0 ? "==" : cl < 0 ? "<" : ">", pairs[i].s2);
+	}
+	{
+	    int cu = stringuppercmp(pairs[i].s1, pairs[i].s2);
+	    printf("U '%s' %s '%s' ", pairs[i].s1, 
+		   cu == 0 ? "==" : cu < 0 ? "<" : ">", pairs[i].s2);
+	}
+	printf("\n");
+    }
+}
+
+#endif

src/utils/smallut.h

+#ifndef _SMALLUT_H_INCLUDED_
+#define _SMALLUT_H_INCLUDED_
+/* @(#$Id: smallut.h,v 1.1 2005-01-31 14:31:10 dockes Exp $  (C) 2004 J.F.Dockes */
+#include <string>
+
+using std::string;
+
+extern int stringicmp(const string& s1, const string& s2);
+extern int stringlowercmp(const string& alreadylower, const string& s2);
+extern int stringuppercmp(const string& alreadyupper, const string& s2); 
+
+
+#endif /* _SMALLUT_H_INCLUDED_ */