Commits

Anonymous committed e11d9f2

Added the query fingerprint code.
Added aromaticity selection

Comments (0)

Files changed (1)

cyclops_mysql.cpp

                     char *is_null, char *error);
   void cyclops_substruct_fp_deinit(UDF_INIT *initid);
 
-  // cyclops_substruct_fp_noh(smiles)
-  my_bool cyclops_substruct_fp_init(UDF_INIT *initid, UDF_ARGS *args, char *message);
-  char *cyclops_substruct_fp(UDF_INIT *initid, UDF_ARGS *args,
+  // cyclops_substruct_qfp(smiles)
+  my_bool cyclops_substruct_qfp_init(UDF_INIT *initid, UDF_ARGS *args, char *message);
+  char *cyclops_substruct_qfp(UDF_INIT *initid, UDF_ARGS *args,
                     char *result, unsigned long *length,
                     char *is_null, char *error);
-  void cyclops_substruct_fp_deinit(UDF_INIT *initid);
+  void cyclops_substruct_qfp_deinit(UDF_INIT *initid);
 
   
   // fp_valid(fp)
 
 }
 
+enum AroModel {
+  ARO_ERROR = -1,
+  ARO_NONE = 0,
+  ARO_OPENEYE,
+  ARO_DAYLIGHT,
+  ARO_TRIPOS,
+  ARO_MMFF,
+  ARO_MDL,
+};
+
+
+
 // Parse a SMILES string and put the molecule into normalized form
 // Return 1 on success, 0 on failure
-static int parse_smiles(const char *smiles, int length, OEGraphMol &mol) {
+static int parse_smiles(const char *smiles, int length, OEGraphMol &mol, AroModel aro_model) {
   // There is a bug in OEParseSmiles when the strict flag is true
   // and the input is the empty string. I get the warning message
   //   Strict mode without previous atom.
   if (!OEParseSmiles(mol, smiles_s, false, true)) {
     return 0;
   }
-  OEAssignAromaticFlags(mol, OEAroModelOpenEye);
+  switch (aro_model) {
+  case ARO_NONE: break;
+  case ARO_OPENEYE: OEAssignAromaticFlags(mol, OEAroModelOpenEye); break;
+  case ARO_DAYLIGHT: OEAssignAromaticFlags(mol, OEAroModelDaylight); break;
+  case ARO_TRIPOS: OEAssignAromaticFlags(mol, OEAroModelTripos); break;
+  case ARO_MMFF: OEAssignAromaticFlags(mol, OEAroModelMMFF); break;
+  case ARO_MDL: OEAssignAromaticFlags(mol, OEAroModelMDL); break;
+  default:
+    OEAssignAromaticFlags(mol, OEAroModelOpenEye); break;
+  }
   return 1;
 }
 // Parse a SMARTS string. Return 1 on success, 0 on failure
   char *smiles = args->args[0];
   if (smiles) {
     state->mol_is_constant = 1;
-    if (!parse_smiles(smiles, args->lengths[0], state->mol)) {
+    if (!parse_smiles(smiles, args->lengths[0], state->mol, ARO_OPENEYE)) {
       delete state;
       strcpy(message, "cannot parse SMILES string");
       return 1;
     goto error;
   }
   state->mol.Clear();
-  if (!parse_smiles(smiles, args->lengths[0], state->mol)) {
+  if (!parse_smiles(smiles, args->lengths[0], state->mol, ARO_OPENEYE)) {
     goto error;
   }
   return 0;
   smiles = args->args[0];
   if (smiles) {
     state->mol_is_constant = 1;
-    if (!parse_smiles(smiles, args->lengths[0], state->mol)) {
+    if (!parse_smiles(smiles, args->lengths[0], state->mol, ARO_OPENEYE)) {
       sprintf(message, "%s: cannot parse SMILES string", fname);
       goto error;
     }
       goto error;
     }
     state->mol.Clear();
-    if (!parse_smiles(smiles, args->lengths[0], state->mol)) {
+    if (!parse_smiles(smiles, args->lengths[0], state->mol, ARO_OPENEYE)) {
       goto error;
     }
   }
   int max_bonds;
   int atom_type;
   int bond_type;
+  AroModel aro_model;
   OEGraphMol mol;
   OEFingerPrint fp;
   string hex_s;
   int num_bytes = (num_bits+7)/8;
   hex_s.resize(num_bytes*2, 'X');
   for (int i=0; i<num_bytes; i++) {
-    hex_s[i*2+1] = hex_chars[byte_s[i] & 0xf];
-    hex_s[i*2] = hex_chars[byte_s[i]>>4];
+    hex_s[i*2+1] = hex_chars[((unsigned char)byte_s[i]) & 0xf];
+    hex_s[i*2] = hex_chars[((unsigned char)byte_s[i]) >> 4];
   }
 }
 
   char *smiles = args->args[0];
   if (smiles) {
     state->mol_is_constant = 1;
-    if (!parse_smiles(smiles, args->lengths[0], state->mol)) {
+    if (!parse_smiles(smiles, args->lengths[0], state->mol, ARO_OPENEYE)) {
       strcpy(message, "oe_maccs_fp: cannot parse SMILES string");
       goto error;
     }
     goto error;
   }
   state->mol.Clear();
-  if (!parse_smiles(smiles, args->lengths[0], state->mol)) {
+  if (!parse_smiles(smiles, args->lengths[0], state->mol, ARO_OPENEYE)) {
     goto error;
   }
   OEMakeMACCS166FP(state->fp, state->mol);
   char *smiles = args->args[0];
   if (smiles) {
     state->mol_is_constant = 1;
-    if (!parse_smiles(smiles, args->lengths[0], state->mol)) {
+    if (!parse_smiles(smiles, args->lengths[0], state->mol, ARO_OPENEYE)) {
       strcpy(message, "cannot parse SMILES string");
       goto error;
     }
       goto error;
     }
     state->mol.Clear();
-    if (!parse_smiles(smiles, args->lengths[0], state->mol)) {
+    if (!parse_smiles(smiles, args->lengths[0], state->mol, ARO_OPENEYE)) {
       goto error;
     }
     OEMakePathFP(state->fp, state->mol, num_bits, min_bonds, max_bonds,
   delete state;
 }
 
-// SQL: cyclops_substruct_fp(smiles)
-//    -> hex-encoded path fingerprint, as specified
-// I precompute static fields when possible.
-my_bool cyclops_substruct_fp_init(UDF_INIT *initid, UDF_ARGS *args, char *message) {
-  if (args->arg_count != 1) {
-    strcpy(message, "cyclops_substruct_fp takes a single argument: (smiles)");
+//// The Cyclops fingerprints, based on CACTVS substructure keys
+
+static void _suppress_hydrogens(OEGraphMol &mol) {
+  OESuppressHydrogens(mol, false, false, false);
+  for (OEIter<OEAtomBase> atom = mol.GetAtoms(); atom; ++atom) {
+    atom->SetImplicitHCount(0);
+  }
+}
+
+static AroModel get_aro_model(int len, const char *name) {
+  if (len == 4 && !strncmp(name, "none", 4)) return ARO_NONE;
+  if (len == 7 && !strncmp(name, "openeye", 7)) return ARO_OPENEYE;
+  if (len == 8 && !strncmp(name, "daylight", 8)) return ARO_DAYLIGHT;
+  if (len == 6 && !strncmp(name, "tripos", 6)) return ARO_TRIPOS;
+  if (len == 4 && !strncmp(name, "mmff", 4)) return ARO_MMFF;
+  if (len == 3 && !strncmp(name, "mdl", 3)) return ARO_MDL;
+  return ARO_ERROR;
+}
+
+static my_bool
+_cyclops_substruct_fp_init(const char *fname, UDF_INIT *initid, UDF_ARGS *args,
+                           char *message, int suppress_hydrogens) {
+  int arg_count = args->arg_count;
+  AroModel aro_model = ARO_OPENEYE;
+  if (arg_count < 1 || arg_count > 2)  {
+    sprintf(message, "%s takes smiles and an optional aromaticity name", fname);
     return 1;
   }
   if (args->arg_type[0] != STRING_RESULT) {
-    strcpy(message, "cyclops_substruct_fp argument must be a SMILES string");
+    sprintf(message, "%s: argument 1 must be a SMILES string", fname);
     return 1;
   }
+  if (arg_count > 1) {
+    if (args->arg_type[1] != STRING_RESULT) {
+      sprintf(message, "%s: optional argument 2 must be an aromaticity name", fname);
+      return 1;
+    }
+    // I can't think of why this is reasonable and it's slightly tricky
+    // to handle correctly (I don't want to re-re-perceive a molecule
+    // which was defined in the query initialization.) So I don't
+    // handle it now. Let me know if you need it.
+    char *name = args->args[1];
+    if (name == NULL) {
+      sprintf(message, "%s: when given, argument 2 must be a constant string", fname);
+      return 1;
+    }
+    aro_model = get_aro_model(args->lengths[1], name);
+    if (aro_model == -1) {
+      sprintf(message, "%s: aromaticity model (argument 2) must be one of: "
+              "none, openeye, daylight, tripos, mmff, mdl", fname);
+      return 1;
+    }
+  }
+
   FPState *state = new FPState;
+  state->aro_model = aro_model;
 
   char *smiles = args->args[0];
   state->byte_s.resize(111); // Use this to store the bytes (not the hex characters)
   if (smiles) {
     state->mol_is_constant = 1;
-    if (!parse_smiles(smiles, args->lengths[0], state->mol)) {
-      strcpy(message, "cyclops_substruct_fp: cannot parse SMILES string");
+    if (!parse_smiles(smiles, args->lengths[0], state->mol, aro_model)) {
+      sprintf(message, "%s: cannot parse SMILES string", fname);
       goto error;
     }
+    if (suppress_hydrogens) {
+      _suppress_hydrogens(state->mol);
+    }
     Cyclops::substruct_fp(state->mol, (unsigned char *) state->byte_s.c_str());
     to_hex(881, state->byte_s, state->hex_s);
   } else {
   return 1;
 }
 
-char *cyclops_substruct_fp(UDF_INIT *initid, UDF_ARGS *args,
-                           char *result, unsigned long *length,
-                           char *is_null, char *error) {
+char *_cyclops_substruct_fp(UDF_INIT *initid, UDF_ARGS *args,
+                            char *result, unsigned long *length,
+                            char *is_null, char *error,
+                            int suppress_hydrogens) {
   FPState *state = reinterpret_cast<FPState *>(initid->ptr);
   if (state->mol_is_constant) {
     *length = (881/8+1)*2;
     goto error;
   }
   state->mol.Clear();
-  if (!parse_smiles(smiles, args->lengths[0], state->mol)) {
+  if (!parse_smiles(smiles, args->lengths[0], state->mol, state->aro_model)) {
     goto error;
   }
+  if (suppress_hydrogens) {
+    _suppress_hydrogens(state->mol);
+  }
   memset((void *)state->byte_s.c_str(), 0, 111);
   Cyclops::substruct_fp(state->mol, (unsigned char *) state->byte_s.c_str());
   to_hex(881, state->byte_s, state->hex_s);
   return NULL;
 }
 
-void cyclops_substruct_fp_deinit(UDF_INIT *initid) {
+void _cyclops_substruct_fp_deinit(UDF_INIT *initid) {
   FPState *state = reinterpret_cast<FPState *>(initid->ptr);
   delete state;
 }
+
+
+// SQL: cyclops_substruct_fp(smiles)
+//    -> hex-encoded cyclops substructure path fingerprint, including hydrogens
+my_bool cyclops_substruct_fp_init(UDF_INIT *initid, UDF_ARGS *args, char *message) {
+  return _cyclops_substruct_fp_init("cyclops_substruct", initid, args, message, 0);
+}
+
+char *cyclops_substruct_fp(UDF_INIT *initid, UDF_ARGS *args,
+                           char *result, unsigned long *length,
+                           char *is_null, char *error) {
+  return _cyclops_substruct_fp(initid, args, result, length, is_null, error, 0);
+}
+
+void cyclops_substruct_fp_deinit(UDF_INIT *initid) {
+  _cyclops_substruct_fp_deinit(initid);
+}
+
+// SQL: cyclops_substruct_qfp(smiles)
+//    -> hex-encoded cyclops substructure path fingerprint, excluding hydrogens
+//        (meant for use in a substructure query fingerprint)
+// I precompute static fields when possible.
+my_bool cyclops_substruct_qfp_init(UDF_INIT *initid, UDF_ARGS *args, char *message) {
+  return _cyclops_substruct_fp_init("cyclops_substruct", initid, args, message, 1);
+}
+
+char *cyclops_substruct_qfp(UDF_INIT *initid, UDF_ARGS *args,
+                            char *result, unsigned long *length,
+                            char *is_null, char *error) {
+  return _cyclops_substruct_fp(initid, args, result, length, is_null, error, 0);
+}
+
+void cyclops_substruct_qfp_deinit(UDF_INIT *initid) {
+  _cyclops_substruct_fp_deinit(initid);
+}