1. Daniel Lowe
  2. OPSIN

Commits

Daniel Lowe  committed 3bb5fc9

Added support for "yl" on cyclic carbohydrates

  • Participants
  • Parent commits 74b0b5f
  • Branches default

Comments (0)

Files changed (13)

File opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentProcessor.java

View file
 		//this is of the form comma sepeated ids with the number of ids corresponding to the number of instances of the suffix
 		Element suffix =OpsinTools.getNextNonChargeSuffix(group);
 		if (suffix ==null){
-			throw new ComponentGenerationException("No suffix where suffix was expected");
+			if (group.getAttributeValue(TYPE_ATR).equals(ACIDSTEM_TYPE_VAL)){
+				throw new ComponentGenerationException("No suffix where suffix was expected");
+			}
 		}
 		else{
 			if (suffixes.size()>1 && group.getAttributeValue(TYPE_ATR).equals(ACIDSTEM_TYPE_VAL)){
 					chargeHydroxyGroups(atomLikelyToBeUsedBySuffix);
 					
 				}
-				else if (suffixRuleTagName.equals(SUFFIXRULES_REMOVEONEDOUBLEBONDEDOXYGEN_EL)){
+				else if (suffixRuleTagName.equals(SUFFIXRULES_REMOVETERMINALOXYGEN_EL)){
 					if (suffixFrag != null){
-						throw new ComponentGenerationException("removeOneDoubleBondedOxygen is not currently compatable with the addGroup suffix rule");
+						throw new ComponentGenerationException("removeTerminalOxygen is not currently compatible with the addGroup suffix rule");
 					}
-					removeOneDoubleBondedOxygen(atomLikelyToBeUsedBySuffix);
+					int bondOrder = Integer.parseInt(suffixRuleTag.getAttributeValue(SUFFIXRULES_ORDER_ATR));
+					removeTerminalOxygen(atomLikelyToBeUsedBySuffix, bondOrder);
 				}
             }
             if (suffixFrag != null) {
 	}
 
 	/**
-	 * Removes a double bonded Oxygen from the atom (an [N+][O-] is treated as N=O)
-	 * An exception is thrown if no double bonded oxygen could be found connected to the atom
+	 * Removes a terminal oxygen from the atom 
+	 * An exception is thrown if no suitable oxygen could be found connected to the atom
+	 * Note that [N+][O-] is treated as N=O
 	 * @param atom
 	 * @throws StructureBuildingException
 	 */
-	private void removeOneDoubleBondedOxygen(Atom atom) throws StructureBuildingException {
+	private void removeTerminalOxygen(Atom atom, int desiredBondOrder) throws StructureBuildingException {
 		//TODO prioritise [N+][O-]
 		List<Atom> neighbours = atom.getAtomNeighbours();
 		for (Atom neighbour : neighbours) {
 			if (neighbour.getElement().equals("O") && neighbour.getAtomNeighbours().size()==1){
 				Bond b = atom.getBondToAtomOrThrow(neighbour);
-				if (b.getOrder()==2 && neighbour.getCharge()==0){
+				if (b.getOrder()==desiredBondOrder && neighbour.getCharge()==0){
 					state.fragManager.removeAtomAndAssociatedBonds(neighbour);
 					if (atom.getLambdaConventionValency()!=null){//corrects valency for phosphin/arsin/stibin
-						atom.setLambdaConventionValency(atom.getLambdaConventionValency()-2);
+						atom.setLambdaConventionValency(atom.getLambdaConventionValency()-desiredBondOrder);
 					}
 					if (atom.getMinimumValency()!=null){//corrects valency for phosphin/arsin/stibin
-						atom.setMinimumValency(atom.getMinimumValency()-2);
+						atom.setMinimumValency(atom.getMinimumValency()-desiredBondOrder);
 					}
 					return;
 				}
-				else if (neighbour.getCharge() ==-1 && b.getOrder()==1){
+				else if (neighbour.getCharge() ==-1 && b.getOrder()==1 && desiredBondOrder == 2){
 					if (atom.getCharge() ==1 && atom.getElement().equals("N")){
 						state.fragManager.removeAtomAndAssociatedBonds(neighbour);
 						atom.neutraliseCharge();
 				}
 			}
 		}
-		throw new StructureBuildingException("Double bonded oxygen not found in fragment. Perhaps a suffix has been used inappropriately");
+		if (desiredBondOrder ==2){
+			throw new StructureBuildingException("Double bonded oxygen not found at suffix attachment position. Perhaps a suffix has been used inappropriately");
+		}
+		else if (desiredBondOrder ==1){
+			throw new StructureBuildingException("Hydroxy oxygen not found at suffix attachment position. Perhaps a suffix has been used inappropriately");
+		}
+		else {
+			throw new StructureBuildingException("Suita;e oxygen not found at suffix attachment position Perhaps a suffix has been used inappropriately");
+		}
+
 	}
 	
 	/**
                     //already processed
                 } else if (suffixRuleTagName.equals(SUFFIXRULES_CHARGEHYDROXYGROUPS_EL)) {
                     //already processed
-                } else if (suffixRuleTagName.equals(SUFFIXRULES_REMOVEONEDOUBLEBONDEDOXYGEN_EL)) {
+                } else if (suffixRuleTagName.equals(SUFFIXRULES_REMOVETERMINALOXYGEN_EL)) {
                     //already processed
                 } else if (suffixRuleTagName.equals(SUFFIXRULES_CONVERTHYDROXYGROUPSTOOUTATOMS_EL)) {
                     //already processed

File opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/XmlDeclarations.java

View file
 	static final String SUFFIXRULES_CHANGECHARGE_EL = "changecharge";
 	static final String SUFFIXRULES_ADDFUNCTIONALATOMSTOHYDROXYGROUPS_EL = "addFunctionalAtomsToHydroxyGroups";
 	static final String SUFFIXRULES_CHARGEHYDROXYGROUPS_EL = "chargeHydroxyGroups";
-	static final String SUFFIXRULES_REMOVEONEDOUBLEBONDEDOXYGEN_EL = "removeOneDoubleBondedOxygen";
+	static final String SUFFIXRULES_REMOVETERMINALOXYGEN_EL = "removeTerminalOxygen";
 	static final String SUFFIXRULES_CONVERTHYDROXYGROUPSTOOUTATOMS_EL = "convertHydroxyGroupsToOutAtoms";
 	static final String SUFFIXRULES_CONVERTHYDROXYGROUPSTOPOSITIVECHARGE_EL = "convertHydroxyGroupsToPositiveCharge";
 	static final String SUFFIXRULES_VALUE_ATR = "value";
 	static final String SUFFIXRULES_FUNCTIONALIDS_ATR = "functionalIDs";
 	static final String SUFFIXRULES_OUTIDS_ATR = "outIDs";
 	static final String SUFFIXRULES_KETONELOCANT_ATR = "ketoneLocant";
+	static final String SUFFIXRULES_ORDER_ATR = "order";
 	static final String SUFFIXRULES_OUTVALENCY_ATR = "outValency";
 	static final String SUFFIXRULES_CHARGE_ATR = "charge";
 	static final String SUFFIXRULES_PROTONS_ATR = "protons";

File opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/carbohydrateSuffixes.xml

View file
+<!DOCTYPE tokenLists SYSTEM "tokenLists.dtd">
+<!-- See the DTD for more details -->
+<tokenLists>
+  <!--<tokenList tagname="suffix" type="root" subType="terminal" symbol="">
+    <token value="aldehyde">ose</token>
+    <token value="aldehyde" additionalValue="aldehyde">dialdehyde</token>
+    <token value="ol" additionalValue="ol">itol</token>
+  </tokenList>
+
+  <tokenList tagname="suffix" type="root" symbol="s">
+    <token value="one">ulose</token>
+  </tokenList>
+
+  <tokenList tagname="infix" type="carbohydrateStem" symbol="">
+    <token value="on">on</token>
+    <token value="uloson">uloson</token>might be redundant
+    <token value="uron">uron</token>
+    <token value="ar">ar</token>
+ for use with symbol="å"
+  </tokenList>-->
+
+  <tokenList tagname="suffix" type="inline" symbol="v">
+    <token value="yl">yl</token>
+    <!--<token value="oyl">onoyl</token>-->
+  </tokenList>
+</tokenLists>

File opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/carbohydrates.xml

View file
 OPSIN's code, however, is written such as to not assume that this is the case-->
 	<tokenList tagname="group" type="carbohydrateStem" subType="carbohydrate" symbol="á">
 		<!--aldoses -->
-		<token value="O=C[C@H](O)CO" labels="/1/2//3/" valType="SMILES">glycero</token>
+		<token value="O=C[C@H](O)CO" labels="/1/2//3/" valType="SMILES" suffixAppliesTo="2">glycero</token>
 
-		<token value="O=C[C@H](O)[C@H](O)CO" labels="/1/2//3//4/" valType="SMILES">erythro</token>
-		<token value="O=C[C@@H](O)[C@H](O)CO" labels="/1/2//3//4/" valType="SMILES">threo</token>
+		<token value="O=C[C@H](O)[C@H](O)CO" labels="/1/2//3//4/" valType="SMILES" suffixAppliesTo="2">erythro</token>
+		<token value="O=C[C@@H](O)[C@H](O)CO" labels="/1/2//3//4/" valType="SMILES" suffixAppliesTo="2">threo</token>
 
-		<token value="O=C[C@H](O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5/" valType="SMILES">ribo</token>
-		<token value="O=C[C@@H](O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5/" valType="SMILES">arabino</token>
-		<token value="O=C[C@H](O)[C@@H](O)[C@H](O)CO" labels="/1/2//3//4//5/" valType="SMILES">xylo</token>
-		<token value="O=C[C@@H](O)[C@@H](O)[C@H](O)CO" labels="/1/2//3//4//5/" valType="SMILES">lyxo</token>
+		<token value="O=C[C@H](O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5/" valType="SMILES" suffixAppliesTo="2">ribo</token>
+		<token value="O=C[C@@H](O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5/" valType="SMILES" suffixAppliesTo="2">arabino</token>
+		<token value="O=C[C@H](O)[C@@H](O)[C@H](O)CO" labels="/1/2//3//4//5/" valType="SMILES" suffixAppliesTo="2">xylo</token>
+		<token value="O=C[C@@H](O)[C@@H](O)[C@H](O)CO" labels="/1/2//3//4//5/" valType="SMILES" suffixAppliesTo="2">lyxo</token>
 
-		<token value="O=C[C@H](O)[C@H](O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES">allo</token>
-		<token value="O=C[C@@H](O)[C@H](O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES">altro</token>
-		<token value="O=C[C@H](O)[C@@H](O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES">gluco</token>
-		<token value="O=C[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES">manno</token>
-		<token value="O=C[C@H](O)[C@H](O)[C@@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES">gulo</token>
-		<token value="O=C[C@@H](O)[C@H](O)[C@@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES">ido</token>
-		<token value="O=C[C@H](O)[C@@H](O)[C@@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES">galacto</token>
-		<token value="O=C[C@@H](O)[C@@H](O)[C@@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES">talo</token>
+		<token value="O=C[C@H](O)[C@H](O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES" suffixAppliesTo="2">allo</token>
+		<token value="O=C[C@@H](O)[C@H](O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES" suffixAppliesTo="2">altro</token>
+		<token value="O=C[C@H](O)[C@@H](O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES" suffixAppliesTo="2">gluco</token>
+		<token value="O=C[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES" suffixAppliesTo="2">manno</token>
+		<token value="O=C[C@H](O)[C@H](O)[C@@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES" suffixAppliesTo="2">gulo</token>
+		<token value="O=C[C@@H](O)[C@H](O)[C@@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES" suffixAppliesTo="2">ido</token>
+		<token value="O=C[C@H](O)[C@@H](O)[C@@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES" suffixAppliesTo="2">galacto</token>
+		<token value="O=C[C@@H](O)[C@@H](O)[C@@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES" suffixAppliesTo="2">talo</token>
 
 		<!--ketoses -->
-		<token value="OCC(=O)[C@H](O)CO" labels="/1/2//3//4/" valType="SMILES">erythrulo</token>
+		<token value="OCC(=O)[C@H](O)CO" labels="/1/2//3//4/" valType="SMILES" suffixAppliesTo="3">erythrulo</token>
 
-		<token value="OCC(=O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5/" valType="SMILES">ribulo</token>
-		<token value="OCC(=O)[C@@H](O)[C@H](O)CO" labels="/1/2//3//4//5/" valType="SMILES">xylulo</token>
+		<token value="OCC(=O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5/" valType="SMILES" suffixAppliesTo="3">ribulo</token>
+		<token value="OCC(=O)[C@@H](O)[C@H](O)CO" labels="/1/2//3//4//5/" valType="SMILES" suffixAppliesTo="3">xylulo</token>
 
-		<token value="OCC(=O)[C@H](O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES">psico</token>
-		<token value="OCC(=O)[C@@H](O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES">fructo</token>
-		<token value="OCC(=O)[C@H](O)[C@@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES">sorbo</token>
-		<token value="OCC(=O)[C@@H](O)[C@@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES">tagato</token>
+		<token value="OCC(=O)[C@H](O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES" suffixAppliesTo="3">psico</token>
+		<token value="OCC(=O)[C@@H](O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES" suffixAppliesTo="3">fructo</token>
+		<token value="OCC(=O)[C@H](O)[C@@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES" suffixAppliesTo="3">sorbo</token>
+		<token value="OCC(=O)[C@@H](O)[C@@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6/" valType="SMILES" suffixAppliesTo="3">tagato</token>
 
-		<token value="OCC(=O)[C@@H](O)[C@H](O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6//7/" valType="SMILES">sedoheptulo</token>
-		<token value="OCC(=O)[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6//7/" valType="SMILES">mannoheptulo</token>
+		<token value="OCC(=O)[C@@H](O)[C@H](O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6//7/" valType="SMILES" suffixAppliesTo="3">sedoheptulo</token>
+		<token value="OCC(=O)[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)CO" labels="/1/2//3//4//5//6//7/" valType="SMILES" suffixAppliesTo="3">mannoheptulo</token>
 	</tokenList>
 
 	<tokenList tagname="carbohydrateRingSize" symbol="ã">
-		<token value="3">oxirose</token>
-		<token value="4">oxetose</token>
-		<token value="5">furanose</token>
-		<token value="6">pyranose</token>
-		<token value="7">septanose</token>
-		<token value="8">octanose</token>
+		<token value="3">oxiros</token>
+		<token value="4">oxetos</token>
+		<token value="5">furanos</token>
+		<token value="6">pyranos</token>
+		<token value="7">septanos</token>
+		<token value="8">octanos</token>
 	</tokenList>
 
 	<!--all possible configurations of stereocentres for carbohydrates with 1-4 stereocentres (l/r for position on a fischer projection)-->

File opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/index.xml

View file
   <tokenFile>inlineSuffixes.xml</tokenFile>
   <tokenFile>inlineChargeSuffixes.xml</tokenFile>
   <tokenFile>suffixPrefix.xml</tokenFile>
+  <tokenFile>carbohydrateSuffixes.xml</tokenFile>
   <tokenFile>suffixes.xml</tokenFile>
   <tokenFile>unsaturators.xml</tokenFile>
   <tokenFile>miscTokens.xml</tokenFile>

File opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/regexes.xml

View file
   <regex name="%suffix%" value="s"/>
   <regex name="%unbrackettedCisTrans%" value="t"/>
   <regex name="%unsaturator%" value="u"/>
-<!--v unused -->
+  <regex name="%carbohydrateInlineSuffix%" value="v"/>
   <regex name="%repeatableInlineSuffix%" value="w"/>
   <regex name="%simpleSubstituent%" value="x"/>
   <regex name="%hydro%" value="y"/>
   <regex name="%conjunctiveSideChainRoot%" value ="(%hyphen%?(%locant%|%alphaBetaStereochemLocant%)?%multiplier%?(%alkaneStem%%hyphen%?(%ane%%suffixGroup%|%ane%?%ylamine%)|%optionalOpenBracket%%alkaneStem%%hyphen%?%ane%?%ylamine%%optionalCloseBracket%|%acidStem%(%infixedSuffix%|%o%?%rootSuffix%)))"/>
   <regex name="%conjunctiveSideChainInline%" value ="(%multiplier%?(%acidStem%(%infixedInlineSuffix%|%inlineSuffix%|%o%%infixableInlineSuffix%)))"/><!--No hyphen/locant to avoid ambiguity with a mixture e.g. benzene-acetylbenzene-->
 
-  <regex name="%cyclicSugar%" value="(%dlStereochemistryPrefix%?%carbohydrateStem%%carbohydrateRingSize%)"/>
+  <regex name="%cyclicSugar%" value="(%dlStereochemistryPrefix%?%carbohydrateStem%%carbohydrateRingSize%%e%?)"/>
+  <regex name="%cycliSugarSubstituent%" value="(%cyclicSugar%%optLocantGroupNoOMP%%carbohydrateInlineSuffix%)"/>
 
   <!--Fused ring terms-->
   <regex name="%benzoComponent%" value ="((%bracketedLocant%|%locant%)%benzo%)"/>
   <regex name="%fusedRingSystemFromTrivialRingSubstituent%" value = "((%simpleComponent%|%multipliedComponent%|%multipliedParent%)+%trivialRingSubstituentSub%)"/>
 
   <!--Complex terms-->
-  <regex name="%ringGroup%" value = "((%semiTrivialFusedRing%|%hantzschWidmanRing%|%annulen%|(%locant%|%bracketedLocant%)?%trivialRing%)%e%?|%vonBaeyerSystem%|%spiroSystem%|%trivialRingSubstituentGroup%|%cycloChain%|%cyclicUnsaturableHydrocarbonGroup%|%naturalProductGroup%|%cyclicSugar%)"/>
+  <regex name="%ringGroup%" value = "((%semiTrivialFusedRing%|%hantzschWidmanRing%|%annulen%|(%locant%|%bracketedLocant%)?%trivialRing%)%e%?|%vonBaeyerSystem%|%spiroSystem%|%trivialRingSubstituentGroup%|%cycloChain%|%cyclicUnsaturableHydrocarbonGroup%|%naturalProductGroup%)"/>
+  <regex name="%preSuffixedRing%" value = "(%cyclicSugar%|%simpleCyclicGroup%)"/>
   <regex name="%irregularRingSubstituentStem%" value="(%vonBaeyerSystemSubstituent%|%cycloChainSubstituent%|%spiroSystemSubstituent%|%cyclicUnsaturableHydrocarbon%%a%?)"/>
-  <regex name="%ringSubstituent%" value = "((%ringGroup%|%irregularRingSubstituentStem%)(%inlineEndings%|%conjunctiveSideChainInline%)|%trivialRingSubstituentSub%|%fusedRingSystemFromTrivialRingSubstituent%)"/>
+  <regex name="%ringSubstituent%" value = "((%ringGroup%|%irregularRingSubstituentStem%)(%inlineEndings%|%conjunctiveSideChainInline%)|%trivialRingSubstituentSub%|%fusedRingSystemFromTrivialRingSubstituent%|%cycliSugarSubstituent%)"/>
 
-  <regex name="%fusedRingSystem%" value = "((%simpleComponent%|%multipliedComponent%|%multipliedParent%)+(%ringGroup%|%simpleCyclicGroup%))"/>
+  <regex name="%fusedRingSystem%" value = "((%simpleComponent%|%multipliedComponent%|%multipliedParent%)+(%ringGroup%|%preSuffixedRing%))"/>
 	
 	<regex name="%polycyclicSpiroOldMethod%" value="(%ringNonDetachableFeatures%(%ringGroup%|%fusedRingSystem%)%inlineChargeGroup%*(%hyphen%?%locant%?%spiroOldMethod%%locant%?(%ring_CanStartWithHydro_NonDetachableFeatures%(%ringGroup%|%fusedRingSystem%)%inlineChargeGroup%*|%optionalOpenBracket%%locant%?%ring_CanStartWithHydro_NonDetachableFeatures%(%ringGroup%|%fusedRingSystem%)%inlineChargeGroup%*%optionalCloseBracket%))+)"/>
 	<regex name="%vonBaeyerForSpiro%" value="(%structuralOpenBracket%%vonBaeyerMultiplier%%vonBaeyer%(%alkaneStem%|%multiplierNotGroup%%heteroStem%)%ane%%structuralCloseBracket%%unsaturation%+)" />
   <regex name="%polycyclicSpiro%" value ="(%simpleMultiplier%?%spiro%%structuralOpenBracket%%polycyclicSpiroContents%%structuralCloseBracket%)"/>
   <regex name="%polycyclicSpiroGroup%" value ="(%polycyclicSpiroIdenticalComponents%|%polycyclicSpiro%|%polycyclicSpiroOldMethod%)" />
 
-  <regex name="%ringAssemblyConjunctive%" value = "(%ring_CanStartWithHydro_NonDetachableFeatures%?((%ringGroup%|%fusedRingSystem%)%rootEnding%|%simpleCyclicGroup%))" />
+  <regex name="%ringAssemblyConjunctive%" value = "(%ring_CanStartWithHydro_NonDetachableFeatures%?((%ringGroup%|%fusedRingSystem%)%rootEnding%|%preSuffixedRing%))" />
   <regex name="%ringAssemblyAdditive%" value = "(%ring_CanStartWithHydro_NonDetachableFeatures%?(%trivialRingSubstituentSub%|%fusedRingSystemFromTrivialRingSubstituent%|%irregularRingSubstituentStem%%optLocantGroupNoOMP%%repeatableInlineSuffix%|(%ringGroup%|%fusedRingSystem%)%inlineChargeGroup%*%optLocantGroupNoOMP%%repeatableInlineSuffix%))" />
   <regex name="%ringAssemblyRing%" value = "(%ringAssemblyConjunctive%|%ringAssemblyAdditive%)" />
   <regex name="%locantForRingAssembly%" value = "(%colonSeperatedLocant%|%newLocantGroupNoStartingHyphen%|%optionalOpenBracket%(%colonSeperatedLocant%|%locant%)%optionalCloseBracket%%hyphen%?)" />
   <regex name="%amineMeaningNitriloSubstituent%" value = "(%openBracket%*%multiplier%%openBracket%*%amineMeaningNitrilo%%closeBracket%*%interSubstituentHyphen%?)"/><!--needed to avoid "amine" being parsed as a substituent-->
   <regex name="%substituent%" value= "((%locantOpenBracket%*%newLocantGroupNoStartingHyphen%?%stereochemistry%*(%relativeCisTrans%|%newLocantGroupNoStartingHyphen%)?(%multiplier%|%groupMultiplier%%hyphen%?%simpleMultiplier%)?(%subsituentBody%%closeBracket%*%interSubstituentHyphen%?|%hydroGroup%)|%amineMeaningNitriloSubstituent%)%endOfSubstituent%)"/>
 
-  <regex name="%allRingGroups%" value = "((%ringGroup%|%ringAssembly%|%structuralOpenBracket%%ringAssembly%%structuralCloseBracket%|%fusedRingSystem%|%polycyclicSpiroGroup%)(%rootEnding%|%conjunctiveSideChainRoot%)|%simpleCyclicGroup%)"/>
+  <regex name="%allRingGroups%" value = "((%ringGroup%|%ringAssembly%|%structuralOpenBracket%%ringAssembly%%structuralCloseBracket%|%fusedRingSystem%|%polycyclicSpiroGroup%)(%rootEnding%|%conjunctiveSideChainRoot%)|%preSuffixedRing%)"/>
   <regex name="%allOtherSuffixableGroups%" value = "((%groupStemAllowingAllSuffixes%%e%?|%chainGroup%)%rootEnding%|%acidGroup%|%groupStemAllowingInlineSuffixes%%e%?%inlineChargeGroup%*)"/>
   <regex name="%mainGroupBody%" value="(%ringNonDetachableFeatures%%allRingGroups%|%acyclicNonDetachableFeatures%(%allOtherSuffixableGroups%|%simpleGroups%|%elementaryAtom%%chargeOrOxidationNumberSpecifier%?))"/>
   <regex name="%mainGroupHead%" value="(%openBracket%?%multiplier%?%openBracket%?%stereochemistry%*(%newLocantGroupNoStartingHyphen%%mono%?)?)" />

File opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemicalRegexHash.txt

View file
-1479514677
+-1835332910

File opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemicalSerialisedAutomaton.aut

Binary file modified.

File opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemical_reversed_RegexHash.txt

View file
-1479514677
+-1835332910

File opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemical_reversed_SerialisedAutomaton.aut

Binary file modified.

File opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixApplicability.xml

View file
     <suffix value="aminoAcidYl">yl</suffix><!--locanted yl means something different to unlocanted yl!-->
   </groupType>
 
+  <groupType type="carbohydrateStem"><!--e.g. D-Glucose to D-Glucuronic acid-->
+    <suffix value="yl">yl_carbohydrate</suffix>
+    <!--<suffix value="oyl">oyl</suffix>-->
+  </groupType>
+
   <groupType type="chalcogenAcidStem"><!--e.g. sulfon to sulfonyl -->
     <suffix value="acylium">ylium</suffix>
     <suffix value="amide">amine</suffix>

File opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixRules.dtd

View file
 <!ELEMENT suffixRulesList (rule)+>
 
-<!ELEMENT rule (addgroup|addSuffixPrefixIfNonePresentAndCyclic|setOutAtom|changecharge|addFunctionalAtomsToHydroxyGroups|chargeHydroxyGroups|removeOneDoubleBondedOxygen|convertHydroxyGroupsToOutAtoms|convertHydroxyGroupsToPositiveCharge)+>
+<!ELEMENT rule (addgroup|addSuffixPrefixIfNonePresentAndCyclic|setOutAtom|changecharge|addFunctionalAtomsToHydroxyGroups|chargeHydroxyGroups|removeTerminalOxygen|convertHydroxyGroupsToOutAtoms|convertHydroxyGroupsToPositiveCharge)+>
 <!ATTLIST rule value CDATA #REQUIRED
                subType CDATA #IMPLIED>
 
 <!ATTLIST changecharge charge CDATA #REQUIRED protons CDATA #REQUIRED>
 <!ELEMENT addFunctionalAtomsToHydroxyGroups EMPTY>
 <!ELEMENT chargeHydroxyGroups EMPTY>
-<!ELEMENT removeOneDoubleBondedOxygen EMPTY>
+<!ELEMENT removeTerminalOxygen EMPTY>
+<!ATTLIST removeTerminalOxygen order CDATA #REQUIRED>
 <!ELEMENT convertHydroxyGroupsToOutAtoms EMPTY>
 <!ELEMENT convertHydroxyGroupsToPositiveCharge EMPTY>

File opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixRules.xml

View file
     <addgroup SMILES="[*][O-]" functionalIDs="2" />
   </rule>
   <rule value="ite_nonCarboxylic">
-    <removeOneDoubleBondedOxygen />
+    <removeTerminalOxygen order="2" />
     <addFunctionalAtomsToHydroxyGroups />
     <chargeHydroxyGroups />
   </rule>
     <addgroup SMILES="[*]O" functionalIDs="2"/>
   </rule>
   <rule value="ous_nonCarboxylic">
-    <removeOneDoubleBondedOxygen />
+    <removeTerminalOxygen order="2" />
     <addFunctionalAtomsToHydroxyGroups/>
   </rule>
   <rule value="oxoAndDiYl">
   <rule value="yl">
     <setOutAtom />
   </rule>
+  <rule value="yl_carbohydrate">
+    <removeTerminalOxygen order="1" />
+    <setOutAtom />
+  </rule>
   <rule value="diyl">
     <setOutAtom />
     <setOutAtom />