Commits

Daniel Lowe  committed 77753c0

The distinction between locanted and unlocanted yl on carbohydrates is now supported
disallowed ortho/meta/para in substiuents like prop-2-yl e.g. prop-m-yl is unparseable

  • Participants
  • Parent commits 1e419d0

Comments (0)

Files changed (7)

File opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/inlineSuffixes.xml

     <token value="oyl">oyl</token>
   </tokenList>
 
-  <tokenList tagname="suffix" type="inline" symbol="»"><!--used for amino acids-->
-    <token value="aminoAcidYl">yl</token>
+  <tokenList tagname="suffix" type="inline" symbol="»"><!--used for amino acids and carbohydrates-->
+    <token value="locantedAminoAcidOrCarbohydrateYl">yl</token>
   </tokenList>
 
   <tokenList tagname="suffix" type="inline" symbol="×"><!--ylene-->

File opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/regexes.xml

   <regex name="%anhydrideFunctionalGroup%" value="¸"/>
   <regex name="%alphaBetaStereochemLocant%" value="¹"/>
   <regex name="%implicitIc%" value="º"/>
-  <regex name="%aminoAcidYl%" value="»"/>
+  <regex name="%locantedAminoAcidOrCarbohydrateYl%" value="»"/>
   <regex name="%oMeaningYl%" value="¼"/>
   <regex name="%nonCarbonAcidNoAcyl%" value="½"/>
   <regex name="%simpleCyclicGroup%" value="¾"/>
   <regex name="%stereochemistry%" value="(%stereochemistryBracket%|%locant%?%unbrackettedCisTrans%(%comma%%locant%?%unbrackettedCisTrans%)*%hyphen%?|%locant%?%unbrackettedEZ%(%comma%%locant%?%unbrackettedEZ%)*%hyphen%|%lightRotation%|%alphaBetaStereochemLocant%)"/>
   <regex name="%optLocantGroupNoOMP%" value="(%hyphen%?%locant%?)"/>
   <regex name="%optLocantGroup%" value="(%hyphen%?(%locant%|%orthoMetaPara%)?)"/>
-  <regex name="%locantGroup%" value="(%hyphen%?(%locant%|%orthoMetaPara%))"/>
   <regex name="%newLocantGroupNoStartingHyphen%" value="(%locant%|%orthoMetaPara%)"/><!--Substituents/mainGroups may not start with a hyphen (otherwise it is ambiguous as to where to break up the name so this is used for prefixes-->
   <regex name="%bracketedLocant%" value="(%optionalOpenBracket%%locantThatNeedsBrackets%%optionalCloseBracket%%hyphen%?)"/>
   <regex name="%infixReplacement%" value ="(%multiplier%?%replacementInfix%(%o%?%multiplier%?%replacementInfix%)*%o%?)"/>
   <regex name="%chainGroup%" value = "(%heteroChain%|%alternatingHeteroChain%|%alkaneStemModifier%?%alkaneStem%%a%?%unsaturationBlock%)"/>
   <regex name="%nitrogenChainSubstituent%" value = "(((%multiplierNotGroup%%nitrogenHeteroStem%%unsaturationBlock%)|(%nitrogenHeteroStem%%hyphen%?%ane%))%oMeaningYl%)"/>
   <regex name="%alkYlene%" value = "((%alkaneStemSystematic%%unsaturationBlock%?|%alkaneStemTrivial%%unsaturationBlock%)%optLocantGroupNoOMP%%ylene%)"/>
-  <regex name="%chainSubstituent%" value = "(%chainGroup%%inlineEndings%|%alkaneStemModifier%?%alkaneStem%(%optLocantGroup%|%a%%locantGroup%)%unlocantedInlineSuffixGroup%%repeatableInlineSuffixGroup%*|%nitrogenChainSubstituent%|%alkYlene%)"/>
+  <regex name="%chainSubstituent%" value = "(%chainGroup%%inlineEndings%|%alkaneStemModifier%?%alkaneStem%(%optLocantGroupNoOMP%|%a%%hyphen%?%locant%)%unlocantedInlineSuffixGroup%%repeatableInlineSuffixGroup%*|%nitrogenChainSubstituent%|%alkYlene%)"/>
 
   <regex name="%aminoMonoAcid%" value="(%dlStereochemistryPrefix%?((%aminoAcidEndsInIne%|%aminoAcidEndsInCompulsoryIne%)%ine%|%aminoAcidEndsInAn%%ane%))"/>
   <regex name="%aminoAcidSubstituentStem%" value="(%dlStereochemistryPrefix%?(%aminoAcidEndsInIne%%ine%?|%aminoAcidEndsInCompulsoryIne%%ine%|%aminoAcidEndsInAn%%ane%?|%aminoAcidEndsInIc%))"/>
   <regex name="%aminoAcidGroup%" value="(%aminoMonoAcid%%implicitIc%%inlineChargeSuffix%?|(%aminoMonoAcid%|%dlStereochemistryPrefix%?%aminoAcidEndsInIc%)(%infixedSuffix%|%rootSuffix%))"/>
 
-  <regex name="%aminoAcidLocantedYl%" value ="((%aminoMonoAcid%%implicitIc%|%dlStereochemistryPrefix%?%aminoAcidEndsInIc%%rootSuffix%)%hyphen%?%locant%%aminoAcidYl%)"/>
+  <regex name="%aminoAcidLocantedYl%" value ="((%aminoMonoAcid%%implicitIc%|%dlStereochemistryPrefix%?%aminoAcidEndsInIc%%rootSuffix%)%hyphen%?%locant%%locantedAminoAcidOrCarbohydrateYl%)"/>
   <regex name="%aminoAcidRadicalOnN%" value ="((%aminoMonoAcid%|%dlStereochemistryPrefix%?%aminoAcidEndsInIc%)%implicitIc%%oMeaningYl%)"/>
   <regex name="%aminoAcidSubstituent%" value="(%aminoAcidLocantedYl%|%aminoAcidRadicalOnN%|%aminoAcidSubstituentStem%(%infixedInlineSuffix%|%inlineSuffix%))"/>
 
   <regex name="%conjunctiveSideChainInline%" value ="(%multiplier%?(%acidStem%(%infixedInlineSuffix%|%inlineSuffix%|%o%%infixableInlineSuffix%)))"/><!--No hyphen/locant to avoid ambiguity with a mixture e.g. benzene-acetylbenzene-->
 
   <regex name="%cyclicSugar%" value="(%dlStereochemistryPrefix%?%carbohydrateStem%%carbohydrateRingSize%%e%?)"/>
-  <regex name="%cycliSugarSubstituent%" value="(%cyclicSugar%%optLocantGroupNoOMP%%carbohydrateInlineSuffix%)"/>
+  <regex name="%cyclicSugarSubstituent%" value="(%cyclicSugar%%hyphen%?(%carbohydrateInlineSuffix%|%locant%%locantedAminoAcidOrCarbohydrateYl%))"/>
 
   <!--Fused ring terms-->
   <regex name="%benzoComponent%" value ="((%bracketedLocant%|%locant%)%benzo%)"/>
   <regex name="%ringGroup%" value = "((%semiTrivialFusedRing%|%hantzschWidmanRing%|%annulen%|(%locant%|%bracketedLocant%)?%trivialRing%)%e%?|%vonBaeyerSystem%|%spiroSystem%|%trivialRingSubstituentGroup%|%cycloChain%|%cyclicUnsaturableHydrocarbonGroup%|%naturalProductGroup%)"/>
   <regex name="%preSuffixedRing%" value = "(%cyclicSugar%|%simpleCyclicGroup%)"/>
   <regex name="%irregularRingSubstituentStem%" value="(%vonBaeyerSystemSubstituent%|%cycloChainSubstituent%|%spiroSystemSubstituent%|%cyclicUnsaturableHydrocarbon%%a%?)"/>
-  <regex name="%ringSubstituent%" value = "((%ringGroup%|%irregularRingSubstituentStem%)(%inlineEndings%|%conjunctiveSideChainInline%)|%trivialRingSubstituentSub%|%fusedRingSystemFromTrivialRingSubstituent%|%cycliSugarSubstituent%)"/>
+  <regex name="%ringSubstituent%" value = "((%ringGroup%|%irregularRingSubstituentStem%)(%inlineEndings%|%conjunctiveSideChainInline%)|%trivialRingSubstituentSub%|%fusedRingSystemFromTrivialRingSubstituent%|%cyclicSugarSubstituent%)"/>
 
   <regex name="%fusedRingSystem%" value = "((%simpleComponent%|%multipliedComponent%|%multipliedParent%)+(%ringGroup%|%preSuffixedRing%))"/>
 	

File opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemicalRegexHash.txt

--1835332910
+601277201

File opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemicalSerialisedAutomaton.aut

Binary file modified.

File opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemical_reversed_RegexHash.txt

--1835332910
+601277201

File opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemical_reversed_SerialisedAutomaton.aut

Binary file modified.

File opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixApplicability.xml

     <suffix value="oyl">oyl</suffix>
     <suffix value="yl">oyl</suffix>
     <suffix value="oMeaningYl">yl</suffix><!--e.g. glycino-->
-    <suffix value="aminoAcidYl">yl</suffix><!--locanted yl means something different to unlocanted yl!-->
+    <suffix value="locantedAminoAcidOrCarbohydrateYl">yl</suffix><!--locanted yl means something different to unlocanted yl!-->
   </groupType>
 
   <groupType type="carbohydrateStem"><!--e.g. D-Glucose to D-Glucuronic acid-->
     <suffix value="yl">yl_carbohydrate</suffix>
+    <suffix value="locantedAminoAcidOrCarbohydrateYl">yl</suffix><!--locanted yl means something different to unlocanted yl!-->
     <!--<suffix value="oyl">oyl</suffix>-->
   </groupType>