Daniel Lowe avatar Daniel Lowe committed 19aec7b

Added support for disaccharides
Added ulose suffix

Comments (0)

Files changed (8)

opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/carbohydrateSuffixes.xml

 <!DOCTYPE tokenLists SYSTEM "tokenLists.dtd">
 <!-- See the DTD for more details -->
 <tokenLists>
-  <!--<tokenList tagname="suffix" type="root" subType="terminal" symbol="">
-    <token value="aldehyde">ose</token>
+  <tokenList tagname="suffix" type="root" subType="terminal" symbol="ê">
+		<token value="glycoside">ide</token>
+		<token value="glycoside">id</token>
+   <!-- <token value="aldehyde">ose</token>
     <token value="aldehyde" additionalValue="aldehyde">dialdehyde</token>
-    <token value="ol" additionalValue="ol">itol</token>
+    <token value="ol" additionalValue="ol">itol</token>-->
   </tokenList>
 
-  <tokenList tagname="suffix" type="root" symbol="s">
-    <token value="one">ulose</token>
+  <tokenList tagname="suffix" type="root" symbol="ë">
+    <token value="ulose">ulose</token>
   </tokenList>
 
-  <tokenList tagname="infix" type="carbohydrateStem" symbol="">
+  <!--<tokenList tagname="infix" type="carbohydrateStem" symbol="">
     <token value="on">on</token>
     <token value="uloson">uloson</token>might be redundant
     <token value="uron">uron</token>

opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/regexes.xml

 	<regex name="%functionalSuffixBeforeInlineSuffix%" value="ç"/>
 	<regex name="%cycleFormingSuffix%" value="è"/>
   <regex name="%endOfSubstituent%" value="é"/>
-  <!--next would be Alt 0234-->
+	<regex name="%carbohydrateUnlocantedRootSuffix%" value="ê"/>
+  <regex name="%carbohydrateLocantedRootSuffix%" value="ë"/>
+  <!--next would be Alt 0236-->
   <regex name="%endOfFunctionalGroup%" value="û"/>
 
 <!-- composite regexes-->
   <regex name="%conjunctiveSideChainRoot%" value ="(%hyphen%?(%locant%|%alphaBetaStereochemLocant%)?%multiplier%?(%alkaneStem%%hyphen%?(%ane%%suffixGroup%|%ane%?%ylamine%)|%optionalOpenBracket%%alkaneStem%%hyphen%?%ane%?%ylamine%%optionalCloseBracket%|%acidStem%(%infixedSuffix%|%o%?%rootSuffix%)))"/>
   <regex name="%conjunctiveSideChainInline%" value ="(%multiplier%?(%acidStem%(%infixedInlineSuffix%|%inlineSuffix%|%o%%infixableInlineSuffix%)))"/><!--No hyphen/locant to avoid ambiguity with a mixture e.g. benzene-acetylbenzene-->
 
-  <regex name="%cyclicSugar%" value="(%dlStereochemistryPrefix%?%carbohydrateStem%%carbohydrateRingSize%%e%?)"/>
+  <regex name="%cyclicSugar%" value="(%dlStereochemistryPrefix%?%carbohydrateStem%%hyphen%?%carbohydrateRingSize%%e%?(%hyphen%?%carbohydrateUnlocantedRootSuffix%)?(%optLocantGroupNoOMP%%carbohydrateLocantedRootSuffix%)?)"/>
   <regex name="%cyclicSugarSubstituent%" value="(%cyclicSugar%%hyphen%?(%carbohydrateInlineSuffix%|%locant%%locantedAminoAcidOrCarbohydrateYl%))"/>
 
   <!--Fused ring terms-->

opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemicalRegexHash.txt

-601277201
+-549724911
Add a comment to this file

opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemicalSerialisedAutomaton.aut

Binary file modified.

opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemical_reversed_RegexHash.txt

-601277201
+-549724911
Add a comment to this file

opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemical_reversed_SerialisedAutomaton.aut

Binary file modified.

opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixApplicability.xml

   </groupType>
 
   <groupType type="carbohydrateStem"><!--e.g. D-Glucose to D-Glucuronic acid-->
+		<suffix value="ulose">one</suffix>
+		<suffix value="glycoside">ic_nonCarboxylic</suffix>
     <suffix value="yl">yl_carbohydrate</suffix>
     <suffix value="locantedAminoAcidOrCarbohydrateYl">yl</suffix><!--locanted yl means something different to unlocanted yl!-->
     <!--<suffix value="oyl">oyl</suffix>-->

opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/wordRules.xml

 
   <wordRule name="ester" type="full">
     <word type="substituent" />
-    <word type="full" endsWithRegex="\S(at[e]?|amid[e]?|it[e]?)[\]\)\}]*"/>
+    <word type="full" endsWithRegex="\S(at[e]?|amid[e]?|it[e]?|os[e]?id[e?])[\]\)\}]*"/>
   </wordRule>
 
   <wordRule name="amide" type="full">
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.