Commits

Daniel Lowe committed 74b0b5f

Added sn-glycero and sn-glycero-3-phospho
Improved support for glycerol phosphates
phospho now prefers hydroxy groups hence correcting some interpretations e.g. phosphoethanolamine
Corrected interpretation of phosphoglycolic acid

  • Participants
  • Parent commits 53576c8

Comments (0)

Files changed (9)

opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentGenerator.java

 			Element nextSubstituent = (Element) XOMTools.getNextSibling(substituent);
 			if (nextSubstituent !=null){
 				Element nextGroup = nextSubstituent.getFirstChildElement(GROUP_EL);
-				if (nextGroup !=null && (nextGroup.getAttributeValue(TYPE_ATR).equals(AMINOACID_TYPE_VAL)||BIOCHEMICAL_SUBTYPE_VAL.equals(nextGroup.getAttributeValue(SUBTYPE_ATR)) ||CARBOHYDRATE_SUBTYPE_VAL.equals(nextGroup.getAttributeValue(SUBTYPE_ATR)))){
+				String subType = nextGroup.getAttributeValue(SUBTYPE_ATR);
+				if (nextGroup !=null && (nextGroup.getAttributeValue(TYPE_ATR).equals(AMINOACID_TYPE_VAL) || 
+						BIOCHEMICAL_SUBTYPE_VAL.equals(subType) || CARBOHYDRATE_SUBTYPE_VAL.equals(subType) ||
+						(YLFORACYL_SUBTYPE_VAL.equals(subType) &&
+						("glycol".equals(nextGroup.getValue()) || "diglycol".equals(nextGroup.getValue())))
+						)){
 					group.getAttribute(VALUE_ATR).setValue("-P(=O)(O)O");
 					group.addAttribute(new Attribute(USABLEASJOINER_ATR, "yes"));
 				}

opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FragmentTools.java

 		}
 		return true;
 	}
+	
+	/**
+	 * Finds a hydroxy atom in a hydroxy functional group
+	 * i.e. not in carboxylic acid or oxime
+	 * @param biochemicalFragment
+	 * @return
+	 * @throws StructureBuildingException 
+	 */
+	static Atom findHydroxyGroup(Fragment biochemicalFragment) throws StructureBuildingException {
+		List<Atom> atoms = biochemicalFragment.getAtomList();
+		for (Atom atom : atoms) {
+			if (atom.getElement().equals("O") && atom.getBonds().size()==1  && atom.getFirstBond().getOrder()==1){
+				Atom adjacentAtom = atom.getAtomNeighbours().get(0);
+				List<Atom> neighbours = adjacentAtom.getAtomNeighbours();
+				if (adjacentAtom.getElement().equals("C")){
+					neighbours.remove(atom);
+					if (neighbours.size() >= 1 && neighbours.get(0).getElement().equals("O") && adjacentAtom.getBondToAtomOrThrow(neighbours.get(0)).getOrder()==2){
+						continue;
+					}
+					if (neighbours.size() >= 2 && neighbours.get(1).getElement().equals("O") && adjacentAtom.getBondToAtomOrThrow(neighbours.get(1)).getOrder()==2){
+						continue;
+					}
+					return atom;
+				}
+			}
+		}
+		return null;
+	}
 }

opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StructureBuilder.java

 			else{
 				atomOnBiochemicalFragment = biochemicalFragment.getAtomByLocant("O5'");//take a guess at it being 5' ;-)
 				if (atomOnBiochemicalFragment==null){
-					List<Atom> atoms = biochemicalFragment.getAtomList();
-					for (Atom atom : atoms) {
-						if (atom.getElement().equals("O") && atom.getBonds().size()==1  && atom.getFirstBond().getOrder()==1){
-							Atom adjacentAtom = atom.getAtomNeighbours().get(0);
-							List<Atom> neighbours = adjacentAtom.getAtomNeighbours();
-							if (adjacentAtom.getElement().equals("C") && neighbours.size()==3){
-								neighbours.remove(atom);
-								if (neighbours.get(0).getElement().equals("O") && adjacentAtom.getBondToAtomOrThrow(neighbours.get(0)).getOrder()==2){
-									continue;
-								}
-								if (neighbours.get(1).getElement().equals("O") && adjacentAtom.getBondToAtomOrThrow(neighbours.get(1)).getOrder()==2){
-									continue;
-								}
-							}
-							atomOnBiochemicalFragment= atom;//find a hydroxy - not a carboxylic acid
-						}
-					}
+					atomOnBiochemicalFragment = FragmentTools.findHydroxyGroup(biochemicalFragment);
 				}
 			}
 			String element = atomOnBiochemicalFragment !=null ? atomOnBiochemicalFragment.getElement() : null;

opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StructureBuildingMethods.java

 					performPerHalogenoSubstitution(state, frag, subBracketOrRoot);
 				}
 				else{
-					Atom atomToJoinTo = findAtomForSubstitution(state, subBracketOrRoot, frag.getOutAtom(0).getValency());
+					Atom atomToJoinTo = null;
+					if (PHOSPHO_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR)) && frag.getOutAtom(0).getValency() == 1){
+						List<Fragment> possibleParents =findAlternativeFragments(state, subBracketOrRoot);
+						for (Fragment fragment : possibleParents) {
+							atomToJoinTo = FragmentTools.findHydroxyGroup(fragment);
+							break;
+						}
+					}
+					if (atomToJoinTo ==null) {
+					  atomToJoinTo = findAtomForSubstitution(state, subBracketOrRoot, frag.getOutAtom(0).getValency());
+					}
 					if (atomToJoinTo ==null){
 						throw new StructureBuildingException("Unlocanted substitution failed: unable to find suitable atom to bond atom with id:" + frag.getOutAtom(0).getAtom().getID() + " to!");
 					}
 	}
 
 	private static Atom findAtomForSubstitution(BuildState state, Element subOrBracket, int bondOrder)  {
-		//case where you should actually be substituting onto the previous element e.g. 5-(4-methylphenylcarbonyl)pentane
 		Atom to =null;
 		List<Fragment> possibleParents =findAlternativeFragments(state, subOrBracket);
 		for (Fragment fragment : possibleParents) {

opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/XmlDeclarations.java

 	/**oxido/sulfido/selenido/tellurido*/
 	static final String OXIDOLIKE_SUBTYPE_VAL ="oxidoLike";
 	static final String PERHALOGENO_SUBTYPE_VAL ="perhalogeno";
+	static final String PHOSPHO_SUBTYPE_VAL ="phospho";
 	static final String SIMPLESUBSTITUENT_SUBTYPE_VAL ="simpleSubstituent";
 	static final String SUBSTITUENT_SUBTYPE_VAL ="substituent";
 	static final String TERMINAL_SUBTYPE_VAL ="terminal";

opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/groupStemsAllowingInlineSuffixes.xml

 <!-- See the DTD for more details -->
 <!--These are groups that can be followed by charge modifying suffixes and should be terminated by an e or an inline suffix if used as substituent -->
 <tokenLists>
-	<tokenList tagname="group" type="substituent" subType="groupStem" symbol="g">
+	<tokenList tagname="group" type="simpleGroup" subType="groupStem" symbol="g">
 		<token value="C=C=C" valType="SMILES" usableAsAJoiner="yes">allen</token>
 		<token value="N=NC=NN" labels="1/2/3/4/5" valType="SMILES" suffixAppliesToByDefault="3">formazan</token>
-		<token value="OCC(O)CO" labels="1///2//3" valType="SMILES">glycerol</token>
-		<token value="OCC(O)CO" labels="1///2//3" valType="SMILES">glycerin</token>
 		<token value="[NH2+]=[N-]" labels="none" valType="SMILES">isodiazen</token>
 		<token value="C=C=O" labels="none" valType="SMILES">keten</token>
 		<token value="NC(=O)OCC" labels="none" valType="SMILES">urethan</token>
 		<token value="CCCCCCCCCCCCCCCC(=O)CCCCCCCCCCCCCCC" labels="1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16//17/18/19/20/21/22/23/24/25/26/27/28/29/30/31" defaultInLocant="31" valType="SMILES" usableAsAJoiner="yes">palmiton</token>
 		<token value="CCCCCCCCCCCCCCCCCC(=O)CCCCCCCCCCCCCCCCC" labels="1/2/3/4/5/6/7/8/9/10/11/12/13/14/15/16/17/18//19/20/21/22/23/24/25/26/27/28/29/30/31/32/33/34/35" defaultInLocant="35" valType="SMILES" usableAsAJoiner="yes">stearon</token>
 	</tokenList>
-	<tokenList tagname="group" type="substituent" subType="groupStem" symbol="g">
+	<tokenList tagname="group" type="simpleGroup" subType="biochemical" symbol="g">
 		<token value="N[C@H](C(=O)O)CCC(=O)N[C@@H](CS)C(=O)NCC(=O)O" labels="none" valType="SMILES">glutathion</token>
+		<token value="OCC(O)CO" labels="1///2//3" valType="SMILES">glycerol</token>
+		<token value="OCC(O)CO" labels="1///2//3" valType="SMILES">glycerin</token>
+		<token value="OC[C@@H](O)CO" labels="1///2//3" valType="SMILES">sn-glycerol</token>
 		<token value="NC(=N)N" labels="1,N//2,N''/3,N'" valType="SMILES">guanidin</token>
 		<token value="S1(=O)(=O)NC(=O)c2ccccc12" labels="1///2/3//3a/4/5/6/7/7a" valType="SMILES">saccharin</token>
 	</tokenList>

opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/simpleSubstituents.xml

 		<token value="c1(ccccc1)C(=O)Cc2ccccc2" labels="1/2/3/4/5/6///alpha/1'/2'/3'/4'/5'/6'" valType="SMILES" outIDs="9">desyl</token>
 		<token value="-[2H]" valType="SMILES">deuterio</token>
 		<token value="-[2H]" valType="SMILES">deutero</token>
-		<token value="-P(=O)(O)OP(=O)(O)O" labels="none" valType="SMILES">diphospho</token>
 		<token value="-[N+]#N" labels="none" valType="SMILES">diazonio</token>
 		<token value="-c1c(C)c(C)cc(C)c1C" labels="none" valType="SMILES">duryl</token>
 		<token value="-c1c(OC)cc(CC=C)cc1" labels="none" valType="SMILES">eugenyl</token>
 		<token value="-c1cc(C)ccc1C(C)C" labels="none" valType="SMILES">thymyl</token>
 		<token value="c12ccccc1.C2" labels="1/2,ortho/3,meta/4,para/5/6/alpha" valType="SMILES" addGroup="N locant4" outIDs="8" frontLocantsExpected="2,ortho,3,meta,4,para,5,6">toluidino</token>
 		<token value="-S(=O)(=O)c1ccc(C)cc1" labels="///1/2,ortho/3,meta/4,para//5/6" valType="SMILES">tosyl</token>
-		<token value="-P(=O)(O)OP(=O)(O)OP(=O)(O)O" labels="none" valType="SMILES">triphospho</token>
 		<token value="-[3H]" valType="SMILES">tritio</token>
 		<token value="-C(c1ccccc1)(c1ccccc1)c1ccccc1" labels="/1/2/3/4/5/6/1'/2'/3'/4'/5'/6'/1''/2''/3''/4''/5''/6''" valType="SMILES">trityl</token>
 		<token value="-NC(=O)N" labels="1/2//3" valType="SMILES" usableAsAJoiner="yes" defaultInLocant="3">ureido</token>
 		<token value="=[Se]" labels="none" valType="SMILES">selenoxo</token>
 		<token value="=[Te]" labels="none" valType="SMILES">telluroxo</token>
 
-		<token value="-[P|5](=O)(O)O" labels="none" valType="SMILES" usableAsAJoiner="yes">phosphono</token>
-		<token value="-[P|5](=O)([O-])[O-]" labels="none" valType="SMILES">phosphonato</token>
-		<token value="-[P|5](=O)=O" labels="none" valType="SMILES">phospho</token><!-- NOTE that this is P(=O)(O)O in biochemical nomenclature. This ambiguity is resolved in the ComponentGenerator-->
 		<token value="-[P|5]=N" labels="none" valType="SMILES">phosphinimyl</token>
 		<token value="-P=O" labels="none" valType="SMILES">phosphoroso</token>
 		<token value="-P" labels="none" valType="SMILES" usableAsAJoiner="yes">phosphino</token>
 		<token value="-[Zr]" labels="none" valType="SMILES">zirconio</token>
 	</tokenList>
 
+	<tokenList tagname="group" type="substituent" subType="phospho" symbol="x">
+		<token value="-P(=O)(O)OP(=O)(O)O" labels="none" valType="SMILES">diphospho</token>
+		<token value="-[P|5](=O)(O)O" labels="none" valType="SMILES" usableAsAJoiner="yes">phosphono</token>
+		<token value="-[P|5](=O)([O-])[O-]" labels="none" valType="SMILES">phosphonato</token>
+		<token value="-[P|5](=O)=O" labels="none" valType="SMILES">phospho</token><!-- NOTE that this is P(=O)(O)O in biochemical nomenclature. This ambiguity is resolved in the ComponentGenerator-->
+		<token value="OC[C@@H](O)COP(=O)O" labels="1///2//3///" valType="SMILES" outIDs="7">sn-glycero-3-phospho</token>
+		<token value="-P(=O)(O)OP(=O)(O)OP(=O)(O)O" labels="none" valType="SMILES">triphospho</token>
+	</tokenList>
+
 	<tokenList tagname="group" type="substituent" subType="biochemical" symbol="x">
 		<token value="-NC(=N)N" labels="1//2/3" valType="SMILES">guanidino</token>
 		<token value="-S(=O)(=O)CCN" labels="none" valType="SMILES">tauryl</token>

opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/tokenList.dtd

 <!ELEMENT tokenList (token)+>
 <!ATTLIST tokenList tagname (alkaneStemComponent|alkaneStemModifier|bridgeFormingO|carbohydrateRingSize|chargeSpecifier|closebracket|cyclo|dlStereochemistry|functionalClass|functionalGroup|functionalModifier|fusedRingBridge|group|heteroatom|hydro|hyphen|ine|infix|hydrocarbonFusedRingSystem|multiplier|NA|openbracket|oxidationNumberSpecifier|polyCyclicSpiro|ringAssemblyMultiplier|stereoChemistry|structuralCloseBracket|structuralOpenBracket|subtractivePrefix|suffix|suffixPrefix|unsaturator) #REQUIRED
                     type (acidStem|aminoAcid|basic|carbohydrateChainLength|carbohydrateConfigurationalPrefix|carbohydrateStem|chain|charge|cisOrTrans|chalcogenAcidStem|diValentGroup|EorZ|functionalClass|group|inline|monoValentGroup|monoValentStandaloneGroup|nonCarboxylicAcid|ring|simpleGroup|substituent|root|VonBaeyer) #IMPLIED
-                    subType (alkaneStem|anhydride|arylGroup|arylSubstituent|biochemical|carbohydrate|cycleformer|cyclicUnsaturableHydrocarbon|dedicatedFunctionalReplacementPrefix|elementaryAtom|endInAn|endInIc|endInIne|epoxyLike|functionalClassGroup|groupStem|halideOrPseudoHalide|hantzschWidman|heteroStem|fusionRing|simpleGroup|multiRadicalSubstituent|noAcyl|none|oxidoLike|perhalogeno|simpleSubstituent|substituent|terminal|ylForAcyl|ylForNothing|ylForYl) #IMPLIED
+                    subType (alkaneStem|anhydride|arylGroup|arylSubstituent|biochemical|carbohydrate|cycleformer|cyclicUnsaturableHydrocarbon|dedicatedFunctionalReplacementPrefix|elementaryAtom|endInAn|endInIc|endInIne|epoxyLike|functionalClassGroup|groupStem|halideOrPseudoHalide|hantzschWidman|heteroStem|fusionRing|simpleGroup|multiRadicalSubstituent|noAcyl|none|oxidoLike|perhalogeno|phospho|simpleSubstituent|substituent|terminal|ylForAcyl|ylForNothing|ylForYl) #IMPLIED
                     symbol CDATA #REQUIRED
                     ignoreWhenWritingXML (yes) #IMPLIED>
 

opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/tokenLists.dtd

 <!ELEMENT tokenList (token)+>
 <!ATTLIST tokenList tagname (alkaneStemComponent|alkaneStemModifier|bridgeFormingO|carbohydrateRingSize|chargeSpecifier|closebracket|cyclo|dlStereochemistry|functionalClass|functionalGroup|functionalModifier|fusedRingBridge|group|heteroatom|hydro|hyphen|ine|infix|hydrocarbonFusedRingSystem|multiplier|NA|openbracket|oxidationNumberSpecifier|polyCyclicSpiro|ringAssemblyMultiplier|stereoChemistry|structuralCloseBracket|structuralOpenBracket|subtractivePrefix|suffix|suffixPrefix|unsaturator) #REQUIRED
                     type (acidStem|aminoAcid|basic|carbohydrateChainLength|carbohydrateConfigurationalPrefix|carbohydrateStem|chain|charge|cisOrTrans|chalcogenAcidStem|diValentGroup|EorZ|functionalClass|group|inline|monoValentGroup|monoValentStandaloneGroup|nonCarboxylicAcid|ring|simpleGroup|substituent|root|VonBaeyer) #IMPLIED
-                    subType (alkaneStem|anhydride|arylGroup|arylSubstituent|biochemical|carbohydrate|cycleformer|cyclicUnsaturableHydrocarbon|dedicatedFunctionalReplacementPrefix|elementaryAtom|endInAn|endInIc|endInIne|epoxyLike|functionalClassGroup|groupStem|halideOrPseudoHalide|hantzschWidman|heteroStem|fusionRing|simpleGroup|multiRadicalSubstituent|noAcyl|none|oxidoLike|perhalogeno|simpleSubstituent|substituent|terminal|ylForAcyl|ylForNothing|ylForYl) #IMPLIED
+                    subType (alkaneStem|anhydride|arylGroup|arylSubstituent|biochemical|carbohydrate|cycleformer|cyclicUnsaturableHydrocarbon|dedicatedFunctionalReplacementPrefix|elementaryAtom|endInAn|endInIc|endInIne|epoxyLike|functionalClassGroup|groupStem|halideOrPseudoHalide|hantzschWidman|heteroStem|fusionRing|simpleGroup|multiRadicalSubstituent|noAcyl|none|oxidoLike|perhalogeno|phospho|simpleSubstituent|substituent|terminal|ylForAcyl|ylForNothing|ylForYl) #IMPLIED
                     symbol CDATA #REQUIRED
                     ignoreWhenWritingXML (yes) #IMPLIED>
 <!-- tagname == the name of the XML element that tokens in this file will be translated to