Snippets
Created by
SeanB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 | Use UI
Struct tAttrPair
String sAttr
String sValue
Boolean isAssigned
End_Struct
Struct tTagData
String sValue
tAttrPair[] aAttributes
Boolean isAloneTag
End_Struct
Struct tParsedHTML
String sType
String sValue
tTagData tagdata
End_Struct
Class cHTMLParser is a cObject
Function isHTMLSpace String sInp Returns Boolean
Function_Return ((Trim(sInp))="")
End_Function
Function tokenizeTags String sText Returns String[]
tTagData tagData
Integer iPos
Integer iMax
String sType
String sCurrentToken
Pointer pText
String sChar
String[] Tokens
Move "" to sCurrentToken
Move "text" to sType
Move (AddressOf(sText)) to pText
Move (Length(sText)) to iMax
For iPos from 0 to (iMax-1)
Move (Character(DeRefC(pText,iPos))) to sChar
If ((sType = "space") and (sChar = '"') ) Begin
Move "double-quote" to sType
End
Else If ((sType = "space") and (sChar = "'") ) Begin
Move "single-quote" to sType
End
Else If ((sType = "space") and (sChar = "=") ) Begin
Move "space" to sType
If (sCurrentToken<>"") Move sCurrentToken to Tokens[(SizeOfArray(Tokens))]
Move "=" to Tokens[(SizeOfArray(Tokens))]
Move "" to sCurrentToken
End
Else If ((sType = "space") and (not(isHTMLSpace(Self,sChar))) ) Begin
Move "text" to sType
If (sCurrentToken<>"") Move sCurrentToken to Tokens[(SizeOfArray(Tokens))]
Move sChar to sCurrentToken
End
Else If ((sType = "text") and (isHTMLSpace(Self,sChar)) ) Begin
Move "space" to sType
If (sCurrentToken<>"") Move sCurrentToken to Tokens[(SizeOfArray(Tokens))]
Move "" to sCurrentToken
End
Else If ((sType = "text") and (sChar = "=") ) Begin
Move "space" to sType
If (sCurrentToken<>"") Move sCurrentToken to Tokens[(SizeOfArray(Tokens))]
Move "=" to Tokens[(SizeOfArray(Tokens))]
Move "" to sCurrentToken
End
Else If ((sType="single-quote") and (sChar="'")) Begin
Move "space" to sType
If (sCurrentToken<>"") Move sCurrentToken to Tokens[(SizeOfArray(Tokens))]
Move "" to sCurrentToken
End
Else If ((sType="double-quote") and (sChar='"')) Begin
Move "space" to sType
If (sCurrentToken<>"") Move sCurrentToken to Tokens[(SizeOfArray(Tokens))]
Move "" to sCurrentToken
End
Else Append sCurrentToken sChar
Loop
If (sCurrentToken<>"") Move sCurrentToken to Tokens[(SizeOfArray(Tokens))]
Function_Return Tokens
End_Function
Function ParseHTMLTag String sText Returns tTagData
Integer iMax
Integer iPos
String[] Tokens
tTagData tagData
Integer iAttrNo
//strip angle brackets......
Move (Length(sText)) to iMax
Move (Mid(sText,(iMax-2),2)) to sText
Move (False) to tagData.isAloneTag
If ((right(sText,1))="/") Begin
Move (True) to tagData.isAloneTag
Move (Length(sText)) to iMax
Move (Left(sText,(iMax-1))) to sText
End
Move (tokenizeTags(Self,sText)) to Tokens
//we have the tokens
Move (SizeOfArray(Tokens)) to iMax
Move "" to Tokens[iMax+1] //stop any overflow when looking ahead.
Move Tokens[0] to tagData.sValue
Move 1 to iPos
Move 0 to iAttrNo
While (iPos < iMax)
If (Tokens[iPos] <> "") Begin
Move Tokens[iPos] to tagData.aAttributes[iAttrNo].sAttr
If (Tokens[iPos + 1] = "=") Begin
Move Tokens[iPos+2] to tagData.aAttributes[iAttrNo].sValue
Move (iPos+2) to iPos
Move True to tagData.aAttributes[iAttrNo].isAssigned
End
Else Move False to tagData.aAttributes[iAttrNo].isAssigned
Move (iAttrNo+1) to iAttrNo
End
Move (iPos +1) to iPos
Loop
Function_Return tagData
End_Function
Function ParseHTML String sText Returns tParsedHTML[]
String sType
String sNewType
tParsedHTML[] aParsed
String sChar
String sCurrentToken
Integer iMax
Integer iPos
Char[] acHTML
Pointer pText
Integer iChar
Integer iTokenPos
Move "text" to sType
Move "" to sCurrentToken
Move (AddressOf(sText)) to pText
Move (Length(sText)) to iMax
For iPos from 0 to (iMax-1)
Move (Character(DeRefC(pText,iPos))) to sChar
If ((sChar = "<") and (sType="text")) Begin
Move (Trim(sCurrentToken)) to sCurrentToken
If (sCurrentToken <>"") Begin
Move (SizeOfArray(aParsed)) to iTokenPos
Move sCurrentToken to aParsed[iTokenPos].sValue
Move sType to aParsed[iTokenPos].sType
End
Move "" to sCurrentToken
Append sCurrentToken sChar
Move "tag" to sType
End
Else If ((sChar = ">") and (sType="tag")) Begin
Append sCurrentToken sChar
Move (SizeOfArray(aParsed)) to iTokenPos
Move sCurrentToken to aParsed[iTokenPos].sValue
Move sType to aParsed[iTokenPos].sType
Move "text" to sType
Move "" to sCurrentToken
End
Else Begin
Append sCurrentToken sChar
End
Loop
Move (Trim(sCurrentToken)) to sCurrentToken
If (sCurrentToken<>"") Begin
Move (SizeOfArray(aParsed)) to iTokenPos
Move sCurrentToken to aParsed[iTokenPos].sValue
Move sType to aParsed[ iTokenPos ].sType
End
Move (SizeOfArray(aParsed)) to iMax
For iPos from 0 to (iMax-1)
If (aParsed[iPos].sType = "tag") Get ParseHTMLTag aParsed[iPos].sValue to aParsed[iPos].tagData
Loop
Function_Return aParsed
End_Function
Function SplitHTML String sInp Returns tParsedHTML[]
Function_Return (ParseHTML(Self,sInp))
End_Function
Function QuoteIfNeeded String sText Returns String
Pointer pText
Integer iPos
Integer iMax
String sChar
Boolean isQuotable
String sQuoteCode
Move (False) to isQuotable
Move '"' to sQuoteCode
Move (AddressOf(sText)) to pText
Move (Length(sText)) to iMax
For iPos from 0 to (iMax-1)
Move (Character(DeRefC(pText,iPos))) to sChar
Move (Lowercase(sChar)) to sChar
If ((pos(sChar,"abcdefghijlkmnopqrstuvwxyz01234567890"))=0) Move (True) to isQuotable
If (sChar = '"') Move "'" to sQuoteCode
Loop
If (isQuotable) Move (sQuoteCode+sText+sQuoteCode) to sText
Function_Return sText
End_Function
Function TagToHtml tTagData tag Returns String
String sRet
Integer iPos
Integer iMax
Move "<" to sRet
Append sRet tag.sValue
Move (SizeOfArray(tag.aAttributes)) to iMax
For iPos from 0 to (iMax-1)
Append sRet " " tag.aAttributes[iPos].sAttr
If (tag.aAttributes[iPos].isAssigned) Begin
Append sRet "=" (QuoteIfNeeded(Self,tag.aAttributes[iPos].sValue))
End
Loop
If (tag.isAloneTag) Append sRet "/>"
Else Append sRet ">"
Function_Return sRet
End_Function
Function toHTML tParsedHTML[] Parsed Returns String
Integer iPos
Integer iMax
String sRet
tParsedHTML HtmlFragment
Move (SizeOfArray(Parsed)) to iMax
For iPos from 0 to (iMax-1)
Move parsed[iPos] to HtmlFragment
If (HtmlFragment.sType = "tag") Append sRet (TagToHtml(Self,HtmlFragment.tagdata))
If (HtmlFragment.sType = "text") Append sRet HtmlFragment.sValue
Loop
Function_Return sRet
End_Function
Function toFlatHTML tParsedHTML[] Parsed Returns String
Integer iPos
Integer iMax
String sRet
tParsedHTML HtmlFragment
Move (SizeOfArray(Parsed)) to iMax
For iPos from 0 to (iMax-1)
Move parsed[iPos] to HtmlFragment
If (HtmlFragment.sType = "tag") Append sRet (TagToHtml(Self,HtmlFragment.tagdata))
If (HtmlFragment.sType = "text") Append sRet HtmlFragment.sValue
Append sRet CR_LF
Loop
Showln (Length(sRet))
Function_Return sRet
End_Function
End_Class
|
Comments (0)
You can clone a snippet to your computer for local editing. Learn more.