Commits

Daniel Herzog committed 57bafdd Draft

Initial implementation of HTTP header tokenizer

Comments (0)

Files changed (3)

src/client-en.xml

 <script src="./syntaxhighlight/markup/syntax.js"/>
 <script src="./syntaxhighlight/css/tokenizer.js"/>
 <script src="./syntaxhighlight/css/syntax.js"/>
+<script src="./syntaxhighlight/http-header/tokenizer.js"/>
 
 
 

src/syntaxhighlight/http-header/tokenizer.js

+"use strict";
+
+window.cls = window.cls || {};
+
+cls.HTTPHeaderTokenizer = function()
+{
+  var CR = "\r";
+  var LF = "\n";
+  var PUNCTUATOR = ":";
+  var WHITESPACE_CHARS =
+  {
+    '\u0009': 1, //  Tab <TAB>
+    '\u0020': 1, //  Space <SP>
+  };
+
+  this._buffer = "";
+  this._current_pos = 0;
+  this._token_buffer = "";
+  this._state_handler = {};
+
+  this.tokenize = function(input_buffer, ontoken)
+  {
+    this._state_handler = this._state_handlers.FIRST_LINE_PART;
+    this._buffer = input_buffer;
+    this._emitToken = ontoken;
+    while (this._state_handler !== this._state_handlers.EOF)
+    {
+      this._state_handler.apply(this);
+    }
+
+    this._state_handlers.EOF.apply(this);
+  };
+
+  this._state_handlers = 
+  {
+    FIRST_LINE_PART: function()
+    {
+      if (this._is_EOF())
+      {
+        return false;
+      }
+      var c = this._buffer.charAt(this._current_pos++);
+      this._token_type = cls.HTTPHeaderTokenizer.types.FIRST_LINE_PART;
+      if (c in WHITESPACE_CHARS)
+      {
+        this._emitToken(this._token_type ,this._token_buffer);
+        this._token_buffer = "";
+        // For now, LF and whitespace add to the next token. Visually that makes no difference.
+      }
+      else
+      if (c === LF)
+      {
+        this._emitToken(this._token_type ,this._token_buffer);
+        this._token_buffer = "";
+        this._emitToken(cls.HTTPHeaderTokenizer.types.LINE_SEPARATOR, c); // todo: don't emit your own token.
+        this._state_handler = this._state_handlers.NAME;
+        return false;
+      }
+      this._token_buffer += c;
+    },
+    NAME: function()
+    {
+      if (this._is_EOF())
+      {
+        return false;
+      }
+      var c = this._buffer.charAt(this._current_pos++);
+      this._token_type = cls.HTTPHeaderTokenizer.types.NAME;
+      if (c === PUNCTUATOR)
+      {
+        this._emitToken(this._token_type ,this._token_buffer);
+        this._emitToken(cls.HTTPHeaderTokenizer.types.PUNCTUATOR, c);
+        this._token_buffer = "";
+        this._state_handler = this._state_handlers.VALUE;
+        return false;
+      }
+      this._token_buffer += c;
+    },
+    VALUE: function()
+    {
+      if (this._is_EOF())
+      {
+        return false;
+      }
+      var c = this._buffer.charAt(this._current_pos++);
+      this._token_type = cls.HTTPHeaderTokenizer.types.VALUE;
+      // LF only means switching to header when the following char is not whitespace.
+      if (c === LF && !(this._buffer.charAt(this._current_pos) in WHITESPACE_CHARS))
+      {
+        this._emitToken(this._token_type ,this._token_buffer);
+        this._token_buffer = "";
+        this._state_handler = this._state_handlers.NAME;
+        // For now, LF and whitespace add to the next token. Visually that makes no difference.
+      }
+      this._token_buffer += c;
+    },
+    EOF: function()
+    {
+      this._emitToken(this._token_type, this._token_buffer);
+    }
+  };
+
+  this._is_EOF = function()
+  {
+    if (this._current_pos >= this._buffer.length)
+    {
+      this._state_handler = this._state_handlers.EOF;
+      return true;
+    }
+    return false;
+  }
+}
+
+cls.HTTPHeaderTokenizer.types = {
+    FIRST_LINE_PART  : 1,
+    NAME             : 2,
+    VALUE            : 3,
+    PUNCTUATOR       : 4,
+    LINE_SEPARATOR   : 5
+};

test-scripts/network-view/header-tokenizer.html

+<!DOCTYPE html>
+<html>
+
+<link rel="stylesheet" href="../../src/ui-style/ui.css"/>
+<link rel="stylesheet" href="../../src/network/network_style.css"/>
+<link rel="stylesheet" href="../../src/ui-scripts/tooltip/tooltip.css"/>
+
+<style>
+
+html
+{
+  margin: 5px;
+}
+
+.type-1 {
+  color: blue;
+}
+
+.type-2 {
+  color: red;
+}
+
+.type-3 {
+  color: green;
+}
+
+.type-4 {
+  color: #bada55;
+}
+
+</style>
+
+<script src="../../src/scripts/dom.js"></script>
+<script src="../../src/syntaxhighlight/http/tokenizer.js"></script>
+
+<script>
+
+var token_template = function(token)
+{
+  var TYPE = 0;
+  var STR = 1;
+  return ["span", token[STR], "class", "type-" + token[TYPE]]
+}
+
+var get_tokens = function(raw_headers)
+{
+  var tokens = [];
+  var tokenizer = new cls.HTTPHeaderTokenizer();
+
+  tokenizer.tokenize(raw_headers, function(token_type, token)
+  {
+    tokens.push([token_type, token]);
+  });
+
+  return tokens;
+};
+
+var raw_headers = "GET /dherzog/ HTTP/1.1 \n\
+User-Agent: Opera/9.80 (Macintosh; Intel Mac OS X 10.7.4; U; en) Presto/2.10.289 Version/12.00\n\
+Host: homes.oslo.osa\n\
+Accept: text/html, application/xml;q=0.9, application/xhtml+xml, image/png, image/webp, image/jpeg, image/gif, image/x-xbitmap, */*;q=0.1\n\
+Accept-Language: en,en-US;q=0.9,de;q=0.8,ja;q=0.7,fr;q=0.6,es;q=0.5,it;q=0.4,pt;q=0.3,pt-PT;q=0.2,nl;q=0.1,sv;q=0.1,nb;q=0.1,da;q=0.1,fi;q=0.1,ru;q=0.1,pl;q=0.1,zh-CN;q=0.1,zh-TW;q=0.1,ko;q=0.1,ar;q=0.1,cs;q=0.1,hu;q=0.1,tr;q=0.1,ca;q=0.1,el;q=0.1,he;q=0.1,hr;q=0.1,ro;q=0.1,sk;q=0.1,th;q=0.1,uk;q=0.1\
+Accept-Encoding: gzip, deflate\n\
+Authorization: Basic XXX==\n\
+Referer: https://homes.oslo.osa/dherzog/\n\
+Cache-Control: no-cache\n\
+Connection: Keep-Alive";
+
+var raw_headers2 = "GET /dherzog/ HTTP/1.1\n\
+User-Agent:Opera/9.80 (Macintosh; Intel Mac OS X 10.7.4; U; en) Presto/2.10.289 Version/12.00\n\
+Host:homes.oslo.osa\n\
+Accept:text/html, application/xml;q=0.9, application/xhtml+xml, image/png, image/webp, image/jpeg, image/gif, image/x-xbitmap, */*;q=0.1\n\
+Accept-Language:en,en-US;q=0.9,de;q=0.8,ja;q=0.7,fr;q=0.6,es;q=0.5,it;q=0.4,pt;q=0.3,pt-PT;q=0.2,nl;q=0.1,sv;q=0.1,nb;q=0.1,da;q=0.1,fi;q=0.1,ru;q=0.1,pl;q=0.1,zh-CN;q=0.1,zh-TW;q=0.1,ko;q=0.1,ar;q=0.1,cs;q=0.1,hu;q=0.1,tr;q=0.1,ca;q=0.1,el;q=0.1,he;q=0.1,hr;q=0.1,ro;q=0.1,sk;q=0.1,th;q=0.1,uk;q=0.1\
+Accept-Encoding:gzip, deflate\n\
+Authorization:Basic XXX==\n\
+Referer:https://homes.oslo.osa/dherzog/\n\
+Cache-Control:no-cache\n\
+Connection:Keep-Alive";
+
+var raw_headers3 = "GET /dherzog/ HTTP/1.1\n\
+User-Agent:Opera/9.80 (Macintosh; Intel Mac OS X 10.7.4; U; en) Presto/2.10.289 Version/12.00\n\
+Host:homes.oslo.osa\n\
+Accept:text/html, application/xml;q=0.9,\n\
+ ooh-this-is-continued, application/xhtml+xml, image/png, image/webp, image/jpeg, image/gif, image/x-xbitmap, */*;q=0.1\n\
+Accept-Language:en,en-US;q=0.9,de;q=0.8,ja;q=0.7,fr;q=0.6,es;q=0.5,it;q=0.4,pt;q=0.3,pt-PT;q=0.2,nl;q=0.1,sv;q=0.1,nb;q=0.1,da;q=0.1,fi;q=0.1,ru;q=0.1,pl;q=0.1,zh-CN;q=0.1,zh-TW;q=0.1,ko;q=0.1,ar;q=0.1,cs;q=0.1,hu;q=0.1,tr;q=0.1,ca;q=0.1,el;q=0.1,he;q=0.1,hr;q=0.1,ro;q=0.1,sk;q=0.1,th;q=0.1,uk;q=0.1\
+Accept-Encoding:gzip, \n\
+  \t  or even more continued\n\
+Authorization:Basic XXX==\n\
+Referer:https://homes.oslo.osa/dherzog/\n\
+Cache-Control:no-cache\n\
+Connection:Keep-Alive";
+
+window.onload = function()
+{
+  document.body.firstElementChild.render(get_tokens(raw_headers).map(token_template));
+  document.body.firstElementChild.render([["br"], ["br"]]);
+  document.body.firstElementChild.render(get_tokens(raw_headers2).map(token_template));
+  document.body.firstElementChild.render([["br"], ["br"]]);
+  document.body.firstElementChild.render(get_tokens(raw_headers3).map(token_template));
+}
+
+</script>
+<body>
+  <pre class="mono"></pre>
+</body>
+</html>
+