Commits

Jason McKesson committed 6e37b12

glutil: Font UTF-8 processing work for valid UTF-8 within the expected codepoint range.

Comments (0)

Files changed (4)

glutil/Test/UTF8Text.txt

+SomeStuff: àÏƾ½Other Text¼¶©¦½¼¶©¦

glutil/Test/font_test.cpp

 #include <math.h>
 #include <stdio.h>
 #include <string.h>
+#include <iostream>
+#include <fstream>
 #include <glload/gl_3_3.hpp>
 #include <glload/gll.hpp>
 #include <GL/glfw.h>
 	PushGlyph(vecVertex, 2, positions, texCoords);
 }
 
+std::string GetString()
+{
+	std::ifstream theFile("UTF8Text.txt");
+
+	std::string ret;
+
+	std::getline(theFile, ret);
+
+	return ret;
+}
+
 void InitializeVertexData()
 {
 	std::vector<GLfloat> vecVertex;
-	std::vector<glutil::GlyphQuad> glyphs = g_pFont->LayoutLine("Hello, Jing!", 0, glm::vec2(50.0f, 250.0f),
-		glutil::REF_BASELINE);
+
+	std::string theText = GetString();
+
+	std::vector<glutil::GlyphQuad> glyphs = g_pFont->LayoutLine(theText.c_str(), theText.size(),
+		glm::vec2(50.0f, 250.0f), glutil::REF_BASELINE);
 
 	vecVertex.reserve(24 * glyphs.size());
 
 //Called after the window and OpenGL are initialized. Called exactly once, before the main loop.
 void init()
 {
-	g_pFont = glutil::CreateFont(glutil::FONT_SIZE_GIANT);
+	g_pFont = glutil::CreateFont(glutil::FONT_SIZE_MEDIUM);
 
 	InitializeProgram();
 	InitializeVertexData();

glutil/include/glutil/Font.h

 \brief Contains the \ref module_glutil_font "basic font rendering system" classes and functions.
 **/
 
+#include <exception>
+#include <stdexcept>
 #include <utility>
 #include <vector>
 #include <glm/glm.hpp>
 		REF_TOP,			///<The point is the top of the text; nothing above here will be touched.
 	};
 
+	/**
+	\brief Thrown if a supposedly UTF-8 encoded string is not valid UTF-8.
+	**/
+	class InvalidEncodingException : public std::runtime_error
+	{
+	public:
+		InvalidEncodingException() : std::runtime_error("UTF-8 text is not valid.") {}
+	};
+
 	//Deletion of this object must happen while OpenGL is still active.
 	/**
 	\brief The class that represents a series of glyphs as well as the information to layout a string of text.
 		\param eRef Defines what the Y-component of \a ptReference means.
 
 		\return An ordered set of glyphs that represent the string of text.
+
+		\throw InvalidEncodingException Thrown if \a text is not valid UTF-8.
 		**/
 		std::vector<GlyphQuad> LayoutLine(const char *text, size_t length, const glm::vec2 &ptReference,
 			PointReference eRef = REF_BASELINE) const;

glutil/source/Font.cpp

 
 				glm::vec2 baseline = CalcBaseline(ptReference, eRef);
 
-				const char *currPos = text;
-				const char * const endPos = text + length;
+				const unsigned char *currPos = reinterpret_cast<const unsigned char*>(text);
+				const unsigned char * const endPos = currPos + length;
 				for(; currPos != endPos; ++currPos)
 				{
-					//Can't handle UTF-8 yet.
-					assert(*currPos <= 0x7F);
+					unsigned int codepoint = *currPos;
+					
+					//Parse UTF-8
+					if(codepoint & 0x80)
+					{
+						unsigned int build = codepoint;
+						int bit = 5;
+						for(; (codepoint & (0x1 << (bit + 1))) && (bit != 2); --bit)
+						{
+							++currPos;
+							if(!((*currPos & 0x80) && (~(*currPos) & 0x40)))
+								throw InvalidEncodingException();
 
-					unsigned int glyphIx = ConvertCodepointToGlyphIndex(*currPos);
+							//Set the top bits to 1's. Leave the last 6, since that's what we want
+							build <<= 6;
+							build += *currPos & 0x3F;
+						}
+
+						if((codepoint & (0x1 << bit)) != 0)
+							throw InvalidEncodingException();
+
+						unsigned int mask = 0x1 << (bit + 1);
+						//For each extra byte, we must shift by 6 bits.
+						mask <<= 6 * (6 - (bit + 1));
+
+						//Keep every bit below this point.
+						--mask;
+						codepoint = build & mask;
+					}
+
+					unsigned int glyphIx = ConvertCodepointToGlyphIndex(codepoint);
 
 					if(glyphIx == BAD_GLYPH_INDEX || glyphIx == IGNORE_GLYPH_INDEX)
 						continue;