Commits

Matt Hamilton committed 4843651

More accurate when locating entities in tweets with unicode characters

  • Participants
  • Parent commits 54e1386

Comments (0)

Files changed (2)

Source/Halfwit2/Extensions/TweetableExtensions.cs

                 entities = item.Entities;
             }
 
+            // Twitter counts unicode "characters" as a single char, even though strings represent them as 
+            // two characters. We need to use StringInfo to get an accurate substring and length in case
+            // the tweet contains a unicode character. ☺
+            var si = new System.Globalization.StringInfo(text);
+            var len = si.LengthInTextElements;
+
             var textBlock = new TextBlock
             {
                 TextWrapping = System.Windows.TextWrapping.Wrap,
             };
 
-
             foreach (var e in entities)
             {
-                if (e.Indices.Item1 > text.Length || e.Indices.Item2 > text.Length) continue;
+                if (e.Indices.Item1 > len || e.Indices.Item2 > len) continue;
 
                 if (e.Indices.Item1 > i)
                 {
-                    textBlock.Inlines.Add(new Run(WebUtility.HtmlDecode(text.Substring(i, e.Indices.Item1 - i))));
+                    textBlock.Inlines.Add(new Run(WebUtility.HtmlDecode(si.SubstringByTextElements(i, e.Indices.Item1 - i))));
                 }
                 i = e.Indices.Item2;
 
                 }
             }
 
-            if (i < item.Text.Length)
+            if (i < len)
             {
                 textBlock.Inlines.Add(new Run(WebUtility.HtmlDecode(text.Substring(i))));
             }

Source/Halfwit2/Halfwit2.csproj

     <PublisherName>Mad Props!</PublisherName>
     <CreateWebPageOnPublish>true</CreateWebPageOnPublish>
     <WebPage>publish.htm</WebPage>
-    <ApplicationRevision>61</ApplicationRevision>
+    <ApplicationRevision>62</ApplicationRevision>
     <ApplicationVersion>2.0.0.%2a</ApplicationVersion>
     <UseApplicationTrust>false</UseApplicationTrust>
     <PublishWizardCompleted>true</PublishWizardCompleted>