Commits

Anonymous committed f35c1ef Merge

merging moabrichard and mstrong forks

Comments (0)

Files changed (3)

src/main/java/org/jhove2/module/format/xml/NumericCharacterReferenceInformation.java

      * 
      * @param code
      *            the string representation of a character's unicode code point
+     * @param jhove2
+     *            the JHOVE2 framework
+     * @throws JHOVE2Exception
      */
-    public void tally(String code) {
-        Integer codePoint = Integer.decode(code.replace("x", "0x"));
-        NumericCharacterReference reference = numericCharacterReferenceMap
-                .get(codePoint);
-        if (reference != null) {
-            reference.count++;
-        }
-        else {
-            numericCharacterReferenceMap.put(codePoint,
-                    new NumericCharacterReference(codePoint));
+    public void tally(String code, JHOVE2 jhove2) throws JHOVE2Exception {
+        try {
+            Integer codePoint;
+            if (code.substring(0,1).toLowerCase().equals("x")) {
+                // Hexadecimal
+                codePoint = Integer.decode(code.toLowerCase().replace("x", "0x"));
+            } else {
+                // Decimal
+                codePoint = new Integer(code);
+            }
+            NumericCharacterReference reference = numericCharacterReferenceMap
+                    .get(codePoint);
+            if (reference != null) {
+                reference.count++;
+            }
+            else {
+                numericCharacterReferenceMap.put(codePoint,
+                        new NumericCharacterReference(codePoint));
+            }
+        }  catch (NumberFormatException e) {
+            this.invalidCharacterForEncodingMessage = new Message(
+                    Severity.ERROR, Context.OBJECT,
+                    "org.jhove2.module.format.xml.XmlModule.invalidCharacterForEncodingMessage",
+                    jhove2.getConfigInfo());
         }
     }
 
      * constructs, which are not considered XML entities. The characters()
      * method of the ContentHandler interface, translates these codes into
      * Unicode characters before placing the data in the buffer.
-     * @param jhove2 TODO
      * @param jhove2
      *            the JHOVE2 framework
-     * @param source
-     *            the source object
-     * 
+     *
      * @throws IOException
      * @throws JHOVE2Exception
      */
                  * Found one, record the occurrence of the NCR code (pattern
                  * capture group 1)
                  */
-                tally(ncrMatcher.group(1));
+                tally(ncrMatcher.group(1), jhove2);
             }
         }
         catch (CharacterCodingException e) {

src/test/java/org/jhove2/module/format/xml/XmlCharacterReferenceTest.java

     @Test
     public void testNumericCharacterReferences() {
         TreeMap<Integer, NumericCharacterReference> ncrMap = testXmlModule.numericCharacterReferenceInformation.numericCharacterReferenceMap;
-        assertEquals(6, ncrMap.size());
+        assertEquals(7, ncrMap.size());
         assertEquals(3, ncrMap.get(0x0009).getCount().intValue());
         assertEquals(1, ncrMap.get(0x000A).getCount().intValue());
         assertEquals(1, ncrMap.get(0x000D).getCount().intValue());
         assertEquals(1, ncrMap.get(0x00B6).getCount().intValue());
         assertEquals(2, ncrMap.get(0x2021).getCount().intValue());
         assertEquals(1, ncrMap.get(0x12415).getCount().intValue());
+        assertEquals(1, ncrMap.get(0x0026).getCount().intValue());
     }
 
     /**

src/test/resources/examples/xml/samples/character-reference-sample.xml

 		<ncr>&#x00B6;&#x2021;&#x12415;</ncr>
 		<ncr>&#9;&#9;&#x2021;</ncr>
 		<ncr>&#9;&#10;&#13;</ncr>
+        <ncr>&#038;</ncr>
 	</numericCharacterReferences>
 </characterReferences>