Added functions that provide information for block and glyph coverage
CL @ http://codereview.appspot.com/6463047/


git-svn-id: http://sfntly.googlecode.com/svn/trunk@143 672e30a5-4c29-85ac-ac6d-611c735e0a51
diff --git a/java/src/com/google/typography/font/tools/fontinfo/FontInfo.java b/java/src/com/google/typography/font/tools/fontinfo/FontInfo.java
index ce4cece..8324f16 100644
--- a/java/src/com/google/typography/font/tools/fontinfo/FontInfo.java
+++ b/java/src/com/google/typography/font/tools/fontinfo/FontInfo.java
@@ -27,7 +27,10 @@
 import com.google.typography.font.sfntly.table.truetype.LocaTable;
 import com.google.typography.font.tools.fontinfo.DataDisplayTable.Align;
 
+import com.ibm.icu.impl.IllegalIcuArgumentException;
 import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.lang.UScript;
+import com.ibm.icu.text.UnicodeSet;
 
 import java.text.DecimalFormat;
 import java.util.Arrays;
@@ -279,6 +282,165 @@
   // TODO public static DataDisplayTable listChars(Font font, String charString)
 
   /**
+   * Gets a list of Unicode blocks covered by the font and the amount each block
+   * is covered.
+   *
+   * @param font
+   *          the source font
+   * @return a list of Unicode blocks covered by the font
+   */
+  // FIXME Find more elegant method of retrieving block data
+  public static DataDisplayTable listCharBlockCoverage(Font font) {
+    String[] header = { "Block", "Coverage" };
+    Align[] displayAlignment = { Align.Left, Align.Right };
+    DataDisplayTable table = new DataDisplayTable(Arrays.asList(header));
+    table.setAlignment(Arrays.asList(displayAlignment));
+
+    // Iterate through each block to check for coverage
+    CMap cmap = getUCSCMap(font);
+    int totalCount = 0;
+    for (int i = 0; i < UnicodeBlockData.numBlocks(); i++) {
+      String block = UnicodeBlockData.getBlockName(i);
+      UnicodeSet set = null;
+      try {
+        set = new UnicodeSet("[[:Block=" + block + ":]-[:gc=Unassigned:]-[:gc=Control:]]");
+      } catch (IllegalIcuArgumentException e) {
+        continue;
+      }
+      int count = 0;
+      for (String charStr : set) {
+        if (cmap.glyphId(UCharacter.codePointAt(charStr, 0)) > 0) {
+          count++;
+        }
+      }
+      if (count > 0) {
+        table.add(Arrays.asList(new String[] { String.format(
+            "%s [%s, %s]", block, UnicodeBlockData.getBlockStartCode(i),
+            UnicodeBlockData.getBlockEndCode(i)), String.format("%d / %d", count, set.size()) }));
+      }
+      totalCount += count;
+    }
+
+    // Add control code points with valid glyphs to find the total number of
+    // unicode characters with valid glyphs
+    UnicodeSet controlSet = new UnicodeSet("[[:gc=Control:]]");
+    for (String charStr : controlSet) {
+      if (cmap.glyphId(UCharacter.codePointAt(charStr, 0)) > 0) {
+        totalCount++;
+      }
+    }
+    int nonUnicodeCount = countChars(font) - totalCount;
+    if (nonUnicodeCount > 0) {
+      table.add(Arrays.asList(new String[] { "Unknown", String.format("%d", nonUnicodeCount) }));
+    }
+
+    return table;
+  }
+  
+  /**
+   * Gets a list of scripts covered by the font and the amount each block is covered.
+   *
+   * @param font the source font
+   * @return a list of scripts covered by the font
+   */
+  public static DataDisplayTable listScriptCoverage(Font font) {
+    String[] header = {"Script", "Coverage"};
+    Align[] displayAlignment = {Align.Left, Align.Right};
+    DataDisplayTable table = new DataDisplayTable(Arrays.asList(header));
+    table.setAlignment(Arrays.asList(displayAlignment));
+    HashMap<Integer, Integer> coveredScripts = new HashMap<Integer, Integer>();
+
+    // Add to script count for the script each code point belongs to
+    CMap cmap = getUCSCMap(font);
+    for (int charId : cmap) {
+      if (cmap.glyphId(charId) != CMapTable.NOTDEF) {
+        int scriptCode = UScript.getScript(charId);
+        int scriptCount = 1;
+        if (coveredScripts.containsKey(scriptCode)) {
+          scriptCount += coveredScripts.get(scriptCode);
+        }
+        coveredScripts.put(scriptCode, scriptCount);
+      }
+    }
+
+    // For each covered script, find the total size of the script and add coverage to table
+    Set<Integer> sortedScripts = new TreeSet<Integer>(coveredScripts.keySet());
+    int unknown = 0;
+    for (Integer scriptCode : sortedScripts) {
+      UnicodeSet scriptSet = null;
+      String scriptName = UScript.getName(scriptCode);
+      try {
+        scriptSet = new UnicodeSet("[[:" + scriptName + ":]]");
+      } catch (IllegalIcuArgumentException e) {
+        unknown += coveredScripts.get(scriptCode);
+        continue;
+      }
+
+      table.add(Arrays.asList(new String[] {
+          scriptName, String.format("%d / %d", coveredScripts.get(scriptCode), scriptSet.size())}));
+    }
+    if (unknown > 0) {
+      table.add(Arrays.asList(new String[] {"Unsupported script", String.format("%d", unknown)}));
+    }
+
+    return table;
+  }
+
+  /**
+   * Gets a list of characters needed to fully cover scripts partially covered by the font
+   *
+   * @param font the source font
+   * @return a list of characters needed to fully cover partially-covered scripts
+   */
+  public static DataDisplayTable listCharsNeededToCoverScript(Font font) {
+    String[] header = {"Script", "Code Point", "Name"};
+    Align[] displayAlignment = {Align.Left, Align.Right, Align.Left};
+    DataDisplayTable table = new DataDisplayTable(Arrays.asList(header));
+    table.setAlignment(Arrays.asList(displayAlignment));
+    HashMap<Integer, UnicodeSet> coveredScripts = new HashMap<Integer, UnicodeSet>();
+
+    // Iterate through each set
+    CMap cmap = getUCSCMap(font);
+    for (int charId : cmap) {
+      if (cmap.glyphId(charId) != CMapTable.NOTDEF) {
+        int scriptCode = UScript.getScript(charId);
+        if (scriptCode == UScript.UNKNOWN) {
+          continue;
+        }
+
+        UnicodeSet scriptSet = null;
+        if (!coveredScripts.containsKey(scriptCode)) {
+          // New covered script found, create set
+          try {
+            scriptSet = new UnicodeSet(
+                "[[:" + UScript.getName(scriptCode) + ":]-[:gc=Unassigned:]-[:gc=Control:]]");
+          } catch (IllegalIcuArgumentException e) {
+            continue;
+          }
+          coveredScripts.put(scriptCode, scriptSet);
+        } else {
+          // Set for script already exists, retrieve for character removal
+          scriptSet = coveredScripts.get(scriptCode);
+        }
+        scriptSet.remove(UCharacter.toString(charId));
+      }
+    }
+
+    // Insert into table in order
+    Set<Integer> sortedScripts = new TreeSet<Integer>(coveredScripts.keySet());
+    for (Integer scriptCode : sortedScripts) {
+      UnicodeSet uSet = coveredScripts.get(scriptCode);
+      for (String charStr : uSet) {
+        int codePoint = UCharacter.codePointAt(charStr, 0);
+        table.add(Arrays.asList(new String[] {String.format("%s", UScript.getName(scriptCode)),
+            getFormattedCodePointString(codePoint), UCharacter.getExtendedName(codePoint)}));
+      }
+    }
+
+    return table;
+  }
+
+  /**
    * Gets a list of minimum and maximum x and y dimensions for the glyphs in the
    * font. This is based on the reported min and max values for each glyph and
    * not on the actual outline sizes.
diff --git a/java/src/com/google/typography/font/tools/fontinfo/UnicodeBlockData.java b/java/src/com/google/typography/font/tools/fontinfo/UnicodeBlockData.java
new file mode 100644
index 0000000..614e99c
--- /dev/null
+++ b/java/src/com/google/typography/font/tools/fontinfo/UnicodeBlockData.java
@@ -0,0 +1,689 @@
+package com.google.typography.font.tools.fontinfo;
+
+/**
+ * Class containing functions that return information about Unicode blocks
+ *
+ * Unicode block information listed is for Unicode version 6.1.0 and is
+ * retrieved from ftp://ftp.unicode.org/Public/6.1.0/ucd/Blocks.txt
+ */
+// FIXME Find more elegant method of retrieving this data
+public class UnicodeBlockData {
+  private static String[] blockNames = { "Basic Latin",
+      "Latin-1 Supplement",
+      "Latin Extended-A",
+      "Latin Extended-B",
+      "IPA Extensions",
+      "Spacing Modifier Letters",
+      "Combining Diacritical Marks",
+      "Greek and Coptic",
+      "Cyrillic",
+      "Cyrillic Supplement",
+      "Armenian",
+      "Hebrew",
+      "Arabic",
+      "Syriac",
+      "Arabic Supplement",
+      "Thaana",
+      "NKo",
+      "Samaritan",
+      "Mandaic",
+      "Arabic Extended-A",
+      "Devanagari",
+      "Bengali",
+      "Gurmukhi",
+      "Gujarati",
+      "Oriya",
+      "Tamil",
+      "Telugu",
+      "Kannada",
+      "Malayalam",
+      "Sinhala",
+      "Thai",
+      "Lao",
+      "Tibetan",
+      "Myanmar",
+      "Georgian",
+      "Hangul Jamo",
+      "Ethiopic",
+      "Ethiopic Supplement",
+      "Cherokee",
+      "Unified Canadian Aboriginal Syllabics",
+      "Ogham",
+      "Runic",
+      "Tagalog",
+      "Hanunoo",
+      "Buhid",
+      "Tagbanwa",
+      "Khmer",
+      "Mongolian",
+      "Unified Canadian Aboriginal Syllabics Extended",
+      "Limbu",
+      "Tai Le",
+      "New Tai Lue",
+      "Khmer Symbols",
+      "Buginese",
+      "Tai Tham",
+      "Balinese",
+      "Sundanese",
+      "Batak",
+      "Lepcha",
+      "Ol Chiki",
+      "Sundanese Supplement",
+      "Vedic Extensions",
+      "Phonetic Extensions",
+      "Phonetic Extensions Supplement",
+      "Combining Diacritical Marks Supplement",
+      "Latin Extended Additional",
+      "Greek Extended",
+      "General Punctuation",
+      "Superscripts and Subscripts",
+      "Currency Symbols",
+      "Combining Diacritical Marks for Symbols",
+      "Letterlike Symbols",
+      "Number Forms",
+      "Arrows",
+      "Mathematical Operators",
+      "Miscellaneous Technical",
+      "Control Pictures",
+      "Optical Character Recognition",
+      "Enclosed Alphanumerics",
+      "Box Drawing",
+      "Block Elements",
+      "Geometric Shapes",
+      "Miscellaneous Symbols",
+      "Dingbats",
+      "Miscellaneous Mathematical Symbols-A",
+      "Supplemental Arrows-A",
+      "Braille Patterns",
+      "Supplemental Arrows-B",
+      "Miscellaneous Mathematical Symbols-B",
+      "Supplemental Mathematical Operators",
+      "Miscellaneous Symbols and Arrows",
+      "Glagolitic",
+      "Latin Extended-C",
+      "Coptic",
+      "Georgian Supplement",
+      "Tifinagh",
+      "Ethiopic Extended",
+      "Cyrillic Extended-A",
+      "Supplemental Punctuation",
+      "CJK Radicals Supplement",
+      "Kangxi Radicals",
+      "Ideographic Description Characters",
+      "CJK Symbols and Punctuation",
+      "Hiragana",
+      "Katakana",
+      "Bopomofo",
+      "Hangul Compatibility Jamo",
+      "Kanbun",
+      "Bopomofo Extended",
+      "CJK Strokes",
+      "Katakana Phonetic Extensions",
+      "Enclosed CJK Letters and Months",
+      "CJK Compatibility",
+      "CJK Unified Ideographs Extension A",
+      "Yijing Hexagram Symbols",
+      "CJK Unified Ideographs",
+      "Yi Syllables",
+      "Yi Radicals",
+      "Lisu",
+      "Vai",
+      "Cyrillic Extended-B",
+      "Bamum",
+      "Modifier Tone Letters",
+      "Latin Extended-D",
+      "Syloti Nagri",
+      "Common Indic Number Forms",
+      "Phags-pa",
+      "Saurashtra",
+      "Devanagari Extended",
+      "Kayah Li",
+      "Rejang",
+      "Hangul Jamo Extended-A",
+      "Javanese",
+      "Cham",
+      "Myanmar Extended-A",
+      "Tai Viet",
+      "Meetei Mayek Extensions",
+      "Ethiopic Extended-A",
+      "Meetei Mayek",
+      "Hangul Syllables",
+      "Hangul Jamo Extended-B",
+      "High Surrogates",
+      "High Private Use Surrogates",
+      "Low Surrogates",
+      "Private Use Area",
+      "CJK Compatibility Ideographs",
+      "Alphabetic Presentation Forms",
+      "Arabic Presentation Forms-A",
+      "Variation Selectors",
+      "Vertical Forms",
+      "Combining Half Marks",
+      "CJK Compatibility Forms",
+      "Small Form Variants",
+      "Arabic Presentation Forms-B",
+      "Halfwidth and Fullwidth Forms",
+      "Specials",
+      "Linear B Syllabary",
+      "Linear B Ideograms",
+      "Aegean Numbers",
+      "Ancient Greek Numbers",
+      "Ancient Symbols",
+      "Phaistos Disc",
+      "Lycian",
+      "Carian",
+      "Old Italic",
+      "Gothic",
+      "Ugaritic",
+      "Old Persian",
+      "Deseret",
+      "Shavian",
+      "Osmanya",
+      "Cypriot Syllabary",
+      "Imperial Aramaic",
+      "Phoenician",
+      "Lydian",
+      "Meroitic Hieroglyphs",
+      "Meroitic Cursive",
+      "Kharoshthi",
+      "Old South Arabian",
+      "Avestan",
+      "Inscriptional Parthian",
+      "Inscriptional Pahlavi",
+      "Old Turkic",
+      "Rumi Numeral Symbols",
+      "Brahmi",
+      "Kaithi",
+      "Sora Sompeng",
+      "Chakma",
+      "Sharada",
+      "Takri",
+      "Cuneiform",
+      "Cuneiform Numbers and Punctuation",
+      "Egyptian Hieroglyphs",
+      "Bamum Supplement",
+      "Miao",
+      "Kana Supplement",
+      "Byzantine Musical Symbols",
+      "Musical Symbols",
+      "Ancient Greek Musical Notation",
+      "Tai Xuan Jing Symbols",
+      "Counting Rod Numerals",
+      "Mathematical Alphanumeric Symbols",
+      "Arabic Mathematical Alphabetic Symbols",
+      "Mahjong Tiles",
+      "Domino Tiles",
+      "Playing Cards",
+      "Enclosed Alphanumeric Supplement",
+      "Enclosed Ideographic Supplement",
+      "Miscellaneous Symbols And Pictographs",
+      "Emoticons",
+      "Transport And Map Symbols",
+      "Alchemical Symbols",
+      "CJK Unified Ideographs Extension B",
+      "CJK Unified Ideographs Extension C",
+      "CJK Unified Ideographs Extension D",
+      "CJK Compatibility Ideographs Supplement",
+      "Tags",
+      "Variation Selectors Supplement",
+      "Supplementary Private Use Area-A",
+      "Supplementary Private Use Area-B" };
+
+  private static String[] blockStartCode = { "U+0000",
+      "U+0080",
+      "U+0100",
+      "U+0180",
+      "U+0250",
+      "U+02B0",
+      "U+0300",
+      "U+0370",
+      "U+0400",
+      "U+0500",
+      "U+0530",
+      "U+0590",
+      "U+0600",
+      "U+0700",
+      "U+0750",
+      "U+0780",
+      "U+07C0",
+      "U+0800",
+      "U+0840",
+      "U+08A0",
+      "U+0900",
+      "U+0980",
+      "U+0A00",
+      "U+0A80",
+      "U+0B00",
+      "U+0B80",
+      "U+0C00",
+      "U+0C80",
+      "U+0D00",
+      "U+0D80",
+      "U+0E00",
+      "U+0E80",
+      "U+0F00",
+      "U+1000",
+      "U+10A0",
+      "U+1100",
+      "U+1200",
+      "U+1380",
+      "U+13A0",
+      "U+1400",
+      "U+1680",
+      "U+16A0",
+      "U+1700",
+      "U+1720",
+      "U+1740",
+      "U+1760",
+      "U+1780",
+      "U+1800",
+      "U+18B0",
+      "U+1900",
+      "U+1950",
+      "U+1980",
+      "U+19E0",
+      "U+1A00",
+      "U+1A20",
+      "U+1B00",
+      "U+1B80",
+      "U+1BC0",
+      "U+1C00",
+      "U+1C50",
+      "U+1CC0",
+      "U+1CD0",
+      "U+1D00",
+      "U+1D80",
+      "U+1DC0",
+      "U+1E00",
+      "U+1F00",
+      "U+2000",
+      "U+2070",
+      "U+20A0",
+      "U+20D0",
+      "U+2100",
+      "U+2150",
+      "U+2190",
+      "U+2200",
+      "U+2300",
+      "U+2400",
+      "U+2440",
+      "U+2460",
+      "U+2500",
+      "U+2580",
+      "U+25A0",
+      "U+2600",
+      "U+2700",
+      "U+27C0",
+      "U+27F0",
+      "U+2800",
+      "U+2900",
+      "U+2980",
+      "U+2A00",
+      "U+2B00",
+      "U+2C00",
+      "U+2C60",
+      "U+2C80",
+      "U+2D00",
+      "U+2D30",
+      "U+2D80",
+      "U+2DE0",
+      "U+2E00",
+      "U+2E80",
+      "U+2F00",
+      "U+2FF0",
+      "U+3000",
+      "U+3040",
+      "U+30A0",
+      "U+3100",
+      "U+3130",
+      "U+3190",
+      "U+31A0",
+      "U+31C0",
+      "U+31F0",
+      "U+3200",
+      "U+3300",
+      "U+3400",
+      "U+4DC0",
+      "U+4E00",
+      "U+A000",
+      "U+A490",
+      "U+A4D0",
+      "U+A500",
+      "U+A640",
+      "U+A6A0",
+      "U+A700",
+      "U+A720",
+      "U+A800",
+      "U+A830",
+      "U+A840",
+      "U+A880",
+      "U+A8E0",
+      "U+A900",
+      "U+A930",
+      "U+A960",
+      "U+A980",
+      "U+AA00",
+      "U+AA60",
+      "U+AA80",
+      "U+AAE0",
+      "U+AB00",
+      "U+ABC0",
+      "U+AC00",
+      "U+D7B0",
+      "U+D800",
+      "U+DB80",
+      "U+DC00",
+      "U+E000",
+      "U+F900",
+      "U+FB00",
+      "U+FB50",
+      "U+FE00",
+      "U+FE10",
+      "U+FE20",
+      "U+FE30",
+      "U+FE50",
+      "U+FE70",
+      "U+FF00",
+      "U+FFF0",
+      "U+10000",
+      "U+10080",
+      "U+10100",
+      "U+10140",
+      "U+10190",
+      "U+101D0",
+      "U+10280",
+      "U+102A0",
+      "U+10300",
+      "U+10330",
+      "U+10380",
+      "U+103A0",
+      "U+10400",
+      "U+10450",
+      "U+10480",
+      "U+10800",
+      "U+10840",
+      "U+10900",
+      "U+10920",
+      "U+10980",
+      "U+109A0",
+      "U+10A00",
+      "U+10A60",
+      "U+10B00",
+      "U+10B40",
+      "U+10B60",
+      "U+10C00",
+      "U+10E60",
+      "U+11000",
+      "U+11080",
+      "U+110D0",
+      "U+11100",
+      "U+11180",
+      "U+11680",
+      "U+12000",
+      "U+12400",
+      "U+13000",
+      "U+16800",
+      "U+16F00",
+      "U+1B000",
+      "U+1D000",
+      "U+1D100",
+      "U+1D200",
+      "U+1D300",
+      "U+1D360",
+      "U+1D400",
+      "U+1EE00",
+      "U+1F000",
+      "U+1F030",
+      "U+1F0A0",
+      "U+1F100",
+      "U+1F200",
+      "U+1F300",
+      "U+1F600",
+      "U+1F680",
+      "U+1F700",
+      "U+20000",
+      "U+2A700",
+      "U+2B740",
+      "U+2F800",
+      "U+E0000",
+      "U+E0100",
+      "U+F0000",
+      "U+100000" };
+
+  private static String[] blockEndCode = { "U+007F",
+      "U+00FF",
+      "U+017F",
+      "U+024F",
+      "U+02AF",
+      "U+02FF",
+      "U+036F",
+      "U+03FF",
+      "U+04FF",
+      "U+052F",
+      "U+058F",
+      "U+05FF",
+      "U+06FF",
+      "U+074F",
+      "U+077F",
+      "U+07BF",
+      "U+07FF",
+      "U+083F",
+      "U+085F",
+      "U+08FF",
+      "U+097F",
+      "U+09FF",
+      "U+0A7F",
+      "U+0AFF",
+      "U+0B7F",
+      "U+0BFF",
+      "U+0C7F",
+      "U+0CFF",
+      "U+0D7F",
+      "U+0DFF",
+      "U+0E7F",
+      "U+0EFF",
+      "U+0FFF",
+      "U+109F",
+      "U+10FF",
+      "U+11FF",
+      "U+137F",
+      "U+139F",
+      "U+13FF",
+      "U+167F",
+      "U+169F",
+      "U+16FF",
+      "U+171F",
+      "U+173F",
+      "U+175F",
+      "U+177F",
+      "U+17FF",
+      "U+18AF",
+      "U+18FF",
+      "U+194F",
+      "U+197F",
+      "U+19DF",
+      "U+19FF",
+      "U+1A1F",
+      "U+1AAF",
+      "U+1B7F",
+      "U+1BBF",
+      "U+1BFF",
+      "U+1C4F",
+      "U+1C7F",
+      "U+1CCF",
+      "U+1CFF",
+      "U+1D7F",
+      "U+1DBF",
+      "U+1DFF",
+      "U+1EFF",
+      "U+1FFF",
+      "U+206F",
+      "U+209F",
+      "U+20CF",
+      "U+20FF",
+      "U+214F",
+      "U+218F",
+      "U+21FF",
+      "U+22FF",
+      "U+23FF",
+      "U+243F",
+      "U+245F",
+      "U+24FF",
+      "U+257F",
+      "U+259F",
+      "U+25FF",
+      "U+26FF",
+      "U+27BF",
+      "U+27EF",
+      "U+27FF",
+      "U+28FF",
+      "U+297F",
+      "U+29FF",
+      "U+2AFF",
+      "U+2BFF",
+      "U+2C5F",
+      "U+2C7F",
+      "U+2CFF",
+      "U+2D2F",
+      "U+2D7F",
+      "U+2DDF",
+      "U+2DFF",
+      "U+2E7F",
+      "U+2EFF",
+      "U+2FDF",
+      "U+2FFF",
+      "U+303F",
+      "U+309F",
+      "U+30FF",
+      "U+312F",
+      "U+318F",
+      "U+319F",
+      "U+31BF",
+      "U+31EF",
+      "U+31FF",
+      "U+32FF",
+      "U+33FF",
+      "U+4DBF",
+      "U+4DFF",
+      "U+9FFF",
+      "U+A48F",
+      "U+A4CF",
+      "U+A4FF",
+      "U+A63F",
+      "U+A69F",
+      "U+A6FF",
+      "U+A71F",
+      "U+A7FF",
+      "U+A82F",
+      "U+A83F",
+      "U+A87F",
+      "U+A8DF",
+      "U+A8FF",
+      "U+A92F",
+      "U+A95F",
+      "U+A97F",
+      "U+A9DF",
+      "U+AA5F",
+      "U+AA7F",
+      "U+AADF",
+      "U+AAFF",
+      "U+AB2F",
+      "U+ABFF",
+      "U+D7AF",
+      "U+D7FF",
+      "U+DB7F",
+      "U+DBFF",
+      "U+DFFF",
+      "U+F8FF",
+      "U+FAFF",
+      "U+FB4F",
+      "U+FDFF",
+      "U+FE0F",
+      "U+FE1F",
+      "U+FE2F",
+      "U+FE4F",
+      "U+FE6F",
+      "U+FEFF",
+      "U+FFEF",
+      "U+FFFF",
+      "U+1007F",
+      "U+100FF",
+      "U+1013F",
+      "U+1018F",
+      "U+101CF",
+      "U+101FF",
+      "U+1029F",
+      "U+102DF",
+      "U+1032F",
+      "U+1034F",
+      "U+1039F",
+      "U+103DF",
+      "U+1044F",
+      "U+1047F",
+      "U+104AF",
+      "U+1083F",
+      "U+1085F",
+      "U+1091F",
+      "U+1093F",
+      "U+1099F",
+      "U+109FF",
+      "U+10A5F",
+      "U+10A7F",
+      "U+10B3F",
+      "U+10B5F",
+      "U+10B7F",
+      "U+10C4F",
+      "U+10E7F",
+      "U+1107F",
+      "U+110CF",
+      "U+110FF",
+      "U+1114F",
+      "U+111DF",
+      "U+116CF",
+      "U+123FF",
+      "U+1247F",
+      "U+1342F",
+      "U+16A3F",
+      "U+16F9F",
+      "U+1B0FF",
+      "U+1D0FF",
+      "U+1D1FF",
+      "U+1D24F",
+      "U+1D35F",
+      "U+1D37F",
+      "U+1D7FF",
+      "U+1EEFF",
+      "U+1F02F",
+      "U+1F09F",
+      "U+1F0FF",
+      "U+1F1FF",
+      "U+1F2FF",
+      "U+1F5FF",
+      "U+1F64F",
+      "U+1F6FF",
+      "U+1F77F",
+      "U+2A6DF",
+      "U+2B73F",
+      "U+2B81F",
+      "U+2FA1F",
+      "U+E007F",
+      "U+E01EF",
+      "U+FFFFF",
+      "U+10FFFF", };
+
+  public static String getBlockName(int block) {
+    return blockNames[block];
+  }
+
+  public static String getBlockStartCode(int block) {
+    return blockStartCode[block];
+  }
+
+  public static String getBlockEndCode(int block) {
+    return blockEndCode[block];
+  }
+
+  public static int numBlocks() {
+    return blockNames.length;
+  }
+}