1. /*
  2. * @(#)PatternEntry.java 1.25 03/12/19
  3. *
  4. * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. /*
  8. * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  9. * (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved
  10. *
  11. * The original version of this source code and documentation is copyrighted
  12. * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  13. * materials are provided under terms of a License Agreement between Taligent
  14. * and Sun. This technology is protected by multiple US and International
  15. * patents. This notice and attribution to Taligent may not be removed.
  16. * Taligent is a registered trademark of Taligent, Inc.
  17. *
  18. */
  19. package java.text;
  20. import java.lang.Character;
  21. /**
  22. * Utility class for normalizing and merging patterns for collation.
  23. * This is to be used with MergeCollation for adding patterns to an
  24. * existing rule table.
  25. * @see MergeCollation
  26. * @version 1.25 12/19/03
  27. * @author Mark Davis, Helena Shih
  28. */
  29. class PatternEntry {
  30. /**
  31. * Gets the current extension, quoted
  32. */
  33. public void appendQuotedExtension(StringBuffer toAddTo) {
  34. appendQuoted(extension,toAddTo);
  35. }
  36. /**
  37. * Gets the current chars, quoted
  38. */
  39. public void appendQuotedChars(StringBuffer toAddTo) {
  40. appendQuoted(chars,toAddTo);
  41. }
  42. /**
  43. * WARNING this is used for searching in a Vector.
  44. * Because Vector.indexOf doesn't take a comparator,
  45. * this method is ill-defined and ignores strength.
  46. */
  47. public boolean equals(Object obj) {
  48. if (obj == null) return false;
  49. PatternEntry other = (PatternEntry) obj;
  50. boolean result = chars.equals(other.chars);
  51. return result;
  52. }
  53. public int hashCode() {
  54. return chars.hashCode();
  55. }
  56. /**
  57. * For debugging.
  58. */
  59. public String toString() {
  60. StringBuffer result = new StringBuffer();
  61. addToBuffer(result, true, false, null);
  62. return result.toString();
  63. }
  64. /**
  65. * Gets the strength of the entry.
  66. */
  67. final int getStrength() {
  68. return strength;
  69. }
  70. /**
  71. * Gets the expanding characters of the entry.
  72. */
  73. final String getExtension() {
  74. return extension;
  75. }
  76. /**
  77. * Gets the core characters of the entry.
  78. */
  79. final String getChars() {
  80. return chars;
  81. }
  82. // ===== privates =====
  83. void addToBuffer(StringBuffer toAddTo,
  84. boolean showExtension,
  85. boolean showWhiteSpace,
  86. PatternEntry lastEntry)
  87. {
  88. if (showWhiteSpace && toAddTo.length() > 0)
  89. if (strength == Collator.PRIMARY || lastEntry != null)
  90. toAddTo.append('\n');
  91. else
  92. toAddTo.append(' ');
  93. if (lastEntry != null) {
  94. toAddTo.append('&');
  95. if (showWhiteSpace)
  96. toAddTo.append(' ');
  97. lastEntry.appendQuotedChars(toAddTo);
  98. appendQuotedExtension(toAddTo);
  99. if (showWhiteSpace)
  100. toAddTo.append(' ');
  101. }
  102. switch (strength) {
  103. case Collator.IDENTICAL: toAddTo.append('='); break;
  104. case Collator.TERTIARY: toAddTo.append(','); break;
  105. case Collator.SECONDARY: toAddTo.append(';'); break;
  106. case Collator.PRIMARY: toAddTo.append('<'); break;
  107. case RESET: toAddTo.append('&'); break;
  108. case UNSET: toAddTo.append('?'); break;
  109. }
  110. if (showWhiteSpace)
  111. toAddTo.append(' ');
  112. appendQuoted(chars,toAddTo);
  113. if (showExtension && extension.length() != 0) {
  114. toAddTo.append('/');
  115. appendQuoted(extension,toAddTo);
  116. }
  117. }
  118. static void appendQuoted(String chars, StringBuffer toAddTo) {
  119. boolean inQuote = false;
  120. char ch = chars.charAt(0);
  121. if (Character.isSpaceChar(ch)) {
  122. inQuote = true;
  123. toAddTo.append('\'');
  124. } else {
  125. if (PatternEntry.isSpecialChar(ch)) {
  126. inQuote = true;
  127. toAddTo.append('\'');
  128. } else {
  129. switch (ch) {
  130. case 0x0010: case '\f': case '\r':
  131. case '\t': case '\n': case '@':
  132. inQuote = true;
  133. toAddTo.append('\'');
  134. break;
  135. case '\'':
  136. inQuote = true;
  137. toAddTo.append('\'');
  138. break;
  139. default:
  140. if (inQuote) {
  141. inQuote = false; toAddTo.append('\'');
  142. }
  143. break;
  144. }
  145. }
  146. }
  147. toAddTo.append(chars);
  148. if (inQuote)
  149. toAddTo.append('\'');
  150. }
  151. //========================================================================
  152. // Parsing a pattern into a list of PatternEntries....
  153. //========================================================================
  154. PatternEntry(int strength,
  155. StringBuffer chars,
  156. StringBuffer extension)
  157. {
  158. this.strength = strength;
  159. this.chars = chars.toString();
  160. this.extension = (extension.length() > 0) ? extension.toString()
  161. : "";
  162. }
  163. static class Parser {
  164. private String pattern;
  165. private int i;
  166. public Parser(String pattern) {
  167. this.pattern = pattern;
  168. this.i = 0;
  169. }
  170. public PatternEntry next() throws ParseException {
  171. int newStrength = UNSET;
  172. newChars.setLength(0);
  173. newExtension.setLength(0);
  174. boolean inChars = true;
  175. boolean inQuote = false;
  176. mainLoop:
  177. while (i < pattern.length()) {
  178. char ch = pattern.charAt(i);
  179. if (inQuote) {
  180. if (ch == '\'') {
  181. inQuote = false;
  182. } else {
  183. if (newChars.length() == 0) newChars.append(ch);
  184. else if (inChars) newChars.append(ch);
  185. else newExtension.append(ch);
  186. }
  187. } else switch (ch) {
  188. case '=': if (newStrength != UNSET) break mainLoop;
  189. newStrength = Collator.IDENTICAL; break;
  190. case ',': if (newStrength != UNSET) break mainLoop;
  191. newStrength = Collator.TERTIARY; break;
  192. case ';': if (newStrength != UNSET) break mainLoop;
  193. newStrength = Collator.SECONDARY; break;
  194. case '<': if (newStrength != UNSET) break mainLoop;
  195. newStrength = Collator.PRIMARY; break;
  196. case '&': if (newStrength != UNSET) break mainLoop;
  197. newStrength = RESET; break;
  198. case '\t':
  199. case '\n':
  200. case '\f':
  201. case '\r':
  202. case ' ': break; // skip whitespace TODO use Character
  203. case '/': inChars = false; break;
  204. case '\'':
  205. inQuote = true;
  206. ch = pattern.charAt(++i);
  207. if (newChars.length() == 0) newChars.append(ch);
  208. else if (inChars) newChars.append(ch);
  209. else newExtension.append(ch);
  210. break;
  211. default:
  212. if (newStrength == UNSET) {
  213. throw new ParseException
  214. ("missing char (=,;<&) : " +
  215. pattern.substring(i,
  216. (i+10 < pattern.length()) ?
  217. i+10 : pattern.length()),
  218. i);
  219. }
  220. if (PatternEntry.isSpecialChar(ch) && (inQuote == false))
  221. throw new ParseException
  222. ("Unquoted punctuation character : " + Integer.toString(ch, 16), i);
  223. if (inChars) {
  224. newChars.append(ch);
  225. } else {
  226. newExtension.append(ch);
  227. }
  228. break;
  229. }
  230. i++;
  231. }
  232. if (newStrength == UNSET)
  233. return null;
  234. if (newChars.length() == 0) {
  235. throw new ParseException
  236. ("missing chars (=,;<&): " +
  237. pattern.substring(i,
  238. (i+10 < pattern.length()) ?
  239. i+10 : pattern.length()),
  240. i);
  241. }
  242. return new PatternEntry(newStrength, newChars, newExtension);
  243. }
  244. // We re-use these objects in order to improve performance
  245. private StringBuffer newChars = new StringBuffer();
  246. private StringBuffer newExtension = new StringBuffer();
  247. }
  248. static boolean isSpecialChar(char ch) {
  249. return ((ch == '\u0020') ||
  250. ((ch <= '\u002F') && (ch >= '\u0022')) ||
  251. ((ch <= '\u003F') && (ch >= '\u003A')) ||
  252. ((ch <= '\u0060') && (ch >= '\u005B')) ||
  253. ((ch <= '\u007E') && (ch >= '\u007B')));
  254. }
  255. static final int RESET = -2;
  256. static final int UNSET = -1;
  257. int strength = UNSET;
  258. String chars = "";
  259. String extension = "";
  260. }