1. /*
  2. * @(#)PatternEntry.java 1.21 00/01/19
  3. *
  4. * Copyright 1996-2000 Sun Microsystems, Inc. All Rights Reserved.
  5. *
  6. * This software is the proprietary information of Sun Microsystems, Inc.
  7. * Use is subject to license terms.
  8. *
  9. */
  10. /*
  11. * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  12. * (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved
  13. *
  14. * The original version of this source code and documentation is copyrighted
  15. * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  16. * materials are provided under terms of a License Agreement between Taligent
  17. * and Sun. This technology is protected by multiple US and International
  18. * patents. This notice and attribution to Taligent may not be removed.
  19. * Taligent is a registered trademark of Taligent, Inc.
  20. *
  21. */
  22. package java.text;
  23. import java.lang.Character;
  24. /**
  25. * Utility class for normalizing and merging patterns for collation.
  26. * This is to be used with MergeCollation for adding patterns to an
  27. * existing rule table.
  28. * @see MergeCollation
  29. * @version 1.21 01/19/00
  30. * @author Mark Davis, Helena Shih
  31. */
  32. class PatternEntry {
  33. /**
  34. * Gets the current extension, quoted
  35. */
  36. public void appendQuotedExtension(StringBuffer toAddTo) {
  37. appendQuoted(extension,toAddTo);
  38. }
  39. /**
  40. * Gets the current chars, quoted
  41. */
  42. public void appendQuotedChars(StringBuffer toAddTo) {
  43. appendQuoted(chars,toAddTo);
  44. }
  45. /**
  46. * WARNING this is used for searching in a Vector.
  47. * Because Vector.indexOf doesn't take a comparator,
  48. * this method is ill-defined and ignores strength.
  49. */
  50. public boolean equals(Object obj) {
  51. if (obj == null) return false;
  52. PatternEntry other = (PatternEntry) obj;
  53. boolean result = chars.equals(other.chars);
  54. return result;
  55. }
  56. public int hashCode() {
  57. return chars.hashCode();
  58. }
  59. /**
  60. * For debugging.
  61. */
  62. public String toString() {
  63. StringBuffer result = new StringBuffer();
  64. addToBuffer(result, true, false, null);
  65. return result.toString();
  66. }
  67. /**
  68. * Gets the strength of the entry.
  69. */
  70. final int getStrength() {
  71. return strength;
  72. }
  73. /**
  74. * Gets the expanding characters of the entry.
  75. */
  76. final String getExtension() {
  77. return extension;
  78. }
  79. /**
  80. * Gets the core characters of the entry.
  81. */
  82. final String getChars() {
  83. return chars;
  84. }
  85. // ===== privates =====
  86. void addToBuffer(StringBuffer toAddTo,
  87. boolean showExtension,
  88. boolean showWhiteSpace,
  89. PatternEntry lastEntry)
  90. {
  91. if (showWhiteSpace && toAddTo.length() > 0)
  92. if (strength == Collator.PRIMARY || lastEntry != null)
  93. toAddTo.append('\n');
  94. else
  95. toAddTo.append(' ');
  96. if (lastEntry != null) {
  97. toAddTo.append('&');
  98. if (showWhiteSpace)
  99. toAddTo.append(' ');
  100. lastEntry.appendQuotedChars(toAddTo);
  101. appendQuotedExtension(toAddTo);
  102. if (showWhiteSpace)
  103. toAddTo.append(' ');
  104. }
  105. switch (strength) {
  106. case Collator.IDENTICAL: toAddTo.append('='); break;
  107. case Collator.TERTIARY: toAddTo.append(','); break;
  108. case Collator.SECONDARY: toAddTo.append(';'); break;
  109. case Collator.PRIMARY: toAddTo.append('<'); break;
  110. case RESET: toAddTo.append('&'); break;
  111. case UNSET: toAddTo.append('?'); break;
  112. }
  113. if (showWhiteSpace)
  114. toAddTo.append(' ');
  115. appendQuoted(chars,toAddTo);
  116. if (showExtension && extension.length() != 0) {
  117. toAddTo.append('/');
  118. appendQuoted(extension,toAddTo);
  119. }
  120. }
  121. static void appendQuoted(String chars, StringBuffer toAddTo) {
  122. boolean inQuote = false;
  123. char ch = chars.charAt(0);
  124. if (Character.isSpaceChar(ch)) {
  125. inQuote = true;
  126. toAddTo.append('\'');
  127. } else {
  128. if (PatternEntry.isSpecialChar(ch)) {
  129. inQuote = true;
  130. toAddTo.append('\'');
  131. } else {
  132. switch (ch) {
  133. case 0x0010: case '\f': case '\r':
  134. case '\t': case '\n': case '@':
  135. inQuote = true;
  136. toAddTo.append('\'');
  137. break;
  138. case '\'':
  139. inQuote = true;
  140. toAddTo.append('\'');
  141. break;
  142. default:
  143. if (inQuote) {
  144. inQuote = false; toAddTo.append('\'');
  145. }
  146. break;
  147. }
  148. }
  149. }
  150. toAddTo.append(chars);
  151. if (inQuote)
  152. toAddTo.append('\'');
  153. }
  154. //========================================================================
  155. // Parsing a pattern into a list of PatternEntries....
  156. //========================================================================
  157. PatternEntry(int strength,
  158. StringBuffer chars,
  159. StringBuffer extension)
  160. {
  161. this.strength = strength;
  162. this.chars = chars.toString();
  163. this.extension = (extension.length() > 0) ? extension.toString()
  164. : "";
  165. }
  166. static class Parser {
  167. private String pattern;
  168. private int i;
  169. public Parser(String pattern) {
  170. this.pattern = pattern;
  171. this.i = 0;
  172. }
  173. public PatternEntry next() throws ParseException {
  174. int newStrength = UNSET;
  175. newChars.setLength(0);
  176. newExtension.setLength(0);
  177. boolean inChars = true;
  178. boolean inQuote = false;
  179. mainLoop:
  180. while (i < pattern.length()) {
  181. char ch = pattern.charAt(i);
  182. if (inQuote) {
  183. if (ch == '\'') {
  184. inQuote = false;
  185. } else {
  186. if (newChars.length() == 0) newChars.append(ch);
  187. else if (inChars) newChars.append(ch);
  188. else newExtension.append(ch);
  189. }
  190. } else switch (ch) {
  191. case '=': if (newStrength != UNSET) break mainLoop;
  192. newStrength = Collator.IDENTICAL; break;
  193. case ',': if (newStrength != UNSET) break mainLoop;
  194. newStrength = Collator.TERTIARY; break;
  195. case ';': if (newStrength != UNSET) break mainLoop;
  196. newStrength = Collator.SECONDARY; break;
  197. case '<': if (newStrength != UNSET) break mainLoop;
  198. newStrength = Collator.PRIMARY; break;
  199. case '&': if (newStrength != UNSET) break mainLoop;
  200. newStrength = RESET; break;
  201. case '\t':
  202. case '\n':
  203. case '\f':
  204. case '\r':
  205. case ' ': break; // skip whitespace TODO use Character
  206. case '/': inChars = false; break;
  207. case '\'':
  208. inQuote = true;
  209. ch = pattern.charAt(++i);
  210. if (newChars.length() == 0) newChars.append(ch);
  211. else if (inChars) newChars.append(ch);
  212. else newExtension.append(ch);
  213. break;
  214. default:
  215. if (newStrength == UNSET) {
  216. throw new ParseException
  217. ("missing char (=,;<&) : " +
  218. pattern.substring(i,
  219. (i+10 < pattern.length()) ?
  220. i+10 : pattern.length()),
  221. i);
  222. }
  223. if (PatternEntry.isSpecialChar(ch) && (inQuote == false))
  224. throw new ParseException
  225. ("Unquoted punctuation character : " + Integer.toString(ch, 16), i);
  226. if (inChars) {
  227. newChars.append(ch);
  228. } else {
  229. newExtension.append(ch);
  230. }
  231. break;
  232. }
  233. i++;
  234. }
  235. if (newStrength == UNSET)
  236. return null;
  237. if (newChars.length() == 0) {
  238. throw new ParseException
  239. ("missing chars (=,;<&): " +
  240. pattern.substring(i,
  241. (i+10 < pattern.length()) ?
  242. i+10 : pattern.length()),
  243. i);
  244. }
  245. return new PatternEntry(newStrength, newChars, newExtension);
  246. }
  247. // We re-use these objects in order to improve performance
  248. private StringBuffer newChars = new StringBuffer();
  249. private StringBuffer newExtension = new StringBuffer();
  250. }
  251. static boolean isSpecialChar(char ch) {
  252. return (((ch <= '\u002F') && (ch >= '\u0020')) ||
  253. ((ch <= '\u003F') && (ch >= '\u003A')) ||
  254. ((ch <= '\u0060') && (ch >= '\u005B')) ||
  255. ((ch <= '\u007E') && (ch >= '\u007B')));
  256. }
  257. static final int RESET = -2;
  258. static final int UNSET = -1;
  259. int strength = UNSET;
  260. String chars = "";
  261. String extension = "";
  262. }