1. /*
  2. * @(#)PatternEntry.java 1.20 01/11/29
  3. *
  4. * Copyright 2002 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. /*
  8. * @(#)PatternEntry.java 1.20 01/11/29
  9. *
  10. * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
  11. * (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved
  12. *
  13. * Portions copyright (c) 1996-1998 Sun Microsystems, Inc. All Rights Reserved.
  14. *
  15. * The original version of this source code and documentation is copyrighted
  16. * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  17. * materials are provided under terms of a License Agreement between Taligent
  18. * and Sun. This technology is protected by multiple US and International
  19. * patents. This notice and attribution to Taligent may not be removed.
  20. * Taligent is a registered trademark of Taligent, Inc.
  21. *
  22. * Permission to use, copy, modify, and distribute this software
  23. * and its documentation for NON-COMMERCIAL purposes and without
  24. * fee is hereby granted provided that this copyright notice
  25. * appears in all copies. Please refer to the file "copyright.html"
  26. * for further important copyright and licensing information.
  27. *
  28. * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
  29. * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
  30. * TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
  31. * PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR
  32. * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
  33. * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
  34. *
  35. */
  36. package java.text;
  37. import java.lang.Character;
  38. /**
  39. * Utility class for normalizing and merging patterns for collation.
  40. * This is to be used with MergeCollation for adding patterns to an
  41. * existing rule table.
  42. * @see MergeCollation
  43. * @version 1.20 11/29/01
  44. * @author Mark Davis, Helena Shih
  45. */
  46. class PatternEntry {
  47. /**
  48. * Gets the current extension, quoted
  49. */
  50. public void appendQuotedExtension(StringBuffer toAddTo) {
  51. appendQuoted(extension,toAddTo);
  52. }
  53. /**
  54. * Gets the current chars, quoted
  55. */
  56. public void appendQuotedChars(StringBuffer toAddTo) {
  57. appendQuoted(chars,toAddTo);
  58. }
  59. /**
  60. * WARNING this is used for searching in a Vector.
  61. * Because Vector.indexOf doesn't take a comparator,
  62. * this method is ill-defined and ignores strength.
  63. */
  64. public boolean equals(Object obj) {
  65. if (obj == null) return false;
  66. PatternEntry other = (PatternEntry) obj;
  67. boolean result = chars.equals(other.chars);
  68. return result;
  69. }
  70. /**
  71. * For debugging.
  72. */
  73. public String toString() {
  74. StringBuffer result = new StringBuffer();
  75. addToBuffer(result, true, false, null);
  76. return result.toString();
  77. }
  78. /**
  79. * Gets the strength of the entry.
  80. */
  81. final int getStrength() {
  82. return strength;
  83. }
  84. /**
  85. * Gets the expanding characters of the entry.
  86. */
  87. final String getExtension() {
  88. return extension;
  89. }
  90. /**
  91. * Gets the core characters of the entry.
  92. */
  93. final String getChars() {
  94. return chars;
  95. }
  96. // ===== privates =====
  97. void addToBuffer(StringBuffer toAddTo,
  98. boolean showExtension,
  99. boolean showWhiteSpace,
  100. PatternEntry lastEntry)
  101. {
  102. if (showWhiteSpace && toAddTo.length() > 0)
  103. if (strength == Collator.PRIMARY || lastEntry != null)
  104. toAddTo.append('\n');
  105. else
  106. toAddTo.append(' ');
  107. if (lastEntry != null) {
  108. toAddTo.append('&');
  109. if (showWhiteSpace)
  110. toAddTo.append(' ');
  111. lastEntry.appendQuotedChars(toAddTo);
  112. appendQuotedExtension(toAddTo);
  113. if (showWhiteSpace)
  114. toAddTo.append(' ');
  115. }
  116. switch (strength) {
  117. case Collator.IDENTICAL: toAddTo.append('='); break;
  118. case Collator.TERTIARY: toAddTo.append(','); break;
  119. case Collator.SECONDARY: toAddTo.append(';'); break;
  120. case Collator.PRIMARY: toAddTo.append('<'); break;
  121. case RESET: toAddTo.append('&'); break;
  122. case UNSET: toAddTo.append('?'); break;
  123. }
  124. if (showWhiteSpace)
  125. toAddTo.append(' ');
  126. appendQuoted(chars,toAddTo);
  127. if (showExtension && extension.length() != 0) {
  128. toAddTo.append('/');
  129. appendQuoted(extension,toAddTo);
  130. }
  131. }
  132. static void appendQuoted(String chars, StringBuffer toAddTo) {
  133. boolean inQuote = false;
  134. char ch = chars.charAt(0);
  135. if (Character.isSpaceChar(ch)) {
  136. inQuote = true;
  137. toAddTo.append('\'');
  138. } else {
  139. if (PatternEntry.isSpecialChar(ch)) {
  140. inQuote = true;
  141. toAddTo.append('\'');
  142. } else {
  143. switch (ch) {
  144. case 0x0010: case '\f': case '\r':
  145. case '\t': case '\n': case '@':
  146. inQuote = true;
  147. toAddTo.append('\'');
  148. break;
  149. case '\'':
  150. inQuote = true;
  151. toAddTo.append('\'');
  152. break;
  153. default:
  154. if (inQuote) {
  155. inQuote = false; toAddTo.append('\'');
  156. }
  157. break;
  158. }
  159. }
  160. }
  161. toAddTo.append(chars);
  162. if (inQuote)
  163. toAddTo.append('\'');
  164. }
  165. //========================================================================
  166. // Parsing a pattern into a list of PatternEntries....
  167. //========================================================================
  168. PatternEntry(int strength,
  169. StringBuffer chars,
  170. StringBuffer extension)
  171. {
  172. this.strength = strength;
  173. this.chars = chars.toString();
  174. this.extension = (extension.length() > 0) ? extension.toString()
  175. : "";
  176. }
  177. static class Parser {
  178. private String pattern;
  179. private int i;
  180. public Parser(String pattern) {
  181. this.pattern = pattern;
  182. this.i = 0;
  183. }
  184. public PatternEntry next() throws ParseException {
  185. int newStrength = UNSET;
  186. newChars.setLength(0);
  187. newExtension.setLength(0);
  188. boolean inChars = true;
  189. boolean inQuote = false;
  190. mainLoop:
  191. while (i < pattern.length()) {
  192. char ch = pattern.charAt(i);
  193. if (inQuote) {
  194. if (ch == '\'') {
  195. inQuote = false;
  196. } else {
  197. if (newChars.length() == 0) newChars.append(ch);
  198. else if (inChars) newChars.append(ch);
  199. else newExtension.append(ch);
  200. }
  201. } else switch (ch) {
  202. case '=': if (newStrength != UNSET) break mainLoop;
  203. newStrength = Collator.IDENTICAL; break;
  204. case ',': if (newStrength != UNSET) break mainLoop;
  205. newStrength = Collator.TERTIARY; break;
  206. case ';': if (newStrength != UNSET) break mainLoop;
  207. newStrength = Collator.SECONDARY; break;
  208. case '<': if (newStrength != UNSET) break mainLoop;
  209. newStrength = Collator.PRIMARY; break;
  210. case '&': if (newStrength != UNSET) break mainLoop;
  211. newStrength = RESET; break;
  212. case '\t':
  213. case '\n':
  214. case '\f':
  215. case '\r':
  216. case ' ': break; // skip whitespace TODO use Character
  217. case '/': inChars = false; break;
  218. case '\'':
  219. inQuote = true;
  220. ch = pattern.charAt(++i);
  221. if (newChars.length() == 0) newChars.append(ch);
  222. else if (inChars) newChars.append(ch);
  223. else newExtension.append(ch);
  224. break;
  225. default:
  226. if (newStrength == UNSET) {
  227. throw new ParseException
  228. ("missing char (=,;<&) : " +
  229. pattern.substring(i,
  230. (i+10 < pattern.length()) ?
  231. i+10 : pattern.length()),
  232. i);
  233. }
  234. if (PatternEntry.isSpecialChar(ch) && (inQuote == false))
  235. throw new ParseException
  236. ("Unquoted punctuation character : " + Integer.toString(ch, 16), i);
  237. if (inChars) {
  238. newChars.append(ch);
  239. } else {
  240. newExtension.append(ch);
  241. }
  242. break;
  243. }
  244. i++;
  245. }
  246. if (newStrength == UNSET)
  247. return null;
  248. if (newChars.length() == 0) {
  249. throw new ParseException
  250. ("missing chars (=,;<&): " +
  251. pattern.substring(i,
  252. (i+10 < pattern.length()) ?
  253. i+10 : pattern.length()),
  254. i);
  255. }
  256. return new PatternEntry(newStrength, newChars, newExtension);
  257. }
  258. // We re-use these objects in order to improve performance
  259. private StringBuffer newChars = new StringBuffer();
  260. private StringBuffer newExtension = new StringBuffer();
  261. }
  262. static boolean isSpecialChar(char ch) {
  263. return (((ch <= '\u002F') && (ch >= '\u0020')) ||
  264. ((ch <= '\u003F') && (ch >= '\u003A')) ||
  265. ((ch <= '\u0060') && (ch >= '\u005B')) ||
  266. ((ch <= '\u007E') && (ch >= '\u007B')));
  267. }
  268. static final int RESET = -2;
  269. static final int UNSET = -1;
  270. int strength = UNSET;
  271. String chars = "";
  272. String extension = "";
  273. }