1. /*
  2. * @(#)CollationRules.java 1.21 01/11/29
  3. *
  4. * Copyright 2002 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. /*
  8. * @(#)CollationRules.java 1.19 98/07/23
  9. *
  10. * (C) Copyright Taligent, Inc. 1996,1997 - All Rights Reserved
  11. * (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved
  12. *
  13. * Portions copyright (c) 1996-1998 Sun Microsystems, Inc. All Rights Reserved.
  14. *
  15. * The original version of this source code and documentation is copyrighted
  16. * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  17. * materials are provided under terms of a License Agreement between Taligent
  18. * and Sun. This technology is protected by multiple US and International
  19. * patents. This notice and attribution to Taligent may not be removed.
  20. * Taligent is a registered trademark of Taligent, Inc.
  21. *
  22. * Permission to use, copy, modify, and distribute this software
  23. * and its documentation for NON-COMMERCIAL purposes and without
  24. * fee is hereby granted provided that this copyright notice
  25. * appears in all copies. Please refer to the file "copyright.html"
  26. * for further important copyright and licensing information.
  27. *
  28. * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
  29. * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
  30. * TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
  31. * PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR
  32. * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
  33. * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
  34. *
  35. */
  36. package java.text;
  37. /**
  38. * CollationRules contains the default en_US collation rules as a base
  39. * for building other collation tables.
  40. * <p>Note that decompositions are done before these rules are used,
  41. * so they do not have to contain accented characters, such as A-grave.
  42. * @see RuleBasedCollator
  43. * @see LocaleElements
  44. * @version 1.19 07/23/98
  45. * @author Helena Shih, Mark Davis
  46. */
  47. final class CollationRules {
  48. final static String DEFAULTRULES = new String(
  49. "" // no FRENCH accent order by default, add in French Delta
  50. // IGNORABLES (up to first < character)
  51. // COMPLETELY IGNORE format characters
  52. + "='\u200B'=\u200C=\u200D=\u200E=\u200F"
  53. // Control Characters
  54. + "=\u0000 =\u0001 =\u0002 =\u0003 =\u0004" //null, .. eot
  55. + "=\u0005 =\u0006 =\u0007 =\u0008 ='\u0009'" //enq, ...
  56. + "='\u000b' =\u000e" //vt,, so
  57. + "=\u000f ='\u0010' =\u0011 =\u0012 =\u0013" //si, dle, dc1, dc2, dc3
  58. + "=\u0014 =\u0015 =\u0016 =\u0017 =\u0018" //dc4, nak, syn, etb, can
  59. + "=\u0019 =\u001a =\u001b =\u001c =\u001d" //em, sub, esc, fs, gs
  60. + "=\u001e =\u001f =\u007f" //rs, us, del
  61. //....then the C1 Latin 1 reserved control codes
  62. + "=\u0080 =\u0081 =\u0082 =\u0083 =\u0084 =\u0085"
  63. + "=\u0086 =\u0087 =\u0088 =\u0089 =\u008a =\u008b"
  64. + "=\u008c =\u008d =\u008e =\u008f =\u0090 =\u0091"
  65. + "=\u0092 =\u0093 =\u0094 =\u0095 =\u0096 =\u0097"
  66. + "=\u0098 =\u0099 =\u009a =\u009b =\u009c =\u009d"
  67. + "=\u009e =\u009f"
  68. // IGNORE except for secondary, tertiary difference
  69. // Spaces
  70. + ";'\u0020';'\u00A0'" // spaces
  71. + ";'\u2000';'\u2001';'\u2002';'\u2003';'\u2004'" // spaces
  72. + ";'\u2005';'\u2006';'\u2007';'\u2008';'\u2009'" // spaces
  73. + ";'\u200A';'\u3000';'\uFEFF'" // spaces
  74. + ";'\r' ;'\t' ;'\n';'\f';'\u000b'" // whitespace
  75. // Non-spacing accents
  76. + ";\u0301" // non-spacing acute accent
  77. + ";\u0300" // non-spacing grave accent
  78. + ";\u0306" // non-spacing breve accent
  79. + ";\u0302" // non-spacing circumflex accent
  80. + ";\u030c" // non-spacing caron/hacek accent
  81. + ";\u030a" // non-spacing ring above accent
  82. + ";\u030d" // non-spacing vertical line above
  83. + ";\u0308" // non-spacing diaeresis accent
  84. + ";\u030b" // non-spacing double acute accent
  85. + ";\u0303" // non-spacing tilde accent
  86. + ";\u0307" // non-spacing dot above/overdot accent
  87. + ";\u0304" // non-spacing macron accent
  88. + ";\u0337" // non-spacing short slash overlay (overstruck diacritic)
  89. + ";\u0327" // non-spacing cedilla accent
  90. + ";\u0328" // non-spacing ogonek accent
  91. + ";\u0323" // non-spacing dot-below/underdot accent
  92. + ";\u0332" // non-spacing underscore/underline accent
  93. // with the rest of the general diacritical marks in binary order
  94. + ";\u0305" // non-spacing overscore/overline
  95. + ";\u0309" // non-spacing hook above
  96. + ";\u030e" // non-spacing double vertical line above
  97. + ";\u030f" // non-spacing double grave
  98. + ";\u0310" // non-spacing chandrabindu
  99. + ";\u0311" // non-spacing inverted breve
  100. + ";\u0312" // non-spacing turned comma above/cedilla above
  101. + ";\u0313" // non-spacing comma above
  102. + ";\u0314" // non-spacing reversed comma above
  103. + ";\u0315" // non-spacing comma above right
  104. + ";\u0316" // non-spacing grave below
  105. + ";\u0317" // non-spacing acute below
  106. + ";\u0318" // non-spacing left tack below
  107. + ";\u0319" // non-spacing tack below
  108. + ";\u031a" // non-spacing left angle above
  109. + ";\u031b" // non-spacing horn
  110. + ";\u031c" // non-spacing left half ring below
  111. + ";\u031d" // non-spacing up tack below
  112. + ";\u031e" // non-spacing down tack below
  113. + ";\u031f" // non-spacing plus sign below
  114. + ";\u0320" // non-spacing minus sign below
  115. + ";\u0321" // non-spacing palatalized hook below
  116. + ";\u0322" // non-spacing retroflex hook below
  117. + ";\u0324" // non-spacing double dot below
  118. + ";\u0325" // non-spacing ring below
  119. + ";\u0326" // non-spacing comma below
  120. + ";\u0329" // non-spacing vertical line below
  121. + ";\u032a" // non-spacing bridge below
  122. + ";\u032b" // non-spacing inverted double arch below
  123. + ";\u032c" // non-spacing hacek below
  124. + ";\u032d" // non-spacing circumflex below
  125. + ";\u032e" // non-spacing breve below
  126. + ";\u032f" // non-spacing inverted breve below
  127. + ";\u0330" // non-spacing tilde below
  128. + ";\u0331" // non-spacing macron below
  129. + ";\u0333" // non-spacing double underscore
  130. + ";\u0334" // non-spacing tilde overlay
  131. + ";\u0335" // non-spacing short bar overlay
  132. + ";\u0336" // non-spacing long bar overlay
  133. + ";\u0338" // non-spacing long slash overlay
  134. + ";\u0339" // non-spacing right half ring below
  135. + ";\u033a" // non-spacing inverted bridge below
  136. + ";\u033b" // non-spacing square below
  137. + ";\u033c" // non-spacing seagull below
  138. + ";\u033d" // non-spacing x above
  139. + ";\u033e" // non-spacing vertical tilde
  140. + ";\u033f" // non-spacing double overscore
  141. + ";\u0340" // non-spacing grave tone mark
  142. + ";\u0341" // non-spacing acute tone mark
  143. + ";\u0342;\u0343;\u0344;\u0345;\u0360;\u0361" // newer
  144. + ";\u0483;\u0484;\u0485;\u0486" // Cyrillic accents
  145. + ";\u20D0;\u20D1;\u20D2" // symbol accents
  146. + ";\u20D3;\u20D4;\u20D5" // symbol accents
  147. + ";\u20D6;\u20D7;\u20D8" // symbol accents
  148. + ";\u20D9;\u20DA;\u20DB" // symbol accents
  149. + ";\u20DC;\u20DD;\u20DE" // symbol accents
  150. + ";\u20DF;\u20E0;\u20E1" // symbol accents
  151. + ",'\u002D';\u00AD" // dashes
  152. + ";\u2010;\u2011;\u2012" // dashes
  153. + ";\u2013;\u2014;\u2015" // dashes
  154. + ";\u2212" // dashes
  155. // other punctuation
  156. + "<'\u005f'" // underline/underscore (spacing)
  157. + "<\u00af" // overline or macron (spacing)
  158. + "<'\u002c'" // comma (spacing)
  159. + "<'\u003b'" // semicolon
  160. + "<'\u003a'" // colon
  161. + "<'\u0021'" // exclamation point
  162. + "<\u00a1" // inverted exclamation point
  163. + "<'\u003f'" // question mark
  164. + "<\u00bf" // inverted question mark
  165. + "<'\u002f'" // slash
  166. + "<'\u002e'" // period/full stop
  167. + "<\u00b4" // acute accent (spacing)
  168. + "<'\u0060'" // grave accent (spacing)
  169. + "<'\u005e'" // circumflex accent (spacing)
  170. + "<\u00a8" // diaresis/umlaut accent (spacing)
  171. + "<'\u007e'" // tilde accent (spacing)
  172. + "<\u00b7" // middle dot (spacing)
  173. + "<\u00b8" // cedilla accent (spacing)
  174. + "<'\u0027'" // apostrophe
  175. + "<'\"'" // quotation marks
  176. + "<\u00ab" // left angle quotes
  177. + "<\u00bb" // right angle quotes
  178. + "<'\u0028'" // left parenthesis
  179. + "<'\u0029'" // right parenthesis
  180. + "<'\u005b'" // left bracket
  181. + "<'\u005d'" // right bracket
  182. + "<'\u007b'" // left brace
  183. + "<'\u007d'" // right brace
  184. + "<\u00a7" // section symbol
  185. + "<\u00b6" // paragraph symbol
  186. + "<\u00a9" // copyright symbol
  187. + "<\u00ae" // registered trademark symbol
  188. + "<'\u0040'" // at sign
  189. + "<\u00a4" // international currency symbol
  190. + "<\u0e3f" // baht sign
  191. + "<\u00a2" // cent sign
  192. + "<\u20a1" // colon sign
  193. + "<\u20a2" // cruzeiro sign
  194. + "<'\u0024'" // dollar sign
  195. + "<\u20ab" // dong sign
  196. + "<\u20ac" // euro sign
  197. + "<\u20a3" // franc sign
  198. + "<\u20a4" // lira sign
  199. + "<\u20a5" // mill sign
  200. + "<\u20a6" // naira sign
  201. + "<\u20a7" // peseta sign
  202. + "<\u00a3" // pound-sterling sign
  203. + "<\u20a8" // rupee sign
  204. + "<\u20aa" // new shekel sign
  205. + "<\u20a9" // won sign
  206. + "<\u00a5" // yen sign
  207. + "<'\u002a'" // asterisk
  208. + "<'\\'" // backslash
  209. + "<'\u0026'" // ampersand
  210. + "<'\u0023'" // number sign
  211. + "<'\u0025'" // percent sign
  212. + "<'\u002b'" // plus sign
  213. + "<\u00b1" // plus-or-minus sign
  214. + "<\u00f7" // divide sign
  215. + "<\u00d7" // multiply sign
  216. + "<'\u003c'" // less-than sign
  217. + "<'\u003d'" // equal sign
  218. + "<'\u003e'" // greater-than sign
  219. + "<\u00ac" // end of line symbol/logical NOT symbol
  220. + "<'\u007c'" // vertical line/logical OR symbol
  221. + "<\u00a6" // broken vertical line
  222. + "<\u00b0" // degree symbol
  223. + "<\u00b5" // micro symbol
  224. // NUMERICS
  225. + "<0<1<2<3<4<5<6<7<8<9"
  226. + "<\u00bc<\u00bd<\u00be" // 1/4,1/2,3/4 fractions
  227. // NON-IGNORABLES
  228. + "<a,A"
  229. + "<b,B"
  230. + "<c,C"
  231. + "<d,D"
  232. + "<\u00F0,\u00D0" // eth
  233. + "<e,E"
  234. + "<f,F"
  235. + "<g,G"
  236. + "<h,H"
  237. + "<i,I"
  238. + "<j,J"
  239. + "<k,K"
  240. + "<l,L"
  241. + "<m,M"
  242. + "<n,N"
  243. + "<o,O"
  244. + "<p,P"
  245. + "<q,Q"
  246. + "<r,R"
  247. + "<s, S & SS,\u00DF" // s-zet
  248. + "<t,T"
  249. + "& TH, \u00DE &TH, \u00FE " // thorn
  250. + "<u,U"
  251. + "<v,V"
  252. + "<w,W"
  253. + "<x,X"
  254. + "<y,Y"
  255. + "<z,Z"
  256. + "&AE,\u00C6" // ae & AE ligature
  257. + "&AE,\u00E6"
  258. + "&OE,\u0152" // oe & OE ligature
  259. + "&OE,\u0153"
  260. );
  261. }