1. /* ====================================================================
  2. * The Apache Software License, Version 1.1
  3. *
  4. * Copyright (c) 2002-2003 The Apache Software Foundation. All rights
  5. * reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. *
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. *
  14. * 2. Redistributions in binary form must reproduce the above copyright
  15. * notice, this list of conditions and the following disclaimer in
  16. * the documentation and/or other materials provided with the
  17. * distribution.
  18. *
  19. * 3. The end-user documentation included with the redistribution, if
  20. * any, must include the following acknowledgement:
  21. * "This product includes software developed by the
  22. * Apache Software Foundation (http://www.apache.org/)."
  23. * Alternately, this acknowledgement may appear in the software itself,
  24. * if and wherever such third-party acknowledgements normally appear.
  25. *
  26. * 4. The names "The Jakarta Project", "Commons", and "Apache Software
  27. * Foundation" must not be used to endorse or promote products derived
  28. * from this software without prior written permission. For written
  29. * permission, please contact apache@apache.org.
  30. *
  31. * 5. Products derived from this software may not be called "Apache"
  32. * nor may "Apache" appear in their names without prior written
  33. * permission of the Apache Software Foundation.
  34. *
  35. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  36. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  37. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  38. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  39. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  40. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  41. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  42. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  43. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  44. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  45. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  46. * SUCH DAMAGE.
  47. * ====================================================================
  48. *
  49. * This software consists of voluntary contributions made by many
  50. * individuals on behalf of the Apache Software Foundation. For more
  51. * information on the Apache Software Foundation, please see
  52. * <http://www.apache.org/>.
  53. */
  54. package org.apache.commons.lang;
  55. import java.io.IOException;
  56. import java.io.Writer;
  57. import org.apache.commons.lang.exception.NestableRuntimeException;
  58. /**
  59. * <p>Escapes and unescapes <code>String</code>s for
  60. * Java, Java Script, HTML, XML, and SQL.</p>
  61. *
  62. * @author Apache Jakarta Turbine
  63. * @author GenerationJavaCore library
  64. * @author Purple Technology
  65. * @author <a href="mailto:bayard@generationjava.com">Henri Yandell</a>
  66. * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
  67. * @author <a href="mailto:cybertiger@cyberiantiger.org">Antony Riley</a>
  68. * @author Helge Tesgaard
  69. * @author <a href="sean@boohai.com">Sean Brown</a>
  70. * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a>
  71. * @author Phil Steitz
  72. * @author Pete Gieser
  73. * @since 2.0
  74. * @version $Id: StringEscapeUtils.java,v 1.25 2003/08/18 02:22:23 bayard Exp $
  75. */
  76. public class StringEscapeUtils {
  77. /**
  78. * <p><code>StringEscapeUtils</code> instances should NOT be constructed in
  79. * standard programming.</p>
  80. *
  81. * <p>Instead, the class should be used as:
  82. * <pre>StringEscapeUtils.escapeJava("foo");</pre></p>
  83. *
  84. * <p>This constructor is public to permit tools that require a JavaBean
  85. * instance to operate.</p>
  86. */
  87. public StringEscapeUtils() {
  88. }
  89. // Java and JavaScript
  90. //--------------------------------------------------------------------------
  91. /**
  92. * <p>Escapes the characters in a <code>String</code> using Java String rules.</p>
  93. *
  94. * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
  95. *
  96. * <p>So a tab becomes the characters <code>'\\'</code> and
  97. * <code>'t'</code>.</p>
  98. *
  99. * <p>The only difference between Java strings and JavaScript strings
  100. * is that in JavaScript, a single quote must be escaped.</p>
  101. *
  102. * <p>Example:
  103. * <pre>
  104. * input string: He didn't say, "Stop!"
  105. * output string: He didn't say, \"Stop!\"
  106. * </pre>
  107. * </p>
  108. *
  109. * @param str String to escape values in, may be null
  110. * @return String with escaped values, <code>null</code> if null string input
  111. */
  112. public static String escapeJava(String str) {
  113. return escapeJavaStyleString(str, false);
  114. }
  115. /**
  116. * <p>Escapes the characters in a <code>String</code> using Java String rules to
  117. * a <code>Writer</code>.</p>
  118. *
  119. * <p>A <code>null</code> string input has no effect.</p>
  120. *
  121. * @see #escapeJava(java.lang.String)
  122. * @param out Writer to write escaped string into
  123. * @param str String to escape values in, may be null
  124. * @throws IllegalArgumentException if the Writer is <code>null</code>
  125. * @throws IOException if error occurs on undelying Writer
  126. */
  127. public static void escapeJava(Writer out, String str) throws IOException {
  128. escapeJavaStyleString(out, str, false);
  129. }
  130. /**
  131. * <p>Escapes the characters in a <code>String</code> using JavaScript String rules.</p>
  132. * <p>Escapes any values it finds into their JavaScript String form.
  133. * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
  134. *
  135. * <p>So a tab becomes the characters <code>'\\'</code> and
  136. * <code>'t'</code>.</p>
  137. *
  138. * <p>The only difference between Java strings and JavaScript strings
  139. * is that in JavaScript, a single quote must be escaped.</p>
  140. *
  141. * <p>Example:
  142. * <pre>
  143. * input string: He didn't say, "Stop!"
  144. * output string: He didn\'t say, \"Stop!\"
  145. * </pre>
  146. * </p>
  147. *
  148. * @param str String to escape values in, may be null
  149. * @return String with escaped values, <code>null</code> if null string input
  150. */
  151. public static String escapeJavaScript(String str) {
  152. return escapeJavaStyleString(str, true);
  153. }
  154. /**
  155. * <p>Escapes the characters in a <code>String</code> using JavaScript String rules
  156. * to a <code>Writer</code>.</p>
  157. *
  158. * <p>A <code>null</code> string input has no effect.</p>
  159. *
  160. * @see #escapeJavaScript(java.lang.String)
  161. * @param out Writer to write escaped string into
  162. * @param str String to escape values in, may be null
  163. * @throws IllegalArgumentException if the Writer is <code>null</code>
  164. * @throws IOException if error occurs on undelying Writer
  165. **/
  166. public static void escapeJavaScript(Writer out, String str) throws IOException {
  167. escapeJavaStyleString(out, str, true);
  168. }
  169. private static String escapeJavaStyleString(String str, boolean escapeSingleQuotes) {
  170. if (str == null) {
  171. return null;
  172. }
  173. try {
  174. StringPrintWriter writer = new StringPrintWriter(str.length() * 2);
  175. escapeJavaStyleString(writer, str, escapeSingleQuotes);
  176. return writer.getString();
  177. } catch (IOException ioe) {
  178. // this should never ever happen while writing to a StringWriter
  179. ioe.printStackTrace();
  180. return null;
  181. }
  182. }
  183. private static void escapeJavaStyleString(Writer out, String str, boolean escapeSingleQuote) throws IOException {
  184. if (out == null) {
  185. throw new IllegalArgumentException("The Writer must not be null");
  186. }
  187. if (str == null) {
  188. return;
  189. }
  190. int sz;
  191. sz = str.length();
  192. for (int i = 0; i < sz; i++) {
  193. char ch = str.charAt(i);
  194. // handle unicode
  195. if (ch > 0xfff) {
  196. out.write("\\u" + hex(ch));
  197. } else if (ch > 0xff) {
  198. out.write("\\u0" + hex(ch));
  199. } else if (ch > 0x7f) {
  200. out.write("\\u00" + hex(ch));
  201. } else if (ch < 32) {
  202. switch (ch) {
  203. case '\b':
  204. out.write('\\');
  205. out.write('b');
  206. break;
  207. case '\n':
  208. out.write('\\');
  209. out.write('n');
  210. break;
  211. case '\t':
  212. out.write('\\');
  213. out.write('t');
  214. break;
  215. case '\f':
  216. out.write('\\');
  217. out.write('f');
  218. break;
  219. case '\r':
  220. out.write('\\');
  221. out.write('r');
  222. break;
  223. default :
  224. if (ch > 0xf) {
  225. out.write("\\u00" + hex(ch));
  226. } else {
  227. out.write("\\u000" + hex(ch));
  228. }
  229. break;
  230. }
  231. } else {
  232. switch (ch) {
  233. case '\'':
  234. if (escapeSingleQuote) out.write('\\');
  235. out.write('\'');
  236. break;
  237. case '"':
  238. out.write('\\');
  239. out.write('"');
  240. break;
  241. case '\\':
  242. out.write('\\');
  243. out.write('\\');
  244. break;
  245. default :
  246. out.write(ch);
  247. break;
  248. }
  249. }
  250. }
  251. }
  252. /**
  253. * <p>Returns an upper case hexadecimal <code>String</code> for the given
  254. * character.</p>
  255. *
  256. * @param ch The character to convert.
  257. * @return An upper case hexadecimal <code>String</code>
  258. */
  259. private static String hex(char ch) {
  260. return Integer.toHexString(ch).toUpperCase();
  261. }
  262. /**
  263. * <p>Unescapes any Java literals found in the <code>String</code>.
  264. * For example, it will turn a sequence of <code>'\'</code> and
  265. * <code>'n'</code> into a newline character, unless the <code>'\'</code>
  266. * is preceded by another <code>'\'</code>.</p>
  267. *
  268. * @param str the <code>String</code> to unescape, may be null
  269. * @return a new unescaped <code>String</code>, <code>null</code> if null string input
  270. */
  271. public static String unescapeJava(String str) {
  272. if (str == null) {
  273. return null;
  274. }
  275. try {
  276. StringPrintWriter writer = new StringPrintWriter(str.length());
  277. unescapeJava(writer, str);
  278. return writer.getString();
  279. } catch (IOException ioe) {
  280. // this should never ever happen while writing to a StringWriter
  281. ioe.printStackTrace();
  282. return null;
  283. }
  284. }
  285. /**
  286. * <p>Unescapes any Java literals found in the <code>String</code> to a
  287. * <code>Writer</code>.</p>
  288. *
  289. * <p>For example, it will turn a sequence of <code>'\'</code> and
  290. * <code>'n'</code> into a newline character, unless the <code>'\'</code>
  291. * is preceded by another <code>'\'</code>.</p>
  292. *
  293. * <p>A <code>null</code> string input has no effect.</p>
  294. *
  295. * @param out the <code>Writer</code> used to output unescaped characters
  296. * @param str the <code>String</code> to unescape, may be null
  297. * @throws IllegalArgumentException if the Writer is <code>null</code>
  298. * @throws IOException if error occurs on undelying Writer
  299. */
  300. public static void unescapeJava(Writer out, String str) throws IOException {
  301. if (out == null) {
  302. throw new IllegalArgumentException("The Writer must not be null");
  303. }
  304. if (str == null) {
  305. return;
  306. }
  307. int sz = str.length();
  308. StringBuffer unicode = new StringBuffer(4);
  309. boolean hadSlash = false;
  310. boolean inUnicode = false;
  311. for (int i = 0; i < sz; i++) {
  312. char ch = str.charAt(i);
  313. if (inUnicode) {
  314. // if in unicode, then we're reading unicode
  315. // values in somehow
  316. unicode.append(ch);
  317. if (unicode.length() == 4) {
  318. // unicode now contains the four hex digits
  319. // which represents our unicode chacater
  320. try {
  321. int value = Integer.parseInt(unicode.toString(), 16);
  322. out.write((char) value);
  323. unicode.setLength(0);
  324. inUnicode = false;
  325. hadSlash = false;
  326. } catch (NumberFormatException nfe) {
  327. throw new NestableRuntimeException("Unable to parse unicode value: " + unicode, nfe);
  328. }
  329. }
  330. continue;
  331. }
  332. if (hadSlash) {
  333. // handle an escaped value
  334. hadSlash = false;
  335. switch (ch) {
  336. case '\\':
  337. out.write('\\');
  338. break;
  339. case '\'':
  340. out.write('\'');
  341. break;
  342. case '\"':
  343. out.write('"');
  344. break;
  345. case 'r':
  346. out.write('\r');
  347. break;
  348. case 'f':
  349. out.write('\f');
  350. break;
  351. case 't':
  352. out.write('\t');
  353. break;
  354. case 'n':
  355. out.write('\n');
  356. break;
  357. case 'b':
  358. out.write('\b');
  359. break;
  360. case 'u':
  361. {
  362. // uh-oh, we're in unicode country....
  363. inUnicode = true;
  364. break;
  365. }
  366. default :
  367. out.write(ch);
  368. break;
  369. }
  370. continue;
  371. } else if (ch == '\\') {
  372. hadSlash = true;
  373. continue;
  374. }
  375. out.write(ch);
  376. }
  377. if (hadSlash) {
  378. // then we're in the weird case of a \ at the end of the
  379. // string, let's output it anyway.
  380. out.write('\\');
  381. }
  382. }
  383. /**
  384. * <p>Unescapes any JavaScript literals found in the <code>String</code>.</p>
  385. *
  386. * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code>
  387. * into a newline character, unless the <code>'\'</code> is preceded by another
  388. * <code>'\'</code>.</p>
  389. *
  390. * @see #unescapeJava(String)
  391. * @param str the <code>String</code> to unescape, may be null
  392. * @return A new unescaped <code>String</code>, <code>null</code> if null string input
  393. */
  394. public static String unescapeJavaScript(String str) {
  395. return unescapeJava(str);
  396. }
  397. /**
  398. * <p>Unescapes any JavaScript literals found in the <code>String</code> to a
  399. * <code>Writer</code>.</p>
  400. *
  401. * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code>
  402. * into a newline character, unless the <code>'\'</code> is preceded by another
  403. * <code>'\'</code>.</p>
  404. *
  405. * <p>A <code>null</code> string input has no effect.</p>
  406. *
  407. * @see #unescapeJava(Writer,String)
  408. * @param out the <code>Writer</code> used to output unescaped characters
  409. * @param str the <code>String</code> to unescape, may be null
  410. * @throws IllegalArgumentException if the Writer is <code>null</code>
  411. * @throws IOException if error occurs on undelying Writer
  412. */
  413. public static void unescapeJavaScript(Writer out, String str) throws IOException {
  414. unescapeJava(out, str);
  415. }
  416. // HTML and XML
  417. //--------------------------------------------------------------------------
  418. /**
  419. * <p>Escapes the characters in a <code>String</code> using HTML entities.</p>
  420. *
  421. * <p>
  422. * For example: <tt>"bread" & "butter"</tt> => <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>.
  423. * </p>
  424. *
  425. * <p>Supports all known HTML 4.0 entities, including funky accents.</p>
  426. *
  427. * @param str the <code>String</code> to escape, may be null
  428. * @return a new escaped <code>String</code>, <code>null</code> if null string input
  429. *
  430. * @see #unescapeHtml(String)
  431. * @see </br><a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
  432. * @see </br><a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
  433. * @see </br><a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
  434. * @see </br><a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
  435. * @see </br><a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
  436. **/
  437. public static String escapeHtml(String str) {
  438. if (str == null) {
  439. return null;
  440. }
  441. //todo: add a version that takes a Writer
  442. //todo: rewrite underlying method to use a Writer instead of a StringBuffer
  443. return Entities.HTML40.escape(str);
  444. }
  445. /**
  446. * <p>Unescapes a string containing entity escapes to a string
  447. * containing the actual Unicode characters corresponding to the
  448. * escapes. Supports HTML 4.0 entities.</p>
  449. *
  450. * <p>For example, the string "&lt;Fran&ccedil;ais&gt;"
  451. * will become "<Français>"</p>
  452. *
  453. * <p>If an entity is unrecognized, it is left alone, and inserted
  454. * verbatim into the result string. e.g. "&gt;&zzzz;x" will
  455. * become ">&zzzz;x".</p>
  456. *
  457. * @param str the <code>String</code> to unescape, may be null
  458. * @return a new unescaped <code>String</code>, <code>null</code> if null string input
  459. * @see #escapeHtml(String)
  460. **/
  461. public static String unescapeHtml(String str) {
  462. if (str == null) {
  463. return null;
  464. }
  465. return Entities.HTML40.unescape(str);
  466. }
  467. /**
  468. * <p>Escapes the characters in a <code>String</code> using XML entities.</p>
  469. *
  470. * <p>For example: <tt>"bread" & "butter"</tt> =>
  471. * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>.
  472. * </p>
  473. *
  474. * <p>Supports only the four basic XML entities (gt, lt, quot, amp).
  475. * Does not support DTDs or external entities.</p>
  476. *
  477. * @param str the <code>String</code> to escape, may be null
  478. * @return a new escaped <code>String</code>, <code>null</code> if null string input
  479. * @see #unescapeXml(java.lang.String)
  480. **/
  481. public static String escapeXml(String str) {
  482. if (str == null) {
  483. return null;
  484. }
  485. return Entities.XML.escape(str);
  486. }
  487. /**
  488. * <p>Unescapes a string containing XML entity escapes to a string
  489. * containing the actual Unicode characters corresponding to the
  490. * escapes.</p>
  491. *
  492. * <p>Supports only the four basic XML entities (gt, lt, quot, amp).
  493. * Does not support DTDs or external entities.</p>
  494. *
  495. * @param str the <code>String</code> to unescape, may be null
  496. * @return a new unescaped <code>String</code>, <code>null</code> if null string input
  497. * @see #escapeXml(String)
  498. **/
  499. public static String unescapeXml(String str) {
  500. if (str == null) {
  501. return null;
  502. }
  503. return Entities.XML.unescape(str);
  504. }
  505. /**
  506. * <p>Escapes the characters in a <code>String</code> to be suitable to pass to
  507. * an SQL query.</p>
  508. *
  509. * <p>For example,
  510. * <pre>statement.executeQuery("SELECT * FROM MOVIES WHERE TITLE='" +
  511. * StringEscapeUtils.escapeSql("McHale's Navy") +
  512. * "'");</pre>
  513. * </p>
  514. *
  515. * <p>At present, this method only turns single-quotes into doubled single-quotes
  516. * (<code>"McHale's Navy"</code> => <code>"McHale''s Navy"</code>). It does not
  517. * handle the cases of percent (%) or underscore (_) for use in LIKE clauses.</p>
  518. *
  519. * see http://www.jguru.com/faq/view.jsp?EID=8881
  520. * @param str the string to escape, may be null
  521. * @return a new String, escaped for SQL, <code>null</code> if null string input
  522. */
  523. public static String escapeSql(String str) {
  524. if (str == null) {
  525. return null;
  526. }
  527. return StringUtils.replace(str, "'", "''");
  528. }
  529. }