001 // Copyright (c) 2001 Hursh Jain (http://www.mollypages.org) 002 // The Molly framework is freely distributable under the terms of an 003 // MIT-style license. For details, see the molly pages web site at: 004 // http://www.mollypages.org/. Use, modify, have fun ! 005 006 package fc.util; 007 008 import java.util.*; 009 import java.util.regex.*; 010 011 /* 012 HTML Utility functions 013 014 @author hursh jain 015 */ 016 public final class HTMLUtil 017 { 018 /** 019 Replaces all occurences of single <i>and</i> double quotes 020 with corresponding HTML entities. This is useful when setting 021 attribute values containing those characters and maintaining 022 state for characters typed by the user. 023 For example, 024 <pre> 025 <input type=text value=<font size="+1">'</font><font color=blue>O<font size="+1"><font size="+1" color=red>'</font></font>Reilly</font><font size="+1">'</font>> 026 </pre> 027 <tt>O'Reilly</tt> could have been typed in by the user (and we are maintaining 028 state so we have to show the value back to the user) or it could have been 029 retrieved from the database. Either way, when the form is resubmitted, it would 030 not be sent properly by the browser. 031 <p> 032 The embedded single quote in the value trips up the browser, because it 033 prematurely ends the value. One would think that the following backslash-escape 034 would work: 035 <pre> 036 <input type=text value='<font color=blue>O<font size="+1" color=red>\</font><b>'</b>Reilly</font>'> 037 </pre> 038 Unfortunately, escaping like this does <b>not</b> work reliably in firefox, 039 safari or IE. 040 <p> 041 Here is another example: 042 <pre> 043 <input type=text value=<font size="+1">"</font><font color=blue>foo<font size="+1" color=red>"</font>bar</font><font size="+1">"</font>> 044 </pre> 045 The following escape does <b>not</b> work either: 046 <pre> 047 <input type=text value=<font size="+1">"</font><font color=blue>foo<font size="+1" color=red>\"</font>bar</font><font size="+1">"</font>> 048 </pre> 049 To be safe, all embedded quotes must be encoded using character escapes: 050 (<tt>single quote (') as &#39; </tt>) and double quote (<tt>double (") as 051 &#34; </tt>). So 052 <pre> 053 <input type=text value='<font color=blue>O<b>&#39;</b>Reilly</font>'> 054 </pre> 055 This works fine and is submitted by the browser as <tt>O'Reilly</tt> 056 <p> 057 This method is critically useful. <a href="http://www.imdb.com/title/tt0083929/quotes">Learn it. Live it</a>. 058 */ 059 public static String quoteToEntity(String str) 060 { 061 if (str == null || str.equals("")) 062 return str; 063 064 String ret = str 065 .replace("\"", """) 066 .replace("'", "'"); 067 068 return ret; 069 } 070 071 /** 072 Replaces embedded entities for single and double quotes, back to the 073 correspoding single and double quote characters. This method is 074 the converse of {@link #quoteToEntity(String)} 075 */ 076 public static String entityToQuote(String str) 077 { 078 if (str == null || str.equals("")) 079 return str; 080 081 String ret = str 082 .replace(""", "\"") 083 .replace("'", "'"); 084 085 return ret; 086 } 087 088 private static void test(String desc, String s) 089 { 090 String tmp = null, tmp2 = null; 091 092 tmp = HTMLUtil.quoteToEntity(s); 093 tmp2 = HTMLUtil.entityToQuote(tmp); 094 095 System.out.println( String.format("%15s",desc) + " -> " 096 + String.format("%15s", tmp) + " -> " 097 + String.format("%10s", tmp2) 098 ); 099 } 100 101 public static void main (String args[]) 102 { 103 test("empty", ""); 104 test("spaces", " "); 105 test("single-quote", "'"); 106 test("double-quote", "\""); 107 test("X'Y\"Z", "X'Y\"Z"); 108 test("\\'\\'", "\\'\\'"); 109 } 110 } //~class HTMLUtil