001// Copyright (c) 2001 Hursh Jain (http://www.mollypages.org) 
002// The Molly framework is freely distributable under the terms of an
003// MIT-style license. For details, see the molly pages web site at:
004// http://www.mollypages.org/. Use, modify, have fun !
005
006package fc.util;
007
008import java.util.*;
009import java.util.regex.*;
010
011/* 
012HTML Utility functions
013
014@author hursh jain
015*/
016public final class HTMLUtil
017{
018/**
019Replaces all occurences of single <i>and</i> double quotes
020with corresponding HTML entities. This is useful when setting 
021attribute values containing those characters and maintaining
022state for characters typed by the user.
023For example,
024<pre>
025&lt;input type=text value=<font size="+1">'</font><font color=blue>O<font size="+1"><font size="+1" color=red>'</font></font>Reilly</font><font size="+1">'</font>&gt;
026</pre>
027<tt>O'Reilly</tt> could have been typed in by the user (and we are maintaining
028state so we have to show the value back to the user) or it could have been
029retrieved from the database. Either way, when the form is resubmitted, it would
030not be sent properly by the browser.
031<p>
032The embedded single quote in the value trips up the browser, because it
033prematurely ends the value. One would think that the following backslash-escape
034would work:
035<pre>
036&lt;input type=text value='<font color=blue>O<font size="+1" color=red>\</font><b>'</b>Reilly</font>'&gt;
037</pre>
038Unfortunately, escaping like this does <b>not</b> work reliably in firefox,
039safari or IE. 
040<p>
041Here is another example:
042<pre>
043&lt;input type=text value=<font size="+1">"</font><font color=blue>foo<font size="+1" color=red>"</font>bar</font><font size="+1">"</font>&gt;
044</pre>
045The following escape does <b>not</b> work either:
046<pre>
047&lt;input type=text value=<font size="+1">"</font><font color=blue>foo<font size="+1" color=red>\"</font>bar</font><font size="+1">"</font>&gt;
048</pre>
049To be safe, all embedded quotes must be encoded using character escapes:
050(<tt>single quote (') as &amp;#39; </tt>) and double quote (<tt>double (") as
051&amp;#34; </tt>). So
052<pre>
053&lt;input type=text value='<font color=blue>O<b>&amp;#39;</b>Reilly</font>'&gt;
054</pre>
055This works fine and is submitted by the browser as <tt>O'Reilly</tt>
056<p>
057This method is critically useful. <a href="http://www.imdb.com/title/tt0083929/quotes">Learn it. Live it</a>.
058*/
059public static String quoteToEntity(String str)
060  {
061  if (str == null || str.equals(""))
062    return str;
063  
064  String ret = str
065    .replace("\"", "&#34;")
066    .replace("'", "&#39;");
067    
068  return ret;
069  }
070
071/**
072Replaces embedded entities for single and double quotes, back to the
073correspoding single and double quote characters. This method is
074the converse of {@link #quoteToEntity(String)}
075*/
076public static String entityToQuote(String str)
077  {
078  if (str == null || str.equals(""))
079    return str;
080  
081  String ret = str
082    .replace("&#34;", "\"")
083    .replace("&#39;", "'");   
084  
085  return ret;
086  }
087
088private static void test(String desc, String s)
089  {
090  String tmp = null, tmp2 = null;
091  
092  tmp = HTMLUtil.quoteToEntity(s);
093  tmp2 = HTMLUtil.entityToQuote(tmp);
094
095  System.out.println(  String.format("%15s",desc) + " -> " 
096             + String.format("%15s", tmp) + " -> " 
097             + String.format("%10s", tmp2)
098             ); 
099  }
100
101public static void main (String args[])
102  {
103  test("empty", "");
104  test("spaces", "  ");
105  test("single-quote", "'");
106  test("double-quote", "\"");
107  test("X'Y\"Z", "X'Y\"Z");
108  test("\\'\\'", "\\'\\'");
109    }
110}     //~class HTMLUtil