001 // Copyright (c) 2001 Hursh Jain (http://www.mollypages.org) 002 // The Molly framework is freely distributable under the terms of an 003 // MIT-style license. For details, see the molly pages web site at: 004 // http://www.mollypages.org/. Use, modify, have fun ! 005 006 package fc.web.page; 007 008 import java.io.*; 009 import java.util.*; 010 011 import fc.io.*; 012 import fc.util.*; 013 014 /** 015 A Reader suitable for lexing. Supports all of: <code>peek</code>, 016 <code>read</code> and <code>unread</code>. (no JDK 1.5 reader class has 017 all of those). Mark/reset is not supported because it's too complex to 018 implement given the current <i>fixed-buffer</i> implementation of this 019 class. (on the flip-side this implementation does allow to read 020 <i>very</i> large files without risk of running out of JDK memory). 021 <p> 022 <xmp>1 4m 31337 h4x0r! ph33r |\/|y m4d sk1llz !!11!</xmp>:-] 023 <p> 024 Note 1: If this class is invoked from the command line, setting the dbg 025 flag in the code to <code>true</code> is useful. 026 027 @author hursh jain 028 */ 029 public final class PageReader extends Reader 030 { 031 /* 032 Old implemenation had a rolling buffer window and a previous spillover buffer. 033 It was too complicated, so that implemenation is pastured. 034 035 It is MUCH simpler to read the entire file into one character array...(unread, 036 read, mark, reset) etc, become trivial. By contrast, if using a 037 limited-size-read-buffer implementation (as before) all of those become much 038 harder since we loose the existing buffer contents when the buffer is *refilled* 039 (so how do you go back in the buffer if the buffer is gone ?). 040 */ 041 static final boolean dbg = false; 042 043 //our own buf/pos because most/all reader subclasses dont have mark/reset/unread 044 char[] buf = null; 045 int pos = 0; 046 int count = 0; 047 int markpos = 0; 048 //line, col and other tracking 049 int line = 1; 050 int col = 0; 051 int lastcol = 1; //for unread past a newline 052 boolean pushBackNL = false; 053 boolean lastWasCR = false; 054 String encoding; 055 static String DEFAULT_ENCODING = "UTF-8"; 056 057 /** 058 Creates a new PageReader wrapping the specified reader 059 */ 060 public PageReader(Reader r) throws IOException 061 { 062 Argcheck.notnull(r, "specified reader was null"); 063 buf = IOUtil.readerToCharArray(r); 064 this.encoding = DEFAULT_ENCODING; 065 } 066 067 /** 068 Creates a reader with the specified non-null encoding. 069 */ 070 public PageReader(File file, String encoding) throws IOException 071 { 072 Argcheck.notnull(file, "specified file was null"); 073 Argcheck.notnull(encoding, "specified encoding was null"); 074 this.encoding = encoding; 075 buf = IOUtil.fileToCharArray(file, encoding); 076 } 077 078 /** 079 Creates a reader using the UTF-8 encoding. 080 */ 081 public PageReader(File file) throws IOException 082 { 083 this(file, DEFAULT_ENCODING); 084 } 085 086 public void close() throws IOException 087 { 088 //no underlying stream since everything read into buffer. not much to do. 089 } 090 091 public int read() throws IOException 092 { 093 if (pos == buf.length) { 094 return -1; 095 } 096 097 char c = buf[pos++]; 098 099 if (dbg) System.out.println(">>>>>>>> DEBUG: read() from BUF, c=" + StringUtil.viewableAscii(c)); 100 adjustReadLineNum(c); 101 102 return c; 103 } 104 105 public int read(char[] buf, int start, int len) throws IOException 106 { 107 throw new IOException("not implemented, use the read() method instead"); 108 } 109 110 /** 111 Unreads the current character (which could be EOF) so that the next read will 112 return the current character (or EOF) again. 113 */ 114 public void unread() throws IOException 115 { 116 char c = 0; 117 118 if (pos == 0) 119 { 120 throw new IOException("I am at the beginning of the stream. Cannot unread anything because nothing has been read so far"); 121 } 122 else{ 123 c = buf[--pos]; 124 if (dbg) System.out.println(">>>>>>>> DEBUG: unread() from BUF, c=" + StringUtil.viewableAscii(c)); 125 } 126 127 adjustUnreadLineNum(c); 128 } 129 130 /** 131 Unreads the specified number of characters 132 */ 133 public void unread(int count) throws IOException 134 { 135 for (int n = 0; n < count; n++) { 136 unread(); 137 } 138 } 139 140 /** 141 Useful for inserting included files into the stream and then parsing that content in-line 142 with the rest of the file. 143 */ 144 public void insertIntoStream(File file) throws IOException 145 { 146 char[] insert = IOUtil.fileToCharArray(file, encoding); 147 148 char[] result = new char[buf.length + insert.length]; 149 System.arraycopy(buf, 0, result, 0, pos); 150 System.arraycopy(insert, 0, result, pos, insert.length); 151 System.arraycopy(buf, pos, result, pos+insert.length, buf.length-pos); 152 153 buf = result; 154 } 155 156 /** 157 Useful for inserting included files into the stream and then parsing that content in-line 158 with the rest of the file. 159 */ 160 public void insertIntoStream(Reader r) throws IOException 161 { 162 char[] insert = IOUtil.readerToCharArray(r); 163 164 char[] result = new char[buf.length + insert.length]; 165 System.arraycopy(buf, 0, result, 0, pos); 166 System.arraycopy(insert, 0, result, pos, insert.length); 167 System.arraycopy(buf, pos, result, pos+insert.length, buf.length-pos); 168 169 buf = result; 170 } 171 172 173 void adjustReadLineNum(char c) 174 { 175 // we can read: \r, \r\n , \n all of which increase line count by exactly 1 176 switch (c) 177 { 178 case '\n': 179 if (! lastWasCR) { 180 line++; 181 lastcol=col; 182 col=1; 183 } 184 else { 185 lastWasCR = false; 186 } 187 break; 188 189 case '\r': 190 line++; 191 lastcol=col; 192 col=1; 193 lastWasCR = true; 194 break; 195 196 case '\t': 197 col = col + 4; 198 break; 199 200 default: 201 col++; 202 } 203 } 204 205 206 void adjustUnreadLineNum(char c) 207 { 208 // we can unread: \r, \r\n , \n all of which reduce line count by exactly 1 209 switch (c) { 210 case '\n': 211 pushBackNL = true; 212 line--; 213 col=lastcol; 214 break; 215 case '\r': 216 if (! pushBackNL) { 217 line--; 218 col=lastcol; 219 } 220 else{ 221 pushBackNL = false; 222 } 223 break; 224 case '\t': 225 col = col - 4; 226 break; 227 default: 228 col--; 229 } 230 } 231 232 public int peek() throws IOException 233 { 234 return buf[pos]; 235 } 236 237 238 /** 239 Skips all whitespace characters such that the next {@link read} will 240 return the <b>next</b> non-whitespace character (or EOF if there are no 241 more characters). 242 */ 243 public void skipWhitespace() throws IOException 244 { 245 int c = -1; 246 while (true) 247 { 248 c = read(); 249 250 if (c == -1) { 251 break; 252 } 253 254 if (! Character.isWhitespace(c)) { 255 unread(); 256 break; 257 } 258 } 259 } 260 261 262 263 /** 264 Tries to read/consumes the specified char and returns true 265 if successful. If the specified char is not found, does not 266 consume anything and returns false. 267 */ 268 public boolean match(int target) throws IOException 269 { 270 int c = read(); 271 272 if (c == target) 273 return true; 274 else 275 unread(); 276 277 return false; 278 } 279 280 /** 281 Tries to read/consumes the specified non-null string and returns true 282 if successful. If the specified string is not found, does not 283 consume anything and returns false. 284 */ 285 public boolean match(String target) throws IOException 286 { 287 if (target == null) 288 throw new IllegalArgumentException("Specified target string was null"); 289 290 int c = -1; 291 for (int i = 0; i < target.length(); i++) 292 { 293 c = read(); 294 295 if ( c == -1 || c != target.charAt(i)) { 296 unread(i+1); 297 return false; 298 } 299 } 300 301 return true; 302 } 303 304 public boolean matchIgnoreCase(String target) throws IOException 305 { 306 if (target == null) 307 throw new IllegalArgumentException("Specified target string was null"); 308 309 int c = -1; 310 for (int i = 0; i < target.length(); i++) 311 { 312 c = read(); 313 314 if ( c == -1 || c != Character.toLowerCase(target.charAt(i))) { 315 unread(i+1); 316 return false; 317 } 318 319 } 320 321 return true; 322 } 323 324 public boolean markSupported() 325 { 326 return false; 327 } 328 329 public int getLine() { 330 return line; 331 } 332 333 public int getCol() { 334 return col; 335 } 336 337 char[] getBuf() { return buf; } 338 int getPos() { return pos; } 339 340 //other utility methods 341 342 public static void main (String args[]) throws IOException 343 { 344 //CHANGE CHAR BUFFER TO A SMALL VALUE FOR TESTING */ 345 StringReader sr = null; 346 PageReader lex = null; 347 int c = -1; 348 349 System.out.println("Reading an empty string....."); 350 sr = new StringReader(""); 351 lex = new PageReader(sr); 352 while ( (c = lex.read()) != -1) { 353 testprint(lex, c); 354 } 355 356 System.out.println("----------------- TEST 2 --------------"); 357 sr = new StringReader("abc"); 358 lex = new PageReader(sr); 359 while ( (c = lex.read()) != -1) { 360 testprint(lex, c); 361 //System.out.print(c + " "); 362 } 363 364 System.out.println("----------------- TEST 3 --------------"); 365 sr = new StringReader("abcde"); 366 lex = new PageReader(sr); 367 try { 368 c = lex.read(); 369 testprint(lex, c); 370 lex.unread(); 371 testprint(lex, -10); 372 lex.unread(); 373 testprint(lex, -10); 374 c = lex.read(); 375 testprint(lex, c); 376 } 377 catch (Exception e) { 378 e.printStackTrace(); 379 } 380 381 System.out.println("----------------- TEST 4 --------------"); 382 sr = new StringReader("abcd\ne"); 383 lex = new PageReader(sr); 384 try { 385 c = lex.read(); 386 testprint(lex, c); 387 lex.unread(); 388 testprint(lex, -10); 389 390 for (int i = 0; i < 5; i++) { 391 c = lex.read(); 392 testprint(lex, c); 393 } 394 395 for (int i = 0; i < 5; i++) { 396 lex.unread(); 397 testprint(lex, -10); 398 } 399 400 for (int i = 0; i < 5; i++) { 401 c = lex.read(); 402 testprint(lex, c); 403 } 404 405 c = lex.read(); 406 testprint(lex, c); 407 } 408 catch (Exception e) { 409 e.printStackTrace(); 410 } 411 412 System.out.println("----------------- TEST 5 --------------"); 413 sr = new StringReader("abcd\r\ne"); 414 lex = new PageReader(sr); 415 try { 416 c = lex.read(); 417 testprint(lex, c, lex.peek()); 418 lex.unread(); 419 testprint(lex, -10, lex.peek()); 420 421 for (int i = 0; i < 5; i++) { 422 c = lex.read(); 423 testprint(lex, c, lex.peek()); 424 } 425 426 for (int i = 0; i < 5; i++) { 427 lex.unread(); 428 testprint(lex, -10, lex.peek()); 429 } 430 431 for (int i = 0; i < 5; i++) { 432 c = lex.read(); 433 testprint(lex, c, lex.peek()); 434 } 435 436 c = lex.read(); 437 testprint(lex, c, lex.peek()); 438 } 439 catch (Exception e) { 440 e.printStackTrace(); 441 } 442 443 System.out.println("--------- TEST 6 ---(insert into stream middle)-------"); 444 sr = new StringReader("abc"); 445 lex = new PageReader(sr); 446 447 try { 448 c = lex.read(); 449 testprint(lex, c); 450 451 StringReader insert = new StringReader("123"); 452 System.out.println("inserting \"123\" into the stream\n"); 453 lex.insertIntoStream(insert); 454 455 while ( (c = lex.read()) != -1) { 456 testprint(lex, c); 457 } 458 } 459 catch (Exception e) { 460 e.printStackTrace(); 461 } 462 463 464 System.out.println("--------- TEST 7 ---(insert into stream begin)-------"); 465 sr = new StringReader("abc"); 466 lex = new PageReader(sr); 467 468 try { 469 StringReader insert = new StringReader("123"); 470 System.out.println("inserting \"123\" into the beginning of stream\n"); 471 lex.insertIntoStream(insert); 472 473 while ( (c = lex.read()) != -1) { 474 testprint(lex, c); 475 } 476 } 477 catch (Exception e) { 478 e.printStackTrace(); 479 } 480 481 System.out.println("--------- TEST 8 ---(insert into stream end)-------"); 482 sr = new StringReader("abc"); 483 lex = new PageReader(sr); 484 485 try { 486 while ( (c = lex.read()) != -1) { 487 testprint(lex, c); 488 } 489 StringReader insert = new StringReader("123"); 490 System.out.println("inserting \"123\" into the end of the stream\n"); 491 lex.insertIntoStream(insert); 492 493 while ( (c = lex.read()) != -1) { 494 testprint(lex, c); 495 } 496 } 497 catch (Exception e) { 498 e.printStackTrace(); 499 } 500 501 } 502 503 private static void testprint(PageReader lex, int c, int peek) 504 { 505 if (c == -1) { 506 System.out.println("====> recieved EOF (-1) from read()......."); 507 } 508 509 System.out.format( 510 "buf=%s, pos=%d, buflen=%d\nline=%d, col=%d, char=[%s]", 511 StringUtil.arrayToString(lex.getBuf()), lex.getPos(), lex.getBuf().length, 512 lex.getLine(), lex.getCol(), 513 (c == -10) ? "N/A" : StringUtil.viewableAscii((char)c)); 514 515 if (peek != -2) 516 System.out.format(", peek=[%s]", StringUtil.viewableAscii((char)peek)); 517 518 System.out.print("\n\n"); 519 } 520 521 private static void testprint(PageReader lex, int c) 522 { 523 testprint(lex, c, -2); 524 } 525 526 }