001// Copyright (c) 2001 Hursh Jain (http://www.mollypages.org) 002// The Molly framework is freely distributable under the terms of an 003// MIT-style license. For details, see the molly pages web site at: 004// http://www.mollypages.org/. Use, modify, have fun ! 005 006package fc.web.page; 007 008import java.io.*; 009import java.util.*; 010 011import fc.io.*; 012import fc.util.*; 013 014/** 015A Reader suitable for lexing. Supports all of: <code>peek</code>, 016<code>read</code> and <code>unread</code>. (no JDK 1.5 reader class has 017all of those). Mark/reset is not supported because it's too complex to 018implement given the current <i>fixed-buffer</i> implementation of this 019class. (on the flip-side this implementation does allow to read 020<i>very</i> large files without risk of running out of JDK memory). 021<p> 022<xmp>1 4m 31337 h4x0r! ph33r |\/|y m4d sk1llz !!11!</xmp>:-] 023<p> 024Note 1: If this class is invoked from the command line, setting the dbg 025flag in the code to <code>true</code> is useful. 026 027@author hursh jain 028*/ 029public final class PageReader extends Reader 030{ 031/* 032Old implemenation had a rolling buffer window and a previous spillover buffer. 033It was too complicated, so that implemenation is pastured. 034 035It is MUCH simpler to read the entire file into one character array...(unread, 036read, mark, reset) etc, become trivial. By contrast, if using a 037limited-size-read-buffer implementation (as before) all of those become much 038harder since we loose the existing buffer contents when the buffer is *refilled* 039(so how do you go back in the buffer if the buffer is gone ?). 040*/ 041static final boolean dbg = false; 042 043//our own buf/pos because most/all reader subclasses dont have mark/reset/unread 044char[] buf = null; 045int pos = 0; 046int count = 0; 047int markpos = 0; 048//line, col and other tracking 049int line = 1; 050int col = 0; 051int lastcol = 1; //for unread past a newline 052boolean pushBackNL = false; 053boolean lastWasCR = false; 054String encoding; 055static String DEFAULT_ENCODING = "UTF-8"; 056 057/** 058Creates a new PageReader wrapping the specified reader 059*/ 060public PageReader(Reader r) throws IOException 061 { 062 Argcheck.notnull(r, "specified reader was null"); 063 buf = IOUtil.readerToCharArray(r); 064 this.encoding = DEFAULT_ENCODING; 065 } 066 067/** 068Creates a reader with the specified non-null encoding. 069*/ 070public PageReader(File file, String encoding) throws IOException 071 { 072 Argcheck.notnull(file, "specified file was null"); 073 Argcheck.notnull(encoding, "specified encoding was null"); 074 this.encoding = encoding; 075 buf = IOUtil.fileToCharArray(file, encoding); 076 } 077 078/** 079Creates a reader using the UTF-8 encoding. 080*/ 081public PageReader(File file) throws IOException 082 { 083 this(file, DEFAULT_ENCODING); 084 } 085 086public void close() throws IOException 087 { 088 //no underlying stream since everything read into buffer. not much to do. 089 } 090 091public int read() throws IOException 092 { 093 if (pos == buf.length) { 094 return -1; 095 } 096 097 char c = buf[pos++]; 098 099 if (dbg) System.out.println(">>>>>>>> DEBUG: read() from BUF, c=" + StringUtil.viewableAscii(c)); 100 adjustReadLineNum(c); 101 102 return c; 103 } 104 105public int read(char[] buf, int start, int len) throws IOException 106 { 107 throw new IOException("not implemented, use the read() method instead"); 108 } 109 110/** 111Unreads the current character (which could be EOF) so that the next read will 112return the current character (or EOF) again. 113*/ 114public void unread() throws IOException 115 { 116 char c = 0; 117 118 if (pos == 0) 119 { 120 throw new IOException("I am at the beginning of the stream. Cannot unread anything because nothing has been read so far"); 121 } 122 else{ 123 c = buf[--pos]; 124 if (dbg) System.out.println(">>>>>>>> DEBUG: unread() from BUF, c=" + StringUtil.viewableAscii(c)); 125 } 126 127 adjustUnreadLineNum(c); 128 } 129 130/** 131Unreads the specified number of characters 132*/ 133public void unread(int count) throws IOException 134 { 135 for (int n = 0; n < count; n++) { 136 unread(); 137 } 138 } 139 140/** 141Useful for inserting included files into the stream and then parsing that content in-line 142with the rest of the file. 143*/ 144public void insertIntoStream(File file) throws IOException 145 { 146 char[] insert = IOUtil.fileToCharArray(file, encoding); 147 148 char[] result = new char[buf.length + insert.length]; 149 System.arraycopy(buf, 0, result, 0, pos); 150 System.arraycopy(insert, 0, result, pos, insert.length); 151 System.arraycopy(buf, pos, result, pos+insert.length, buf.length-pos); 152 153 buf = result; 154 } 155 156/** 157Useful for inserting included files into the stream and then parsing that content in-line 158with the rest of the file. 159*/ 160public void insertIntoStream(Reader r) throws IOException 161 { 162 char[] insert = IOUtil.readerToCharArray(r); 163 164 char[] result = new char[buf.length + insert.length]; 165 System.arraycopy(buf, 0, result, 0, pos); 166 System.arraycopy(insert, 0, result, pos, insert.length); 167 System.arraycopy(buf, pos, result, pos+insert.length, buf.length-pos); 168 169 buf = result; 170 } 171 172 173void adjustReadLineNum(char c) 174 { 175 // we can read: \r, \r\n , \n all of which increase line count by exactly 1 176 switch (c) 177 { 178 case '\n': 179 if (! lastWasCR) { 180 line++; 181 lastcol=col; 182 col=1; 183 } 184 else { 185 lastWasCR = false; 186 } 187 break; 188 189 case '\r': 190 line++; 191 lastcol=col; 192 col=1; 193 lastWasCR = true; 194 break; 195 196 case '\t': 197 col = col + 4; 198 break; 199 200 default: 201 col++; 202 } 203 } 204 205 206void adjustUnreadLineNum(char c) 207 { 208 // we can unread: \r, \r\n , \n all of which reduce line count by exactly 1 209 switch (c) { 210 case '\n': 211 pushBackNL = true; 212 line--; 213 col=lastcol; 214 break; 215 case '\r': 216 if (! pushBackNL) { 217 line--; 218 col=lastcol; 219 } 220 else{ 221 pushBackNL = false; 222 } 223 break; 224 case '\t': 225 col = col - 4; 226 break; 227 default: 228 col--; 229 } 230 } 231 232public int peek() throws IOException 233 { 234 return buf[pos]; 235 } 236 237 238/** 239Skips all whitespace characters such that the next {@link read} will 240return the <b>next</b> non-whitespace character (or EOF if there are no 241more characters). 242*/ 243public void skipWhitespace() throws IOException 244 { 245 int c = -1; 246 while (true) 247 { 248 c = read(); 249 250 if (c == -1) { 251 break; 252 } 253 254 if (! Character.isWhitespace(c)) { 255 unread(); 256 break; 257 } 258 } 259 } 260 261 262/** 263Tries to read/consumes the specified char and returns true 264if successful. If the specified char is not found, does not 265consume anything and returns false. 266*/ 267public boolean match(int target) throws IOException 268 { 269 int c = read(); 270 271 if (c == target) 272 return true; 273 else 274 unread(); 275 276 return false; 277 } 278 279/** 280Tries to read/consumes the specified non-null string and returns true 281if successful. If the specified string is not found, does not 282consume anything and returns false. 283*/ 284public boolean match(String target) throws IOException 285 { 286 if (target == null) 287 throw new IllegalArgumentException("Specified target string was null"); 288 289 int c = -1; 290 for (int i = 0; i < target.length(); i++) 291 { 292 c = read(); 293 294 if ( c == -1 || c != target.charAt(i)) { 295 unread(i+1); 296 return false; 297 } 298 } 299 300 return true; 301 } 302 303public boolean matchIgnoreCase(String target) throws IOException 304 { 305 if (target == null) 306 throw new IllegalArgumentException("Specified target string was null"); 307 308 int c = -1; 309 for (int i = 0; i < target.length(); i++) 310 { 311 c = read(); 312 313 if ( c == -1 || c != Character.toLowerCase(target.charAt(i))) { 314 unread(i+1); 315 return false; 316 } 317 318 } 319 320 return true; 321 } 322 323public boolean markSupported() 324 { 325 return false; 326 } 327 328public int getLine() { 329 return line; 330 } 331 332public int getCol() { 333 return col; 334 } 335 336char[] getBuf() { return buf; } 337int getPos() { return pos; } 338 339//other utility methods 340 341public static void main (String args[]) throws IOException 342 { 343 //CHANGE CHAR BUFFER TO A SMALL VALUE FOR TESTING */ 344 StringReader sr = null; 345 PageReader lex = null; 346 int c = -1; 347 348 System.out.println("Reading an empty string....."); 349 sr = new StringReader(""); 350 lex = new PageReader(sr); 351 while ( (c = lex.read()) != -1) { 352 testprint(lex, c); 353 } 354 355 System.out.println("----------------- TEST 2 --------------"); 356 sr = new StringReader("abc"); 357 lex = new PageReader(sr); 358 while ( (c = lex.read()) != -1) { 359 testprint(lex, c); 360 //System.out.print(c + " "); 361 } 362 363 System.out.println("----------------- TEST 3 --------------"); 364 sr = new StringReader("abcde"); 365 lex = new PageReader(sr); 366 try { 367 c = lex.read(); 368 testprint(lex, c); 369 lex.unread(); 370 testprint(lex, -10); 371 lex.unread(); 372 testprint(lex, -10); 373 c = lex.read(); 374 testprint(lex, c); 375 } 376 catch (Exception e) { 377 e.printStackTrace(); 378 } 379 380 System.out.println("----------------- TEST 4 --------------"); 381 sr = new StringReader("abcd\ne"); 382 lex = new PageReader(sr); 383 try { 384 c = lex.read(); 385 testprint(lex, c); 386 lex.unread(); 387 testprint(lex, -10); 388 389 for (int i = 0; i < 5; i++) { 390 c = lex.read(); 391 testprint(lex, c); 392 } 393 394 for (int i = 0; i < 5; i++) { 395 lex.unread(); 396 testprint(lex, -10); 397 } 398 399 for (int i = 0; i < 5; i++) { 400 c = lex.read(); 401 testprint(lex, c); 402 } 403 404 c = lex.read(); 405 testprint(lex, c); 406 } 407 catch (Exception e) { 408 e.printStackTrace(); 409 } 410 411 System.out.println("----------------- TEST 5 --------------"); 412 sr = new StringReader("abcd\r\ne"); 413 lex = new PageReader(sr); 414 try { 415 c = lex.read(); 416 testprint(lex, c, lex.peek()); 417 lex.unread(); 418 testprint(lex, -10, lex.peek()); 419 420 for (int i = 0; i < 5; i++) { 421 c = lex.read(); 422 testprint(lex, c, lex.peek()); 423 } 424 425 for (int i = 0; i < 5; i++) { 426 lex.unread(); 427 testprint(lex, -10, lex.peek()); 428 } 429 430 for (int i = 0; i < 5; i++) { 431 c = lex.read(); 432 testprint(lex, c, lex.peek()); 433 } 434 435 c = lex.read(); 436 testprint(lex, c, lex.peek()); 437 } 438 catch (Exception e) { 439 e.printStackTrace(); 440 } 441 442 System.out.println("--------- TEST 6 ---(insert into stream middle)-------"); 443 sr = new StringReader("abc"); 444 lex = new PageReader(sr); 445 446 try { 447 c = lex.read(); 448 testprint(lex, c); 449 450 StringReader insert = new StringReader("123"); 451 System.out.println("inserting \"123\" into the stream\n"); 452 lex.insertIntoStream(insert); 453 454 while ( (c = lex.read()) != -1) { 455 testprint(lex, c); 456 } 457 } 458 catch (Exception e) { 459 e.printStackTrace(); 460 } 461 462 463 System.out.println("--------- TEST 7 ---(insert into stream begin)-------"); 464 sr = new StringReader("abc"); 465 lex = new PageReader(sr); 466 467 try { 468 StringReader insert = new StringReader("123"); 469 System.out.println("inserting \"123\" into the beginning of stream\n"); 470 lex.insertIntoStream(insert); 471 472 while ( (c = lex.read()) != -1) { 473 testprint(lex, c); 474 } 475 } 476 catch (Exception e) { 477 e.printStackTrace(); 478 } 479 480 System.out.println("--------- TEST 8 ---(insert into stream end)-------"); 481 sr = new StringReader("abc"); 482 lex = new PageReader(sr); 483 484 try { 485 while ( (c = lex.read()) != -1) { 486 testprint(lex, c); 487 } 488 StringReader insert = new StringReader("123"); 489 System.out.println("inserting \"123\" into the end of the stream\n"); 490 lex.insertIntoStream(insert); 491 492 while ( (c = lex.read()) != -1) { 493 testprint(lex, c); 494 } 495 } 496 catch (Exception e) { 497 e.printStackTrace(); 498 } 499 500 } 501 502private static void testprint(PageReader lex, int c, int peek) 503 { 504 if (c == -1) { 505 System.out.println("====> recieved EOF (-1) from read()......."); 506 } 507 508 System.out.format( 509 "buf=%s, pos=%d, buflen=%d\nline=%d, col=%d, char=[%s]", 510 StringUtil.arrayToString(lex.getBuf()), lex.getPos(), lex.getBuf().length, 511 lex.getLine(), lex.getCol(), 512 (c == -10) ? "N/A" : StringUtil.viewableAscii((char)c)); 513 514 if (peek != -2) 515 System.out.format(", peek=[%s]", StringUtil.viewableAscii((char)peek)); 516 517 System.out.print("\n\n"); 518 } 519 520private static void testprint(PageReader lex, int c) 521 { 522 testprint(lex, c, -2); 523 } 524 525}