001 package org.LiveGraph.dataFile.read; 002 003 import java.io.BufferedReader; 004 import java.io.Closeable; 005 import java.io.IOException; 006 import java.io.InputStream; 007 import java.io.InputStreamReader; 008 import java.util.ArrayList; 009 import java.util.Collections; 010 import java.util.HashMap; 011 import java.util.List; 012 import java.util.Map; 013 014 import org.LiveGraph.dataFile.common.DataFormatException; 015 016 import com.softnetConsult.utils.exceptions.Bug; 017 018 019 import static org.LiveGraph.dataFile.common.DataFormatTools.*; 020 021 022 /** 023 * A reader for a data stream (usually, a CSV file). This reader 024 * will parse the data stream and extract the file information, the data 025 * series headings and the actual data.<br /> 026 * <br /> 027 * The information extracted from the data stream is passed to the application 028 * using an observer pattern: after a line was parsed, the appropriate 029 * {@code notifyXXXX(...)}-method of this class is called with the extracted 030 * information. The {@code notifyXXXX(...)}-methods dispatch appropriate 031 * notifications to all {@link DataStreamObserver}-objects registered with this 032 * {@code DataStreamReader}-instance.<br /> 033 * If required, an application may also overwrite the {@code notifyXXXX(...)}-methods 034 * to handle data read events.<br /> 035 * <br /> 036 * See {@link org.LiveGraph.dataFile.write.DataStreamWriter} for the details of the 037 * data file format.<br /> 038 * <br /> 039 * Note, that this class has a different role than it did in version 1.01 of the 040 * LiveGraph API. The {@code DataStreamReader} class from version 1.01 is replaced by 041 * {@link org.LiveGraph.dataCache.DataStreamToCacheReader}. 042 * 043 * <p><strong>LiveGraph</strong> (http://www.live-graph.org).</p> 044 * <p>Copyright (c) 2007 by G. Paperin.</p> 045 * <p>File: DataStreamReader.java</p> 046 * <p style="font-size:smaller;">Redistribution and use in source and binary forms, with or 047 * without modification, are permitted provided that the following terms and conditions are met: 048 * </p> 049 * <p style="font-size:smaller;">1. Redistributions of source code must retain the above 050 * acknowledgement of the LiveGraph project and its web-site, the above copyright notice, 051 * this list of conditions and the following disclaimer.<br /> 052 * 2. Redistributions in binary form must reproduce the above acknowledgement of the 053 * LiveGraph project and its web-site, the above copyright notice, this list of conditions 054 * and the following disclaimer in the documentation and/or other materials provided with 055 * the distribution.<br /> 056 * 3. All advertising materials mentioning features or use of this software or any derived 057 * software must display the following acknowledgement:<br /> 058 * <em>This product includes software developed by the LiveGraph project and its 059 * contributors.<br />(http://www.live-graph.org)</em><br /> 060 * 4. All advertising materials distributed in form of HTML pages or any other technology 061 * permitting active hyper-links that mention features or use of this software or any 062 * derived software must display the acknowledgment specified in condition 3 of this 063 * agreement, and in addition, include a visible and working hyper-link to the LiveGraph 064 * homepage (http://www.live-graph.org). 065 * </p> 066 * <p style="font-size:smaller;">THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY 067 * OF ANY KIND, EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 068 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 069 * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 070 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 071 * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 072 * </p> 073 * 074 * @author Greg Paperin (http://www.paperin.org) 075 * @version {@value org.LiveGraph.LiveGraph#version} 076 * @see DataStreamObserver 077 * @see DataStreamObserverAdapter 078 * @see org.LiveGraph.dataCache.DataStreamToCacheReader 079 */ 080 public class DataStreamReader implements Closeable { 081 082 /** 083 * Data stream reader. 084 */ 085 private BufferedReader in = null; 086 087 /** 088 * Data values separator. 089 */ 090 private String separator = DefaultSeparator; 091 092 /** 093 * Whether the data values separator was already finalised. 094 */ 095 private boolean separatorSet = false; 096 097 /** 098 * Whether the data series headings are already set-up. 099 */ 100 private boolean labelsSet = false; 101 102 /** 103 * The data stream index of the next data record. 104 */ 105 private int nextDatasetFileIndex = -1; 106 107 /** 108 * Observers who want to know what's on the data stream. 109 */ 110 private List<DataStreamObserver> observers = null; 111 112 113 /** 114 * Creates a data reader on the specified stream. 115 * 116 * @param is The stream from which to read. 117 */ 118 public DataStreamReader(InputStream is) { 119 120 if (null == is) 121 throw new NullPointerException("Cannot read from a null stream."); 122 123 this.in = new BufferedReader(new InputStreamReader(is)); 124 this.separator = DefaultSeparator; 125 this.separatorSet = false; 126 this.labelsSet = false; 127 this.nextDatasetFileIndex = -1; 128 this.observers = new ArrayList<DataStreamObserver>(); 129 } 130 131 /** 132 * Creates a data reader on the specified stream and add one initial observer. 133 * 134 * @param is The stream from which to read. 135 * @param observer An observer for the data stream contents. 136 */ 137 public DataStreamReader(InputStream is, DataStreamObserver observer) { 138 this(is); 139 addObserver(observer); 140 } 141 142 143 /** 144 * Tells whether this reader's underlying data stream is ready to be read. 145 * 146 * @return {@code true} if the next {@code readFromStream()} is guaranteed not to block for input, 147 * {@code false} otherwise. Note that returning {@code false} does not guarantee that the next read 148 * will block. 149 * @throws IOException If an I/O error occurs. 150 */ 151 public boolean ready() throws IOException { 152 return in.ready(); 153 } 154 155 /** 156 * Closes the underlying data stream. Further reading is not possible after calling this method. 157 * @throws IOException If an I/O error occurs. 158 */ 159 public void close() throws IOException { 160 in.close(); 161 } 162 163 /** 164 * Reads as many data lines from the underlying stream as there are available and parses them. 165 * 166 * @return The number on non-empty data lines read. 167 * @throws IOException If an I/O error occurs. 168 * @throws DataFormatException If the data stream contents do not conform with the expected data 169 * stream format. 170 * @see org.LiveGraph.dataFile.write.DataStreamWriter 171 * @see #readFromStream(int) 172 */ 173 public int readFromStream() throws IOException, DataFormatException { 174 return readFromStream(-1); 175 } 176 177 /** 178 * Reads up to a specified number of data lines from the underlying stream, and parses the lines. 179 * Reading is stopped when the specified number of lines in reached or if no more lines are available. 180 * 181 * @param maxLines The maximum number of data lines to read (empty lines are ignored and not counted, 182 * but all other lines including comment lines are counted). If negative, all available lines will 183 * be read. 184 * @return The number on non-empty data lines read. 185 * @throws IOException If an I/O error occurs. 186 * @throws DataFormatException If the data stream contents do not conform with the expected data 187 * stream format. 188 * @see org.LiveGraph.dataFile.write.DataStreamWriter 189 */ 190 public int readFromStream(int maxLines) throws IOException, DataFormatException { 191 192 int linesRead = 0; 193 String line = null; 194 while (ready() && (0 > maxLines || linesRead < maxLines) ) { 195 line = in.readLine(); 196 line = line.trim(); 197 if (line.length() > 0) { 198 processLine(line); 199 linesRead++; 200 } 201 } 202 return linesRead; 203 } 204 205 /** 206 * Notifies observers regestered with this parser of a "data values separator set"-event. 207 * 208 * @param separator New data separator to be passed to the observers. 209 */ 210 protected void notifySeparatorSet(String separator) { 211 for (DataStreamObserver observer : observers) 212 observer.eventSeparatorSet(separator, this); 213 } 214 215 /** 216 * Notifies observers regestered with this parser of a "comment line parsed"-event. 217 * 218 * @param comment The parsed comment line to be passed to the observers. 219 */ 220 protected void notifyCommentLine(String comment) { 221 for (DataStreamObserver observer : observers) 222 observer.eventCommentLine(comment, this); 223 } 224 225 /** 226 * Notifies observers regestered with this parser of a "file info line parsed"-event. 227 * 228 * @param info The parsed file info to be passed to the observers. 229 */ 230 protected void notifyFileInfoLine(String info) { 231 for (DataStreamObserver observer : observers) 232 observer.eventFileInfoLine(info, this); 233 } 234 235 /** 236 * Notifies observers regestered with this parser of a "data series labels parsed"-event. 237 * 238 * @param labels The parsed data series labels to be passed to the observers. 239 */ 240 protected void notifyLabelsSet(List<String> labels) { 241 for (DataStreamObserver observer : observers) 242 observer.eventLabelsSet(labels, this); 243 } 244 245 /** 246 * Notifies observers regestered with this parser of a "dataset parsed"-event. 247 * 248 * @param dataTokens The parsed data tokens to be passed to the observers. 249 * @param datasetIndex The file index of the parsed dataset to be passed to the observers. 250 */ 251 protected void notifyDataLineRead(List<String> dataTokens, int datasetIndex) { 252 for (DataStreamObserver observer : observers) 253 observer.eventDataLineRead(dataTokens, datasetIndex, this); 254 } 255 256 /** 257 * Adds an observer to this parser. 258 * 259 * @param observer The observer to add. 260 * @return {@code if the specified observer cound not be added because it was already registered}, 261 * {@code true otherwise}. 262 */ 263 public boolean addObserver(DataStreamObserver observer) { 264 if (null == observer || hasObserver(observer)) 265 return false; 266 return observers.add(observer); 267 } 268 269 /** 270 * Checks whether the specified observer is registered with this parser. 271 * 272 * @param observer An observer. 273 * @return {@code true} if the specified {@code observer} is not {@code null} and is regestered 274 * with this parser, {@code false} otherwise. 275 */ 276 public boolean hasObserver(DataStreamObserver observer) { 277 if (null == observer) 278 return false; 279 return observers.contains(observer); 280 } 281 282 /** 283 * De-registeres the specified observer from this parser. 284 * 285 * @param observer An observer. 286 * @return {@code true} if the specified observer is not {@code null} and was on the 287 * list of registered observers and is now removed from this list, {@code false} otherwise. 288 */ 289 public boolean removeObserver(DataStreamObserver observer) { 290 if (null == observer) 291 return false; 292 return observers.remove(observer); 293 } 294 295 /** 296 * Counts this parser's observers. 297 * 298 * @return The number of observers registered with this parser. 299 */ 300 public int countObservers() { 301 return observers.size(); 302 } 303 304 /** 305 * This static utility method converts a list of {@code String} tokens (presumably just parsed 306 * from a data line) to a list of {@code Double} objects containing the tokens' values; tokens 307 * that cannot be parsed to a {@code Double} are represented by {@code null}-objects in the 308 * resulting list. 309 * 310 * @param tokens A list of data tokens. 311 * @return A list of the double values of the specified tokens. 312 */ 313 public static List<Double> convertTokensToDoubles(List<String> tokens) { 314 315 if (null == tokens) 316 return Collections.emptyList(); 317 318 List<Double> doubles = new ArrayList<Double>(tokens.size()); 319 for (String tok : tokens) { 320 321 if (null == tok) 322 continue; 323 324 tok = tok.trim(); 325 326 Double val = null; 327 if (null != tok && 0 < tok.length()) { 328 try { val = Double.valueOf(tok); } 329 catch (NumberFormatException e) { val = null; } 330 } 331 332 doubles.add(val); 333 } 334 return doubles; 335 } 336 337 338 /** 339 * This static utility method converts a list of strings (presumably representing a list of 340 * labels just parsed from the data file) to a list of strings where each string is unique 341 * in respect to its {@code equals} method (case sensitive); this happens by attaching 342 * counters to repreated strings: for instance, {@code ["boo", "foo", "boo"]} it converted to 343 * {@code ["boo (1)", "foo", "boo (2)"]}. 344 * 345 * @param rawLabels The list of labels to convert. 346 * @param allowEmptyLabels If this is {@code false}, all empty strings ({@code ""}) are converted 347 * to underscores ({@code "_"}) before possibly applying the counters. 348 * @return A list of unique data series labels based on the specified list. 349 */ 350 public static List<String> createUniqueLabels(List<String> rawLabels, boolean allowEmptyLabels) { 351 352 List<String> uniqueLabels = new ArrayList<String>(); 353 Map<String, Integer> labelCounts = new HashMap<String, Integer>(); 354 355 // Mark labels which occure more than once: 356 for (String rawLabel : rawLabels) { 357 358 rawLabel = rawLabel.trim(); 359 if (!allowEmptyLabels && rawLabel.length() == 0) 360 rawLabel = "_"; 361 362 if (!labelCounts.containsKey(rawLabel)) { 363 364 labelCounts.put(rawLabel, 1); 365 366 } else { 367 368 int c = labelCounts.get(rawLabel); 369 labelCounts.put(rawLabel, ++c); 370 rawLabel = rawLabel + " (" + c + ")"; 371 } 372 373 uniqueLabels.add(rawLabel); 374 } 375 376 // Change first occurence of "label" into "label (1)" for the labels which appear more than once: 377 for (String label : labelCounts.keySet()) { 378 int c = labelCounts.get(label); 379 if (1 < c) { 380 int p = uniqueLabels.indexOf(label); 381 uniqueLabels.set(p, label + " (1)"); 382 } 383 } 384 385 // Done: 386 return uniqueLabels; 387 } 388 389 390 /** 391 * Examines a data line and dispatches to a specialised parsing routine. 392 * 393 * @param line A data line. 394 * @throws DataFormatException If the data stream contents do not conform with the expected data 395 * stream format. 396 */ 397 private void processLine(String line) throws DataFormatException { 398 399 if (!separatorSet && line.startsWith(TAGSepDefinition) && line.endsWith(TAGSepDefinition)) { 400 processSeparatorDefinitionLine(line); 401 return; 402 } 403 404 if (line.startsWith(TAGComment)) { 405 processCommentLine(line); 406 return; 407 } 408 409 if (line.startsWith(TAGFileInfo)) { 410 processFileInfoLine(line); 411 return; 412 } 413 414 if (!labelsSet) { 415 processSeriesLabelsLine(line); 416 return; 417 } 418 419 if (true) { 420 processDataLine(line); 421 return; 422 } 423 424 throw new Bug("The program should never get to this line!"); 425 } 426 427 /** 428 * Parses a data values separator definition line. 429 * 430 * @param line Data line to parse. 431 * @throws DataFormatException If the data line contents are not in the expected format. 432 */ 433 private void processSeparatorDefinitionLine(String line) throws DataFormatException { 434 435 if (line.length() < TAGSepDefinition.length() * 2) 436 throw new DataFormatException("Illegal separator definition: \"" + line + "\""); 437 438 if (line.length() == TAGSepDefinition.length() * 2) 439 throw new DataFormatException("Illegal separator definition: separator may not be an empty string"); 440 441 String sep = line.substring(TAGSepDefinition.length(), line.length() - TAGSepDefinition.length()); 442 443 String problem = isValidSeparator(sep); 444 if (null != problem) 445 throw new DataFormatException("Illegal separator definition: " + problem); 446 447 separator = sep; 448 separatorSet = true; 449 notifySeparatorSet(separator); 450 } 451 452 /** 453 * Parses a comments line. 454 * 455 * @param line Data line to parse. 456 * @throws DataFormatException If the data line contents are not in the expected format. 457 */ 458 private void processCommentLine(String line) throws DataFormatException { 459 String comment = ""; 460 if (line.length() > TAGComment.length()) 461 comment = line.substring(TAGComment.length()).trim(); 462 463 separatorSet = true; 464 notifyCommentLine(comment); 465 } 466 467 /** 468 * Parses a file information line. 469 * 470 * @param line Data line to parse. 471 * @throws DataFormatException If the data line contents are not in the expected format. 472 */ 473 private void processFileInfoLine(String line) throws DataFormatException { 474 String info = ""; 475 if (line.length() > TAGFileInfo.length()) 476 info = line.substring(TAGFileInfo.length()).trim(); 477 478 separatorSet = true; 479 notifyFileInfoLine(info); 480 } 481 482 /** 483 * Parses a data series headings line. 484 * 485 * @param line Data line to parse. 486 * @throws DataFormatException If the data line contents are not in the expected format. 487 */ 488 private void processSeriesLabelsLine(String line) throws DataFormatException { 489 490 DataLineTokenizer tok = new DataLineTokenizer(line, separator); 491 nextDatasetFileIndex = 0; 492 labelsSet = true; 493 separatorSet = true; 494 notifyLabelsSet(Collections.unmodifiableList(tok.getTokens())); 495 } 496 497 /** 498 * Parses a data line. 499 * 500 * @param line Data line to parse. 501 * @throws DataFormatException If the data line contents are not in the expected format. 502 */ 503 private void processDataLine(String line) throws DataFormatException { 504 505 DataLineTokenizer tok = new DataLineTokenizer(line, separator); 506 separatorSet = true; 507 notifyDataLineRead(tok.getTokens(), nextDatasetFileIndex++); 508 } 509 510 }