001/* An actor that outputs strings read from a text file or URL. 002 003 @Copyright (c) 2002-2015 The Regents of the University of California. 004 All rights reserved. 005 006 Permission is hereby granted, without written agreement and without 007 license or royalty fees, to use, copy, modify, and distribute this 008 software and its documentation for any purpose, provided that the 009 above copyright notice and the following two paragraphs appear in all 010 copies of this software. 011 012 IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY 013 FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 014 ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF 015 THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF 016 SUCH DAMAGE. 017 018 THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, 019 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 020 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE 021 PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF 022 CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 023 ENHANCEMENTS, OR MODIFICATIONS. 024 025 PT_COPYRIGHT_VERSION 2 026 COPYRIGHTENDKEY 027 */ 028package ptolemy.actor.lib.io; 029 030import java.io.IOException; 031import java.util.ArrayList; 032import java.util.HashSet; 033import java.util.Set; 034import java.util.StringTokenizer; 035 036import ptolemy.data.BooleanToken; 037import ptolemy.data.OrderedRecordToken; 038import ptolemy.data.RecordToken; 039import ptolemy.data.StringToken; 040import ptolemy.data.Token; 041import ptolemy.data.expr.ASTPtRootNode; 042import ptolemy.data.expr.FileParameter; 043import ptolemy.data.expr.ModelScope; 044import ptolemy.data.expr.Parameter; 045import ptolemy.data.expr.ParseTreeEvaluator; 046import ptolemy.data.expr.ParserScope; 047import ptolemy.data.expr.PtParser; 048import ptolemy.data.expr.SingletonParameter; 049import ptolemy.data.expr.StringParameter; 050import ptolemy.data.expr.Variable; 051import ptolemy.data.type.BaseType; 052import ptolemy.data.type.Type; 053import ptolemy.graph.Inequality; 054import ptolemy.kernel.CompositeEntity; 055import ptolemy.kernel.util.Attribute; 056import ptolemy.kernel.util.IllegalActionException; 057import ptolemy.kernel.util.NameDuplicationException; 058import ptolemy.kernel.util.Settable; 059 060/////////////////////////////////////////////////////////////////// 061//// CSVReader 062 063/** 064 <p> 065 This actor reads a file or URL, one line at a time, and outputs each line 066 except the first as a record. The first line of the file gives 067 the names of the fields of the output records. 068 The remaining lines give the values of the fields. 069 The output is an ordered 070 record token, which means that the order defined in the 071 first line is preserved. 072 </p><p> 073 <b>NOTE:</b> By default, this actor imposes no type constraints 074 on its output. To use it in a model, you must either enable 075 backward type inference (a parameter at the top level of the model), 076 or explicitly declare the output type (by selecting Configure-Ports 077 in the context menu). If you use backward type inference, then the 078 constraints are inferred from how you use the output. For example, 079 if you extract a record field of a particular type, then the output 080 will be constrained to be a record that contains that field. 081 If you declare output types specifically, then every line read 082 from the file must conform. 083 For example, if you set the output the type 084 constraint to "[x = int, y = double]" then the output will be an 085 ordered record where the first field is named "x" and has type int, 086 and the second field is named "y" and has type double. 087 If any line in the file violates this typing, then an exception 088 will be thrown. 089 </p><p> 090 If any line has more values than 091 the first line, then the trailing values will be ignored. 092 If any line has fewer values than the first line, then the 093 field values will be an empty string. 094 </p><p> 095 By default, the separator between field names and values is a comma, 096 so the file format is the standard CSV (comma-separated value) format. 097 The <i>separator</i> parameter enables changing the separator to 098 tabs or semicolons. 099 </p><p> 100 The file or URL is specified using any form acceptable 101 to FileParameter. 102 </p><p> 103 Before an end of file is reached, the <i>endOfFile</i> 104 output produces <i>false</i>. In the iteration where the last line 105 of the file is read and produced on the <i>output</i> port, this actor 106 produces <i>true</i> on the <i>endOfFile</i> port. In that iteration, 107 postfire() returns false. If the actor is iterated again, after the end 108 of file, then prefire() and postfire() will both return false, <i>output</i> 109 will produce the string "EOF", and <i>endOfFile</i> will produce <i>true</i>. 110 </p><p> 111 In some domains (such as SDF), returning false in postfire() 112 causes the model to cease executing. 113 In other domains (such as DE), this causes the director to avoid 114 further firings of this actor. So usually, the actor will not be 115 invoked again after the end of file is reached. 116 </p><p> 117 This actor reads ahead in the file so that it can produce an output 118 <i>true</i> on <i>endOfFile</i> in the same iteration where it outputs 119 the last line. It reads the first two lines in preinitialize(), and 120 subsequently reads a new line in each invocation of postfire(). The 121 data type of the output is also set in preinitialize(), after reading 122 the first line, which defines the structure of the record. 123 line read is produced on the <i>output</i> in the next iteration 124 after it is read. 125 </p> 126 127 @see FileParameter 128 @author Edward A. Lee 129 @version $Id$ 130 @since Ptolemy II 10.0 131 @Pt.ProposedRating Yellow (eal) 132 @Pt.AcceptedRating Red (cxh) 133 */ 134public class CSVReader extends LineReader { 135 /** Construct an actor with the given container and name. 136 * @param container The container. 137 * @param name The name of this actor. 138 * @exception IllegalActionException If the actor cannot be contained 139 * by the proposed container. 140 * @exception NameDuplicationException If the container already has an 141 * actor with this name. 142 */ 143 public CSVReader(CompositeEntity container, String name) 144 throws IllegalActionException, NameDuplicationException { 145 super(container, name); 146 147 numberOfLinesToSkip.setVisibility(Settable.NONE); 148 149 separator = new StringParameter(this, "separator"); 150 separator.setExpression("comma"); 151 separator.addChoice("comma"); 152 separator.addChoice("tab"); 153 separator.addChoice("semicolon"); 154 155 trimSpaces = new Parameter(this, "trimSpaces"); 156 trimSpaces.setTypeEquals(BaseType.BOOLEAN); 157 trimSpaces.setExpression("true"); 158 159 new SingletonParameter(endOfFile, "_showName") 160 .setToken(BooleanToken.TRUE); 161 162 // Base class declares the output to be of type string, so we 163 // have to first undo that. 164 output.setTypeEquals(BaseType.UNKNOWN); 165 // Do not force the output to be a record because downstream 166 // types may be general, in which case, backward type inference 167 // will want to resolve to general, which is fine. I.e., resolving 168 // to anything above record types is also OK. 169 // output.setTypeAtMost(RecordType.EMPTY_RECORD); 170 171 _attachText("_iconDescription", "<svg>\n" + "<rect x=\"-25\" y=\"-20\" " 172 + "width=\"50\" height=\"40\" " + "style=\"fill:white\"/>\n" 173 + "<polygon points=\"-15,-10 -12,-10 -8,-14 -1,-14 3,-10" 174 + " 15,-10 15,10, -15,10\" " + "style=\"fill:red\"/>\n" 175 + "<text x=\"-11\" y=\"4\"" 176 + "style=\"font-size:11; fill:white; font-family:SansSerif\">" 177 + "CSV</text>\n" + "</svg>\n"); 178 } 179 180 /////////////////////////////////////////////////////////////////// 181 //// ports and parameters //// 182 183 /** A specification of the separator between items in the table. 184 * The default is "comma", which results in assuming that fields 185 * are separated by commas. If the value is changed to "tab", then 186 * a tab separator will be used. If the value is "semicolon", then 187 * a semicolon separator will be used. If the value is anything 188 * else, then the value of the parameter, whatever it is, will 189 * be the separator. 190 */ 191 public StringParameter separator; 192 193 /** If true, then trim spaces around each field name and value. 194 * This is a boolean that defaults to true. If you change it 195 * to false, then all spaces in the field names and values are 196 * preserved. Note that if there are spaces in the field names, 197 * then the value of the record cannot be read by the 198 * expression evaluator, so spaces in field names are not 199 * recommended. 200 */ 201 public Parameter trimSpaces; 202 203 /////////////////////////////////////////////////////////////////// 204 //// public methods //// 205 206 /** If the specified attribute is <i>separator</i> then set a local 207 * variable with the value of the separator. 208 * @param attribute The attribute that has changed. 209 * @exception IllegalActionException If the specified attribute 210 * is <i>fileOrURL</i> and the file cannot be opened, or the previously 211 * opened file cannot be closed; or if the attribute is 212 * <i>numberOfLinesToSkip</i> and its value is negative. 213 */ 214 @Override 215 public void attributeChanged(Attribute attribute) 216 throws IllegalActionException { 217 if (attribute == separator) { 218 _delimiter = separator.stringValue(); 219 if (_delimiter.equals("comma")) { 220 _delimiter = ","; 221 } else if (_delimiter.equals("tab")) { 222 _delimiter = "\t"; 223 } else if (_delimiter.equals("semicolon")) { 224 _delimiter = ";"; 225 } else { 226 _delimiter = separator.stringValue(); 227 } 228 } else { 229 super.attributeChanged(attribute); 230 } 231 } 232 233 /** Output the data read in the preinitialize() or in the previous 234 * invocation of postfire(), if there is any. 235 * @exception IllegalActionException If there's no director. 236 */ 237 @Override 238 public void fire() throws IllegalActionException { 239 // Cannot invoke super.fire() because it produces the wrong 240 // output. 241 // super.fire(); 242 243 // Duplicated from the AtomicActor base class: 244 if (_debugging) { 245 _debug("Called fire()"); 246 } 247 248 // Duplicated from the Source base class: 249 for (int i = 0; i < trigger.getWidth(); i++) { 250 if (trigger.hasToken(i)) { 251 trigger.get(i); 252 } 253 } 254 255 if (_firstFiring) { 256 _openAndReadFirstTwoLines(); 257 _firstFiring = false; 258 259 if (_currentLine == null) { 260 throw new IllegalActionException("File has no data."); 261 } 262 StringTokenizer tokenizer = new StringTokenizer(_currentLine, 263 _delimiter); 264 ArrayList<String> fieldNames = new ArrayList<String>(); 265 while (tokenizer.hasMoreElements()) { 266 String nextName = tokenizer.nextToken(); 267 if (((BooleanToken) trimSpaces.getToken()).booleanValue()) { 268 nextName = nextName.trim(); 269 } 270 fieldNames.add(nextName); 271 } 272 _fieldNames = new String[1]; 273 _fieldNames = fieldNames.toArray(_fieldNames); 274 275 // Type[] fieldTypes = new Type[_fieldNames.length]; 276 // for (int i = 0; i < _fieldNames.length; i++) { 277 // fieldTypes[i] = BaseType.STRING; 278 // } 279 280 // Skip the first line, which only has header information. 281 _currentLine = _nextLine; 282 try { 283 _nextLine = _reader.readLine(); 284 } catch (IOException ex) { 285 throw new IllegalActionException(this, ex, 286 "initialize() failed"); 287 } 288 } 289 290 if (_currentLine != null) { 291 StringTokenizer tokenizer = new StringTokenizer(_currentLine, 292 _delimiter); 293 int i = 0; 294 Token[] fieldValues = new Token[_fieldNames.length]; 295 while (tokenizer.hasMoreTokens()) { 296 if (i >= _fieldNames.length) { 297 // Ignore additional fields. 298 break; 299 } 300 String nextToken = tokenizer.nextToken(); 301 if (((BooleanToken) trimSpaces.getToken()).booleanValue()) { 302 nextToken = nextToken.trim(); 303 } 304 if (_parser == null) { 305 _parser = new PtParser(); 306 } 307 308 ASTPtRootNode parseTree = null; 309 try { 310 parseTree = _parser.generateParseTree(nextToken); 311 } catch (Exception ex) { 312 // If the field cannot be parsed, then interpret 313 // the field as a string. 314 fieldValues[i] = new StringToken(nextToken); 315 } 316 if (parseTree != null) { 317 if (_parseTreeEvaluator == null) { 318 _parseTreeEvaluator = new ParseTreeEvaluator(); 319 } 320 321 if (_scope == null) { 322 _scope = new ExpressionScope(); 323 } 324 325 try { 326 fieldValues[i] = _parseTreeEvaluator 327 .evaluateParseTree(parseTree, _scope); 328 } catch (Exception ex) { 329 // If the field cannot be evaluated, then interpret 330 // the field as a string. 331 fieldValues[i] = new StringToken(nextToken); 332 } 333 } 334 335 i++; 336 } 337 while (i < _fieldNames.length) { 338 fieldValues[i] = new StringToken(""); 339 i++; 340 } 341 RecordToken outputValue = new OrderedRecordToken(_fieldNames, 342 fieldValues); 343 output.broadcast(outputValue); 344 } 345 if (_nextLine == null) { 346 endOfFile.broadcast(BooleanToken.TRUE); 347 } else { 348 endOfFile.broadcast(BooleanToken.FALSE); 349 } 350 } 351 352 /** Wrapup execution of this actor. This method overrides the 353 * base class to discard the internal parser to save memory. 354 */ 355 @Override 356 public void wrapup() { 357 _parser = null; 358 } 359 360 /////////////////////////////////////////////////////////////////// 361 //// protected methods //// 362 363 /** Override the default to eliminate the default type constraints/. 364 * @return An empty set of type constraints 365 */ 366 @Override 367 protected Set<Inequality> _defaultTypeConstraints() { 368 return new HashSet<Inequality>(); 369 } 370 371 /////////////////////////////////////////////////////////////////// 372 //// private members //// 373 374 /** The delimiter. */ 375 private String _delimiter = ","; 376 377 /** Field names for the output record. */ 378 private String[] _fieldNames; 379 380 /** The parse tree evaluator to use. */ 381 private ParseTreeEvaluator _parseTreeEvaluator = null; 382 383 /** The parser to use. */ 384 private PtParser _parser = null; 385 386 /** The scope for the parser. */ 387 private ParserScope _scope = null; 388 389 /////////////////////////////////////////////////////////////////// 390 //// inner classes //// 391 392 // FIXME: This is copied from ExpressionToToken. Some way to share? 393 private class ExpressionScope extends ModelScope { 394 /** Look up and return the attribute with the specified name in the 395 * scope. Return null if such an attribute does not exist. 396 * @return The attribute with the specified name in the scope. 397 */ 398 @Override 399 public Token get(String name) throws IllegalActionException { 400 Variable result = getScopedVariable(null, CSVReader.this, name); 401 402 if (result != null) { 403 return result.getToken(); 404 } 405 406 return null; 407 } 408 409 /** Look up and return the type of the attribute with the 410 * specified name in the scope. Return null if such an 411 * attribute does not exist. 412 * @return The attribute with the specified name in the scope. 413 */ 414 @Override 415 public Type getType(String name) throws IllegalActionException { 416 Variable result = getScopedVariable(null, CSVReader.this, name); 417 418 if (result != null) { 419 return (Type) result.getTypeTerm().getValue(); 420 } 421 422 return null; 423 } 424 425 /** Look up and return the type term for the specified name 426 * in the scope. Return null if the name is not defined in this 427 * scope, or is a constant type. 428 * @return The InequalityTerm associated with the given name in 429 * the scope. 430 * @exception IllegalActionException If a value in the scope 431 * exists with the given name, but cannot be evaluated. 432 */ 433 @Override 434 public ptolemy.graph.InequalityTerm getTypeTerm(String name) 435 throws IllegalActionException { 436 Variable result = getScopedVariable(null, CSVReader.this, name); 437 438 if (result != null) { 439 return result.getTypeTerm(); 440 } 441 442 return null; 443 } 444 445 /** Return the list of identifiers within the scope. 446 * @return The list of identifiers within the scope. 447 */ 448 @Override 449 public Set identifierSet() { 450 return getAllScopedVariableNames(null, CSVReader.this); 451 } 452 } 453}