001/* An actor that outputs strings read from a text file or URL.
002
003 @Copyright (c) 2002-2015 The Regents of the University of California.
004 All rights reserved.
005
006 Permission is hereby granted, without written agreement and without
007 license or royalty fees, to use, copy, modify, and distribute this
008 software and its documentation for any purpose, provided that the
009 above copyright notice and the following two paragraphs appear in all
010 copies of this software.
011
012 IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
013 FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
014 ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
015 THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
016 SUCH DAMAGE.
017
018 THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
019 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
020 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
021 PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
022 CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
023 ENHANCEMENTS, OR MODIFICATIONS.
024
025 PT_COPYRIGHT_VERSION 2
026 COPYRIGHTENDKEY
027 */
028package ptolemy.actor.lib.io;
029
030import java.io.IOException;
031import java.util.ArrayList;
032import java.util.HashSet;
033import java.util.Set;
034import java.util.StringTokenizer;
035
036import ptolemy.data.BooleanToken;
037import ptolemy.data.OrderedRecordToken;
038import ptolemy.data.RecordToken;
039import ptolemy.data.StringToken;
040import ptolemy.data.Token;
041import ptolemy.data.expr.ASTPtRootNode;
042import ptolemy.data.expr.FileParameter;
043import ptolemy.data.expr.ModelScope;
044import ptolemy.data.expr.Parameter;
045import ptolemy.data.expr.ParseTreeEvaluator;
046import ptolemy.data.expr.ParserScope;
047import ptolemy.data.expr.PtParser;
048import ptolemy.data.expr.SingletonParameter;
049import ptolemy.data.expr.StringParameter;
050import ptolemy.data.expr.Variable;
051import ptolemy.data.type.BaseType;
052import ptolemy.data.type.Type;
053import ptolemy.graph.Inequality;
054import ptolemy.kernel.CompositeEntity;
055import ptolemy.kernel.util.Attribute;
056import ptolemy.kernel.util.IllegalActionException;
057import ptolemy.kernel.util.NameDuplicationException;
058import ptolemy.kernel.util.Settable;
059
060///////////////////////////////////////////////////////////////////
061//// CSVReader
062
063/**
064 <p>
065 This actor reads a file or URL, one line at a time, and outputs each line
066 except the first as a record. The first line of the file gives
067 the names of the fields of the output records.
068 The remaining lines give the values of the fields.
069 The output is an ordered
070 record token, which means that the order defined in the
071 first line is preserved.
072 </p><p>
073 <b>NOTE:</b> By default, this actor imposes no type constraints
074 on its output. To use it in a model, you must either enable
075 backward type inference (a parameter at the top level of the model),
076 or explicitly declare the output type (by selecting Configure-Ports
077 in the context menu). If you use backward type inference, then the
078 constraints are inferred from how you use the output. For example,
079 if you extract a record field of a particular type, then the output
080 will be constrained to be a record that contains that field.
081 If you declare output types specifically, then every line read
082 from the file must conform.
083 For example, if you set the output the type
084 constraint to "[x = int, y = double]" then the output will be an
085 ordered record where the first field is named "x" and has type int,
086 and the second field is named "y" and has type double.
087 If any line in the file violates this typing, then an exception
088 will be thrown.
089 </p><p>
090 If any line has more values than
091 the first line, then the trailing values will be ignored.
092 If any line has fewer values than the first line, then the
093 field values will be an empty string.
094 </p><p>
095 By default, the separator between field names and values is a comma,
096 so the file format is the standard CSV (comma-separated value) format.
097 The <i>separator</i> parameter enables changing the separator to
098 tabs or semicolons.
099 </p><p>
100 The file or URL is specified using any form acceptable
101 to FileParameter.
102 </p><p>
103 Before an end of file is reached, the <i>endOfFile</i>
104 output produces <i>false</i>.  In the iteration where the last line
105 of the file is read and produced on the <i>output</i> port, this actor
106 produces <i>true</i> on the <i>endOfFile</i> port. In that iteration,
107 postfire() returns false.  If the actor is iterated again, after the end
108 of file, then prefire() and postfire() will both return false, <i>output</i>
109 will produce the string "EOF", and <i>endOfFile</i> will produce <i>true</i>.
110 </p><p>
111 In some domains (such as SDF), returning false in postfire()
112 causes the model to cease executing.
113 In other domains (such as DE), this causes the director to avoid
114 further firings of this actor.  So usually, the actor will not be
115 invoked again after the end of file is reached.
116 </p><p>
117 This actor reads ahead in the file so that it can produce an output
118 <i>true</i> on <i>endOfFile</i> in the same iteration where it outputs
119 the last line.  It reads the first two lines in preinitialize(), and
120 subsequently reads a new line in each invocation of postfire(). The
121 data type of the output is also set in preinitialize(), after reading
122 the first line, which defines the structure of the record.
123 line read is produced on the <i>output</i> in the next iteration
124 after it is read.
125 </p>
126
127 @see FileParameter
128 @author  Edward A. Lee
129 @version $Id$
130 @since Ptolemy II 10.0
131 @Pt.ProposedRating Yellow (eal)
132 @Pt.AcceptedRating Red (cxh)
133 */
134public class CSVReader extends LineReader {
135    /** Construct an actor with the given container and name.
136     *  @param container The container.
137     *  @param name The name of this actor.
138     *  @exception IllegalActionException If the actor cannot be contained
139     *   by the proposed container.
140     *  @exception NameDuplicationException If the container already has an
141     *   actor with this name.
142     */
143    public CSVReader(CompositeEntity container, String name)
144            throws IllegalActionException, NameDuplicationException {
145        super(container, name);
146
147        numberOfLinesToSkip.setVisibility(Settable.NONE);
148
149        separator = new StringParameter(this, "separator");
150        separator.setExpression("comma");
151        separator.addChoice("comma");
152        separator.addChoice("tab");
153        separator.addChoice("semicolon");
154
155        trimSpaces = new Parameter(this, "trimSpaces");
156        trimSpaces.setTypeEquals(BaseType.BOOLEAN);
157        trimSpaces.setExpression("true");
158
159        new SingletonParameter(endOfFile, "_showName")
160                .setToken(BooleanToken.TRUE);
161
162        // Base class declares the output to be of type string, so we
163        // have to first undo that.
164        output.setTypeEquals(BaseType.UNKNOWN);
165        // Do not force the output to be a record because downstream
166        // types may be general, in which case, backward type inference
167        // will want to resolve to general, which is fine. I.e., resolving
168        // to anything above record types is also OK.
169        // output.setTypeAtMost(RecordType.EMPTY_RECORD);
170
171        _attachText("_iconDescription", "<svg>\n" + "<rect x=\"-25\" y=\"-20\" "
172                + "width=\"50\" height=\"40\" " + "style=\"fill:white\"/>\n"
173                + "<polygon points=\"-15,-10 -12,-10 -8,-14 -1,-14 3,-10"
174                + " 15,-10 15,10, -15,10\" " + "style=\"fill:red\"/>\n"
175                + "<text x=\"-11\" y=\"4\""
176                + "style=\"font-size:11; fill:white; font-family:SansSerif\">"
177                + "CSV</text>\n" + "</svg>\n");
178    }
179
180    ///////////////////////////////////////////////////////////////////
181    ////                     ports and parameters                  ////
182
183    /** A specification of the separator between items in the table.
184     *  The default is "comma", which results in assuming that fields
185     *  are separated by commas. If the value is changed to "tab", then
186     *  a tab separator will be used. If the value is "semicolon", then
187     *  a semicolon separator will be used. If the value is anything
188     *  else, then the value of the parameter, whatever it is, will
189     *  be the separator.
190     */
191    public StringParameter separator;
192
193    /** If true, then trim spaces around each field name and value.
194     *  This is a boolean that defaults to true. If you change it
195     *  to false, then all spaces in the field names and values are
196     *  preserved. Note that if there are spaces in the field names,
197     *  then the value of the record cannot be read by the
198     *  expression evaluator, so spaces in field names are not
199     *  recommended.
200     */
201    public Parameter trimSpaces;
202
203    ///////////////////////////////////////////////////////////////////
204    ////                         public methods                    ////
205
206    /** If the specified attribute is <i>separator</i> then set a local
207     *  variable with the value of the separator.
208     *  @param attribute The attribute that has changed.
209     *  @exception IllegalActionException If the specified attribute
210     *   is <i>fileOrURL</i> and the file cannot be opened, or the previously
211     *   opened file cannot be closed; or if the attribute is
212     *   <i>numberOfLinesToSkip</i> and its value is negative.
213     */
214    @Override
215    public void attributeChanged(Attribute attribute)
216            throws IllegalActionException {
217        if (attribute == separator) {
218            _delimiter = separator.stringValue();
219            if (_delimiter.equals("comma")) {
220                _delimiter = ",";
221            } else if (_delimiter.equals("tab")) {
222                _delimiter = "\t";
223            } else if (_delimiter.equals("semicolon")) {
224                _delimiter = ";";
225            } else {
226                _delimiter = separator.stringValue();
227            }
228        } else {
229            super.attributeChanged(attribute);
230        }
231    }
232
233    /** Output the data read in the preinitialize() or in the previous
234     *  invocation of postfire(), if there is any.
235     *  @exception IllegalActionException If there's no director.
236     */
237    @Override
238    public void fire() throws IllegalActionException {
239        // Cannot invoke super.fire() because it produces the wrong
240        // output.
241        // super.fire();
242
243        // Duplicated from the AtomicActor base class:
244        if (_debugging) {
245            _debug("Called fire()");
246        }
247
248        // Duplicated from the Source base class:
249        for (int i = 0; i < trigger.getWidth(); i++) {
250            if (trigger.hasToken(i)) {
251                trigger.get(i);
252            }
253        }
254
255        if (_firstFiring) {
256            _openAndReadFirstTwoLines();
257            _firstFiring = false;
258
259            if (_currentLine == null) {
260                throw new IllegalActionException("File has no data.");
261            }
262            StringTokenizer tokenizer = new StringTokenizer(_currentLine,
263                    _delimiter);
264            ArrayList<String> fieldNames = new ArrayList<String>();
265            while (tokenizer.hasMoreElements()) {
266                String nextName = tokenizer.nextToken();
267                if (((BooleanToken) trimSpaces.getToken()).booleanValue()) {
268                    nextName = nextName.trim();
269                }
270                fieldNames.add(nextName);
271            }
272            _fieldNames = new String[1];
273            _fieldNames = fieldNames.toArray(_fieldNames);
274
275            // Type[] fieldTypes = new Type[_fieldNames.length];
276            // for (int i = 0; i < _fieldNames.length; i++) {
277            //     fieldTypes[i] = BaseType.STRING;
278            // }
279
280            // Skip the first line, which only has header information.
281            _currentLine = _nextLine;
282            try {
283                _nextLine = _reader.readLine();
284            } catch (IOException ex) {
285                throw new IllegalActionException(this, ex,
286                        "initialize() failed");
287            }
288        }
289
290        if (_currentLine != null) {
291            StringTokenizer tokenizer = new StringTokenizer(_currentLine,
292                    _delimiter);
293            int i = 0;
294            Token[] fieldValues = new Token[_fieldNames.length];
295            while (tokenizer.hasMoreTokens()) {
296                if (i >= _fieldNames.length) {
297                    // Ignore additional fields.
298                    break;
299                }
300                String nextToken = tokenizer.nextToken();
301                if (((BooleanToken) trimSpaces.getToken()).booleanValue()) {
302                    nextToken = nextToken.trim();
303                }
304                if (_parser == null) {
305                    _parser = new PtParser();
306                }
307
308                ASTPtRootNode parseTree = null;
309                try {
310                    parseTree = _parser.generateParseTree(nextToken);
311                } catch (Exception ex) {
312                    // If the field cannot be parsed, then interpret
313                    // the field as a string.
314                    fieldValues[i] = new StringToken(nextToken);
315                }
316                if (parseTree != null) {
317                    if (_parseTreeEvaluator == null) {
318                        _parseTreeEvaluator = new ParseTreeEvaluator();
319                    }
320
321                    if (_scope == null) {
322                        _scope = new ExpressionScope();
323                    }
324
325                    try {
326                        fieldValues[i] = _parseTreeEvaluator
327                                .evaluateParseTree(parseTree, _scope);
328                    } catch (Exception ex) {
329                        // If the field cannot be evaluated, then interpret
330                        // the field as a string.
331                        fieldValues[i] = new StringToken(nextToken);
332                    }
333                }
334
335                i++;
336            }
337            while (i < _fieldNames.length) {
338                fieldValues[i] = new StringToken("");
339                i++;
340            }
341            RecordToken outputValue = new OrderedRecordToken(_fieldNames,
342                    fieldValues);
343            output.broadcast(outputValue);
344        }
345        if (_nextLine == null) {
346            endOfFile.broadcast(BooleanToken.TRUE);
347        } else {
348            endOfFile.broadcast(BooleanToken.FALSE);
349        }
350    }
351
352    /** Wrapup execution of this actor.  This method overrides the
353     *  base class to discard the internal parser to save memory.
354     */
355    @Override
356    public void wrapup() {
357        _parser = null;
358    }
359
360    ///////////////////////////////////////////////////////////////////
361    ////                         protected methods                 ////
362
363    /** Override the default to eliminate the default type constraints/.
364     *  @return An empty set of type constraints
365     */
366    @Override
367    protected Set<Inequality> _defaultTypeConstraints() {
368        return new HashSet<Inequality>();
369    }
370
371    ///////////////////////////////////////////////////////////////////
372    ////                         private members                   ////
373
374    /** The delimiter. */
375    private String _delimiter = ",";
376
377    /** Field names for the output record. */
378    private String[] _fieldNames;
379
380    /** The parse tree evaluator to use. */
381    private ParseTreeEvaluator _parseTreeEvaluator = null;
382
383    /** The parser to use. */
384    private PtParser _parser = null;
385
386    /** The scope for the parser. */
387    private ParserScope _scope = null;
388
389    ///////////////////////////////////////////////////////////////////
390    ////                         inner classes                     ////
391
392    // FIXME: This is copied from ExpressionToToken. Some way to share?
393    private class ExpressionScope extends ModelScope {
394        /** Look up and return the attribute with the specified name in the
395         *  scope. Return null if such an attribute does not exist.
396         *  @return The attribute with the specified name in the scope.
397         */
398        @Override
399        public Token get(String name) throws IllegalActionException {
400            Variable result = getScopedVariable(null, CSVReader.this, name);
401
402            if (result != null) {
403                return result.getToken();
404            }
405
406            return null;
407        }
408
409        /** Look up and return the type of the attribute with the
410         *  specified name in the scope. Return null if such an
411         *  attribute does not exist.
412         *  @return The attribute with the specified name in the scope.
413         */
414        @Override
415        public Type getType(String name) throws IllegalActionException {
416            Variable result = getScopedVariable(null, CSVReader.this, name);
417
418            if (result != null) {
419                return (Type) result.getTypeTerm().getValue();
420            }
421
422            return null;
423        }
424
425        /** Look up and return the type term for the specified name
426         *  in the scope. Return null if the name is not defined in this
427         *  scope, or is a constant type.
428         *  @return The InequalityTerm associated with the given name in
429         *  the scope.
430         *  @exception IllegalActionException If a value in the scope
431         *  exists with the given name, but cannot be evaluated.
432         */
433        @Override
434        public ptolemy.graph.InequalityTerm getTypeTerm(String name)
435                throws IllegalActionException {
436            Variable result = getScopedVariable(null, CSVReader.this, name);
437
438            if (result != null) {
439                return result.getTypeTerm();
440            }
441
442            return null;
443        }
444
445        /** Return the list of identifiers within the scope.
446         *  @return The list of identifiers within the scope.
447         */
448        @Override
449        public Set identifierSet() {
450            return getAllScopedVariableNames(null, CSVReader.this);
451        }
452    }
453}