001/* An operator for file sources.
002 * 
003 * Copyright (c) 2014 The Regents of the University of California.
004 * All rights reserved.
005 *
006 * '$Author: crawl $'
007 * '$Date: 2014-10-08 21:03:37 +0000 (Wed, 08 Oct 2014) $' 
008 * '$Revision: 32994 $'
009 * 
010 * Permission is hereby granted, without written agreement and without
011 * license or royalty fees, to use, copy, modify, and distribute this
012 * software and its documentation for any purpose, provided that the above
013 * copyright notice and the following two paragraphs appear in all copies
014 * of this software.
015 *
016 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
017 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
018 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
019 * THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
020 * SUCH DAMAGE.
021 *
022 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
023 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
024 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
025 * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
026 * CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
027 * ENHANCEMENTS, OR MODIFICATIONS.
028 *
029 */
030package org.kepler.spark.operator;
031
032import java.net.URI;
033
034import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
035import org.apache.spark.api.java.JavaPairRDD;
036
037/** A data sink that reads from files.
038 * 
039 *  @author Daniel Crawl
040 *  @version $Id: FileDataSource.java 32994 2014-10-08 21:03:37Z crawl $
041 */
042public class FileDataSource extends DataSource {
043
044    /** Create a new FileDataSource.
045     *  @param inputFormatClass the format class to read the file
046     *  @param path the input file path
047     *  @param name the name of the operator  
048     */
049    public FileDataSource(Class<? extends FileInputFormat<?,?>> inputFormatClass, URI path, String name) {
050        super(inputFormatClass, name);
051        _path = path;
052    }
053    
054    /** Execute the operator. */
055    @Override
056    public JavaPairRDD<Object, ?> execute() {   
057        return _context.newAPIHadoopFile(_path.toString(),
058                (Class<? extends FileInputFormat>)_stub,
059                Object.class, Object.class, _configuration);
060    }
061
062    ///////////////////////////////////////////////////////////////////
063    ////                      private fields                     //////
064    
065    /** The path of the input file. */
066    private URI _path;
067    
068}