001/* An operator for file sources. 002 * 003 * Copyright (c) 2014 The Regents of the University of California. 004 * All rights reserved. 005 * 006 * '$Author: crawl $' 007 * '$Date: 2014-10-08 21:03:37 +0000 (Wed, 08 Oct 2014) $' 008 * '$Revision: 32994 $' 009 * 010 * Permission is hereby granted, without written agreement and without 011 * license or royalty fees, to use, copy, modify, and distribute this 012 * software and its documentation for any purpose, provided that the above 013 * copyright notice and the following two paragraphs appear in all copies 014 * of this software. 015 * 016 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY 017 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 018 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF 019 * THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF 020 * SUCH DAMAGE. 021 * 022 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, 023 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 024 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE 025 * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF 026 * CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 027 * ENHANCEMENTS, OR MODIFICATIONS. 028 * 029 */ 030package org.kepler.spark.operator; 031 032import java.net.URI; 033 034import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 035import org.apache.spark.api.java.JavaPairRDD; 036 037/** A data sink that reads from files. 038 * 039 * @author Daniel Crawl 040 * @version $Id: FileDataSource.java 32994 2014-10-08 21:03:37Z crawl $ 041 */ 042public class FileDataSource extends DataSource { 043 044 /** Create a new FileDataSource. 045 * @param inputFormatClass the format class to read the file 046 * @param path the input file path 047 * @param name the name of the operator 048 */ 049 public FileDataSource(Class<? extends FileInputFormat<?,?>> inputFormatClass, URI path, String name) { 050 super(inputFormatClass, name); 051 _path = path; 052 } 053 054 /** Execute the operator. */ 055 @Override 056 public JavaPairRDD<Object, ?> execute() { 057 return _context.newAPIHadoopFile(_path.toString(), 058 (Class<? extends FileInputFormat>)_stub, 059 Object.class, Object.class, _configuration); 060 } 061 062 /////////////////////////////////////////////////////////////////// 063 //// private fields ////// 064 065 /** The path of the input file. */ 066 private URI _path; 067 068}