001/* 
002 * Copyright (c) 2012-2013 The Regents of the University of California.
003 * All rights reserved.
004 *
005 * '$Author: crawl $'
006 * '$Date: 2015-09-04 16:53:05 +0000 (Fri, 04 Sep 2015) $' 
007 * '$Revision: 33862 $'
008 * 
009 * Permission is hereby granted, without written agreement and without
010 * license or royalty fees, to use, copy, modify, and distribute this
011 * software and its documentation for any purpose, provided that the above
012 * copyright notice and the following two paragraphs appear in all copies
013 * of this software.
014 *
015 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
016 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
017 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
018 * THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
019 * SUCH DAMAGE.
020 *
021 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
022 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
023 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
024 * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
025 * CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
026 * ENHANCEMENTS, OR MODIFICATIONS.
027 *
028 */
029
030package org.kepler.hadoop.mapreduce;
031
032import java.io.IOException;
033import java.net.InetAddress;
034
035import org.apache.hadoop.fs.Path;
036import org.apache.hadoop.io.IOUtils;
037import org.apache.hadoop.io.SequenceFile;
038import org.apache.hadoop.io.Writable;
039import org.apache.hadoop.io.WritableComparable;
040import org.apache.hadoop.mapreduce.Mapper;
041import org.apache.log4j.Logger;
042import org.kepler.ddp.Utilities;
043
044
045//////////////////////////////////////////////////////////////////////////
046////Mapper4DataTransform
047
048/**
049* Mapper4DataTransform is a subclass of the Mapper to read data using a certain
050* input format and save it in SequenceFile format. It is mainly a pre-processing
051* for DualInput patterns.
052* 
053* @author Jianwu Wang (jianwu@sdsc.edu)
054* @version $Id: Mapper4DataTransform.java 33862 2015-09-04 16:53:05Z crawl $
055*/
056public class Mapper4DataTransform extends
057                Mapper<WritableComparable, Writable, WritableComparable, Writable> {
058        
059        static Logger logger = Logger.getLogger(Mapper4DataTransform.class.getName());
060        
061    SequenceFile.Writer writer = null;
062    @Override
063    public void setup(Context context) {
064        try {  
065                String strPath = context.getConfiguration().get(Utilities.CONFIGURATION_KEPLER_DISTRIBUTED_CACHE_PATH);
066                final String keyClass = context.getConfiguration().get(Utilities.CONFIGURATION_KEPLER_JOB_KEY_CLASS);
067                final String valueClass = context.getConfiguration().get(Utilities.CONFIGURATION_KEPLER_JOB_VALUE_CLASS);               
068                final String ipAddress = InetAddress.getLocalHost().getHostAddress();
069                //in distributed environment, each slave node should have its own file to write in distributed cache.
070                strPath = strPath + "/" + ipAddress + "-" + context.getTaskAttemptID();
071                logger.debug(Utilities.CONFIGURATION_KEPLER_DISTRIBUTED_CACHE_PATH + " in Mapper4DataTransform : " + strPath);
072                logger.debug(Utilities.CONFIGURATION_KEPLER_JOB_KEY_CLASS + " in Mapper4DataTransform : " + keyClass);
073                logger.debug(Utilities.CONFIGURATION_KEPLER_JOB_VALUE_CLASS + " in Mapper4DataTransform : " + valueClass);
074            writer = SequenceFile.createWriter(context.getConfiguration(), SequenceFile.Writer.file(new Path(strPath)), SequenceFile.Writer.keyClass(Class.forName(keyClass)), SequenceFile.Writer.valueClass(Class.forName(valueClass)));      
075                } catch (IOException e) {
076                        e.printStackTrace();
077                } catch (ClassNotFoundException e) {
078                        e.printStackTrace();
079                }
080      }
081      
082    @Override
083    public void map(WritableComparable key, Writable value, Context context
084                    ) throws IOException, InterruptedException {
085        
086        logger.debug("key in map() of Mapper4DataTransform:" + key);
087        logger.debug("value in map() of Mapper4DataTransform:" + value);        
088        writer.append(key, value);
089    }
090    
091    @Override
092    protected void cleanup(Context context
093            ) throws IOException, InterruptedException {
094        if (writer != null)
095                IOUtils.closeStream(writer);
096    }
097    
098  }