001/* 002 * Copyright (c) 2012-2013 The Regents of the University of California. 003 * All rights reserved. 004 * 005 * '$Author: crawl $' 006 * '$Date: 2015-09-04 16:53:05 +0000 (Fri, 04 Sep 2015) $' 007 * '$Revision: 33862 $' 008 * 009 * Permission is hereby granted, without written agreement and without 010 * license or royalty fees, to use, copy, modify, and distribute this 011 * software and its documentation for any purpose, provided that the above 012 * copyright notice and the following two paragraphs appear in all copies 013 * of this software. 014 * 015 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY 016 * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 017 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF 018 * THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF 019 * SUCH DAMAGE. 020 * 021 * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, 022 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 023 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE 024 * PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF 025 * CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, 026 * ENHANCEMENTS, OR MODIFICATIONS. 027 * 028 */ 029 030package org.kepler.hadoop.mapreduce; 031 032import java.io.IOException; 033import java.net.InetAddress; 034 035import org.apache.hadoop.fs.Path; 036import org.apache.hadoop.io.IOUtils; 037import org.apache.hadoop.io.SequenceFile; 038import org.apache.hadoop.io.Writable; 039import org.apache.hadoop.io.WritableComparable; 040import org.apache.hadoop.mapreduce.Mapper; 041import org.apache.log4j.Logger; 042import org.kepler.ddp.Utilities; 043 044 045////////////////////////////////////////////////////////////////////////// 046////Mapper4DataTransform 047 048/** 049* Mapper4DataTransform is a subclass of the Mapper to read data using a certain 050* input format and save it in SequenceFile format. It is mainly a pre-processing 051* for DualInput patterns. 052* 053* @author Jianwu Wang (jianwu@sdsc.edu) 054* @version $Id: Mapper4DataTransform.java 33862 2015-09-04 16:53:05Z crawl $ 055*/ 056public class Mapper4DataTransform extends 057 Mapper<WritableComparable, Writable, WritableComparable, Writable> { 058 059 static Logger logger = Logger.getLogger(Mapper4DataTransform.class.getName()); 060 061 SequenceFile.Writer writer = null; 062 @Override 063 public void setup(Context context) { 064 try { 065 String strPath = context.getConfiguration().get(Utilities.CONFIGURATION_KEPLER_DISTRIBUTED_CACHE_PATH); 066 final String keyClass = context.getConfiguration().get(Utilities.CONFIGURATION_KEPLER_JOB_KEY_CLASS); 067 final String valueClass = context.getConfiguration().get(Utilities.CONFIGURATION_KEPLER_JOB_VALUE_CLASS); 068 final String ipAddress = InetAddress.getLocalHost().getHostAddress(); 069 //in distributed environment, each slave node should have its own file to write in distributed cache. 070 strPath = strPath + "/" + ipAddress + "-" + context.getTaskAttemptID(); 071 logger.debug(Utilities.CONFIGURATION_KEPLER_DISTRIBUTED_CACHE_PATH + " in Mapper4DataTransform : " + strPath); 072 logger.debug(Utilities.CONFIGURATION_KEPLER_JOB_KEY_CLASS + " in Mapper4DataTransform : " + keyClass); 073 logger.debug(Utilities.CONFIGURATION_KEPLER_JOB_VALUE_CLASS + " in Mapper4DataTransform : " + valueClass); 074 writer = SequenceFile.createWriter(context.getConfiguration(), SequenceFile.Writer.file(new Path(strPath)), SequenceFile.Writer.keyClass(Class.forName(keyClass)), SequenceFile.Writer.valueClass(Class.forName(valueClass))); 075 } catch (IOException e) { 076 e.printStackTrace(); 077 } catch (ClassNotFoundException e) { 078 e.printStackTrace(); 079 } 080 } 081 082 @Override 083 public void map(WritableComparable key, Writable value, Context context 084 ) throws IOException, InterruptedException { 085 086 logger.debug("key in map() of Mapper4DataTransform:" + key); 087 logger.debug("value in map() of Mapper4DataTransform:" + value); 088 writer.append(key, value); 089 } 090 091 @Override 092 protected void cleanup(Context context 093 ) throws IOException, InterruptedException { 094 if (writer != null) 095 IOUtils.closeStream(writer); 096 } 097 098 }