복붙노트

[HADOOP] 하둡 M / R 차 종류는 사용자의 마지막 이름에 기지를 작동하지

HADOOP

하둡 M / R 차 종류는 사용자의 마지막 이름에 기지를 작동하지

나는 사용자의 LASTNAME에 따라 출력을 정렬하려면, 사용되는 키는 firstName을이다. 다음은 내가 사용하고있는 클래스는하지만 난이 lastName에 따라 출력을 분류하기 있지 않다. 내가 하둡에 새로운 오전,이 나는 등 다양한 인터넷 소스에서 도움을 사용하여 썼다.

홈페이지 등급 : -

public class WordCount {

    public static class Map extends Mapper<LongWritable, Text, CustomKey, Text> {

        public static final Log log = LogFactory.getLog(Map.class);

        private final static IntWritable one = new IntWritable(1);
        private Text first = new Text();
        private Text last = new Text();

        @Override
        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();

            StringTokenizer tokenizer = new StringTokenizer(line, "\n");

            log.info(" line issssssss " + tokenizer.hasMoreTokens());
            while (tokenizer.hasMoreTokens()) {

                String[] vals = tokenizer.nextToken().split(" ");

                first.set(vals[0]);
                last.set(vals[1]);

                context.write(new CustomKey(first.toString(), last.toString()), last);

            }
        }
    }

    public static class Reduce extends Reducer<CustomKey, Text, Text, Text> {

        public static final Log log = LogFactory.getLog(Reduce.class);

        @Override
        public void reduce(CustomKey key, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {
            int sum = 0;

            // Text value = new Text();

                for (Text val : values) {
                context.write(new Text(key.getFirstName()), val);
            }

        }
    }

    public static void main(String[] args) throws Exception {
        Job job = Job.getInstance(new Configuration());

        job.setJarByClass(WordCount.class);

        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        // job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setMapOutputKeyClass(CustomKey.class);
        job.setPartitionerClass(CustomKeyPartinioner.class);
        job.setGroupingComparatorClass(CustomGroupComparator.class);
        job.setSortComparatorClass(CustomKeySorter.class);


          job.setInputFormatClass(TextInputFormat.class);
          job.setOutputFormatClass(TextOutputFormat.class);


        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);
    }

}

Composite key class :- 

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableUtils;

public class CustomKey implements WritableComparable<CustomKey> {


    public static final Log log = LogFactory.getLog(CustomKey.class);

    private String firstName, lastName;

     public CustomKey() {
        // TODO Auto-generated constructor stub
    }

    @Override
    public void readFields(DataInput arg0) throws IOException {

        this.firstName = WritableUtils.readString(arg0);
        this.lastName = WritableUtils.readString(arg0);
    }

    public String getFirstName() {
        return firstName;
    }

    public CustomKey(String firstName, String lastName) {
        super();
        this.firstName = firstName;
        this.lastName = lastName;
    }

    public void setFirstName(String firstName) {
        this.firstName = firstName;
    }

    public String getLastName() {
        return lastName;
    }

    public void setLastName(String lastName) {
        this.lastName = lastName;
    }

    @Override
    public void write(DataOutput arg0) throws IOException {

        log.debug("write value is " + firstName);

        WritableUtils.writeString(arg0, firstName);
        WritableUtils.writeString(arg0, lastName);
    }





    @Override
    public int compareTo(CustomKey o) {


        int result = firstName.compareTo(o.getFirstName());

        log.debug("value is " + result);


        if (result == 0) {
            return lastName.compareTo(o.getLastName());
        }

        return result;
    }

    @Override
    public String toString() {

    return (new StringBuilder()).append(firstName).append(',').append(lastName).toString();
    }

}


Partitioner Class :- 

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;

public class CustomKeyPartinioner extends Partitioner<CustomKey, Text> {

    HashPartitioner<Text, Text> hashPartitioner = new HashPartitioner<Text, Text>();
    Text newKey = new Text();

    @Override
    public int getPartition(CustomKey arg0, Text arg1, int arg2) {

        try {
            newKey.set(arg0.getFirstName());
            return hashPartitioner.getPartition(newKey, arg1, arg2);
        } catch (Exception e) {

            e.printStackTrace();
            return (int) (Math.random() * arg2);
        }
    }

}


GroupComparator Class :- 

import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class CustomGroupComparator extends WritableComparator {

    protected CustomGroupComparator() {

        super(CustomKey.class, true);
    }

    @Override
    public int compare(WritableComparable w1, WritableComparable w2) {

        CustomKey key1 = (CustomKey) w1;
        CustomKey key2 = (CustomKey) w2;

    // (check on udid)
        return key1.getFirstName().compareTo(key2.getFirstName());
    }
}


Custom Key Sorter Class :- 

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;

public class CustomKeySorter extends WritableComparator {

    public static final Log log = LogFactory.getLog(CustomKeySorter.class);

    protected CustomKeySorter() {

        super(CustomKey.class, true);
    }

    @Override
    public int compare(WritableComparable w1, WritableComparable w2) {

        CustomKey key1 = (CustomKey) w1;
        CustomKey key2 = (CustomKey) w2;

        int value = key1.getFirstName().compareTo(key2.getFirstName());



        log.debug("value is " + value);

        if (value == 0) {

            return -key1.getLastName().compareTo(key2.getLastName());

        }
        return value;
    }
}

해결법

    from https://stackoverflow.com/questions/34505078/hadoop-m-r-secondary-sort-not-working-bases-on-last-name-of-the-user by cc-by-sa and MIT license