This project has retired. For details please refer to its Attic page.
MultiVertexInputFormat xref
View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.giraph.io.formats.multi;
20  
21  import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
22  import org.apache.giraph.io.VertexInputFormat;
23  import org.apache.giraph.io.VertexReader;
24  import org.apache.giraph.io.internal.WrappedVertexReader;
25  import org.apache.hadoop.conf.Configuration;
26  import org.apache.hadoop.io.Writable;
27  import org.apache.hadoop.io.WritableComparable;
28  import org.apache.hadoop.mapreduce.InputSplit;
29  import org.apache.hadoop.mapreduce.JobContext;
30  import org.apache.hadoop.mapreduce.TaskAttemptContext;
31  
32  import java.io.DataInput;
33  import java.io.DataOutput;
34  import java.io.IOException;
35  import java.util.List;
36  
37  /**
38   * Vertex input format which wraps several vertex input formats.
39   * Provides the way to read data from multiple sources,
40   * using several different input formats.
41   *
42   * @param <I> Vertex id
43   * @param <V> Vertex data
44   * @param <E> Edge data
45   */
46  public class MultiVertexInputFormat<I extends WritableComparable,
47      V extends Writable, E extends Writable> extends VertexInputFormat<I, V, E> {
48    /** Vertex input formats */
49    private List<VertexInputFormat<I, V, E>> vertexInputFormats;
50  
51    @Override public void checkInputSpecs(Configuration conf) {
52      for (VertexInputFormat vertexInputFormat : vertexInputFormats) {
53        vertexInputFormat.checkInputSpecs(conf);
54      }
55    }
56  
57    @Override
58    public void setConf(
59        ImmutableClassesGiraphConfiguration<I, V, E> conf) {
60      super.setConf(conf);
61      vertexInputFormats =
62          VertexInputFormatDescription.createVertexInputFormats(getConf());
63      if (vertexInputFormats.isEmpty()) {
64        throw new IllegalStateException("setConf: Using MultiVertexInputFormat " +
65            "without specifying vertex inputs");
66      }
67    }
68  
69    @Override
70    public VertexReader<I, V, E> createVertexReader(InputSplit inputSplit,
71        TaskAttemptContext context) throws IOException {
72      if (inputSplit instanceof InputSplitWithInputFormatIndex) {
73        // When multithreaded input is used we need to make sure other threads
74        // don't change context's configuration while we use it
75        synchronized (context) {
76          InputSplitWithInputFormatIndex split =
77              (InputSplitWithInputFormatIndex) inputSplit;
78          VertexInputFormat<I, V, E> vertexInputFormat =
79              vertexInputFormats.get(split.getInputFormatIndex());
80          VertexReader<I, V, E> vertexReader =
81              vertexInputFormat.createVertexReader(split.getSplit(), context);
82          return new WrappedVertexReader<I, V, E>(
83              vertexReader, vertexInputFormat.getConf()) {
84            @Override
85            public void initialize(InputSplit inputSplit,
86                TaskAttemptContext context) throws IOException,
87                InterruptedException {
88              // When multithreaded input is used we need to make sure other
89              // threads don't change context's configuration while we use it
90              synchronized (context) {
91                super.initialize(inputSplit, context);
92              }
93            }
94          };
95        }
96      } else {
97        throw new IllegalStateException("createVertexReader: Got InputSplit " +
98            "which was not created by this class: " +
99            inputSplit.getClass().getName());
100     }
101   }
102 
103   @Override
104   public List<InputSplit> getSplits(JobContext context,
105       int minSplitCountHint) throws IOException, InterruptedException {
106     // When multithreaded input is used we need to make sure other threads don't
107     // change context's configuration while we use it
108     synchronized (context) {
109       return MultiInputUtils.getSplits(
110           context, minSplitCountHint, vertexInputFormats);
111     }
112   }
113 
114   @Override
115   public void writeInputSplit(InputSplit inputSplit,
116       DataOutput dataOutput) throws IOException {
117     MultiInputUtils.writeInputSplit(inputSplit, dataOutput, vertexInputFormats);
118   }
119 
120   @Override
121   public InputSplit readInputSplit(
122       DataInput dataInput) throws IOException, ClassNotFoundException {
123     return MultiInputUtils.readInputSplit(dataInput, vertexInputFormats);
124   }
125 }