View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.giraph.io;
20  
21  import java.io.IOException;
22  
23  import org.apache.giraph.conf.DefaultImmutableClassesGiraphConfigurable;
24  import org.apache.hadoop.mapreduce.JobContext;
25  import org.apache.hadoop.mapreduce.OutputCommitter;
26  
27  import org.apache.hadoop.io.Writable;
28  import org.apache.hadoop.io.WritableComparable;
29  import org.apache.hadoop.mapreduce.TaskAttemptContext;
30  
31  /**
32   * Implement to output the graph after the computation.  It is modeled
33   * directly after the Hadoop OutputFormat.
34   * ImmutableClassesGiraphConfiguration is available
35   *
36   * It's guaranteed that whatever parameters are set in the configuration are
37   * also going to be available in all method arguments related to this output
38   * format (context in createVertexWriter, checkOutputSpecs and
39   * getOutputCommitter; methods invoked on VertexWriter and OutputCommitter).
40   * So if backing output format relies on some parameters from configuration,
41   * you can safely set them for example in
42   * {@link #setConf(org.apache.giraph.conf.ImmutableClassesGiraphConfiguration)}.
43   *
44   * @param <I> Vertex index value
45   * @param <V> Vertex value
46   * @param <E> Edge value
47   */
48  @SuppressWarnings("rawtypes")
49  public abstract class VertexOutputFormat<
50      I extends WritableComparable, V extends Writable,
51      E extends Writable> extends
52      DefaultImmutableClassesGiraphConfigurable<I, V, E> {
53    /**
54     * Create a vertex writer for a given split. The framework will call
55     * {@link VertexWriter#initialize(TaskAttemptContext)} before
56     * the split is used.
57     *
58     * @param context the information about the task
59     * @return a new vertex writer
60     * @throws IOException
61     * @throws InterruptedException
62     */
63    public abstract VertexWriter<I, V, E> createVertexWriter(
64      TaskAttemptContext context) throws IOException, InterruptedException;
65  
66    /**
67     * Check for validity of the output-specification for the job.
68     * (Copied from Hadoop OutputFormat)
69     *
70     * <p>This is to validate the output specification for the job when it is
71     * a job is submitted.  Typically checks that it does not already exist,
72     * throwing an exception when it already exists, so that output is not
73     * overwritten.</p>
74     *
75     * @param context information about the job
76     * @throws IOException when output should not be attempted
77     */
78    public abstract void checkOutputSpecs(JobContext context)
79      throws IOException, InterruptedException;
80  
81    /**
82     * Get the output committer for this output format. This is responsible
83     * for ensuring the output is committed correctly.
84     * (Copied from Hadoop OutputFormat)
85     *
86     * @param context the task context
87     * @return an output committer
88     * @throws IOException
89     * @throws InterruptedException
90     */
91    public abstract OutputCommitter getOutputCommitter(
92      TaskAttemptContext context) throws IOException, InterruptedException;
93  }