This project has retired. For details please refer to its Attic page.
GiraphRunner xref
View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.giraph;
19  
20  import org.apache.commons.cli.CommandLine;
21  import org.apache.giraph.io.formats.FileOutputFormatUtil;
22  import org.apache.giraph.utils.ConfigurationUtils;
23  import org.apache.giraph.conf.GiraphConfiguration;
24  import org.apache.giraph.job.GiraphJob;
25  /*if[PURE_YARN]
26  import org.apache.giraph.yarn.GiraphYarnClient;
27  end[PURE_YARN]*/
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.filecache.DistributedCache;
30  import org.apache.hadoop.fs.Path;
31  import org.apache.hadoop.util.Tool;
32  import org.apache.hadoop.util.ToolRunner;
33  import org.apache.log4j.Logger;
34  import java.net.URI;
35  
36  /**
37   * Helper class to run Giraph applications by specifying the actual class name
38   * to use (i.e. vertex, vertex input/output format, combiner, etc.).
39   *
40   * This is the default entry point for Giraph jobs running on any Hadoop
41   * cluster, MRv1 or v2, including Hadoop-specific configuration and setup.
42   */
43  public class GiraphRunner implements Tool {
44    static {
45      Configuration.addDefaultResource("giraph-site.xml");
46    }
47  
48    /** Class logger */
49    private static final Logger LOG = Logger.getLogger(GiraphRunner.class);
50    /** Writable conf */
51    private Configuration conf;
52  
53    @Override
54    public Configuration getConf() {
55      return conf;
56    }
57  
58    @Override
59    public void setConf(Configuration conf) {
60      this.conf = conf;
61    }
62  
63    @Override
64    /**
65     * Drives a job run configured for "Giraph on Hadoop MR cluster"
66     * @param args the command line arguments
67     * @return job run exit code
68     */
69    public int run(String[] args) throws Exception {
70      if (null == getConf()) { // for YARN profile
71        conf = new Configuration();
72      }
73      GiraphConfiguration giraphConf = new GiraphConfiguration(getConf());
74      CommandLine cmd = ConfigurationUtils.parseArgs(giraphConf, args);
75      if (null == cmd) {
76        return 0; // user requested help/info printout, don't run a job.
77      }
78  
79      // set up job for various platforms
80      final String vertexClassName = args[0];
81      final String jobName = "Giraph: " + vertexClassName;
82  /*if[PURE_YARN]
83      GiraphYarnClient job = new GiraphYarnClient(giraphConf, jobName);
84  else[PURE_YARN]*/
85      GiraphJob job = new GiraphJob(giraphConf, jobName);
86      prepareHadoopMRJob(job, cmd);
87  /*end[PURE_YARN]*/
88  
89      // run the job, collect results
90      if (LOG.isDebugEnabled()) {
91        LOG.debug("Attempting to run Vertex: " + vertexClassName);
92      }
93      boolean verbose = !cmd.hasOption('q');
94      return job.run(verbose) ? 0 : -1;
95    }
96  
97    /**
98     * Populate internal Hadoop Job (and Giraph IO Formats) with Hadoop-specific
99     * configuration/setup metadata, propagating exceptions to calling code.
100    * @param job the GiraphJob object to help populate Giraph IO Format data.
101    * @param cmd the CommandLine for parsing Hadoop MR-specific args.
102    */
103   private void prepareHadoopMRJob(final GiraphJob job, final CommandLine cmd)
104     throws Exception {
105     if (cmd.hasOption("vof") || cmd.hasOption("eof")) {
106       if (cmd.hasOption("op")) {
107         FileOutputFormatUtil.setOutputPath(job.getInternalJob(),
108           new Path(cmd.getOptionValue("op")));
109       }
110     }
111     if (cmd.hasOption("cf")) {
112       DistributedCache.addCacheFile(new URI(cmd.getOptionValue("cf")),
113           job.getConfiguration());
114     }
115   }
116 
117   /**
118    * Execute GiraphRunner.
119    *
120    * @param args Typically command line arguments.
121    * @throws Exception Any exceptions thrown.
122    */
123   public static void main(String[] args) throws Exception {
124     System.exit(ToolRunner.run(new GiraphRunner(), args));
125   }
126 }