View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.giraph.conf;
19  
20  import org.apache.giraph.aggregators.AggregatorWriter;
21  import org.apache.giraph.aggregators.TextAggregatorWriter;
22  import org.apache.giraph.bsp.BspOutputFormat;
23  import org.apache.giraph.bsp.checkpoints.CheckpointSupportedChecker;
24  import org.apache.giraph.bsp.checkpoints.DefaultCheckpointSupportedChecker;
25  import org.apache.giraph.combiner.MessageCombiner;
26  import org.apache.giraph.comm.messages.InMemoryMessageStoreFactory;
27  import org.apache.giraph.comm.messages.MessageEncodeAndStoreType;
28  import org.apache.giraph.comm.messages.MessageStoreFactory;
29  import org.apache.giraph.edge.ByteArrayEdges;
30  import org.apache.giraph.edge.EdgeStoreFactory;
31  import org.apache.giraph.edge.InMemoryEdgeStoreFactory;
32  import org.apache.giraph.edge.OutEdges;
33  import org.apache.giraph.factories.ComputationFactory;
34  import org.apache.giraph.factories.DefaultComputationFactory;
35  import org.apache.giraph.factories.DefaultEdgeValueFactory;
36  import org.apache.giraph.factories.DefaultMessageValueFactory;
37  import org.apache.giraph.factories.DefaultVertexIdFactory;
38  import org.apache.giraph.factories.DefaultVertexValueFactory;
39  import org.apache.giraph.factories.EdgeValueFactory;
40  import org.apache.giraph.factories.MessageValueFactory;
41  import org.apache.giraph.factories.VertexIdFactory;
42  import org.apache.giraph.factories.VertexValueFactory;
43  import org.apache.giraph.graph.Computation;
44  import org.apache.giraph.graph.DefaultVertex;
45  import org.apache.giraph.graph.DefaultVertexResolver;
46  import org.apache.giraph.graph.DefaultVertexValueCombiner;
47  import org.apache.giraph.graph.Language;
48  import org.apache.giraph.graph.MapperObserver;
49  import org.apache.giraph.graph.Vertex;
50  import org.apache.giraph.graph.VertexResolver;
51  import org.apache.giraph.graph.VertexValueCombiner;
52  import org.apache.giraph.io.EdgeInputFormat;
53  import org.apache.giraph.io.EdgeOutputFormat;
54  import org.apache.giraph.io.MappingInputFormat;
55  import org.apache.giraph.io.VertexInputFormat;
56  import org.apache.giraph.io.VertexOutputFormat;
57  import org.apache.giraph.io.filters.DefaultEdgeInputFilter;
58  import org.apache.giraph.io.filters.DefaultVertexInputFilter;
59  import org.apache.giraph.io.filters.EdgeInputFilter;
60  import org.apache.giraph.io.filters.VertexInputFilter;
61  import org.apache.giraph.job.DefaultGiraphJobRetryChecker;
62  import org.apache.giraph.job.DefaultJobObserver;
63  import org.apache.giraph.job.DefaultJobProgressTrackerService;
64  import org.apache.giraph.job.GiraphJobObserver;
65  import org.apache.giraph.job.GiraphJobRetryChecker;
66  import org.apache.giraph.job.HaltApplicationUtils;
67  import org.apache.giraph.job.JobProgressTrackerService;
68  import org.apache.giraph.mapping.MappingStore;
69  import org.apache.giraph.mapping.MappingStoreOps;
70  import org.apache.giraph.mapping.translate.TranslateEdge;
71  import org.apache.giraph.master.DefaultMasterCompute;
72  import org.apache.giraph.master.MasterCompute;
73  import org.apache.giraph.master.MasterObserver;
74  import org.apache.giraph.ooc.persistence.OutOfCoreDataAccessor;
75  import org.apache.giraph.ooc.persistence.LocalDiskDataAccessor;
76  import org.apache.giraph.ooc.policy.OutOfCoreOracle;
77  import org.apache.giraph.ooc.policy.ThresholdBasedOracle;
78  import org.apache.giraph.partition.GraphPartitionerFactory;
79  import org.apache.giraph.partition.HashPartitionerFactory;
80  import org.apache.giraph.partition.Partition;
81  import org.apache.giraph.partition.SimplePartition;
82  import org.apache.giraph.worker.DefaultWorkerContext;
83  import org.apache.giraph.worker.WorkerContext;
84  import org.apache.giraph.worker.WorkerObserver;
85  import org.apache.hadoop.io.Writable;
86  import org.apache.hadoop.io.WritableComparable;
87  import org.apache.hadoop.mapreduce.OutputFormat;
88  
89  import static java.util.concurrent.TimeUnit.MINUTES;
90  import static java.util.concurrent.TimeUnit.SECONDS;
91  
92  /**
93   * Constants used all over Giraph for configuration.
94   */
95  // CHECKSTYLE: stop InterfaceIsTypeCheck
96  public interface GiraphConstants {
97    /** 1KB in bytes */
98    int ONE_KB = 1024;
99    /** 1MB in bytes */
100   int ONE_MB = 1024 * 1024;
101 
102   /** Mapping related information */
103   ClassConfOption<MappingStore> MAPPING_STORE_CLASS =
104       ClassConfOption.create("giraph.mappingStoreClass", null,
105           MappingStore.class, "MappingStore Class");
106 
107   /** Class to use for performing read operations on mapping store */
108   ClassConfOption<MappingStoreOps> MAPPING_STORE_OPS_CLASS =
109       ClassConfOption.create("giraph.mappingStoreOpsClass", null,
110           MappingStoreOps.class, "MappingStoreOps class");
111 
112   /** Upper value of LongByteMappingStore */
113   IntConfOption LB_MAPPINGSTORE_UPPER =
114       new IntConfOption("giraph.lbMappingStoreUpper", -1,
115           "'upper' value used by lbmappingstore");
116   /** Lower value of LongByteMappingStore */
117   IntConfOption LB_MAPPINGSTORE_LOWER =
118       new IntConfOption("giraph.lbMappingStoreLower", -1,
119           "'lower' value used by lbMappingstore");
120   /** Class used to conduct expensive edge translation during vertex input */
121   ClassConfOption EDGE_TRANSLATION_CLASS =
122       ClassConfOption.create("giraph.edgeTranslationClass", null,
123           TranslateEdge.class, "Class used to conduct expensive edge " +
124               "translation during vertex input phase");
125 
126   /** Computation class - required */
127   ClassConfOption<Computation> COMPUTATION_CLASS =
128       ClassConfOption.create("giraph.computationClass", null,
129           Computation.class, "Computation class - required");
130   /** Computation factory class - optional */
131   ClassConfOption<ComputationFactory> COMPUTATION_FACTORY_CLASS =
132       ClassConfOption.create("giraph.computation.factory.class",
133           DefaultComputationFactory.class, ComputationFactory.class,
134           "Computation factory class - optional");
135 
136   /** TypesHolder, used if Computation not set - optional */
137   ClassConfOption<TypesHolder> TYPES_HOLDER_CLASS =
138       ClassConfOption.create("giraph.typesHolder", null,
139           TypesHolder.class,
140           "TypesHolder, used if Computation not set - optional");
141 
142   /** Edge Store Factory */
143   ClassConfOption<EdgeStoreFactory> EDGE_STORE_FACTORY_CLASS =
144       ClassConfOption.create("giraph.edgeStoreFactoryClass",
145           InMemoryEdgeStoreFactory.class,
146           EdgeStoreFactory.class,
147           "Edge Store Factory class to use for creating edgeStore");
148 
149   /** Message Store Factory */
150   ClassConfOption<MessageStoreFactory> MESSAGE_STORE_FACTORY_CLASS =
151       ClassConfOption.create("giraph.messageStoreFactoryClass",
152           InMemoryMessageStoreFactory.class,
153           MessageStoreFactory.class,
154           "Message Store Factory Class that is to be used");
155 
156   /** Language user's graph types are implemented in */
157   PerGraphTypeEnumConfOption<Language> GRAPH_TYPE_LANGUAGES =
158       PerGraphTypeEnumConfOption.create("giraph.types.language",
159           Language.class, Language.JAVA,
160           "Language user graph types (IVEMM) are implemented in");
161 
162   /** Whether user graph types need wrappers */
163   PerGraphTypeBooleanConfOption GRAPH_TYPES_NEEDS_WRAPPERS =
164       new PerGraphTypeBooleanConfOption("giraph.jython.type.wrappers",
165           false, "Whether user graph types (IVEMM) need Jython wrappers");
166 
167   /** Vertex id factory class - optional */
168   ClassConfOption<VertexIdFactory> VERTEX_ID_FACTORY_CLASS =
169       ClassConfOption.create("giraph.vertexIdFactoryClass",
170           DefaultVertexIdFactory.class, VertexIdFactory.class,
171           "Vertex ID factory class - optional");
172   /** Vertex value factory class - optional */
173   ClassConfOption<VertexValueFactory> VERTEX_VALUE_FACTORY_CLASS =
174       ClassConfOption.create("giraph.vertexValueFactoryClass",
175           DefaultVertexValueFactory.class, VertexValueFactory.class,
176           "Vertex value factory class - optional");
177   /** Edge value factory class - optional */
178   ClassConfOption<EdgeValueFactory> EDGE_VALUE_FACTORY_CLASS =
179       ClassConfOption.create("giraph.edgeValueFactoryClass",
180           DefaultEdgeValueFactory.class, EdgeValueFactory.class,
181           "Edge value factory class - optional");
182   /** Outgoing message value factory class - optional */
183   ClassConfOption<MessageValueFactory>
184   OUTGOING_MESSAGE_VALUE_FACTORY_CLASS =
185       ClassConfOption.create("giraph.outgoingMessageValueFactoryClass",
186           DefaultMessageValueFactory.class, MessageValueFactory.class,
187           "Outgoing message value factory class - optional");
188 
189   /** Vertex edges class - optional */
190   ClassConfOption<OutEdges> VERTEX_EDGES_CLASS =
191       ClassConfOption.create("giraph.outEdgesClass", ByteArrayEdges.class,
192           OutEdges.class, "Vertex edges class - optional");
193   /** Vertex edges class to be used during edge input only - optional */
194   ClassConfOption<OutEdges> INPUT_VERTEX_EDGES_CLASS =
195       ClassConfOption.create("giraph.inputOutEdgesClass",
196           ByteArrayEdges.class, OutEdges.class,
197           "Vertex edges class to be used during edge input only - optional");
198 
199   /** Class for Master - optional */
200   ClassConfOption<MasterCompute> MASTER_COMPUTE_CLASS =
201       ClassConfOption.create("giraph.masterComputeClass",
202           DefaultMasterCompute.class, MasterCompute.class,
203           "Class for Master - optional");
204   /** Classes for Master Observer - optional */
205   ClassConfOption<MasterObserver> MASTER_OBSERVER_CLASSES =
206       ClassConfOption.create("giraph.master.observers",
207           null, MasterObserver.class, "Classes for Master Observer - optional");
208   /** Classes for Worker Observer - optional */
209   ClassConfOption<WorkerObserver> WORKER_OBSERVER_CLASSES =
210       ClassConfOption.create("giraph.worker.observers", null,
211           WorkerObserver.class, "Classes for Worker Observer - optional");
212   /** Classes for Mapper Observer - optional */
213   ClassConfOption<MapperObserver> MAPPER_OBSERVER_CLASSES =
214       ClassConfOption.create("giraph.mapper.observers", null,
215           MapperObserver.class, "Classes for Mapper Observer - optional");
216   /** Message combiner class - optional */
217   ClassConfOption<MessageCombiner> MESSAGE_COMBINER_CLASS =
218       ClassConfOption.create("giraph.messageCombinerClass", null,
219           MessageCombiner.class, "Message combiner class - optional");
220   /** Vertex resolver class - optional */
221   ClassConfOption<VertexResolver> VERTEX_RESOLVER_CLASS =
222       ClassConfOption.create("giraph.vertexResolverClass",
223           DefaultVertexResolver.class, VertexResolver.class,
224           "Vertex resolver class - optional");
225   /** Vertex value combiner class - optional */
226   ClassConfOption<VertexValueCombiner> VERTEX_VALUE_COMBINER_CLASS =
227       ClassConfOption.create("giraph.vertexValueCombinerClass",
228           DefaultVertexValueCombiner.class, VertexValueCombiner.class,
229           "Vertex value combiner class - optional");
230 
231   /** Which language computation is implemented in */
232   EnumConfOption<Language> COMPUTATION_LANGUAGE =
233       EnumConfOption.create("giraph.computation.language",
234           Language.class, Language.JAVA,
235           "Which language computation is implemented in");
236 
237   /**
238    * Option of whether to create vertexes that were not existent before but
239    * received messages
240    */
241   BooleanConfOption RESOLVER_CREATE_VERTEX_ON_MSGS =
242       new BooleanConfOption("giraph.vertex.resolver.create.on.msgs", true,
243           "Option of whether to create vertexes that were not existent " +
244           "before but received messages");
245   /** Graph partitioner factory class - optional */
246   ClassConfOption<GraphPartitionerFactory> GRAPH_PARTITIONER_FACTORY_CLASS =
247       ClassConfOption.create("giraph.graphPartitionerFactoryClass",
248           HashPartitionerFactory.class, GraphPartitionerFactory.class,
249           "Graph partitioner factory class - optional");
250 
251   /** Observer class to watch over job status - optional */
252   ClassConfOption<GiraphJobObserver> JOB_OBSERVER_CLASS =
253       ClassConfOption.create("giraph.jobObserverClass",
254           DefaultJobObserver.class, GiraphJobObserver.class,
255           "Observer class to watch over job status - optional");
256 
257   /** Observer class to watch over job status - optional */
258   ClassConfOption<GiraphJobRetryChecker> JOB_RETRY_CHECKER_CLASS =
259       ClassConfOption.create("giraph.jobRetryCheckerClass",
260           DefaultGiraphJobRetryChecker.class, GiraphJobRetryChecker.class,
261           "Class which decides whether a failed job should be retried - " +
262               "optional");
263 
264   /**
265    * Maximum allowed time for job to run after getting all resources before it
266    * will be killed, in milliseconds (-1 if it has no limit)
267    */
268   LongConfOption MAX_ALLOWED_JOB_TIME_MS =
269       new LongConfOption("giraph.maxAllowedJobTimeMilliseconds", -1,
270           "Maximum allowed time for job to run after getting all resources " +
271               "before it will be killed, in milliseconds " +
272               "(-1 if it has no limit)");
273 
274   // At least one of the input format classes is required.
275   /** VertexInputFormat class */
276   ClassConfOption<VertexInputFormat> VERTEX_INPUT_FORMAT_CLASS =
277       ClassConfOption.create("giraph.vertexInputFormatClass", null,
278           VertexInputFormat.class, "VertexInputFormat class (at least " +
279           "one of the input format classes is required)");
280   /** EdgeInputFormat class */
281   ClassConfOption<EdgeInputFormat> EDGE_INPUT_FORMAT_CLASS =
282       ClassConfOption.create("giraph.edgeInputFormatClass", null,
283           EdgeInputFormat.class, "EdgeInputFormat class");
284   /** MappingInputFormat class */
285   ClassConfOption<MappingInputFormat> MAPPING_INPUT_FORMAT_CLASS =
286       ClassConfOption.create("giraph.mappingInputFormatClass", null,
287           MappingInputFormat.class, "MappingInputFormat class");
288 
289   /** EdgeInputFilter class */
290   ClassConfOption<EdgeInputFilter> EDGE_INPUT_FILTER_CLASS =
291       ClassConfOption.create("giraph.edgeInputFilterClass",
292           DefaultEdgeInputFilter.class, EdgeInputFilter.class,
293           "EdgeInputFilter class");
294   /** VertexInputFilter class */
295   ClassConfOption<VertexInputFilter> VERTEX_INPUT_FILTER_CLASS =
296       ClassConfOption.create("giraph.vertexInputFilterClass",
297           DefaultVertexInputFilter.class, VertexInputFilter.class,
298           "VertexInputFilter class");
299   /** Vertex class */
300   ClassConfOption<Vertex> VERTEX_CLASS =
301       ClassConfOption.create("giraph.vertexClass",
302           DefaultVertex.class, Vertex.class,
303           "Vertex class");
304   /** VertexOutputFormat class */
305   ClassConfOption<VertexOutputFormat> VERTEX_OUTPUT_FORMAT_CLASS =
306       ClassConfOption.create("giraph.vertexOutputFormatClass", null,
307           VertexOutputFormat.class, "VertexOutputFormat class");
308   /** EdgeOutputFormat sub-directory */
309   StrConfOption VERTEX_OUTPUT_FORMAT_SUBDIR =
310     new StrConfOption("giraph.vertex.output.subdir", "",
311                       "VertexOutputFormat sub-directory");
312   /** EdgeOutputFormat class */
313   ClassConfOption<EdgeOutputFormat> EDGE_OUTPUT_FORMAT_CLASS =
314       ClassConfOption.create("giraph.edgeOutputFormatClass", null,
315           EdgeOutputFormat.class, "EdgeOutputFormat class");
316   /** EdgeOutputFormat sub-directory */
317   StrConfOption EDGE_OUTPUT_FORMAT_SUBDIR =
318     new StrConfOption("giraph.edge.output.subdir", "",
319                       "EdgeOutputFormat sub-directory");
320 
321   /** GiraphTextOuputFormat Separator */
322   StrConfOption GIRAPH_TEXT_OUTPUT_FORMAT_SEPARATOR =
323     new StrConfOption("giraph.textoutputformat.separator", "\t",
324                       "GiraphTextOuputFormat Separator");
325   /** Reverse values in the output */
326   BooleanConfOption GIRAPH_TEXT_OUTPUT_FORMAT_REVERSE =
327       new BooleanConfOption("giraph.textoutputformat.reverse", false,
328                             "Reverse values in the output");
329 
330   /**
331    * If you use this option, instead of having saving vertices in the end of
332    * application, saveVertex will be called right after each vertex.compute()
333    * is called.
334    * NOTE: This feature doesn't work well with checkpointing - if you restart
335    * from a checkpoint you won't have any output from previous supersteps.
336    */
337   BooleanConfOption DO_OUTPUT_DURING_COMPUTATION =
338       new BooleanConfOption("giraph.doOutputDuringComputation", false,
339           "If you use this option, instead of having saving vertices in the " +
340           "end of application, saveVertex will be called right after each " +
341           "vertex.compute() is called." +
342           "NOTE: This feature doesn't work well with checkpointing - if you " +
343           "restart from a checkpoint you won't have any ouptut from previous " +
344           "supresteps.");
345   /**
346    * Vertex output format thread-safe - if your VertexOutputFormat allows
347    * several vertexWriters to be created and written to in parallel,
348    * you should set this to true.
349    */
350   BooleanConfOption VERTEX_OUTPUT_FORMAT_THREAD_SAFE =
351       new BooleanConfOption("giraph.vertexOutputFormatThreadSafe", false,
352           "Vertex output format thread-safe - if your VertexOutputFormat " +
353           "allows several vertexWriters to be created and written to in " +
354           "parallel, you should set this to true.");
355   /** Number of threads for writing output in the end of the application */
356   IntConfOption NUM_OUTPUT_THREADS =
357       new IntConfOption("giraph.numOutputThreads", 1,
358           "Number of threads for writing output in the end of the application");
359 
360   /** conf key for comma-separated list of jars to export to YARN workers */
361   StrConfOption GIRAPH_YARN_LIBJARS =
362     new StrConfOption("giraph.yarn.libjars", "",
363         "conf key for comma-separated list of jars to export to YARN workers");
364   /** Name of the XML file that will export our Configuration to YARN workers */
365   String GIRAPH_YARN_CONF_FILE = "giraph-conf.xml";
366   /** Giraph default heap size for all tasks when running on YARN profile */
367   int GIRAPH_YARN_TASK_HEAP_MB_DEFAULT = 1024;
368   /** Name of Giraph property for user-configurable heap memory per worker */
369   IntConfOption GIRAPH_YARN_TASK_HEAP_MB = new IntConfOption(
370     "giraph.yarn.task.heap.mb", GIRAPH_YARN_TASK_HEAP_MB_DEFAULT,
371     "Name of Giraph property for user-configurable heap memory per worker");
372   /** Default priority level in YARN for our task containers */
373   int GIRAPH_YARN_PRIORITY = 10;
374   /** Is this a pure YARN job (i.e. no MapReduce layer managing Giraph tasks) */
375   BooleanConfOption IS_PURE_YARN_JOB =
376     new BooleanConfOption("giraph.pure.yarn.job", false,
377         "Is this a pure YARN job (i.e. no MapReduce layer managing Giraph " +
378         "tasks)");
379 
380   /** Vertex index class */
381   ClassConfOption<WritableComparable> VERTEX_ID_CLASS =
382       ClassConfOption.create("giraph.vertexIdClass", null,
383           WritableComparable.class, "Vertex index class");
384   /** Vertex value class */
385   ClassConfOption<Writable> VERTEX_VALUE_CLASS =
386       ClassConfOption.create("giraph.vertexValueClass", null, Writable.class,
387           "Vertex value class");
388   /** Edge value class */
389   ClassConfOption<Writable> EDGE_VALUE_CLASS =
390       ClassConfOption.create("giraph.edgeValueClass", null, Writable.class,
391           "Edge value class");
392   /** Outgoing message value class */
393   ClassConfOption<Writable> OUTGOING_MESSAGE_VALUE_CLASS =
394       ClassConfOption.create("giraph.outgoingMessageValueClass", null,
395           Writable.class, "Outgoing message value class");
396   /** Worker context class */
397   ClassConfOption<WorkerContext> WORKER_CONTEXT_CLASS =
398       ClassConfOption.create("giraph.workerContextClass",
399           DefaultWorkerContext.class, WorkerContext.class,
400           "Worker contextclass");
401   /** AggregatorWriter class - optional */
402   ClassConfOption<AggregatorWriter> AGGREGATOR_WRITER_CLASS =
403       ClassConfOption.create("giraph.aggregatorWriterClass",
404           TextAggregatorWriter.class, AggregatorWriter.class,
405           "AggregatorWriter class - optional");
406 
407   /** Partition class - optional */
408   ClassConfOption<Partition> PARTITION_CLASS =
409       ClassConfOption.create("giraph.partitionClass", SimplePartition.class,
410           Partition.class, "Partition class - optional");
411 
412   /**
413    * Minimum number of simultaneous workers before this job can run (int)
414    */
415   String MIN_WORKERS = "giraph.minWorkers";
416   /**
417    * Maximum number of simultaneous worker tasks started by this job (int).
418    */
419   String MAX_WORKERS = "giraph.maxWorkers";
420 
421   /**
422    * Separate the workers and the master tasks.  This is required
423    * to support dynamic recovery. (boolean)
424    */
425   BooleanConfOption SPLIT_MASTER_WORKER =
426       new BooleanConfOption("giraph.SplitMasterWorker", true,
427           "Separate the workers and the master tasks.  This is required to " +
428           "support dynamic recovery. (boolean)");
429 
430   /** Indicates whether this job is run in an internal unit test */
431   BooleanConfOption LOCAL_TEST_MODE =
432       new BooleanConfOption("giraph.localTestMode", false,
433           "Indicates whether this job is run in an internal unit test");
434 
435   /** Override the Hadoop log level and set the desired log level. */
436   StrConfOption LOG_LEVEL = new StrConfOption("giraph.logLevel", "info",
437       "Override the Hadoop log level and set the desired log level.");
438 
439   /** Use thread level debugging? */
440   BooleanConfOption LOG_THREAD_LAYOUT =
441       new BooleanConfOption("giraph.logThreadLayout", false,
442           "Use thread level debugging?");
443 
444   /** Configuration key to enable jmap printing */
445   BooleanConfOption JMAP_ENABLE =
446       new BooleanConfOption("giraph.jmap.histo.enable", false,
447           "Configuration key to enable jmap printing");
448 
449   /** Configuration key for msec to sleep between calls */
450   IntConfOption JMAP_SLEEP_MILLIS =
451       new IntConfOption("giraph.jmap.histo.msec", SECONDS.toMillis(30),
452           "Configuration key for msec to sleep between calls");
453 
454   /** Configuration key for how many lines to print */
455   IntConfOption JMAP_PRINT_LINES =
456       new IntConfOption("giraph.jmap.histo.print_lines", 30,
457           "Configuration key for how many lines to print");
458 
459   /**
460    * Configuration key for printing live objects only
461    * This option will trigger Full GC for every jmap dump
462    * and so can significantly hinder performance.
463    */
464   BooleanConfOption JMAP_LIVE_ONLY =
465       new BooleanConfOption("giraph.jmap.histo.live", false,
466           "Only print live objects in jmap?");
467 
468   /**
469    * Option used by ReactiveJMapHistoDumper to check for an imminent
470    * OOM in worker or master process
471    */
472   IntConfOption MIN_FREE_MBS_ON_HEAP =
473       new IntConfOption("giraph.heap.minFreeMb", 128, "Option used by " +
474           "worker and master observers to check for imminent OOM exception");
475   /**
476    * Option can be used to enable reactively dumping jmap histo when
477    * OOM is imminent
478    */
479   BooleanConfOption REACTIVE_JMAP_ENABLE =
480       new BooleanConfOption("giraph.heap.enableReactiveJmapDumping", false,
481           "Option to enable dumping jmap histogram reactively based on " +
482               "free memory on heap");
483 
484   /**
485    * Minimum percent of the maximum number of workers that have responded
486    * in order to continue progressing. (float)
487    */
488   FloatConfOption MIN_PERCENT_RESPONDED =
489       new FloatConfOption("giraph.minPercentResponded", 100.0f,
490           "Minimum percent of the maximum number of workers that have " +
491           "responded in order to continue progressing. (float)");
492 
493   /** Enable the Metrics system */
494   BooleanConfOption METRICS_ENABLE =
495       new BooleanConfOption("giraph.metrics.enable", false,
496           "Enable the Metrics system");
497 
498   /** Directory in HDFS to write master metrics to, instead of stderr */
499   StrConfOption METRICS_DIRECTORY =
500       new StrConfOption("giraph.metrics.directory", "",
501           "Directory in HDFS to write master metrics to, instead of stderr");
502 
503   /**
504    *  ZooKeeper comma-separated list (if not set,
505    *  will start up ZooKeeper locally). Consider that after locally-starting
506    *  zookeeper, this parameter will updated the configuration with the corrent
507    *  configuration value.
508    */
509   StrConfOption ZOOKEEPER_LIST =
510       new StrConfOption("giraph.zkList", "",
511           "ZooKeeper comma-separated list (if not set, will start up " +
512           "ZooKeeper locally). Consider that after locally-starting " +
513           "zookeeper, this parameter will updated the configuration with " +
514           "the corrent configuration value.");
515 
516   /**
517    * Zookeeper List will always hold a value during the computation while
518    * this option provides information regarding whether the zookeeper was
519    * internally started or externally provided.
520    */
521   BooleanConfOption ZOOKEEPER_IS_EXTERNAL =
522     new BooleanConfOption("giraph.zkIsExternal", true,
523                           "Zookeeper List will always hold a value during " +
524                           "the computation while this option provides " +
525                           "information regarding whether the zookeeper was " +
526                           "internally started or externally provided.");
527 
528   /** ZooKeeper session millisecond timeout */
529   IntConfOption ZOOKEEPER_SESSION_TIMEOUT =
530       new IntConfOption("giraph.zkSessionMsecTimeout", MINUTES.toMillis(1),
531           "ZooKeeper session millisecond timeout");
532 
533   /** Polling interval to check for the ZooKeeper server data */
534   IntConfOption ZOOKEEPER_SERVERLIST_POLL_MSECS =
535       new IntConfOption("giraph.zkServerlistPollMsecs", SECONDS.toMillis(3),
536           "Polling interval to check for the ZooKeeper server data");
537 
538   /** ZooKeeper port to use */
539   IntConfOption ZOOKEEPER_SERVER_PORT =
540       new IntConfOption("giraph.zkServerPort", 22181, "ZooKeeper port to use");
541 
542   /** Local ZooKeeper directory to use */
543   String ZOOKEEPER_DIR = "giraph.zkDir";
544 
545   /** Max attempts for handling ZooKeeper connection loss */
546   IntConfOption ZOOKEEPER_OPS_MAX_ATTEMPTS =
547       new IntConfOption("giraph.zkOpsMaxAttempts", 3,
548           "Max attempts for handling ZooKeeper connection loss");
549 
550   /**
551    * Msecs to wait before retrying a failed ZooKeeper op due to connection loss.
552    */
553   IntConfOption ZOOKEEPER_OPS_RETRY_WAIT_MSECS =
554       new IntConfOption("giraph.zkOpsRetryWaitMsecs", SECONDS.toMillis(5),
555           "Msecs to wait before retrying a failed ZooKeeper op due to " +
556           "connection loss.");
557 
558   /** TCP backlog (defaults to number of workers) */
559   IntConfOption TCP_BACKLOG = new IntConfOption("giraph.tcpBacklog", 1,
560       "TCP backlog (defaults to number of workers)");
561 
562   /** Use netty pooled memory buffer allocator */
563   BooleanConfOption NETTY_USE_POOLED_ALLOCATOR = new BooleanConfOption(
564       "giraph.useNettyPooledAllocator", false, "Should netty use pooled " +
565       "memory allocator?");
566 
567   /** Use direct memory buffers in netty */
568   BooleanConfOption NETTY_USE_DIRECT_MEMORY = new BooleanConfOption(
569       "giraph.useNettyDirectMemory", false, "Should netty use direct " +
570       "memory buffers");
571 
572   /** How big to make the encoder buffer? */
573   IntConfOption NETTY_REQUEST_ENCODER_BUFFER_SIZE =
574       new IntConfOption("giraph.nettyRequestEncoderBufferSize", 32 * ONE_KB,
575           "How big to make the encoder buffer?");
576 
577   /** Netty client threads */
578   IntConfOption NETTY_CLIENT_THREADS =
579       new IntConfOption("giraph.nettyClientThreads", 4, "Netty client threads");
580 
581   /** Netty server threads */
582   IntConfOption NETTY_SERVER_THREADS =
583       new IntConfOption("giraph.nettyServerThreads", 16,
584           "Netty server threads");
585 
586   /** Use the execution handler in netty on the client? */
587   BooleanConfOption NETTY_CLIENT_USE_EXECUTION_HANDLER =
588       new BooleanConfOption("giraph.nettyClientUseExecutionHandler", true,
589           "Use the execution handler in netty on the client?");
590 
591   /** Netty client execution threads (execution handler) */
592   IntConfOption NETTY_CLIENT_EXECUTION_THREADS =
593       new IntConfOption("giraph.nettyClientExecutionThreads", 8,
594           "Netty client execution threads (execution handler)");
595 
596   /** Where to place the netty client execution handle? */
597   StrConfOption NETTY_CLIENT_EXECUTION_AFTER_HANDLER =
598       new StrConfOption("giraph.nettyClientExecutionAfterHandler",
599           "request-encoder",
600           "Where to place the netty client execution handle?");
601 
602   /** Use the execution handler in netty on the server? */
603   BooleanConfOption NETTY_SERVER_USE_EXECUTION_HANDLER =
604       new BooleanConfOption("giraph.nettyServerUseExecutionHandler", true,
605           "Use the execution handler in netty on the server?");
606 
607   /** Netty server execution threads (execution handler) */
608   IntConfOption NETTY_SERVER_EXECUTION_THREADS =
609       new IntConfOption("giraph.nettyServerExecutionThreads", 8,
610           "Netty server execution threads (execution handler)");
611 
612   /** Where to place the netty server execution handle? */
613   StrConfOption NETTY_SERVER_EXECUTION_AFTER_HANDLER =
614       new StrConfOption("giraph.nettyServerExecutionAfterHandler",
615           "requestFrameDecoder",
616           "Where to place the netty server execution handle?");
617 
618   /** Netty simulate a first request closed */
619   BooleanConfOption NETTY_SIMULATE_FIRST_REQUEST_CLOSED =
620       new BooleanConfOption("giraph.nettySimulateFirstRequestClosed", false,
621           "Netty simulate a first request closed");
622 
623   /** Netty simulate a first response failed */
624   BooleanConfOption NETTY_SIMULATE_FIRST_RESPONSE_FAILED =
625       new BooleanConfOption("giraph.nettySimulateFirstResponseFailed", false,
626           "Netty simulate a first response failed");
627 
628   /** Netty - set which compression to use */
629   StrConfOption NETTY_COMPRESSION_ALGORITHM =
630       new StrConfOption("giraph.nettyCompressionAlgorithm", "",
631           "Which compression algorithm to use in netty");
632 
633   /**
634    * Whether netty should pro-actively read requests and feed them to its
635    * processing pipeline
636    */
637   BooleanConfOption NETTY_AUTO_READ =
638       new BooleanConfOption("giraph.nettyAutoRead", true,
639           "Whether netty should pro-actively read requests and feed them to " +
640               "its processing pipeline");
641 
642   /** Max resolve address attempts */
643   IntConfOption MAX_RESOLVE_ADDRESS_ATTEMPTS =
644       new IntConfOption("giraph.maxResolveAddressAttempts", 5,
645           "Max resolve address attempts");
646 
647   /** Msecs to wait between waiting for all requests to finish */
648   IntConfOption WAITING_REQUEST_MSECS =
649       new IntConfOption("giraph.waitingRequestMsecs", SECONDS.toMillis(15),
650           "Msecs to wait between waiting for all requests to finish");
651 
652   /** Millseconds to wait for an event before continuing */
653   IntConfOption EVENT_WAIT_MSECS =
654       new IntConfOption("giraph.eventWaitMsecs", SECONDS.toMillis(30),
655           "Millseconds to wait for an event before continuing");
656 
657   /**
658    * Maximum milliseconds to wait before giving up trying to get the minimum
659    * number of workers before a superstep (int).
660    */
661   IntConfOption MAX_MASTER_SUPERSTEP_WAIT_MSECS =
662       new IntConfOption("giraph.maxMasterSuperstepWaitMsecs",
663           MINUTES.toMillis(10),
664           "Maximum milliseconds to wait before giving up trying to get the " +
665           "minimum number of workers before a superstep (int).");
666 
667   /** Milliseconds for a request to complete (or else resend) */
668   IntConfOption MAX_REQUEST_MILLISECONDS =
669       new IntConfOption("giraph.maxRequestMilliseconds", MINUTES.toMillis(10),
670           "Milliseconds for a request to complete (or else resend)");
671 
672   /** Netty max connection failures */
673   IntConfOption NETTY_MAX_CONNECTION_FAILURES =
674       new IntConfOption("giraph.nettyMaxConnectionFailures", 1000,
675           "Netty max connection failures");
676 
677   /** How long to wait before trying to reconnect failed connections */
678   IntConfOption WAIT_TIME_BETWEEN_CONNECTION_RETRIES_MS =
679       new IntConfOption("giraph.waitTimeBetweenConnectionRetriesMs", 500,
680           "");
681 
682   /** Initial port to start using for the IPC communication */
683   IntConfOption IPC_INITIAL_PORT =
684       new IntConfOption("giraph.ipcInitialPort", 30000,
685           "Initial port to start using for the IPC communication");
686 
687   /** Maximum bind attempts for different IPC ports */
688   IntConfOption MAX_IPC_PORT_BIND_ATTEMPTS =
689       new IntConfOption("giraph.maxIpcPortBindAttempts", 20,
690           "Maximum bind attempts for different IPC ports");
691   /**
692    * Fail first IPC port binding attempt, simulate binding failure
693    * on real grid testing
694    */
695   BooleanConfOption FAIL_FIRST_IPC_PORT_BIND_ATTEMPT =
696       new BooleanConfOption("giraph.failFirstIpcPortBindAttempt", false,
697           "Fail first IPC port binding attempt, simulate binding failure " +
698           "on real grid testing");
699 
700   /** Client send buffer size */
701   IntConfOption CLIENT_SEND_BUFFER_SIZE =
702       new IntConfOption("giraph.clientSendBufferSize", 512 * ONE_KB,
703           "Client send buffer size");
704 
705   /** Client receive buffer size */
706   IntConfOption CLIENT_RECEIVE_BUFFER_SIZE =
707       new IntConfOption("giraph.clientReceiveBufferSize", 32 * ONE_KB,
708           "Client receive buffer size");
709 
710   /** Server send buffer size */
711   IntConfOption SERVER_SEND_BUFFER_SIZE =
712       new IntConfOption("giraph.serverSendBufferSize", 32 * ONE_KB,
713           "Server send buffer size");
714 
715   /** Server receive buffer size */
716   IntConfOption SERVER_RECEIVE_BUFFER_SIZE =
717       new IntConfOption("giraph.serverReceiveBufferSize", 512 * ONE_KB,
718           "Server receive buffer size");
719 
720   /** Maximum size of messages (in bytes) per peer before flush */
721   IntConfOption MAX_MSG_REQUEST_SIZE =
722       new IntConfOption("giraph.msgRequestSize", 512 * ONE_KB,
723           "Maximum size of messages (in bytes) per peer before flush");
724 
725   /**
726    * How much bigger than the average per partition size to make initial per
727    * partition buffers.
728    * If this value is A, message request size is M,
729    * and a worker has P partitions, than its initial partition buffer size
730    * will be (M / P) * (1 + A).
731    */
732   FloatConfOption ADDITIONAL_MSG_REQUEST_SIZE =
733       new FloatConfOption("giraph.additionalMsgRequestSize", 0.2f,
734           "How much bigger than the average per partition size to make " +
735           "initial per partition buffers. If this value is A, message " +
736           "request size is M, and a worker has P partitions, than its " +
737           "initial partition buffer size will be (M / P) * (1 + A).");
738 
739 
740   /** Warn if msg request size exceeds default size by this factor */
741   FloatConfOption REQUEST_SIZE_WARNING_THRESHOLD = new FloatConfOption(
742       "giraph.msgRequestWarningThreshold", 2.0f,
743       "If request sizes are bigger than the buffer size by this factor " +
744       "warnings are printed to the log and to the command line");
745 
746   /** Maximum size of vertices (in bytes) per peer before flush */
747   IntConfOption MAX_VERTEX_REQUEST_SIZE =
748       new IntConfOption("giraph.vertexRequestSize", 512 * ONE_KB,
749           "Maximum size of vertices (in bytes) per peer before flush");
750 
751   /**
752    * Additional size (expressed as a ratio) of each per-partition buffer on
753    * top of the average size for vertices.
754    */
755   FloatConfOption ADDITIONAL_VERTEX_REQUEST_SIZE =
756       new FloatConfOption("giraph.additionalVertexRequestSize", 0.2f,
757           "Additional size (expressed as a ratio) of each per-partition " +
758               "buffer on top of the average size.");
759 
760   /** Maximum size of edges (in bytes) per peer before flush */
761   IntConfOption MAX_EDGE_REQUEST_SIZE =
762       new IntConfOption("giraph.edgeRequestSize", 512 * ONE_KB,
763           "Maximum size of edges (in bytes) per peer before flush");
764 
765   /**
766    * Additional size (expressed as a ratio) of each per-partition buffer on
767    * top of the average size for edges.
768    */
769   FloatConfOption ADDITIONAL_EDGE_REQUEST_SIZE =
770       new FloatConfOption("giraph.additionalEdgeRequestSize", 0.2f,
771           "Additional size (expressed as a ratio) of each per-partition " +
772           "buffer on top of the average size.");
773 
774   /** Maximum number of mutations per partition before flush */
775   IntConfOption MAX_MUTATIONS_PER_REQUEST =
776       new IntConfOption("giraph.maxMutationsPerRequest", 100,
777           "Maximum number of mutations per partition before flush");
778 
779   /**
780    * Use message size encoding (typically better for complex objects,
781    * not meant for primitive wrapped messages)
782    */
783   BooleanConfOption USE_MESSAGE_SIZE_ENCODING =
784       new BooleanConfOption("giraph.useMessageSizeEncoding", false,
785           "Use message size encoding (typically better for complex objects, " +
786           "not meant for primitive wrapped messages)");
787 
788   /** Number of channels used per server */
789   IntConfOption CHANNELS_PER_SERVER =
790       new IntConfOption("giraph.channelsPerServer", 1,
791           "Number of channels used per server");
792 
793   /** Number of flush threads per peer */
794   String MSG_NUM_FLUSH_THREADS = "giraph.msgNumFlushThreads";
795 
796   /** Number of threads for vertex computation */
797   IntConfOption NUM_COMPUTE_THREADS =
798       new IntConfOption("giraph.numComputeThreads", 1,
799           "Number of threads for vertex computation");
800 
801   /** Number of threads for input split loading */
802   IntConfOption NUM_INPUT_THREADS =
803       new IntConfOption("giraph.numInputThreads", 1,
804           "Number of threads for input split loading");
805 
806   /** Minimum stragglers of the superstep before printing them out */
807   IntConfOption PARTITION_LONG_TAIL_MIN_PRINT =
808       new IntConfOption("giraph.partitionLongTailMinPrint", 1,
809           "Minimum stragglers of the superstep before printing them out");
810 
811   /** Use superstep counters? (boolean) */
812   BooleanConfOption USE_SUPERSTEP_COUNTERS =
813       new BooleanConfOption("giraph.useSuperstepCounters", true,
814           "Use superstep counters? (boolean)");
815 
816   /**
817    * Input split sample percent - Used only for sampling and testing, rather
818    * than an actual job.  The idea is that to test, you might only want a
819    * fraction of the actual input splits from your VertexInputFormat to
820    * load (values should be [0, 100]).
821    */
822   FloatConfOption INPUT_SPLIT_SAMPLE_PERCENT =
823       new FloatConfOption("giraph.inputSplitSamplePercent", 100f,
824           "Input split sample percent - Used only for sampling and testing, " +
825           "rather than an actual job.  The idea is that to test, you might " +
826           "only want a fraction of the actual input splits from your " +
827           "VertexInputFormat to load (values should be [0, 100]).");
828 
829   /**
830    * To limit outlier vertex input splits from producing too many vertices or
831    * to help with testing, the number of vertices loaded from an input split
832    * can be limited.  By default, everything is loaded.
833    */
834   LongConfOption INPUT_SPLIT_MAX_VERTICES =
835       new LongConfOption("giraph.InputSplitMaxVertices", -1,
836           "To limit outlier vertex input splits from producing too many " +
837               "vertices or to help with testing, the number of vertices " +
838               "loaded from an input split can be limited. By default, " +
839               "everything is loaded.");
840 
841   /**
842    * To limit outlier vertex input splits from producing too many vertices or
843    * to help with testing, the number of edges loaded from an input split
844    * can be limited.  By default, everything is loaded.
845    */
846   LongConfOption INPUT_SPLIT_MAX_EDGES =
847       new LongConfOption("giraph.InputSplitMaxEdges", -1,
848           "To limit outlier vertex input splits from producing too many " +
849               "vertices or to help with testing, the number of edges loaded " +
850               "from an input split can be limited. By default, everything is " +
851               "loaded.");
852 
853   /**
854    * To minimize network usage when reading input splits,
855    * each worker can prioritize splits that reside on its host.
856    * This, however, comes at the cost of increased load on ZooKeeper.
857    * Hence, users with a lot of splits and input threads (or with
858    * configurations that can't exploit locality) may want to disable it.
859    */
860   BooleanConfOption USE_INPUT_SPLIT_LOCALITY =
861       new BooleanConfOption("giraph.useInputSplitLocality", true,
862           "To minimize network usage when reading input splits, each worker " +
863           "can prioritize splits that reside on its host. " +
864           "This, however, comes at the cost of increased load on ZooKeeper. " +
865           "Hence, users with a lot of splits and input threads (or with " +
866           "configurations that can't exploit locality) may want to disable " +
867           "it.");
868 
869   /** Multiplier for the current workers squared */
870   FloatConfOption PARTITION_COUNT_MULTIPLIER =
871       new FloatConfOption("giraph.masterPartitionCountMultiplier", 1.0f,
872           "Multiplier for the current workers squared");
873 
874   /** Minimum number of partitions to have per compute thread */
875   IntConfOption MIN_PARTITIONS_PER_COMPUTE_THREAD =
876       new IntConfOption("giraph.minPartitionsPerComputeThread", 1,
877           "Minimum number of partitions to have per compute thread");
878 
879   /** Overrides default partition count calculation if not -1 */
880   IntConfOption USER_PARTITION_COUNT =
881       new IntConfOption("giraph.userPartitionCount", -1,
882           "Overrides default partition count calculation if not -1");
883 
884   /** Vertex key space size for
885    * {@link org.apache.giraph.partition.WorkerGraphPartitionerImpl}
886    */
887   String PARTITION_VERTEX_KEY_SPACE_SIZE = "giraph.vertexKeySpaceSize";
888 
889   /**
890    *  How often to checkpoint (i.e. 0, means no checkpoint,
891    *  1 means every superstep, 2 is every two supersteps, etc.).
892    */
893   IntConfOption CHECKPOINT_FREQUENCY =
894       new IntConfOption("giraph.checkpointFrequency", 0,
895           "How often to checkpoint (i.e. 0, means no checkpoint, 1 means " +
896           "every superstep, 2 is every two supersteps, etc.).");
897 
898   /**
899    * Delete checkpoints after a successful job run?
900    */
901   BooleanConfOption CLEANUP_CHECKPOINTS_AFTER_SUCCESS =
902       new BooleanConfOption("giraph.cleanupCheckpointsAfterSuccess", true,
903           "Delete checkpoints after a successful job run?");
904 
905   /**
906    * An application can be restarted manually by selecting a superstep.  The
907    * corresponding checkpoint must exist for this to work.  The user should
908    * set a long value.  Default is start from scratch.
909    */
910   String RESTART_SUPERSTEP = "giraph.restartSuperstep";
911 
912   /**
913    * If application is restarted manually we need to specify job ID
914    * to restart from.
915    */
916   StrConfOption RESTART_JOB_ID = new StrConfOption("giraph.restart.jobId",
917       null, "Which job ID should I try to restart?");
918 
919   /**
920    * Base ZNode for Giraph's state in the ZooKeeper cluster.  Must be a root
921    * znode on the cluster beginning with "/"
922    */
923   String BASE_ZNODE_KEY = "giraph.zkBaseZNode";
924 
925   /**
926    * If ZOOKEEPER_LIST is not set, then use this directory to manage
927    * ZooKeeper
928    */
929   StrConfOption ZOOKEEPER_MANAGER_DIRECTORY =
930       new StrConfOption("giraph.zkManagerDirectory",
931           "_bsp/_defaultZkManagerDir",
932           "If ZOOKEEPER_LIST is not set, then use this directory to manage " +
933           "ZooKeeper");
934 
935   /** Number of ZooKeeper client connection attempts before giving up. */
936   IntConfOption ZOOKEEPER_CONNECTION_ATTEMPTS =
937       new IntConfOption("giraph.zkConnectionAttempts", 10,
938           "Number of ZooKeeper client connection attempts before giving up.");
939 
940   /** This directory has/stores the available checkpoint files in HDFS. */
941   StrConfOption CHECKPOINT_DIRECTORY =
942       new StrConfOption("giraph.checkpointDirectory", "_bsp/_checkpoints/",
943           "This directory has/stores the available checkpoint files in HDFS.");
944 
945   /**
946    * Comma-separated list of directories in the local filesystem for
947    * out-of-core partitions.
948    */
949   StrConfOption PARTITIONS_DIRECTORY =
950       new StrConfOption("giraph.partitionsDirectory", "_bsp/_partitions",
951           "Comma-separated list of directories in the local filesystem for " +
952           "out-of-core partitions.");
953 
954   /**
955    * Number of IO threads used in out-of-core mechanism. If local disk is used
956    * for spilling data to and reading data from, this number should be equal to
957    * the number of available disks on each machine. In such case, one should
958    * use giraph.partitionsDirectory to specify directories mounted on different
959    * disks.
960    */
961   IntConfOption NUM_OUT_OF_CORE_THREADS =
962       new IntConfOption("giraph.numOutOfCoreThreads", 1, "Number of IO " +
963           "threads used in out-of-core mechanism. If using local disk to " +
964           "spill data, this should be equal to the number of available " +
965           "disks. In such case, use giraph.partitionsDirectory to specify " +
966           "mount points on different disks.");
967 
968   /** Enable out-of-core graph. */
969   BooleanConfOption USE_OUT_OF_CORE_GRAPH =
970       new BooleanConfOption("giraph.useOutOfCoreGraph", false,
971           "Enable out-of-core graph.");
972 
973   /** Data accessor resource/object */
974   ClassConfOption<OutOfCoreDataAccessor> OUT_OF_CORE_DATA_ACCESSOR =
975       ClassConfOption.create("giraph.outOfCoreDataAccessor",
976           LocalDiskDataAccessor.class, OutOfCoreDataAccessor.class,
977           "Data accessor used in out-of-core computation (local-disk, " +
978               "in-memory, HDFS, etc.)");
979 
980   /**
981    * Out-of-core oracle that is to be used for adaptive out-of-core engine. If
982    * the `MAX_PARTITIONS_IN_MEMORY` is already set, this will be over-written
983    * to be `FixedPartitionsOracle`.
984    */
985   ClassConfOption<OutOfCoreOracle> OUT_OF_CORE_ORACLE =
986       ClassConfOption.create("giraph.outOfCoreOracle",
987           ThresholdBasedOracle.class, OutOfCoreOracle.class,
988           "Out-of-core oracle that is to be used for adaptive out-of-core " +
989               "engine");
990 
991   /** Maximum number of partitions to hold in memory for each worker. */
992   IntConfOption MAX_PARTITIONS_IN_MEMORY =
993       new IntConfOption("giraph.maxPartitionsInMemory", 0,
994           "Maximum number of partitions to hold in memory for each worker. By" +
995               " default it is set to 0 (for adaptive out-of-core mechanism");
996 
997   /** Directory to write YourKit snapshots to */
998   String YOURKIT_OUTPUT_DIR = "giraph.yourkit.outputDir";
999   /** Default directory to write YourKit snapshots to */
1000   String YOURKIT_OUTPUT_DIR_DEFAULT = "/tmp/giraph/%JOB_ID%/%TASK_ID%";
1001 
1002   /** Keep the zookeeper output for debugging? Default is to remove it. */
1003   BooleanConfOption KEEP_ZOOKEEPER_DATA =
1004       new BooleanConfOption("giraph.keepZooKeeperData", false,
1005           "Keep the zookeeper output for debugging? Default is to remove it.");
1006   /** Default ZooKeeper snap count. */
1007   int DEFAULT_ZOOKEEPER_SNAP_COUNT = 50000;
1008   /** Default ZooKeeper tick time. */
1009   int DEFAULT_ZOOKEEPER_TICK_TIME = 6000;
1010   /** Default ZooKeeper maximum client connections. */
1011   int DEFAULT_ZOOKEEPER_MAX_CLIENT_CNXNS = 10000;
1012   /** Number of snapshots to be retained after purge */
1013   int ZOOKEEPER_SNAP_RETAIN_COUNT = 3;
1014   /** Zookeeper purge interval in hours */
1015   int ZOOKEEPER_PURGE_INTERVAL = 1;
1016   /** ZooKeeper minimum session timeout */
1017   IntConfOption ZOOKEEPER_MIN_SESSION_TIMEOUT =
1018       new IntConfOption("giraph.zKMinSessionTimeout", MINUTES.toMillis(10),
1019           "ZooKeeper minimum session timeout");
1020   /** ZooKeeper maximum session timeout */
1021   IntConfOption ZOOKEEPER_MAX_SESSION_TIMEOUT =
1022       new IntConfOption("giraph.zkMaxSessionTimeout", MINUTES.toMillis(15),
1023           "ZooKeeper maximum session timeout");
1024 
1025   /** ZooKeeper force sync */
1026   BooleanConfOption ZOOKEEPER_FORCE_SYNC =
1027       new BooleanConfOption("giraph.zKForceSync", false,
1028           "ZooKeeper force sync");
1029 
1030   /** ZooKeeper skip ACLs */
1031   BooleanConfOption ZOOKEEPER_SKIP_ACL =
1032       new BooleanConfOption("giraph.ZkSkipAcl", true, "ZooKeeper skip ACLs");
1033 
1034   /**
1035    * Whether to use SASL with DIGEST and Hadoop Job Tokens to authenticate
1036    * and authorize Netty BSP Clients to Servers.
1037    */
1038   BooleanConfOption AUTHENTICATE =
1039       new BooleanConfOption("giraph.authenticate", false,
1040           "Whether to use SASL with DIGEST and Hadoop Job Tokens to " +
1041           "authenticate and authorize Netty BSP Clients to Servers.");
1042 
1043   /** Use unsafe serialization? */
1044   BooleanConfOption USE_UNSAFE_SERIALIZATION =
1045       new BooleanConfOption("giraph.useUnsafeSerialization", true,
1046           "Use unsafe serialization?");
1047 
1048   /**
1049    * Use BigDataIO for messages? If there are super-vertices in the
1050    * graph which receive a lot of messages (total serialized size of messages
1051    * goes beyond the maximum size of a byte array), setting this option to true
1052    * will remove that limit. The maximum memory available for a single vertex
1053    * will be limited to the maximum heap size available.
1054    */
1055   BooleanConfOption USE_BIG_DATA_IO_FOR_MESSAGES =
1056       new BooleanConfOption("giraph.useBigDataIOForMessages", false,
1057           "Use BigDataIO for messages?");
1058 
1059   /**
1060    * Maximum number of attempts a master/worker will retry before killing
1061    * the job.  This directly maps to the number of map task attempts in
1062    * Hadoop.
1063    */
1064   IntConfOption MAX_TASK_ATTEMPTS =
1065       new IntConfOption("mapred.map.max.attempts", -1,
1066           "Maximum number of attempts a master/worker will retry before " +
1067           "killing the job.  This directly maps to the number of map task " +
1068           "attempts in Hadoop.");
1069 
1070   /** Interface to use for hostname resolution */
1071   StrConfOption DNS_INTERFACE =
1072       new StrConfOption("giraph.dns.interface", "default",
1073           "Interface to use for hostname resolution");
1074   /** Server for hostname resolution */
1075   StrConfOption DNS_NAMESERVER =
1076       new StrConfOption("giraph.dns.nameserver", "default",
1077           "Server for hostname resolution");
1078 
1079   /**
1080    * The application will halt after this many supersteps is completed.  For
1081    * instance, if it is set to 3, the application will run at most 0, 1,
1082    * and 2 supersteps and then go into the shutdown superstep.
1083    */
1084   IntConfOption MAX_NUMBER_OF_SUPERSTEPS =
1085       new IntConfOption("giraph.maxNumberOfSupersteps", 1,
1086           "The application will halt after this many supersteps is " +
1087           "completed. For instance, if it is set to 3, the application will " +
1088           "run at most 0, 1, and 2 supersteps and then go into the shutdown " +
1089           "superstep.");
1090 
1091   /**
1092    * The application will not mutate the graph topology (the edges). It is used
1093    * to optimise out-of-core graph, by not writing back edges every time.
1094    */
1095   BooleanConfOption STATIC_GRAPH =
1096       new BooleanConfOption("giraph.isStaticGraph", false,
1097           "The application will not mutate the graph topology (the edges). " +
1098           "It is used to optimise out-of-core graph, by not writing back " +
1099           "edges every time.");
1100 
1101   /**
1102    * This option will tell which message encode &amp; store enum to use when
1103    * combining is not enabled
1104    */
1105   EnumConfOption<MessageEncodeAndStoreType> MESSAGE_ENCODE_AND_STORE_TYPE =
1106       EnumConfOption.create("giraph.messageEncodeAndStoreType",
1107           MessageEncodeAndStoreType.class,
1108           MessageEncodeAndStoreType.BYTEARRAY_PER_PARTITION,
1109           "Select the message_encode_and_store_type to use");
1110 
1111   /**
1112    * This option can be used to specify if a source vertex present in edge
1113    * input but not in vertex input can be created
1114    */
1115   BooleanConfOption CREATE_EDGE_SOURCE_VERTICES =
1116       new BooleanConfOption("giraph.createEdgeSourceVertices", true,
1117           "Create a source vertex if present in edge input but not " +
1118           "necessarily in vertex input");
1119 
1120   /**
1121    * This counter group will contain one counter whose name is the ZooKeeper
1122    * server:port which this job is using
1123    */
1124   String ZOOKEEPER_SERVER_PORT_COUNTER_GROUP = "Zookeeper server:port";
1125 
1126   /**
1127    * This counter group will contain one counter whose name is the ZooKeeper
1128    * node path which should be created to trigger computation halt
1129    */
1130   String ZOOKEEPER_HALT_NODE_COUNTER_GROUP = "Zookeeper halt node";
1131 
1132   /**
1133    * This counter group will contain one counter whose name is the ZooKeeper
1134    * node path which contains all data about this job
1135    */
1136   String ZOOKEEPER_BASE_PATH_COUNTER_GROUP = "Zookeeper base path";
1137 
1138   /**
1139    * Which class to use to write instructions on how to halt the application
1140    */
1141   ClassConfOption<HaltApplicationUtils.HaltInstructionsWriter>
1142   HALT_INSTRUCTIONS_WRITER_CLASS = ClassConfOption.create(
1143       "giraph.haltInstructionsWriter",
1144       HaltApplicationUtils.DefaultHaltInstructionsWriter.class,
1145       HaltApplicationUtils.HaltInstructionsWriter.class,
1146       "Class used to write instructions on how to halt the application");
1147 
1148   /**
1149    * Maximum timeout (in milliseconds) for waiting for all tasks
1150    * to complete after the job is done.  Defaults to 15 minutes.
1151    */
1152   IntConfOption WAIT_TASK_DONE_TIMEOUT_MS =
1153       new IntConfOption("giraph.waitTaskDoneTimeoutMs", MINUTES.toMillis(15),
1154           "Maximum timeout (in ms) for waiting for all all tasks to " +
1155               "complete");
1156 
1157   /** Whether to track job progress on client or not */
1158   BooleanConfOption TRACK_JOB_PROGRESS_ON_CLIENT =
1159       new BooleanConfOption("giraph.trackJobProgressOnClient", false,
1160           "Whether to track job progress on client or not");
1161 
1162   /** Class to use to track job progress on client */
1163   ClassConfOption<JobProgressTrackerService> JOB_PROGRESS_TRACKER_CLASS =
1164       ClassConfOption.create("giraph.jobProgressTrackerClass",
1165           DefaultJobProgressTrackerService.class,
1166           JobProgressTrackerService.class,
1167           "Class to use to track job progress on client");
1168 
1169   /** Number of retries for creating the HDFS files */
1170   IntConfOption HDFS_FILE_CREATION_RETRIES =
1171       new IntConfOption("giraph.hdfs.file.creation.retries", 10,
1172           "Retries to create an HDFS file before failing");
1173 
1174   /** Number of milliseconds to wait before retrying HDFS file creation */
1175   IntConfOption HDFS_FILE_CREATION_RETRY_WAIT_MS =
1176       new IntConfOption("giraph.hdfs.file.creation.retry.wait.ms", 30_000,
1177           "Milliseconds to wait prior to retrying creation of an HDFS file");
1178 
1179   /** Number of threads for writing and reading checkpoints */
1180   IntConfOption NUM_CHECKPOINT_IO_THREADS =
1181       new IntConfOption("giraph.checkpoint.io.threads", 8,
1182           "Number of threads for writing and reading checkpoints");
1183 
1184   /**
1185    * Compression algorithm to be used for checkpointing.
1186    * Defined by extension for hadoop compatibility reasons.
1187    */
1188   StrConfOption CHECKPOINT_COMPRESSION_CODEC =
1189       new StrConfOption("giraph.checkpoint.compression.codec",
1190           ".deflate",
1191           "Defines compression algorithm we will be using for " +
1192               "storing checkpoint. Available options include but " +
1193               "not restricted to: .deflate, .gz, .bz2, .lzo");
1194 
1195   /**
1196    * Defines if and when checkpointing is supported by this job.
1197    * By default checkpointing is always supported unless output during the
1198    * computation is enabled.
1199    */
1200   ClassConfOption<CheckpointSupportedChecker> CHECKPOINT_SUPPORTED_CHECKER =
1201       ClassConfOption.create("giraph.checkpoint.supported.checker",
1202           DefaultCheckpointSupportedChecker.class,
1203           CheckpointSupportedChecker.class,
1204           "This is the way to specify if checkpointing is " +
1205               "supported by the job");
1206 
1207 
1208   /** Number of threads to use in async message store, 0 means
1209    * we should not use async message processing */
1210   IntConfOption ASYNC_MESSAGE_STORE_THREADS_COUNT =
1211       new IntConfOption("giraph.async.message.store.threads", 0,
1212           "Number of threads to be used in async message store.");
1213 
1214   /** Output format class for hadoop to use (for committing) */
1215   ClassConfOption<OutputFormat> HADOOP_OUTPUT_FORMAT_CLASS =
1216       ClassConfOption.create("giraph.hadoopOutputFormatClass",
1217           BspOutputFormat.class, OutputFormat.class,
1218           "Output format class for hadoop to use (for committing)");
1219 
1220   /**
1221    * For worker to worker communication we can use IPs or host names, by
1222    * default prefer IPs.
1223    */
1224   BooleanConfOption PREFER_IP_ADDRESSES =
1225       new BooleanConfOption("giraph.preferIP", false,
1226       "Prefer IP addresses instead of host names");
1227 }
1228 // CHECKSTYLE: resume InterfaceIsTypeCheck