View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.giraph.comm.netty;
20  
21  import org.apache.giraph.comm.flow_control.FlowControl;
22  /*if_not[HADOOP_NON_SECURE]*/
23  import org.apache.giraph.comm.netty.handler.AuthorizeServerHandler;
24  /*end[HADOOP_NON_SECURE]*/
25  import org.apache.giraph.comm.netty.handler.RequestDecoder;
26  import org.apache.giraph.comm.netty.handler.RequestServerHandler;
27  /*if_not[HADOOP_NON_SECURE]*/
28  import org.apache.giraph.comm.netty.handler.ResponseEncoder;
29  import org.apache.giraph.comm.netty.handler.SaslServerHandler;
30  /*end[HADOOP_NON_SECURE]*/
31  import org.apache.giraph.comm.netty.handler.WorkerRequestReservedMap;
32  import org.apache.giraph.conf.GiraphConstants;
33  import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
34  import org.apache.giraph.graph.TaskInfo;
35  import org.apache.giraph.utils.PipelineUtils;
36  import org.apache.giraph.utils.ProgressableUtils;
37  import org.apache.giraph.utils.ThreadUtils;
38  import org.apache.hadoop.util.Progressable;
39  import org.apache.log4j.Logger;
40  import io.netty.bootstrap.ServerBootstrap;
41  import io.netty.channel.group.ChannelGroup;
42  import io.netty.channel.group.DefaultChannelGroup;
43  import io.netty.channel.nio.NioEventLoopGroup;
44  import io.netty.channel.socket.SocketChannel;
45  import io.netty.channel.ChannelHandlerContext;
46  import io.netty.channel.EventLoopGroup;
47  import io.netty.channel.ChannelOption;
48  import io.netty.channel.ChannelInitializer;
49  import io.netty.channel.ChannelInboundHandlerAdapter;
50  import io.netty.channel.ChannelFuture;
51  import io.netty.channel.socket.nio.NioServerSocketChannel;
52  import io.netty.handler.codec.LengthFieldBasedFrameDecoder;
53  /*if_not[HADOOP_NON_SECURE]*/
54  import io.netty.util.AttributeKey;
55  /*end[HADOOP_NON_SECURE]*/
56  import io.netty.util.concurrent.DefaultEventExecutorGroup;
57  import io.netty.util.concurrent.EventExecutorGroup;
58  import io.netty.util.concurrent.ImmediateEventExecutor;
59  import io.netty.channel.AdaptiveRecvByteBufAllocator;
60  
61  import java.net.InetSocketAddress;
62  import java.net.UnknownHostException;
63  
64  import static com.google.common.base.Preconditions.checkState;
65  import static org.apache.giraph.conf.GiraphConstants.MAX_IPC_PORT_BIND_ATTEMPTS;
66  
67  /**
68   * This server uses Netty and will implement all Giraph communication
69   */
70  public class NettyServer {
71    /** Default maximum thread pool size */
72    public static final int MAXIMUM_THREAD_POOL_SIZE_DEFAULT = 32;
73  
74  
75  /*if_not[HADOOP_NON_SECURE]*/
76    /** Used to authenticate with netty clients */
77    public static final AttributeKey<SaslNettyServer>
78    CHANNEL_SASL_NETTY_SERVERS = AttributeKey.valueOf("channelSaslServers");
79  /*end[HADOOP_NON_SECURE]*/
80  
81    /** Class logger */
82    private static final Logger LOG = Logger.getLogger(NettyServer.class);
83    /** Configuration */
84    private final ImmutableClassesGiraphConfiguration conf;
85    /** Progressable for reporting progress */
86    private final Progressable progressable;
87    /** Accepted channels */
88    private final ChannelGroup accepted = new DefaultChannelGroup(
89        ImmediateEventExecutor.INSTANCE);
90    /** Local hostname */
91    private final String localHostOrIp;
92    /** Address of the server */
93    private InetSocketAddress myAddress;
94    /** Current task info */
95    private TaskInfo myTaskInfo;
96    /** Maximum number of threads */
97    private final int maxPoolSize;
98    /** TCP backlog */
99    private final int tcpBacklog;
100   /** Factory for {@link RequestServerHandler} */
101   private final RequestServerHandler.Factory requestServerHandlerFactory;
102 /*if_not[HADOOP_NON_SECURE]*/
103   /** Factory for {@link RequestServerHandler} */
104   private SaslServerHandler.Factory saslServerHandlerFactory;
105 /*end[HADOOP_NON_SECURE]*/
106   /** Server bootstrap */
107   private ServerBootstrap bootstrap;
108   /** Inbound byte counter for this client */
109   private final InboundByteCounter inByteCounter = new InboundByteCounter();
110   /** Outbound byte counter for this client */
111   private final OutboundByteCounter outByteCounter = new OutboundByteCounter();
112   /** Send buffer size */
113   private final int sendBufferSize;
114   /** Receive buffer size */
115   private final int receiveBufferSize;
116   /** Boss eventloop group */
117   private final EventLoopGroup bossGroup;
118   /** Worker eventloop group */
119   private final EventLoopGroup workerGroup;
120   /** Request completed map per worker */
121   private final WorkerRequestReservedMap workerRequestReservedMap;
122   /** Use execution group? */
123   private final boolean useExecutionGroup;
124   /** Execution handler (if used) */
125   private final EventExecutorGroup executionGroup;
126   /** Name of the handler before the execution handler (if used) */
127   private final String handlerToUseExecutionGroup;
128   /** Handles all uncaught exceptions in netty threads */
129   private final Thread.UncaughtExceptionHandler exceptionHandler;
130 
131   /**
132    * Constructor for creating the server
133    *
134    * @param conf Configuration to use
135    * @param requestServerHandlerFactory Factory for request handlers
136    * @param myTaskInfo Current task info
137    * @param progressable Progressable for reporting progress
138    * @param exceptionHandler handle uncaught exceptions
139    */
140   public NettyServer(ImmutableClassesGiraphConfiguration conf,
141       RequestServerHandler.Factory requestServerHandlerFactory,
142       TaskInfo myTaskInfo, Progressable progressable,
143       Thread.UncaughtExceptionHandler exceptionHandler) {
144     this.conf = conf;
145     this.progressable = progressable;
146     this.requestServerHandlerFactory = requestServerHandlerFactory;
147 /*if_not[HADOOP_NON_SECURE]*/
148     this.saslServerHandlerFactory = new SaslServerHandler.Factory();
149 /*end[HADOOP_NON_SECURE]*/
150     this.myTaskInfo = myTaskInfo;
151     this.exceptionHandler = exceptionHandler;
152     sendBufferSize = GiraphConstants.SERVER_SEND_BUFFER_SIZE.get(conf);
153     receiveBufferSize = GiraphConstants.SERVER_RECEIVE_BUFFER_SIZE.get(conf);
154 
155     workerRequestReservedMap = new WorkerRequestReservedMap(conf);
156 
157     maxPoolSize = GiraphConstants.NETTY_SERVER_THREADS.get(conf);
158 
159     bossGroup = new NioEventLoopGroup(4,
160         ThreadUtils.createThreadFactory(
161             "netty-server-boss-%d", exceptionHandler));
162 
163     workerGroup = new NioEventLoopGroup(maxPoolSize,
164         ThreadUtils.createThreadFactory(
165             "netty-server-worker-%d", exceptionHandler));
166 
167     try {
168       this.localHostOrIp = conf.getLocalHostOrIp();
169     } catch (UnknownHostException e) {
170       throw new IllegalStateException("NettyServer: unable to get hostname");
171     }
172 
173     tcpBacklog = conf.getInt(GiraphConstants.TCP_BACKLOG.getKey(),
174         conf.getInt(GiraphConstants.MAX_WORKERS,
175             GiraphConstants.TCP_BACKLOG.getDefaultValue()));
176 
177     handlerToUseExecutionGroup =
178         GiraphConstants.NETTY_SERVER_EXECUTION_AFTER_HANDLER.get(conf);
179     useExecutionGroup =
180         GiraphConstants.NETTY_SERVER_USE_EXECUTION_HANDLER.get(conf);
181     if (useExecutionGroup) {
182       int executionThreads = conf.getNettyServerExecutionThreads();
183       executionGroup = new DefaultEventExecutorGroup(executionThreads,
184           ThreadUtils.createThreadFactory(
185               "netty-server-exec-%d", exceptionHandler));
186       if (LOG.isInfoEnabled()) {
187         LOG.info("NettyServer: Using execution group with " +
188             executionThreads + " threads for " +
189             handlerToUseExecutionGroup + ".");
190       }
191     } else {
192       executionGroup = null;
193     }
194   }
195 
196 /*if_not[HADOOP_NON_SECURE]*/
197   /**
198    * Constructor for creating the server
199    *
200    * @param conf Configuration to use
201    * @param requestServerHandlerFactory Factory for request handlers
202    * @param myTaskInfo Current task info
203    * @param progressable Progressable for reporting progress
204    * @param saslServerHandlerFactory  Factory for SASL handlers
205    * @param exceptionHandler handle uncaught exceptions
206    */
207   public NettyServer(ImmutableClassesGiraphConfiguration conf,
208                      RequestServerHandler.Factory requestServerHandlerFactory,
209                      TaskInfo myTaskInfo,
210                      Progressable progressable,
211                      SaslServerHandler.Factory saslServerHandlerFactory,
212                      Thread.UncaughtExceptionHandler exceptionHandler) {
213     this(conf, requestServerHandlerFactory, myTaskInfo,
214         progressable, exceptionHandler);
215     this.saslServerHandlerFactory = saslServerHandlerFactory;
216   }
217 /*end[HADOOP_NON_SECURE]*/
218 
219   /**
220    * Start the server with the appropriate port
221    */
222   public void start() {
223     bootstrap = new ServerBootstrap();
224     bootstrap.group(bossGroup, workerGroup)
225         .channel(NioServerSocketChannel.class)
226         .option(ChannelOption.SO_BACKLOG, tcpBacklog)
227         .option(ChannelOption.ALLOCATOR, conf.getNettyAllocator())
228         .childOption(ChannelOption.SO_KEEPALIVE, true)
229         .childOption(ChannelOption.TCP_NODELAY, true)
230         .childOption(ChannelOption.SO_SNDBUF, sendBufferSize)
231         .childOption(ChannelOption.SO_RCVBUF, receiveBufferSize)
232         .childOption(ChannelOption.ALLOCATOR, conf.getNettyAllocator())
233         .childOption(ChannelOption.RCVBUF_ALLOCATOR,
234             new AdaptiveRecvByteBufAllocator(receiveBufferSize / 4,
235                 receiveBufferSize, receiveBufferSize));
236 
237     /**
238      * Pipeline setup: depends on whether configured to use authentication
239      * or not.
240      */
241     bootstrap.childHandler(new ChannelInitializer<SocketChannel>() {
242       @Override
243       protected void initChannel(SocketChannel ch) throws Exception {
244 /*if_not[HADOOP_NON_SECURE]*/
245         if (conf.authenticate()) {
246           LOG.info("start: Will use Netty pipeline with " +
247               "authentication and authorization of clients.");
248           // After a client authenticates, the two authentication-specific
249           // pipeline components SaslServerHandler and ResponseEncoder are
250           // removed, leaving the pipeline the same as in the non-authenticated
251           // configuration except for the presence of the Authorize component.
252           PipelineUtils.addLastWithExecutorCheck("serverInboundByteCounter",
253               inByteCounter, handlerToUseExecutionGroup, executionGroup, ch);
254           if (conf.doCompression()) {
255             PipelineUtils.addLastWithExecutorCheck("compressionDecoder",
256                 conf.getNettyCompressionDecoder(),
257                 handlerToUseExecutionGroup, executionGroup, ch);
258           }
259           PipelineUtils.addLastWithExecutorCheck("serverOutboundByteCounter",
260               outByteCounter, handlerToUseExecutionGroup, executionGroup, ch);
261           if (conf.doCompression()) {
262             PipelineUtils.addLastWithExecutorCheck("compressionEncoder",
263                 conf.getNettyCompressionEncoder(),
264                 handlerToUseExecutionGroup, executionGroup, ch);
265           }
266           PipelineUtils.addLastWithExecutorCheck("requestFrameDecoder",
267               new LengthFieldBasedFrameDecoder(1024 * 1024 * 1024, 0, 4, 0, 4),
268               handlerToUseExecutionGroup, executionGroup, ch);
269           PipelineUtils.addLastWithExecutorCheck("requestDecoder",
270               new RequestDecoder(conf, inByteCounter),
271               handlerToUseExecutionGroup, executionGroup, ch);
272           // Removed after authentication completes:
273           PipelineUtils.addLastWithExecutorCheck("saslServerHandler",
274               saslServerHandlerFactory.newHandler(conf),
275               handlerToUseExecutionGroup, executionGroup, ch);
276           PipelineUtils.addLastWithExecutorCheck("authorizeServerHandler",
277               new AuthorizeServerHandler(), handlerToUseExecutionGroup,
278               executionGroup, ch);
279           PipelineUtils.addLastWithExecutorCheck("requestServerHandler",
280               requestServerHandlerFactory.newHandler(workerRequestReservedMap,
281                   conf, myTaskInfo, exceptionHandler),
282               handlerToUseExecutionGroup, executionGroup, ch);
283           // Removed after authentication completes:
284           PipelineUtils.addLastWithExecutorCheck("responseEncoder",
285               new ResponseEncoder(), handlerToUseExecutionGroup,
286               executionGroup, ch);
287         } else {
288           LOG.info("start: Using Netty without authentication.");
289 /*end[HADOOP_NON_SECURE]*/
290           // Store all connected channels in order to ensure that we can close
291           // them on stop(), or else stop() may hang waiting for the
292           // connections to close on their own
293           ch.pipeline().addLast("connectedChannels",
294               new ChannelInboundHandlerAdapter() {
295                 @Override
296                 public void channelActive(ChannelHandlerContext ctx)
297                   throws Exception {
298                   accepted.add(ctx.channel());
299                   ctx.fireChannelActive();
300                 }
301               });
302           PipelineUtils.addLastWithExecutorCheck("serverInboundByteCounter",
303               inByteCounter, handlerToUseExecutionGroup, executionGroup, ch);
304           if (conf.doCompression()) {
305             PipelineUtils.addLastWithExecutorCheck("compressionDecoder",
306                 conf.getNettyCompressionDecoder(),
307                 handlerToUseExecutionGroup, executionGroup, ch);
308           }
309           PipelineUtils.addLastWithExecutorCheck("serverOutboundByteCounter",
310               outByteCounter, handlerToUseExecutionGroup, executionGroup, ch);
311           if (conf.doCompression()) {
312             PipelineUtils.addLastWithExecutorCheck("compressionEncoder",
313                 conf.getNettyCompressionEncoder(),
314                 handlerToUseExecutionGroup, executionGroup, ch);
315           }
316           PipelineUtils.addLastWithExecutorCheck("requestFrameDecoder",
317               new LengthFieldBasedFrameDecoder(1024 * 1024 * 1024, 0, 4, 0, 4),
318               handlerToUseExecutionGroup, executionGroup, ch);
319           PipelineUtils.addLastWithExecutorCheck("requestDecoder",
320               new RequestDecoder(conf, inByteCounter),
321               handlerToUseExecutionGroup, executionGroup, ch);
322           PipelineUtils.addLastWithExecutorCheck("requestServerHandler",
323               requestServerHandlerFactory.newHandler(
324                   workerRequestReservedMap, conf, myTaskInfo, exceptionHandler),
325               handlerToUseExecutionGroup, executionGroup, ch);
326 /*if_not[HADOOP_NON_SECURE]*/
327         }
328 /*end[HADOOP_NON_SECURE]*/
329       }
330     });
331 
332     int taskId = conf.getTaskPartition();
333     int numTasks = conf.getInt("mapred.map.tasks", 1);
334     // Number of workers + 1 for master
335     int numServers = conf.getInt(GiraphConstants.MAX_WORKERS, numTasks) + 1;
336     int portIncrementConstant =
337         (int) Math.pow(10, Math.ceil(Math.log10(numServers)));
338     int bindPort = GiraphConstants.IPC_INITIAL_PORT.get(conf) + taskId;
339     int bindAttempts = 0;
340     final int maxIpcPortBindAttempts = MAX_IPC_PORT_BIND_ATTEMPTS.get(conf);
341     final boolean failFirstPortBindingAttempt =
342         GiraphConstants.FAIL_FIRST_IPC_PORT_BIND_ATTEMPT.get(conf);
343 
344     // Simple handling of port collisions on the same machine while
345     // preserving debugability from the port number alone.
346     // Round up the max number of workers to the next power of 10 and use
347     // it as a constant to increase the port number with.
348     while (bindAttempts < maxIpcPortBindAttempts) {
349       this.myAddress = new InetSocketAddress(localHostOrIp, bindPort);
350       if (failFirstPortBindingAttempt && bindAttempts == 0) {
351         if (LOG.isInfoEnabled()) {
352           LOG.info("start: Intentionally fail first " +
353               "binding attempt as giraph.failFirstIpcPortBindAttempt " +
354               "is true, port " + bindPort);
355         }
356         ++bindAttempts;
357         bindPort += portIncrementConstant;
358         continue;
359       }
360 
361       try {
362         ChannelFuture f = bootstrap.bind(myAddress).sync();
363         accepted.add(f.channel());
364         break;
365       } catch (InterruptedException e) {
366         throw new IllegalStateException(e);
367         // CHECKSTYLE: stop IllegalCatchCheck
368       } catch (Exception e) {
369         // CHECKSTYLE: resume IllegalCatchCheck
370         LOG.warn("start: Likely failed to bind on attempt " +
371             bindAttempts + " to port " + bindPort, e.getCause());
372         ++bindAttempts;
373         bindPort += portIncrementConstant;
374       }
375     }
376     if (bindAttempts == maxIpcPortBindAttempts || myAddress == null) {
377       throw new IllegalStateException(
378           "start: Failed to start NettyServer with " +
379               bindAttempts + " attempts");
380     }
381 
382     if (LOG.isInfoEnabled()) {
383       LOG.info("start: Started server " +
384           "communication server: " + myAddress + " with up to " +
385           maxPoolSize + " threads on bind attempt " + bindAttempts +
386           " with sendBufferSize = " + sendBufferSize +
387           " receiveBufferSize = " + receiveBufferSize);
388     }
389   }
390 
391   /**
392    * Stop the server.
393    */
394   public void stop() {
395     if (LOG.isInfoEnabled()) {
396       LOG.info("stop: Halting netty server");
397     }
398     ProgressableUtils.awaitChannelGroupFuture(accepted.close(), progressable);
399     if (LOG.isInfoEnabled()) {
400       LOG.info("stop: Start releasing resources");
401     }
402     bossGroup.shutdownGracefully();
403     workerGroup.shutdownGracefully();
404     ProgressableUtils.awaitTerminationFuture(bossGroup, progressable);
405     ProgressableUtils.awaitTerminationFuture(workerGroup, progressable);
406     if (useExecutionGroup) {
407       executionGroup.shutdownGracefully();
408       ProgressableUtils.awaitTerminationFuture(executionGroup, progressable);
409     }
410     if (LOG.isInfoEnabled()) {
411       LOG.info("stop: Netty server halted");
412     }
413   }
414 
415   public InetSocketAddress getMyAddress() {
416     return myAddress;
417   }
418 
419   public String getLocalHostOrIp() {
420     return localHostOrIp;
421   }
422 
423   /**
424    * Inform the server about the flow control policy used in sending requests
425    *
426    * @param flowControl reference to the flow control used
427    */
428   public void setFlowControl(FlowControl flowControl) {
429     checkState(requestServerHandlerFactory != null);
430     requestServerHandlerFactory.setFlowControl(flowControl);
431   }
432 }
433