1/*2 * Licensed to the Apache Software Foundation (ASF) under one3 * or more contributor license agreements. See the NOTICE file4 * distributed with this work for additional information5 * regarding copyright ownership. The ASF licenses this file6 * to you under the Apache License, Version 2.0 (the7 * "License"); you may not use this file except in compliance8 * with the License. You may obtain a copy of the License at9 *10 * http://www.apache.org/licenses/LICENSE-2.011 *12 * Unless required by applicable law or agreed to in writing, software13 * distributed under the License is distributed on an "AS IS" BASIS,14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.15 * See the License for the specific language governing permissions and16 * limitations under the License.17 */1819package org.apache.giraph.examples;
2021import org.apache.giraph.graph.BasicComputation;
22import org.apache.giraph.edge.Edge;
23import org.apache.giraph.graph.Vertex;
24import org.apache.hadoop.io.IntWritable;
25import org.apache.hadoop.io.NullWritable;
2627import java.io.IOException;
2829/**30 * Implementation of the HCC algorithm that identifies connected components and31 * assigns each vertex its "component identifier" (the smallest vertex id32 * in the component)33 *34 * The idea behind the algorithm is very simple: propagate the smallest35 * vertex id along the edges to all vertices of a connected component. The36 * number of supersteps necessary is equal to the length of the maximum37 * diameter of all components + 138 *39 * The original Hadoop-based variant of this algorithm was proposed by Kang,40 * Charalampos, Tsourakakis and Faloutsos in41 * "PEGASUS: Mining Peta-Scale Graphs", 201042 *43 * http://www.cs.cmu.edu/~ukang/papers/PegasusKAIS.pdf44 */45 @Algorithm(
46 name = "Connected components",
47 description = "Finds connected components of the graph"48 )
49publicclassConnectedComponentsComputationextends50 BasicComputation<IntWritable, IntWritable, NullWritable, IntWritable> {
51/**52 * Propagates the smallest vertex id to all neighbors. Will always choose to53 * halt and only reactivate if a smaller id has been sent to it.54 *55 * @param vertex Vertex56 * @param messages Iterator of messages from the previous superstep.57 * @throws IOException58 */59 @Override
60publicvoid compute(
61 Vertex<IntWritable, IntWritable, NullWritable> vertex,
62 Iterable<IntWritable> messages) throws IOException {
63int currentComponent = vertex.getValue().get();
6465// First superstep is special, because we can simply look at the neighbors66if (getSuperstep() == 0) {
67for (Edge<IntWritable, NullWritable> edge : vertex.getEdges()) {
68int neighbor = edge.getTargetVertexId().get();
69if (neighbor < currentComponent) {
70 currentComponent = neighbor;
71 }
72 }
73// Only need to send value if it is not the own id74if (currentComponent != vertex.getValue().get()) {
75 vertex.setValue(new IntWritable(currentComponent));
76for (Edge<IntWritable, NullWritable> edge : vertex.getEdges()) {
77 IntWritable neighbor = edge.getTargetVertexId();
78if (neighbor.get() > currentComponent) {
79 sendMessage(neighbor, vertex.getValue());
80 }
81 }
82 }
8384 vertex.voteToHalt();
85return;
86 }
8788boolean changed = false;
89// did we get a smaller id ?90for (IntWritable message : messages) {
91int candidateComponent = message.get();
92if (candidateComponent < currentComponent) {
93 currentComponent = candidateComponent;
94 changed = true;
95 }
96 }
9798// propagate new component id to the neighbors99if (changed) {
100 vertex.setValue(new IntWritable(currentComponent));
101 sendMessageToAllEdges(vertex, vertex.getValue());
102 }
103 vertex.voteToHalt();
104 }
105 }