1/*2* Licensed to the Apache Software Foundation (ASF) under one3* or more contributor license agreements. See the NOTICE file4* distributed with this work for additional information5* regarding copyright ownership. The ASF licenses this file6* to you under the Apache License, Version 2.0 (the7* "License"); you may not use this file except in compliance8* with the License. You may obtain a copy of the License at9*10* http://www.apache.org/licenses/LICENSE-2.011*12* Unless required by applicable law or agreed to in writing, software13* distributed under the License is distributed on an "AS IS" BASIS,14* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.15* See the License for the specific language governing permissions and16* limitations under the License.17*/18 19packageorg.apache.giraph.examples; 20 21importorg.apache.giraph.graph.BasicComputation; 22importorg.apache.giraph.edge.Edge; 23importorg.apache.giraph.graph.Vertex; 24importorg.apache.hadoop.io.IntWritable; 25importorg.apache.hadoop.io.NullWritable; 26 27importjava.io.IOException; 28 29/**30* Implementation of the HCC algorithm that identifies connected components and31* assigns each vertex its "component identifier" (the smallest vertex id32* in the component)33*34* The idea behind the algorithm is very simple: propagate the smallest35* vertex id along the edges to all vertices of a connected component. The36* number of supersteps necessary is equal to the length of the maximum37* diameter of all components + 138*39* The original Hadoop-based variant of this algorithm was proposed by Kang,40* Charalampos, Tsourakakis and Faloutsos in41* "PEGASUS: Mining Peta-Scale Graphs", 201042*43* http://www.cs.cmu.edu/~ukang/papers/PegasusKAIS.pdf44*/45 @Algorithm( 46 name = "Connected components", 47 description = "Finds connected components of the graph" 48 ) 49publicclassConnectedComponentsComputationextends50 BasicComputation<IntWritable, IntWritable, NullWritable, IntWritable> { 51/**52* Propagates the smallest vertex id to all neighbors. Will always choose to53* halt and only reactivate if a smaller id has been sent to it.54*55* @param vertex Vertex56* @param messages Iterator of messages from the previous superstep.57* @throws IOException58*/59 @Override 60publicvoidcompute( 61 Vertex<IntWritable, IntWritable, NullWritable> vertex, 62 Iterable<IntWritable> messages)throwsIOException { 63intcurrentComponent = vertex.getValue().get(); 64 65// First superstep is special, because we can simply look at the neighbors66if(getSuperstep() == 0) { 67for(Edge<IntWritable, NullWritable> edge : vertex.getEdges()) { 68intneighbor = edge.getTargetVertexId().get(); 69if(neighbor < currentComponent) { 70 currentComponent = neighbor; 71 } 72 } 73// Only need to send value if it is not the own id74if(currentComponent != vertex.getValue().get()) { 75 vertex.setValue(newIntWritable(currentComponent)); 76for(Edge<IntWritable, NullWritable> edge : vertex.getEdges()) { 77 IntWritable neighbor = edge.getTargetVertexId(); 78if(neighbor.get() > currentComponent) { 79 sendMessage(neighbor, vertex.getValue()); 80 } 81 } 82 } 83 84 vertex.voteToHalt(); 85return; 86 } 87 88booleanchanged = false; 89// did we get a smaller id ?90for(IntWritable message : messages) { 91intcandidateComponent = message.get(); 92if(candidateComponent < currentComponent) { 93 currentComponent = candidateComponent; 94 changed =true; 95 } 96 } 97 98// propagate new component id to the neighbors99if(changed) { 100 vertex.setValue(newIntWritable(currentComponent)); 101 sendMessageToAllEdges(vertex, vertex.getValue()); 102 } 103 vertex.voteToHalt(); 104 } 105 }