This project has retired. For details please refer to its Attic page.
IntIntNullTextVertexInputFormat xref
View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.giraph.io.formats;
20  
21  import com.google.common.collect.Lists;
22  import org.apache.giraph.edge.Edge;
23  import org.apache.giraph.edge.EdgeFactory;
24  import org.apache.hadoop.io.IntWritable;
25  import org.apache.hadoop.io.NullWritable;
26  import org.apache.hadoop.io.Text;
27  import org.apache.hadoop.mapreduce.InputSplit;
28  import org.apache.hadoop.mapreduce.TaskAttemptContext;
29  
30  import java.io.IOException;
31  import java.util.List;
32  import java.util.regex.Pattern;
33  
34  /**
35   * Simple text-based {@link org.apache.giraph.io.VertexInputFormat} for
36   * unweighted graphs with int ids.
37   *
38   * Each line consists of: vertex_id vertex_value neighbor1 neighbor2 ...
39   */
40  public class IntIntNullTextVertexInputFormat
41      extends
42      TextVertexInputFormat<IntWritable, IntWritable, NullWritable> {
43    /** Separator of the vertex and neighbors */
44    private static final Pattern SEPARATOR = Pattern.compile("[\t ]");
45  
46    @Override
47    public TextVertexReader createVertexReader(InputSplit split,
48        TaskAttemptContext context)
49      throws IOException {
50      return new IntIntNullVertexReader();
51    }
52  
53    /**
54     * Vertex reader associated with
55     * {@link org.apache.giraph.io.formats.IntIntNullTextVertexInputFormat}.
56     */
57    public class IntIntNullVertexReader extends
58      TextVertexReaderFromEachLineProcessed<String[]> {
59      /** Cached vertex id for the current line */
60      private IntWritable id;
61      /** Cached vertex value for the current line */
62      private IntWritable value;
63  
64      @Override
65      protected String[] preprocessLine(Text line) throws IOException {
66        String[] tokens = SEPARATOR.split(line.toString());
67        id = new IntWritable(Integer.parseInt(tokens[0]));
68        value = new IntWritable(Integer.parseInt(tokens[1]));
69        return tokens;
70      }
71  
72      @Override
73      protected IntWritable getId(String[] tokens) throws IOException {
74        return id;
75      }
76  
77      @Override
78      protected IntWritable getValue(String[] tokens) throws IOException {
79        return value;
80      }
81  
82      @Override
83      protected Iterable<Edge<IntWritable, NullWritable>> getEdges(
84          String[] tokens) throws IOException {
85        List<Edge<IntWritable, NullWritable>> edges =
86            Lists.newArrayListWithCapacity(tokens.length - 2);
87        for (int n = 2; n < tokens.length; n++) {
88          edges.add(EdgeFactory.create(
89              new IntWritable(Integer.parseInt(tokens[n]))));
90        }
91        return edges;
92      }
93    }
94  }