This project has retired. For details please refer to its Attic page.
DistributedCacheUtils xref
View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.giraph.utils;
19  
20  import org.apache.commons.io.FilenameUtils;
21  import org.apache.hadoop.conf.Configuration;
22  import org.apache.hadoop.filecache.DistributedCache;
23  import org.apache.hadoop.fs.FileSystem;
24  import org.apache.hadoop.fs.Path;
25  import org.apache.log4j.Logger;
26  
27  import com.google.common.base.Optional;
28  
29  import java.io.IOException;
30  
31  import static org.apache.commons.io.FilenameUtils.getBaseName;
32  
33  /**
34   * Helpers for dealing with {@link org.apache.hadoop.filecache.DistributedCache}
35   */
36  public class DistributedCacheUtils {
37    /** Logger */
38    private static final Logger LOG = Logger.getLogger(
39        DistributedCacheUtils.class);
40  
41    /** Don't construct */
42    private DistributedCacheUtils() { }
43  
44    /**
45     * Get local path to file from a DistributedCache.
46     *
47     * @param conf Configuration
48     * @param pathToMatch Path that was used to insert into DistributedCache
49     * @return Path matched, or Optional.absent()
50     */
51    public static Optional<Path>
52    getLocalCacheFile(Configuration conf, String pathToMatch) {
53      String nameToPath = FilenameUtils.getName(pathToMatch);
54      Path[] paths;
55      try {
56        paths = DistributedCache.getLocalCacheFiles(conf);
57      } catch (IOException e) {
58        return Optional.absent();
59      }
60      for (Path path : paths) {
61        if (FilenameUtils.getName(path.toString()).equals(nameToPath)) {
62          return Optional.of(path);
63        }
64      }
65      return Optional.absent();
66    }
67  
68    /**
69     * Copy a file to HDFS if it is local. If the path is already in HDFS, this
70     * call does nothing.
71     *
72     * @param path path to file
73     * @param conf Configuration
74     * @return path to file on HDFS.
75     */
76    public static Path copyToHdfs(Path path, Configuration conf) {
77      if (path.toString().startsWith("hdfs://")) {
78        // Already on HDFS
79        return path;
80      }
81  
82      FileSystem fs = null;
83      try {
84        fs = FileSystem.get(conf);
85      } catch (IOException e) {
86        throw new IllegalArgumentException("Failed to get HDFS FileSystem", e);
87      }
88      String name = getBaseName(path.toString()) + "-" + System.nanoTime();
89      Path remotePath = new Path("/tmp/giraph", name);
90      LOG.info("copyToHdfsIfNecessary: Copying " + path + " to " +
91          remotePath + " on hdfs " + fs.getUri());
92      try {
93        fs.copyFromLocalFile(false, true, path, remotePath);
94      } catch (IOException e) {
95        throw new IllegalArgumentException(
96            "Failed to copy jython script from local path " + path +
97            " to hdfs path " + remotePath + " on hdfs " + fs.getUri(), e);
98      }
99      return remotePath;
100   }
101 
102   /**
103    * Copy a file to HDFS if it is local, and adds it to the distributed cache.
104    *
105    * @param path path to file
106    * @param conf Configuration
107    * @return remote path to file
108    */
109   public static Path copyAndAdd(Path path, Configuration conf) {
110     Path remotePath = copyToHdfs(path, conf);
111     DistributedCache.addCacheFile(remotePath.toUri(), conf);
112     return remotePath;
113   }
114 }