1/*2 * Licensed to the Apache Software Foundation (ASF) under one3 * or more contributor license agreements. See the NOTICE file4 * distributed with this work for additional information5 * regarding copyright ownership. The ASF licenses this file6 * to you under the Apache License, Version 2.0 (the7 * "License"); you may not use this file except in compliance8 * with the License. You may obtain a copy of the License at9 *10 * http://www.apache.org/licenses/LICENSE-2.011 *12 * Unless required by applicable law or agreed to in writing, software13 * distributed under the License is distributed on an "AS IS" BASIS,14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.15 * See the License for the specific language governing permissions and16 * limitations under the License.17 */18package org.apache.giraph.utils;
1920import org.apache.commons.io.FilenameUtils;
21import org.apache.hadoop.conf.Configuration;
22import org.apache.hadoop.filecache.DistributedCache;
23import org.apache.hadoop.fs.FileSystem;
24import org.apache.hadoop.fs.Path;
25import org.apache.log4j.Logger;
2627import com.google.common.base.Optional;
2829import java.io.IOException;
3031importstatic org.apache.commons.io.FilenameUtils.getBaseName;
3233/**34 * Helpers for dealing with {@link org.apache.hadoop.filecache.DistributedCache}35 */36publicclassDistributedCacheUtils {
37/** Logger */38privatestaticfinal Logger LOG = Logger.getLogger(
39 DistributedCacheUtils.class);
4041/** Don't construct */42privateDistributedCacheUtils() { }
4344/**45 * Get local path to file from a DistributedCache.46 *47 * @param conf Configuration48 * @param pathToMatch Path that was used to insert into DistributedCache49 * @return Path matched, or Optional.absent()50 */51publicstatic Optional<Path>
52 getLocalCacheFile(Configuration conf, String pathToMatch) {
53 String nameToPath = FilenameUtils.getName(pathToMatch);
54 Path[] paths;
55try {
56 paths = DistributedCache.getLocalCacheFiles(conf);
57 } catch (IOException e) {
58return Optional.absent();
59 }
60for (Path path : paths) {
61if (FilenameUtils.getName(path.toString()).equals(nameToPath)) {
62return Optional.of(path);
63 }
64 }
65return Optional.absent();
66 }
6768/**69 * Copy a file to HDFS if it is local. If the path is already in HDFS, this70 * call does nothing.71 *72 * @param path path to file73 * @param conf Configuration74 * @return path to file on HDFS.75 */76publicstatic Path copyToHdfs(Path path, Configuration conf) {
77if (path.toString().startsWith("hdfs://")) {
78// Already on HDFS79return path;
80 }
8182 FileSystem fs = null;
83try {
84 fs = FileSystem.get(conf);
85 } catch (IOException e) {
86thrownew IllegalArgumentException("Failed to get HDFS FileSystem", e);
87 }
88 String name = getBaseName(path.toString()) + "-" + System.nanoTime();
89 Path remotePath = new Path("/tmp/giraph", name);
90 LOG.info("copyToHdfsIfNecessary: Copying " + path + " to " +
91 remotePath + " on hdfs " + fs.getUri());
92try {
93 fs.copyFromLocalFile(false, true, path, remotePath);
94 } catch (IOException e) {
95thrownew IllegalArgumentException(
96"Failed to copy jython script from local path " + path +
97" to hdfs path " + remotePath + " on hdfs " + fs.getUri(), e);
98 }
99return remotePath;
100 }
101102/**103 * Copy a file to HDFS if it is local, and adds it to the distributed cache.104 *105 * @param path path to file106 * @param conf Configuration107 * @return remote path to file108 */109publicstatic Path copyAndAdd(Path path, Configuration conf) {
110 Path remotePath = copyToHdfs(path, conf);
111 DistributedCache.addCacheFile(remotePath.toUri(), conf);
112return remotePath;
113 }
114 }