package cascading.tap.hadoop;

import cascading.flow.FlowProcess;
import cascading.flow.hadoop.util.HadoopUtil;
import cascading.scheme.Scheme;
import cascading.scheme.hadoop.SequenceFile;
import cascading.tap.SinkMode;
import cascading.tap.Tap;
import cascading.tap.TapException;
import cascading.tap.hadoop.io.CombineFileRecordReaderWrapper;
import cascading.tap.hadoop.io.HadoopTupleEntrySchemeCollector;
import cascading.tap.hadoop.io.HadoopTupleEntrySchemeIterator;
import cascading.tap.type.FileType;
import cascading.tuple.Fields;
import cascading.tuple.TupleEntryCollector;
import cascading.tuple.TupleEntryIterator;
import cascading.tuple.hadoop.TupleSerialization;
import cascading.util.Util;
import java.beans.ConstructorProperties;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3native.NativeS3FileSystem;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.Utils;
import org.apache.hadoop.mapred.lib.CombineFileInputFormat;
import org.apache.hadoop.mapred.lib.CombineFileRecordReader;
import org.apache.hadoop.mapred.lib.CombineFileSplit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:cascading/tap/hadoop/Hfs.class */
public class Hfs extends Tap<JobConf, RecordReader, OutputCollector> implements FileType<JobConf> {
    private static final Logger LOG = LoggerFactory.getLogger(Hfs.class);

    @Deprecated
    public static final String TEMPORARY_DIRECTORY = "cascading.tmp.dir";
    protected String stringPath;
    transient URI uriScheme;
    transient Path path;
    private transient FileStatus[] statuses;

    /* loaded from: input_file:cascading/tap/hadoop/Hfs$CombinedInputFormat.class */
    static class CombinedInputFormat extends CombineFileInputFormat implements Configurable {
        private Configuration conf;

        CombinedInputFormat() {
        }

        public RecordReader getRecordReader(InputSplit inputSplit, JobConf jobConf, Reporter reporter) throws IOException {
            return new CombineFileRecordReader(jobConf, (CombineFileSplit) inputSplit, reporter, CombineFileRecordReaderWrapper.class);
        }

        public void setConf(Configuration configuration) {
            this.conf = configuration;
            setMaxSplitSize(configuration.getLong(HfsProps.COMBINE_INPUT_FILES_SIZE_MAX, 0L));
        }

        public Configuration getConf() {
            return this.conf;
        }
    }

    @Deprecated
    public static void setTemporaryDirectory(Map<Object, Object> map, String str) {
        map.put("cascading.tmp.dir", str);
    }

    @Deprecated
    public static String getTemporaryDirectory(Map<Object, Object> map) {
        return (String) map.get("cascading.tmp.dir");
    }

    protected static String getLocalModeScheme(JobConf jobConf, String str) {
        return jobConf.get(HfsProps.LOCAL_MODE_SCHEME, str);
    }

    protected static boolean getUseCombinedInput(JobConf jobConf) {
        return jobConf.getBoolean(HfsProps.COMBINE_INPUT_FILES, false);
    }

    protected static boolean getCombinedInputSafeMode(JobConf jobConf) {
        return jobConf.getBoolean(HfsProps.COMBINE_INPUT_FILES_SAFE_MODE, true);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Hfs() {
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @ConstructorProperties({"scheme"})
    public Hfs(Scheme<JobConf, RecordReader, OutputCollector, ?, ?> scheme) {
        super(scheme);
    }

    @ConstructorProperties({"fields", "stringPath"})
    @Deprecated
    public Hfs(Fields fields, String str) {
        super(new SequenceFile(fields));
        setStringPath(str);
    }

    @ConstructorProperties({"fields", "stringPath", "replace"})
    @Deprecated
    public Hfs(Fields fields, String str, boolean z) {
        super(new SequenceFile(fields), z ? SinkMode.REPLACE : SinkMode.KEEP);
        setStringPath(str);
    }

    @ConstructorProperties({"fields", "stringPath", "sinkMode"})
    @Deprecated
    public Hfs(Fields fields, String str, SinkMode sinkMode) {
        super(new SequenceFile(fields), sinkMode);
        setStringPath(str);
        if (sinkMode == SinkMode.UPDATE) {
            throw new IllegalArgumentException("updates are not supported");
        }
    }

    @ConstructorProperties({"scheme", "stringPath"})
    public Hfs(Scheme<JobConf, RecordReader, OutputCollector, ?, ?> scheme, String str) {
        super(scheme);
        setStringPath(str);
    }

    @ConstructorProperties({"scheme", "stringPath", "replace"})
    @Deprecated
    public Hfs(Scheme<JobConf, RecordReader, OutputCollector, ?, ?> scheme, String str, boolean z) {
        super(scheme, z ? SinkMode.REPLACE : SinkMode.KEEP);
        setStringPath(str);
    }

    @ConstructorProperties({"scheme", "stringPath", "sinkMode"})
    public Hfs(Scheme<JobConf, RecordReader, OutputCollector, ?, ?> scheme, String str, SinkMode sinkMode) {
        super(scheme, sinkMode);
        setStringPath(str);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void setStringPath(String str) {
        this.stringPath = Util.normalizeUrl(str);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void setUriScheme(URI uri) {
        this.uriScheme = uri;
    }

    public URI getURIScheme(JobConf jobConf) {
        if (this.uriScheme != null) {
            return this.uriScheme;
        }
        this.uriScheme = makeURIScheme(jobConf);
        return this.uriScheme;
    }

    protected URI makeURIScheme(JobConf jobConf) {
        try {
            LOG.debug("handling path: {}", this.stringPath);
            URI uri = new Path(this.stringPath).toUri();
            String scheme = uri.getScheme();
            String authority = uri.getAuthority();
            LOG.debug("found scheme: {}, authority: {}", scheme, authority);
            URI uri2 = (scheme == null || authority == null) ? scheme != null ? new URI(scheme + ":///") : getDefaultFileSystemURIScheme(jobConf) : new URI(scheme + "://" + uri.getAuthority());
            LOG.debug("using uri scheme: {}", uri2);
            return uri2;
        } catch (URISyntaxException e) {
            throw new TapException("could not determine scheme from path: " + getPath(), e);
        }
    }

    public URI getDefaultFileSystemURIScheme(JobConf jobConf) {
        return getDefaultFileSystem(jobConf).getUri();
    }

    protected FileSystem getDefaultFileSystem(JobConf jobConf) {
        try {
            return FileSystem.get(jobConf);
        } catch (IOException e) {
            throw new TapException("unable to get handle to underlying filesystem", e);
        }
    }

    protected FileSystem getFileSystem(JobConf jobConf) {
        URI uRIScheme = getURIScheme(jobConf);
        try {
            return FileSystem.get(uRIScheme, jobConf);
        } catch (IOException e) {
            throw new TapException("unable to get handle to get filesystem for: " + uRIScheme.getScheme(), e);
        }
    }

    @Override // cascading.tap.Tap
    public String getIdentifier() {
        return getPath().toString();
    }

    public Path getPath() {
        if (this.path != null) {
            return this.path;
        }
        if (this.stringPath == null) {
            throw new IllegalStateException("path not initialized");
        }
        this.path = new Path(this.stringPath);
        return this.path;
    }

    @Override // cascading.tap.Tap
    public String getFullIdentifier(JobConf jobConf) {
        return getPath().makeQualified(getFileSystem(jobConf)).toString();
    }

    @Override // cascading.tap.Tap
    public void sourceConfInit(FlowProcess<JobConf> flowProcess, JobConf jobConf) {
        applySourceConfInitIdentifiers(flowProcess, jobConf, getFullIdentifier(jobConf));
        verifyNoDuplicates(jobConf);
    }

    protected static void verifyNoDuplicates(JobConf jobConf) {
        Path[] inputPaths = FileInputFormat.getInputPaths(jobConf);
        HashSet hashSet = new HashSet((int) (inputPaths.length / 0.75f));
        for (Path path : inputPaths) {
            if (!hashSet.add(path)) {
                throw new TapException("may not add duplicate paths, found: " + path);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void applySourceConfInitIdentifiers(FlowProcess<JobConf> flowProcess, JobConf jobConf, String... strArr) {
        for (String str : strArr) {
            sourceConfInitAddInputPath(jobConf, new Path(str));
        }
        sourceConfInitComplete(flowProcess, jobConf);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void sourceConfInitAddInputPath(JobConf jobConf, Path path) {
        FileInputFormat.addInputPath(jobConf, path);
        makeLocal(jobConf, path, "forcing job to local mode, via source: ");
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void sourceConfInitComplete(FlowProcess<JobConf> flowProcess, JobConf jobConf) {
        super.sourceConfInit((FlowProcess<FlowProcess<JobConf>>) flowProcess, (FlowProcess<JobConf>) jobConf);
        TupleSerialization.setSerializations(jobConf);
        handleCombineFileInputFormat(jobConf);
    }

    private void handleCombineFileInputFormat(JobConf jobConf) {
        if (getUseCombinedInput(jobConf)) {
            String str = jobConf.get("mapred.input.format.class");
            if (str == null) {
                throw new TapException("input format is missing from the underlying scheme");
            }
            if (str.equals(CombinedInputFormat.class.getName()) && jobConf.get(CombineFileRecordReaderWrapper.INDIVIDUAL_INPUT_FORMAT) == null) {
                throw new TapException("the input format class is already the combined input format but the underlying input format is missing");
            }
            boolean combinedInputSafeMode = getCombinedInputSafeMode(jobConf);
            if (FileInputFormat.class.isAssignableFrom(jobConf.getClass("mapred.input.format.class", (Class) null))) {
                jobConf.set(CombineFileRecordReaderWrapper.INDIVIDUAL_INPUT_FORMAT, str);
                jobConf.setInputFormat(CombinedInputFormat.class);
            } else {
                if (combinedInputSafeMode) {
                    throw new TapException("input format must be of type org.apache.hadoop.mapred.FileInputFormat, got: " + str);
                }
                LOG.warn("not combining input splits with CombineFileInputFormat, {} is not of type org.apache.hadoop.mapred.FileInputFormat.", str);
            }
        }
    }

    @Override // cascading.tap.Tap
    public void sinkConfInit(FlowProcess<JobConf> flowProcess, JobConf jobConf) {
        Path path = new Path(getFullIdentifier(jobConf));
        FileOutputFormat.setOutputPath(jobConf, path);
        super.sinkConfInit((FlowProcess<FlowProcess<JobConf>>) flowProcess, (FlowProcess<JobConf>) jobConf);
        makeLocal(jobConf, path, "forcing job to local mode, via sink: ");
        TupleSerialization.setSerializations(jobConf);
    }

    private void makeLocal(JobConf jobConf, Path path, String str) {
        String localModeScheme = getLocalModeScheme(jobConf, "file");
        if (HadoopUtil.isLocal(jobConf) || !path.toUri().getScheme().equalsIgnoreCase(localModeScheme)) {
            return;
        }
        if (LOG.isInfoEnabled()) {
            LOG.info(str + toString());
        }
        HadoopUtil.setLocal(jobConf);
    }

    @Override // cascading.tap.Tap
    public TupleEntryIterator openForRead(FlowProcess<JobConf> flowProcess, RecordReader recordReader) throws IOException {
        return new HadoopTupleEntrySchemeIterator(flowProcess, this, recordReader);
    }

    @Override // cascading.tap.Tap
    public TupleEntryCollector openForWrite(FlowProcess<JobConf> flowProcess, OutputCollector outputCollector) throws IOException {
        return new HadoopTupleEntrySchemeCollector(flowProcess, this, outputCollector);
    }

    @Override // cascading.tap.Tap
    public boolean createResource(JobConf jobConf) throws IOException {
        if (LOG.isDebugEnabled()) {
            LOG.debug("making dirs: {}", getFullIdentifier(jobConf));
        }
        return getFileSystem(jobConf).mkdirs(getPath());
    }

    @Override // cascading.tap.Tap
    public boolean deleteResource(JobConf jobConf) throws IOException {
        return deleteFullIdentifier(jobConf, getFullIdentifier(jobConf));
    }

    private boolean deleteFullIdentifier(JobConf jobConf, String str) throws IOException {
        if (LOG.isDebugEnabled()) {
            LOG.debug("deleting: {}", str);
        }
        Path path = new Path(str);
        if (path.depth() == 0) {
            return true;
        }
        FileSystem fileSystem = getFileSystem(jobConf);
        try {
            return fileSystem.delete(path, true);
        } catch (NullPointerException e) {
            if (fileSystem instanceof NativeS3FileSystem) {
                return true;
            }
            throw e;
        }
    }

    public boolean deleteChildResource(JobConf jobConf, String str) throws IOException {
        Path makeQualified = new Path(str).makeQualified(getFileSystem(jobConf));
        if (makeQualified.toString().startsWith(getFullIdentifier(jobConf))) {
            return deleteFullIdentifier(jobConf, makeQualified.toString());
        }
        return false;
    }

    @Override // cascading.tap.Tap
    public boolean resourceExists(JobConf jobConf) throws IOException {
        FileStatus[] globStatus = getFileSystem(jobConf).globStatus(getPath());
        return globStatus != null && globStatus.length > 0;
    }

    @Override // cascading.tap.type.FileType
    public boolean isDirectory(JobConf jobConf) throws IOException {
        if (resourceExists(jobConf)) {
            return getFileSystem(jobConf).getFileStatus(getPath()).isDir();
        }
        return false;
    }

    @Override // cascading.tap.type.FileType
    public long getSize(JobConf jobConf) throws IOException {
        if (resourceExists(jobConf) && !getFileSystem(jobConf).getFileStatus(getPath()).isDir()) {
            return getFileSystem(jobConf).getFileStatus(getPath()).getLen();
        }
        return 0L;
    }

    public long getBlockSize(JobConf jobConf) throws IOException {
        if (!resourceExists(jobConf)) {
            return 0L;
        }
        FileStatus fileStatus = getFileSystem(jobConf).getFileStatus(getPath());
        if (fileStatus.isDir()) {
            return 0L;
        }
        return fileStatus.getBlockSize();
    }

    public int getReplication(JobConf jobConf) throws IOException {
        if (!resourceExists(jobConf)) {
            return 0;
        }
        FileStatus fileStatus = getFileSystem(jobConf).getFileStatus(getPath());
        if (fileStatus.isDir()) {
            return 0;
        }
        return fileStatus.getReplication();
    }

    @Override // cascading.tap.type.FileType
    public String[] getChildIdentifiers(JobConf jobConf) throws IOException {
        return getChildIdentifiers(jobConf, 1, false);
    }

    @Override // cascading.tap.type.FileType
    public String[] getChildIdentifiers(JobConf jobConf, int i, boolean z) throws IOException {
        if (!resourceExists(jobConf)) {
            return new String[0];
        }
        if (i == 0 && !z) {
            return new String[]{getIdentifier()};
        }
        String fullIdentifier = getFullIdentifier(jobConf);
        int length = z ? 0 : fullIdentifier.length() + 1;
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        getChildPaths(jobConf, linkedHashSet, length, new Path(fullIdentifier), i);
        return (String[]) linkedHashSet.toArray(new String[linkedHashSet.size()]);
    }

    private void getChildPaths(JobConf jobConf, Set<String> set, int i, Path path, int i2) throws IOException {
        if (i2 == 0) {
            String substring = path.toString().substring(i);
            String identifier = getIdentifier();
            if (identifier == null || identifier.isEmpty()) {
                set.add(new Path(substring).toString());
                return;
            } else {
                set.add(new Path(identifier, substring).toString());
                return;
            }
        }
        FileStatus[] listStatus = getFileSystem(jobConf).listStatus(path, new Utils.OutputFileUtils.OutputFilesFilter());
        if (listStatus == null) {
            return;
        }
        for (FileStatus fileStatus : listStatus) {
            getChildPaths(jobConf, set, i, fileStatus.getPath(), i2 - 1);
        }
    }

    @Override // cascading.tap.Tap
    public long getModifiedTime(JobConf jobConf) throws IOException {
        if (!resourceExists(jobConf)) {
            return 0L;
        }
        FileStatus fileStatus = getFileSystem(jobConf).getFileStatus(getPath());
        if (!fileStatus.isDir()) {
            return fileStatus.getModificationTime();
        }
        makeStatuses(jobConf);
        if (this.statuses == null || this.statuses.length == 0) {
            return 0L;
        }
        long j = 0;
        for (FileStatus fileStatus2 : this.statuses) {
            if (!fileStatus2.isDir()) {
                j = Math.max(j, fileStatus2.getModificationTime());
            }
        }
        return j;
    }

    public static Path getTempPath(JobConf jobConf) {
        String str = jobConf.get("cascading.tmp.dir");
        if (str == null) {
            str = jobConf.get("hadoop.tmp.dir");
        }
        return new Path(str);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String makeTemporaryPathDirString(String str) {
        String replaceAll = str.replaceAll("^[_\\W\\s]+", "");
        if (replaceAll.isEmpty()) {
            replaceAll = "temp-path";
        }
        return replaceAll.replaceAll("[\\W\\s]+", "_") + Util.createUniqueID();
    }

    private void makeStatuses(JobConf jobConf) throws IOException {
        if (this.statuses != null) {
            return;
        }
        this.statuses = getFileSystem(jobConf).listStatus(getPath());
    }
}
