/*
 * Decompiled with CFR 0.152.
 */
package org.apache.gobblin.source;

import com.google.common.base.Preconditions;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.gobblin.configuration.SourceState;
import org.apache.gobblin.configuration.State;
import org.apache.gobblin.source.PartitionAwareFileRetriever;
import org.apache.gobblin.source.PartitionAwareFileRetrieverUtils;
import org.apache.gobblin.source.extractor.filebased.FileBasedHelperException;
import org.apache.gobblin.source.extractor.hadoop.HadoopFsHelper;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.joda.time.DateTime;
import org.joda.time.Duration;
import org.joda.time.ReadableDuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class RegexBasedPartitionedRetriever
implements PartitionAwareFileRetriever {
    private static final Logger LOGGER = LoggerFactory.getLogger(RegexBasedPartitionedRetriever.class);
    private Pattern pattern;
    private HadoopFsHelper helper;
    private Path sourceDir;
    private final String expectedExtension;
    private Duration leadTime;
    private boolean schemaInSourceDir;
    private String schemaFile;

    public RegexBasedPartitionedRetriever(String expectedExtension) {
        this.expectedExtension = expectedExtension;
    }

    @Override
    public void init(SourceState state) {
        String regexPattern = state.getProp("date.partitioned.source.partition.pattern");
        Preconditions.checkNotNull((Object)regexPattern, (Object)"Must specify a regex pattern in date.partitioned.source.partition.pattern");
        this.leadTime = PartitionAwareFileRetrieverUtils.getLeadTimeDurationFromConfig((State)state);
        this.pattern = Pattern.compile(regexPattern);
        this.helper = new HadoopFsHelper((State)state);
        this.sourceDir = new Path(state.getProp("source.filebased.data.directory"));
        this.schemaInSourceDir = state.getPropAsBoolean("schema.in.source.dir", false);
        this.schemaFile = this.schemaInSourceDir ? state.getProp("schema.filename", "metadata.json") : "";
    }

    @Override
    public long getWatermarkFromString(String watermark) {
        return Long.parseLong(watermark);
    }

    protected String extractWatermarkFromDirectory(String directoryName) {
        Matcher matcher = this.pattern.matcher(directoryName);
        if (!matcher.matches() || matcher.groupCount() < 1) {
            throw new IllegalArgumentException(directoryName + " does not match regex " + this.pattern.toString());
        }
        return matcher.group(1);
    }

    @Override
    public long getWatermarkIncrementMs() {
        return 1L;
    }

    @Override
    public List<PartitionAwareFileRetriever.FileInfo> getFilesToProcess(long minWatermark, int maxFilesToReturn) throws IOException {
        long maxAllowedWatermark = new DateTime().minus((ReadableDuration)this.leadTime).getMillis();
        try {
            this.helper.connect();
            FileSystem fs = this.helper.getFileSystem();
            ArrayList<PartitionAwareFileRetriever.FileInfo> filesToProcess = new ArrayList<PartitionAwareFileRetriever.FileInfo>();
            List<PartitionAwareFileRetriever.FileInfo> outerDirectories = this.getOuterDirectories(fs, minWatermark, maxAllowedWatermark);
            for (PartitionAwareFileRetriever.FileInfo outerDirectory : outerDirectories) {
                FileStatus[] files;
                for (FileStatus file : files = fs.listStatus(new Path(outerDirectory.getFilePath()), this.getFileFilter())) {
                    filesToProcess.add(new PartitionAwareFileRetriever.FileInfo(file.getPath().toString(), file.getLen(), outerDirectory.getWatermarkMsSinceEpoch()));
                }
                if (filesToProcess.size() <= maxFilesToReturn) continue;
                break;
            }
            return filesToProcess;
        }
        catch (FileBasedHelperException e) {
            throw new IOException("Error initializing Hadoop connection", e);
        }
    }

    private List<PartitionAwareFileRetriever.FileInfo> getOuterDirectories(FileSystem fs, long minWatermark, long maxAllowedWatermark) throws IOException {
        LOGGER.debug("Listing contents of {}", (Object)this.sourceDir);
        FileStatus[] fileStatus = fs.listStatus(this.sourceDir);
        ArrayList<PartitionAwareFileRetriever.FileInfo> outerDirectories = new ArrayList<PartitionAwareFileRetriever.FileInfo>();
        for (FileStatus file : fileStatus) {
            if (!file.isDirectory()) {
                LOGGER.debug("Skipping non-directory {}", (Object)file.getPath().toUri());
                continue;
            }
            try {
                long watermark = this.getWatermarkFromString(this.extractWatermarkFromDirectory(file.getPath().getName()));
                if (watermark > minWatermark && watermark < maxAllowedWatermark) {
                    LOGGER.info("Processing directory {} with watermark {}", (Object)file.getPath(), (Object)watermark);
                    outerDirectories.add(new PartitionAwareFileRetriever.FileInfo(file.getPath().toString(), 0L, watermark));
                    continue;
                }
                LOGGER.info("Ignoring directory {} - watermark {} is not between minWatermark {} and (now-leadTime) {}", new Object[]{file.getPath(), watermark, minWatermark, maxAllowedWatermark});
            }
            catch (IllegalArgumentException e) {
                LOGGER.info("Directory {} ({}) does not match pattern {}; skipping", new Object[]{file.getPath().getName(), file.getPath(), this.pattern.toString()});
            }
        }
        Collections.sort(outerDirectories);
        return outerDirectories;
    }

    private PathFilter getFileFilter() {
        final String extension = this.expectedExtension.startsWith(".") ? this.expectedExtension : "." + this.expectedExtension;
        return new PathFilter(){

            public boolean accept(Path path) {
                return path.getName().endsWith(extension) && (!RegexBasedPartitionedRetriever.this.schemaInSourceDir || !path.getName().equals(RegexBasedPartitionedRetriever.this.schemaFile));
            }
        };
    }
}

