package org.apache.nutch.searcher;

import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Closeable;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.MapFileOutputFormat;
import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.mapred.lib.HashPartitioner;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.parse.ParseData;
import org.apache.nutch.parse.ParseText;
import org.apache.nutch.protocol.Content;

/* loaded from: input_file:org/apache/nutch/searcher/FetchedSegments.class */
public class FetchedSegments implements HitSummarizer, HitContent {
    private HashMap segments = new HashMap();
    private Summarizer summarizer;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/nutch/searcher/FetchedSegments$Segment.class */
    public static class Segment implements Closeable {
        private static final Partitioner PARTITIONER = new HashPartitioner();
        private FileSystem fs;
        private Path segmentDir;
        private MapFile.Reader[] content;
        private MapFile.Reader[] parseText;
        private MapFile.Reader[] parseData;
        private MapFile.Reader[] crawl;
        private Configuration conf;

        public Segment(FileSystem fileSystem, Path path, Configuration configuration) throws IOException {
            this.fs = fileSystem;
            this.segmentDir = path;
            this.conf = configuration;
        }

        public CrawlDatum getCrawlDatum(Text text) throws IOException {
            synchronized (this) {
                if (this.crawl == null) {
                    this.crawl = getReaders(CrawlDatum.FETCH_DIR_NAME);
                }
            }
            return getEntry(this.crawl, text, new CrawlDatum());
        }

        public byte[] getContent(Text text) throws IOException {
            synchronized (this) {
                if (this.content == null) {
                    this.content = getReaders("content");
                }
            }
            return getEntry(this.content, text, new Content()).getContent();
        }

        public ParseData getParseData(Text text) throws IOException {
            synchronized (this) {
                if (this.parseData == null) {
                    this.parseData = getReaders(ParseData.DIR_NAME);
                }
            }
            return getEntry(this.parseData, text, new ParseData());
        }

        public ParseText getParseText(Text text) throws IOException {
            synchronized (this) {
                if (this.parseText == null) {
                    this.parseText = getReaders(ParseText.DIR_NAME);
                }
            }
            return getEntry(this.parseText, text, new ParseText());
        }

        private MapFile.Reader[] getReaders(String str) throws IOException {
            return MapFileOutputFormat.getReaders(this.fs, new Path(this.segmentDir, str), this.conf);
        }

        private Writable getEntry(MapFile.Reader[] readerArr, Text text, Writable writable) throws IOException {
            return MapFileOutputFormat.getEntry(readerArr, PARTITIONER, text, writable);
        }

        public void close() throws IOException {
            if (this.content != null) {
                closeReaders(this.content);
            }
            if (this.parseText != null) {
                closeReaders(this.parseText);
            }
            if (this.parseData != null) {
                closeReaders(this.parseData);
            }
            if (this.crawl != null) {
                closeReaders(this.crawl);
            }
        }

        private void closeReaders(MapFile.Reader[] readerArr) throws IOException {
            for (MapFile.Reader reader : readerArr) {
                reader.close();
            }
        }
    }

    /* loaded from: input_file:org/apache/nutch/searcher/FetchedSegments$SummaryThread.class */
    private class SummaryThread extends Thread {
        private HitDetails details;
        private Query query;
        private Summary summary;
        private Throwable throwable;

        public SummaryThread(HitDetails hitDetails, Query query) {
            this.details = hitDetails;
            this.query = query;
        }

        @Override // java.lang.Thread, java.lang.Runnable
        public void run() {
            try {
                this.summary = FetchedSegments.this.getSummary(this.details, this.query);
            } catch (Throwable th) {
                this.throwable = th;
            }
        }
    }

    public FetchedSegments(FileSystem fileSystem, String str, Configuration configuration) throws IOException {
        Path[] listPaths = fileSystem.listPaths(new Path(str));
        this.summarizer = new SummarizerFactory(configuration).getSummarizer();
        if (listPaths != null) {
            for (Path path : listPaths) {
                this.segments.put(path.getName(), new Segment(fileSystem, path, configuration));
            }
        }
    }

    public String[] getSegmentNames() {
        return (String[]) this.segments.keySet().toArray(new String[this.segments.size()]);
    }

    @Override // org.apache.nutch.searcher.HitContent
    public byte[] getContent(HitDetails hitDetails) throws IOException {
        return getSegment(hitDetails).getContent(getUrl(hitDetails));
    }

    @Override // org.apache.nutch.searcher.HitContent
    public ParseData getParseData(HitDetails hitDetails) throws IOException {
        return getSegment(hitDetails).getParseData(getUrl(hitDetails));
    }

    @Override // org.apache.nutch.searcher.HitContent
    public long getFetchDate(HitDetails hitDetails) throws IOException {
        return getSegment(hitDetails).getCrawlDatum(getUrl(hitDetails)).getFetchTime();
    }

    @Override // org.apache.nutch.searcher.HitContent
    public ParseText getParseText(HitDetails hitDetails) throws IOException {
        return getSegment(hitDetails).getParseText(getUrl(hitDetails));
    }

    @Override // org.apache.nutch.searcher.HitSummarizer
    public Summary getSummary(HitDetails hitDetails, Query query) throws IOException {
        if (this.summarizer == null) {
            return new Summary();
        }
        return this.summarizer.getSummary(getSegment(hitDetails).getParseText(getUrl(hitDetails)).getText(), query);
    }

    @Override // org.apache.nutch.searcher.HitSummarizer
    public Summary[] getSummary(HitDetails[] hitDetailsArr, Query query) throws IOException {
        SummaryThread[] summaryThreadArr = new SummaryThread[hitDetailsArr.length];
        for (int i = 0; i < summaryThreadArr.length; i++) {
            summaryThreadArr[i] = new SummaryThread(hitDetailsArr[i], query);
            summaryThreadArr[i].start();
        }
        Summary[] summaryArr = new Summary[hitDetailsArr.length];
        for (int i2 = 0; i2 < summaryThreadArr.length; i2++) {
            try {
                summaryThreadArr[i2].join();
                if (summaryThreadArr[i2].throwable instanceof IOException) {
                    throw ((IOException) summaryThreadArr[i2].throwable);
                }
                if (summaryThreadArr[i2].throwable != null) {
                    throw new RuntimeException(summaryThreadArr[i2].throwable);
                }
                summaryArr[i2] = summaryThreadArr[i2].summary;
            } catch (InterruptedException e) {
                throw new RuntimeException(e);
            }
        }
        return summaryArr;
    }

    private Segment getSegment(HitDetails hitDetails) {
        return (Segment) this.segments.get(hitDetails.getValue("segment"));
    }

    private Text getUrl(HitDetails hitDetails) {
        return new Text(hitDetails.getValue("url"));
    }

    public void close() throws IOException {
        Iterator it = this.segments.values().iterator();
        while (it.hasNext()) {
            ((Segment) it.next()).close();
        }
    }
}
