package de.dfki.km.leech.parser.incremental;

import de.dfki.inquisition.text.StringUtils;
import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Version;

/* loaded from: input_file:de/dfki/km/leech/parser/incremental/IncrementalCrawlingHistory.class */
public class IncrementalCrawlingHistory {
    public static final String dataEntityContentFingerprint = "dataEntityContentFingerprint";
    public static final String dataEntityExistsID = "dataEntityExistsID";
    public static final String masterDataEntityExistsID = "masterDataEntityExistsID";
    public static final String lastCrawledTime = "lastCrawledTime";
    protected DirectoryReader m_indexReader = null;
    protected IndexSearcher m_indexSearcher = null;
    protected IndexWriter m_indexWriter = null;
    protected Long m_lCrawlStartingTime = null;
    protected final String m_strHistoryPath;

    /* loaded from: input_file:de/dfki/km/leech/parser/incremental/IncrementalCrawlingHistory$CrawlFinishedIterator.class */
    protected class CrawlFinishedIterator implements Iterator<String> {
        protected LinkedList<String> m_llQueuedOutdatedIDs = new LinkedList<>();
        protected NumericRangeQuery<Long> m_query;

        protected CrawlFinishedIterator() throws IOException {
            this.m_query = null;
            if (IncrementalCrawlingHistory.this.m_lCrawlStartingTime == null) {
                throw new IllegalStateException("No crawl starting time found. Did you invoke crawlStarted?");
            }
            this.m_query = NumericRangeQuery.newLongRange(IncrementalCrawlingHistory.lastCrawledTime, 0L, IncrementalCrawlingHistory.this.m_lCrawlStartingTime, true, false);
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            try {
                if (this.m_query == null) {
                    return false;
                }
                if (this.m_llQueuedOutdatedIDs.size() != 0) {
                    return true;
                }
                IncrementalCrawlingHistory.this.refreshIndexReaderz();
                TopDocs search = IncrementalCrawlingHistory.this.m_indexSearcher.search(this.m_query, 5000);
                Bits liveDocs = MultiFields.getLiveDocs(IncrementalCrawlingHistory.this.m_indexReader);
                for (ScoreDoc scoreDoc : search.scoreDocs) {
                    if (liveDocs == null || liveDocs.get(scoreDoc.doc)) {
                        this.m_llQueuedOutdatedIDs.add(IncrementalCrawlingHistory.this.m_indexReader.document(scoreDoc.doc, Collections.singleton(IncrementalCrawlingHistory.dataEntityExistsID)).get(IncrementalCrawlingHistory.dataEntityExistsID));
                    }
                }
                if (this.m_llQueuedOutdatedIDs.size() != 0) {
                    return true;
                }
                IncrementalCrawlingHistory.this.closeLuceneStuff();
                return false;
            } catch (IOException e) {
                Logger.getLogger(CrawlFinishedIterator.class.getName()).log(Level.SEVERE, "Error", (Throwable) e);
                return false;
            }
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public String next() {
            try {
                if (this.m_llQueuedOutdatedIDs.isEmpty()) {
                    return null;
                }
                IncrementalCrawlingHistory.this.m_indexWriter.deleteDocuments(new Term[]{new Term(IncrementalCrawlingHistory.dataEntityExistsID, this.m_llQueuedOutdatedIDs.getFirst())});
                return this.m_llQueuedOutdatedIDs.poll();
            } catch (Exception e) {
                Logger.getLogger(CrawlFinishedIterator.class.getName()).log(Level.SEVERE, "Error", (Throwable) e);
                return null;
            }
        }

        @Override // java.util.Iterator
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }

    /* loaded from: input_file:de/dfki/km/leech/parser/incremental/IncrementalCrawlingHistory$Exist.class */
    public enum Exist {
        NOT,
        YES_PROCESSED,
        YES_UNPROCESSED
    }

    public IncrementalCrawlingHistory(String str) {
        this.m_strHistoryPath = str;
        Runtime.getRuntime().addShutdownHook(new Thread("IncrementalCrawlingHistory shutdown hook for " + str) { // from class: de.dfki.km.leech.parser.incremental.IncrementalCrawlingHistory.1
            @Override // java.lang.Thread, java.lang.Runnable
            public void run() {
                try {
                    IncrementalCrawlingHistory.this.closeLuceneStuff();
                } catch (IOException e) {
                    Logger.getLogger(IncrementalCrawlingHistory.class.getName()).log(Level.SEVERE, "Error", (Throwable) e);
                }
            }
        });
    }

    public void addDataEntity(String str, String str2) throws CorruptIndexException, IOException {
        addDataEntity(str, str2, null);
    }

    public void addDataEntity(String str, String str2, String str3) throws CorruptIndexException, IOException {
        Document document = new Document();
        document.add(new StringField(dataEntityExistsID, str, Field.Store.YES));
        document.add(new StringField(dataEntityContentFingerprint, str2, Field.Store.YES));
        document.add(new LongField(lastCrawledTime, System.currentTimeMillis(), Field.Store.YES));
        if (!StringUtils.nullOrWhitespace(str3)) {
            document.add(new StringField(masterDataEntityExistsID, str3, Field.Store.YES));
        }
        this.m_indexWriter.addDocument(document);
    }

    public void closeLuceneStuff() throws IOException {
        if (this.m_indexSearcher != null) {
            this.m_indexSearcher = null;
        }
        if (this.m_indexReader != null) {
            this.m_indexReader.close();
            this.m_indexReader = null;
        }
        if (this.m_indexWriter != null) {
            this.m_indexWriter.commit();
            this.m_indexWriter.close();
            this.m_indexWriter = null;
        }
    }

    public Iterator<String> crawlFinished() {
        try {
            return new CrawlFinishedIterator();
        } catch (IOException e) {
            Logger.getLogger(IncrementalCrawlingHistory.class.getName()).log(Level.SEVERE, "Error", (Throwable) e);
            return null;
        }
    }

    public void crawlStarted() throws CorruptIndexException, LockObtainFailedException, IOException {
        openLuceneStuff();
        this.m_lCrawlStartingTime = Long.valueOf(System.currentTimeMillis());
    }

    public Exist exists(String str) throws IOException {
        Long dataEntityLastCrawledTime = getDataEntityLastCrawledTime(str);
        return dataEntityLastCrawledTime == null ? Exist.NOT : dataEntityLastCrawledTime.longValue() >= this.m_lCrawlStartingTime.longValue() ? Exist.YES_PROCESSED : Exist.YES_UNPROCESSED;
    }

    public boolean existsWithContent(String str, String str2) throws IOException {
        BooleanQuery booleanQuery = new BooleanQuery();
        booleanQuery.add(new TermQuery(new Term(dataEntityExistsID, str)), BooleanClause.Occur.MUST);
        booleanQuery.add(new TermQuery(new Term(dataEntityContentFingerprint, str2)), BooleanClause.Occur.MUST);
        TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
        refreshIndexReaderz();
        this.m_indexSearcher.search(booleanQuery, totalHitCountCollector);
        return totalHitCountCollector.getTotalHits() > 0;
    }

    public String getDataEntityContentFingerprint(String str) throws IOException {
        Term term = new Term(dataEntityExistsID, str);
        refreshIndexReaderz();
        TopDocs search = this.m_indexSearcher.search(new TermQuery(term), 1);
        if (search.totalHits == 0) {
            return null;
        }
        return this.m_indexReader.document(search.scoreDocs[0].doc, Collections.singleton(dataEntityContentFingerprint)).get(dataEntityContentFingerprint);
    }

    public Long getDataEntityLastCrawledTime(String str) throws IOException {
        Term term = new Term(dataEntityExistsID, str);
        refreshIndexReaderz();
        TopDocs search = this.m_indexSearcher.search(new TermQuery(term), 1);
        if (search.totalHits == 0) {
            return null;
        }
        return Long.valueOf(this.m_indexReader.document(search.scoreDocs[0].doc, Collections.singleton(lastCrawledTime)).get(lastCrawledTime));
    }

    public String getHistoryPath() {
        return this.m_strHistoryPath;
    }

    public void openLuceneStuff() throws CorruptIndexException, LockObtainFailedException, IOException {
        if (this.m_indexWriter == null) {
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_9, new KeywordAnalyzer());
            indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
            this.m_indexWriter = new IndexWriter(new SimpleFSDirectory(new File(this.m_strHistoryPath)), indexWriterConfig);
        }
        if (this.m_indexReader == null) {
            this.m_indexReader = DirectoryReader.open(this.m_indexWriter, true);
        }
        if (this.m_indexSearcher == null) {
            this.m_indexSearcher = new IndexSearcher(this.m_indexReader);
        }
    }

    protected void refreshIndexReaderz() {
        try {
            DirectoryReader openIfChanged = DirectoryReader.openIfChanged(this.m_indexReader);
            if (openIfChanged != null) {
                this.m_indexReader.close();
                this.m_indexReader = openIfChanged;
                this.m_indexSearcher = new IndexSearcher(this.m_indexReader);
            }
        } catch (IOException e) {
            Logger.getLogger(IncrementalCrawlingHistory.class.getName()).log(Level.SEVERE, "Error", (Throwable) e);
        }
    }

    public void updateDataEntity(String str, String str2) throws CorruptIndexException, IOException {
        updateDataEntity(str, str2, null);
    }

    public void updateDataEntity(String str, String str2, String str3) throws CorruptIndexException, IOException {
        Term term = new Term(dataEntityExistsID, str);
        Document document = new Document();
        document.add(new StringField(dataEntityExistsID, str, Field.Store.YES));
        document.add(new StringField(dataEntityContentFingerprint, str2, Field.Store.YES));
        document.add(new LongField(lastCrawledTime, System.currentTimeMillis(), Field.Store.YES));
        if (!StringUtils.nullOrWhitespace(str3)) {
            document.add(new StringField(masterDataEntityExistsID, str3, Field.Store.YES));
        }
        this.m_indexWriter.updateDocument(term, document);
    }

    public void updateDataEntityLastCrawledTime(String str) throws CorruptIndexException, IOException {
        Term term = new Term(dataEntityExistsID, str);
        refreshIndexReaderz();
        TopDocs search = this.m_indexSearcher.search(new TermQuery(term), 1);
        if (search.totalHits == 0) {
            throw new IllegalStateException("there has to be an data entry with Id " + str + " for updating. Nothing was found.");
        }
        long currentTimeMillis = System.currentTimeMillis();
        Document document = this.m_indexReader.document(search.scoreDocs[0].doc);
        document.removeFields(lastCrawledTime);
        document.add(new LongField(lastCrawledTime, currentTimeMillis, Field.Store.YES));
        this.m_indexWriter.updateDocument(term, document);
        Term term2 = new Term(masterDataEntityExistsID, str);
        TopDocs search2 = this.m_indexSearcher.search(new TermQuery(term2), Integer.MAX_VALUE);
        for (int i = 0; i < search2.scoreDocs.length; i++) {
            Document document2 = this.m_indexReader.document(search2.scoreDocs[i].doc);
            document2.removeFields(lastCrawledTime);
            document2.add(new LongField(lastCrawledTime, currentTimeMillis, Field.Store.YES));
            this.m_indexWriter.updateDocument(term2, document);
        }
    }
}
