package de.dfki.catwiesel.index.lucene;

import de.dfki.catwiesel.document.DocumentFactory;
import de.dfki.catwiesel.index.IndexLightweightAccess;
import de.dfki.catwiesel.index.IndexManager;
import de.dfki.catwiesel.index.IndexManagerException;
import de.dfki.catwiesel.index.IndexSynchronizationException;
import de.dfki.catwiesel.index.MultiValueMap;
import de.dfki.catwiesel.search.query.ANDQuery;
import de.dfki.catwiesel.search.query.ElementaryQuery;
import de.dfki.catwiesel.search.query.ORQuery;
import de.dfki.catwiesel.search.query.Query;
import de.dfki.catwiesel.search.query.SimilarityQuery;
import de.dfki.catwiesel.search.query.StringQuery;
import de.dfki.catwiesel.search.query.StringSimilarityQuery;
import de.dfki.catwiesel.similarity.SimilarityMeasure;
import de.dfki.catwiesel.util.FileHandling;
import de.dfki.catwiesel.util.Pair;
import de.dfki.catwiesel.util.RankedItem;
import de.dfki.catwiesel.util.SimpleMultiValueMap;
import de.dfki.catwiesel.vocabulary.AttributeURIs;
import de.dfki.catwiesel.vocabulary.StringConstants;
import de.dfki.inquisition.collections.ConfigurationException;
import de.dfki.inquisition.collections.ConfigurationValue;
import de.dfki.inquisition.collections.MultiValueConfiguration;
import de.dfki.inquisition.lucene.IndexAccessor;
import java.io.IOException;
import java.io.StringReader;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.similar.MoreLikeThis;
import org.ontoware.rdf2go.model.node.URI;
import org.ontoware.rdf2go.model.node.impl.URIImpl;

/* loaded from: input_file:de/dfki/catwiesel/index/lucene/LuceneIndexManager.class */
public class LuceneIndexManager implements IndexManager, IndexLightweightAccess {
    private LuceneAnalyzerFactory m_analyzerFactory;
    private Analyzer m_analyzer;
    private QueryParser m_queryParser;
    private DocumentFactory m_documentFactory;
    private Set<SimilarityMeasure> m_similarityMeasures;
    private static final String SIMILARITY_MEASURE_KEY = "similarityMeasureClass";
    static final String INFO_DOCUMENT_NAME = "CatwieselInfoDocument";
    static final String INFO_DOCUMENT_ATTRIBUTE_NAME = "CatwieselInfoDocumentName";
    static final String INFO_DOCUMENT_ATTRIBUTE_MAX_NUMBER = "CatwieselInfoDocumentMaxNumber";
    static final String INFO_DOCUMENT_ATTRIBUTE_UNIQUE_ID = "CatwieselInfoDocumentUniqueId";
    public static final String COPY_ATTRIBUTE_BASENAME = "copyAttribute";
    private static final String INDEX_PROCESSING_TYPE_URI_ATTRIBUTE = "keyword";
    private static final String INDEX_PROCESSING_TYPE_CHECKSUM_ATTRIBUTE = "keyword";
    private static final String INDEX_PROCESSING_TYPE_SOURCE_ATTRIBUTE = "keyword";
    public static final String DEFAULT_ATTRIBUTE_KEY = "attribute_default";
    public static final String DEFAULT_ANALYZER_KEY = "default_analyzer";
    public static final String DEFAULT_ANALYZER_STOPWORDS_KEY = "default_analyzer_stopwords";
    public static final String ANALYZER_KEY_SUFFIX = ":analyzer";
    public static final String STOPWORD_KEY_SUFFIX = ":stopwords";
    private ReadWriteCoordinator m_coordinator;
    private MultiValueConfiguration m_myConfiguration;
    private static final Object KEYWORD_VALUE = "keyword";
    private static Logger m_logger = Logger.getLogger(LuceneIndexManager.class.getName());
    private SimpleMultiValueMap<String, String> m_attributesToBeCopied = new SimpleMultiValueMap<>();
    private final int MIN_TERM_FREQUENCY = 2;
    private final double MIN_RELATIVE_DOCUMENT_FREQUENCY = 0.01d;
    private final double MAX_RELATIVE_DOCUMENT_FREQUENCY = 0.5d;
    private final int MAX_TYPICAL_TERMS = 20;

    public LuceneIndexManager(MultiValueConfiguration multiValueConfiguration, DocumentFactory documentFactory) {
        init(multiValueConfiguration, documentFactory);
    }

    private void init(MultiValueConfiguration multiValueConfiguration, DocumentFactory documentFactory) {
        try {
            checkConfigurationIntegrity(multiValueConfiguration);
            this.m_myConfiguration = multiValueConfiguration;
            completeMyConfiguration();
            this.m_analyzerFactory = new LuceneAnalyzerFactory();
            setAnalyzers();
            this.m_queryParser = new QueryParser(AttributeURIs.CONTENT.toString(), this.m_analyzer);
            this.m_documentFactory = documentFactory;
            this.m_coordinator = new ReadWriteCoordinator(multiValueConfiguration, this.m_analyzer);
            loadSimilarityMeasures(multiValueConfiguration);
        } catch (ConfigurationException e) {
            getLogger().log(Level.SEVERE, "Configuration error", e);
            throw new IndexManagerException("Configuration error", e);
        } catch (IndexSynchronizationException e2) {
            getLogger().warning("Coordination of Read/Write processing not possible (perhaps synchronization server is not started?)");
            throw new IndexManagerException("Coordination of Read/Write processing not possible (perhaps synchronization server is not started?)");
        } catch (IllegalArgumentException e3) {
            throw new IndexManagerException("Coordination of Read/Write processing not possible due to configuration error");
        }
    }

    private void loadSimilarityMeasures(MultiValueConfiguration multiValueConfiguration) {
        this.m_similarityMeasures = new HashSet();
        Iterator it = multiValueConfiguration.get(SIMILARITY_MEASURE_KEY).iterator();
        while (it.hasNext()) {
            try {
                this.m_similarityMeasures.add((SimilarityMeasure) Class.forName(((ConfigurationValue) it.next()).getValueAsString()).getConstructor(ReadWriteCoordinator.class).newInstance(this.m_coordinator));
            } catch (Exception e) {
                getLogger().log(Level.WARNING, "Error when trying to load similarity measures", (Throwable) e);
                throw new IndexManagerException("Error when trying to load similarity measures", e);
            }
        }
    }

    private void markAsToBeCopied(String str) throws ConfigurationException {
        URIImpl uRIImpl = new URIImpl(str.replaceFirst("copyAttribute.", ""));
        Iterator it = this.m_myConfiguration.get(str).iterator();
        while (it.hasNext()) {
            this.m_attributesToBeCopied.add(uRIImpl.toString(), ((ConfigurationValue) it.next()).getValueAsString());
        }
    }

    private void copyAttribute(Document document, URI uri, Object obj) throws URISyntaxException {
        Iterator<String> it = this.m_attributesToBeCopied.get(uri.toString()).iterator();
        while (it.hasNext()) {
            URIImpl uRIImpl = new URIImpl(it.next());
            addField(document, uRIImpl, obj);
            copyAttribute(document, uRIImpl, obj);
        }
    }

    private void setAnalyzers() throws ConfigurationException {
        getLogger().finer("setting Analyzers");
        PerFieldAnalyzerWrapper perFieldAnalyzerWrapper = new PerFieldAnalyzerWrapper(this.m_analyzerFactory.createAnalyzer(this.m_myConfiguration.getUniqueAsString(DEFAULT_ANALYZER_KEY), this.m_myConfiguration.getUniqueAsString(DEFAULT_ANALYZER_STOPWORDS_KEY)));
        Set<String> keySet = this.m_myConfiguration.keySet();
        for (String str : keySet) {
            if (str.startsWith("copyAttribute.")) {
                markAsToBeCopied(str);
            }
            if (str.endsWith(ANALYZER_KEY_SUFFIX)) {
                String uniqueAsString = this.m_myConfiguration.getUniqueAsString(str);
                String substring = str.substring(0, str.lastIndexOf(ANALYZER_KEY_SUFFIX));
                Analyzer createAnalyzer = this.m_analyzerFactory.createAnalyzer(uniqueAsString, this.m_myConfiguration.getFirstAsString(String.valueOf(substring) + STOPWORD_KEY_SUFFIX));
                getLogger().finest("setting Analyzer " + uniqueAsString + " for attribute " + substring);
                perFieldAnalyzerWrapper.addAnalyzer(substring, createAnalyzer);
            }
        }
        KeywordAnalyzer keywordAnalyzer = new KeywordAnalyzer();
        for (String str2 : keySet) {
            ConfigurationValue configurationValue = (ConfigurationValue) this.m_myConfiguration.getFirst(str2);
            if (configurationValue != null && configurationValue.isStringValue() && KEYWORD_VALUE.equals(configurationValue.getValueAsString())) {
                perFieldAnalyzerWrapper.addAnalyzer(str2, keywordAnalyzer);
            }
        }
        this.m_analyzer = perFieldAnalyzerWrapper;
    }

    private void completeMyConfiguration() {
        this.m_myConfiguration.add(AttributeURIs.MY_URI.toString(), "keyword");
        this.m_myConfiguration.add(AttributeURIs.CHECKSUM.toString(), "keyword");
        this.m_myConfiguration.add(AttributeURIs.SOURCE.toString(), "keyword");
    }

    private Document getLuceneDocument(URI uri) {
        IndexReader indexReader = null;
        getLogger().finest("reading document " + uri + " from index");
        try {
            indexReader = this.m_coordinator.getIndexReader();
            Document luceneDocument = getLuceneDocument(uri, indexReader);
            this.m_coordinator.releaseIndexReader(indexReader);
            return luceneDocument;
        } catch (Throwable th) {
            this.m_coordinator.releaseIndexReader(indexReader);
            throw th;
        }
    }

    private Document getLuceneDocument(URI uri, IndexReader indexReader) {
        getLogger().finest("reading document " + uri + " from index");
        try {
            Document document = null;
            TermDocs termDocs = indexReader.termDocs(new Term(AttributeURIs.MY_URI.toString(), uri.toString()));
            if (termDocs.next()) {
                document = indexReader.document(termDocs.doc());
            }
            return document;
        } catch (IOException e) {
            getLogger().log(Level.WARNING, "IOException when trying to read from Lucene index", (Throwable) e);
            throw new IndexManagerException("Error when trying to read from Lucene index", e);
        }
    }

    @Override // de.dfki.catwiesel.index.IndexManager
    public boolean insert(MultiValueMap multiValueMap) {
        getLogger().finer("inserting data " + multiValueMap + " into index");
        Document document = null;
        IndexReader indexReader = null;
        boolean z = true;
        try {
            if (!multiValueMap.isNoIndexRead()) {
                IndexReader indexReader2 = this.m_coordinator.getIndexReader();
                document = getLuceneDocument(multiValueMap.getUri(), indexReader2);
                this.m_coordinator.releaseIndexReader(indexReader2);
                indexReader = null;
            }
            if (document == null) {
                z = false;
                document = new Document();
            }
            Set<URI> attributeSet = multiValueMap.attributeSet();
            Term term = new Term(AttributeURIs.MY_URI.toString(), multiValueMap.getUri().toString());
            for (URI uri : attributeSet) {
                Set<Object> set = multiValueMap.get(uri);
                document.removeFields(uri.toString());
                for (Object obj : set) {
                    addField(document, uri, obj);
                    if (this.m_attributesToBeCopied.containsKey(uri.toString())) {
                        copyAttribute(document, uri, obj);
                    }
                }
            }
            try {
                if (z) {
                    deleteTermAndRewrite(term, this.m_analyzer, document);
                } else {
                    writeDocument(this.m_analyzer, document);
                }
                getLogger().info("Indexed document '" + multiValueMap.get(AttributeURIs.MY_URI).iterator().next() + "'.");
                getLogger().finest("index version: " + IndexReader.getCurrentVersion(this.m_coordinator.getIndexDirectory()));
                return true;
            } catch (Exception e) {
                getLogger().log(Level.WARNING, "Exception while inserting data into Lucene index", (Throwable) e);
                throw new IndexManagerException(e);
            }
        } catch (Exception e2) {
            this.m_coordinator.releaseIndexReader(indexReader);
            getLogger().log(Level.WARNING, "Exception while inserting data into Lucene index", (Throwable) e2);
            throw new IndexManagerException("Exception while inserting data into Lucene index", e2);
        }
    }

    private void deleteTermAndRewrite(Term term, Analyzer analyzer, Document document) throws IOException {
        IndexWriter indexWriter = null;
        try {
            indexWriter = this.m_coordinator.getIndexWriter(analyzer);
            indexWriter.deleteDocuments(term);
            indexWriter.addDocument(document);
            this.m_coordinator.releaseIndexWriter(indexWriter);
        } catch (Throwable th) {
            this.m_coordinator.releaseIndexWriter(indexWriter);
            throw th;
        }
    }

    private void writeDocument(Analyzer analyzer, Document document) throws IOException {
        IndexWriter indexWriter = this.m_coordinator.getIndexWriter(analyzer);
        try {
            indexWriter.addDocument(document);
        } finally {
            this.m_coordinator.releaseIndexWriter(indexWriter);
        }
    }

    private void checkConfigurationIntegrity(MultiValueConfiguration multiValueConfiguration) throws IndexManagerException {
        try {
            multiValueConfiguration.getUniqueAsString(DEFAULT_ANALYZER_KEY);
            multiValueConfiguration.getUniqueAsString(DEFAULT_ANALYZER_STOPWORDS_KEY);
            multiValueConfiguration.getUniqueAsString(DEFAULT_ATTRIBUTE_KEY);
        } catch (ConfigurationException e) {
            getLogger().log(Level.SEVERE, "Configuration error", e);
            throw new IndexManagerException("Configuration error", e);
        }
    }

    private void addField(Document document, URI uri, Object obj) {
        ConfigurationValue configurationValue = (ConfigurationValue) this.m_myConfiguration.getFirst(uri.toString());
        try {
            String uniqueAsString = configurationValue == null ? this.m_myConfiguration.getUniqueAsString(DEFAULT_ATTRIBUTE_KEY) : configurationValue.getValueAsString();
            if (uniqueAsString.equals("keyword")) {
                if (obj instanceof String) {
                    document.add(new Field(uri.toString(), (String) obj, Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
                    return;
                } else if (obj instanceof Date) {
                    document.add(new Field(uri.toString(), DateTools.dateToString((Date) obj, DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
                    return;
                } else {
                    wrongType(uri.toString(), obj, uniqueAsString);
                    return;
                }
            }
            if (uniqueAsString.equals(StringConstants.INDEX_TYPE_TEXT)) {
                if (obj instanceof String) {
                    document.add(new Field(uri.toString(), (String) obj, Field.Store.COMPRESS, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
                    return;
                } else {
                    wrongType(uri.toString(), obj, uniqueAsString);
                    return;
                }
            }
            if (!uniqueAsString.equals("unindexed")) {
                getLogger().warning("Undefined attribute " + uniqueAsString);
                throw new IndexManagerException("Undefined attribute " + uniqueAsString);
            }
            if (obj instanceof String) {
                document.add(new Field(uri.toString(), (String) obj, Field.Store.YES, Field.Index.NO, Field.TermVector.WITH_POSITIONS_OFFSETS));
            } else {
                wrongType(uri.toString(), obj, uniqueAsString);
            }
        } catch (ConfigurationException e) {
            getLogger().log(Level.SEVERE, "Configuration error: no default attribute specified", e);
            throw new IndexManagerException("Configuration error: no default attribute specified", e);
        }
    }

    private void wrongType(String str, Object obj, String str2) {
        getLogger().warning("Wrong attribute value type for " + str + "! MultiValueMaps of type text do not allow values of type " + obj.getClass() + " in " + str2 + " attributes");
        throw new IndexManagerException("Wrong attribute value type for " + str + "! MultiValueMaps of type text do not allow values of type " + obj.getClass() + " in " + str2 + " attributes");
    }

    @Override // de.dfki.catwiesel.index.IndexManager
    public Set<Object> getValues(URI uri, URI uri2) {
        getLogger().finest("returning values of attribute " + uri2 + " from document " + uri);
        Document luceneDocument = getLuceneDocument(uri);
        if (luceneDocument == null) {
            getLogger().warning("URI " + uri.toString() + " does not exist in Lucene index.");
            throw new IndexManagerException("URI " + uri.toString() + " does not exist in Lucene index.");
        }
        String[] values = luceneDocument.getValues(uri2.toString());
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        if (values != null) {
            for (String str : values) {
                linkedHashSet.add(str);
            }
        }
        return linkedHashSet;
    }

    @Override // de.dfki.catwiesel.index.IndexManager
    public Set<URI> getAttributes(URI uri) {
        getLogger().finest("returning all attributes of document " + uri);
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        Document luceneDocument = getLuceneDocument(uri);
        if (luceneDocument == null) {
            getLogger().warning("URI " + uri.toString() + " does not exist in Lucene index.");
            throw new IndexManagerException("URI " + uri.toString() + " does not exist in Lucene index.");
        }
        Iterator it = luceneDocument.getFields().iterator();
        while (it.hasNext()) {
            linkedHashSet.add(new URIImpl(((Field) it.next()).name()));
        }
        return linkedHashSet;
    }

    public boolean exists(URI uri) {
        IndexReader indexReader = null;
        try {
            try {
                boolean z = false;
                indexReader = this.m_coordinator.getIndexReader();
                if (indexReader.termDocs(new Term(AttributeURIs.MY_URI.toString(), uri.toString())).next()) {
                    z = true;
                }
                boolean z2 = z;
                this.m_coordinator.releaseIndexReader(indexReader);
                return z2;
            } catch (IOException e) {
                getLogger().log(Level.WARNING, "Error when trying to read from Lucene index", (Throwable) e);
                throw new IndexManagerException("Error when trying to read from Lucene index", e);
            }
        } catch (Throwable th) {
            this.m_coordinator.releaseIndexReader(indexReader);
            throw th;
        }
    }

    public boolean isCategory(URI uri) {
        return testEntry(uri, StringConstants.ENTRY_TYPE_CATEGORY);
    }

    @Override // de.dfki.catwiesel.index.IndexLightweightAccess
    public URI getUriByChecksum(String str) {
        IndexReader indexReader = null;
        try {
            try {
                URIImpl uRIImpl = null;
                Term term = new Term(AttributeURIs.CHECKSUM.toString(), str);
                indexReader = this.m_coordinator.getIndexReader();
                TermDocs termDocs = indexReader.termDocs(term);
                if (termDocs.next()) {
                    Document document = indexReader.document(termDocs.doc());
                    if (termDocs.next()) {
                        getLogger().info("More than one Document with checksum " + str + " in Lucene index!");
                    }
                    uRIImpl = new URIImpl(document.get(AttributeURIs.MY_URI.toString()));
                }
                URIImpl uRIImpl2 = uRIImpl;
                this.m_coordinator.releaseIndexReader(indexReader);
                return uRIImpl2;
            } catch (IOException e) {
                getLogger().log(Level.WARNING, "Error when trying to read from Lucene index", (Throwable) e);
                throw new IndexManagerException("Error when trying to read from Lucene index", e);
            }
        } catch (Throwable th) {
            this.m_coordinator.releaseIndexReader(indexReader);
            throw th;
        }
    }

    @Override // de.dfki.catwiesel.index.IndexLightweightAccess
    public URI getUriBySource(String str) {
        IndexReader indexReader = null;
        try {
            try {
                URIImpl uRIImpl = null;
                Term term = new Term(AttributeURIs.SOURCE.toString(), str);
                indexReader = this.m_coordinator.getIndexReader();
                TermDocs termDocs = indexReader.termDocs(term);
                if (termDocs.next()) {
                    Document document = indexReader.document(termDocs.doc());
                    if (termDocs.next()) {
                        getLogger().severe("More than one Document with source " + str + " in Lucene index!");
                    }
                    uRIImpl = new URIImpl(document.get(AttributeURIs.MY_URI.toString()));
                }
                URIImpl uRIImpl2 = uRIImpl;
                this.m_coordinator.releaseIndexReader(indexReader);
                return uRIImpl2;
            } catch (IOException e) {
                getLogger().log(Level.WARNING, "Error when trying to read from Lucene index", (Throwable) e);
                throw new IndexManagerException("Error when trying to read from Lucene index", e);
            }
        } catch (Throwable th) {
            this.m_coordinator.releaseIndexReader(indexReader);
            throw th;
        }
    }

    @Override // de.dfki.catwiesel.index.IndexLightweightAccess
    @Deprecated
    public long getMaxURINumber() {
        return getIndexSpecialValues().getFirst().longValue();
    }

    @Override // de.dfki.catwiesel.index.IndexLightweightAccess
    @Deprecated
    public String getUniqueIdOfIndex() {
        return getIndexSpecialValues().getSecond();
    }

    public Pair<Long, String> getIndexSpecialValues() {
        try {
            try {
                Term term = new Term(INFO_DOCUMENT_ATTRIBUTE_NAME, INFO_DOCUMENT_NAME);
                IndexReader indexReader = this.m_coordinator.getIndexReader();
                TermDocs termDocs = indexReader.termDocs(term);
                if (!termDocs.next()) {
                    getLogger().severe("Corrupt index! Could not find info document.");
                    throw new IndexManagerException("Corrupt index! Could not find info document.");
                }
                Document document = indexReader.document(termDocs.doc());
                long parseLong = Long.parseLong(document.get(INFO_DOCUMENT_ATTRIBUTE_MAX_NUMBER));
                String str = document.get(INFO_DOCUMENT_ATTRIBUTE_UNIQUE_ID);
                getLogger().finer("Returning max URI number " + parseLong);
                Pair<Long, String> pair = new Pair<>(Long.valueOf(parseLong), str);
                this.m_coordinator.releaseIndexReader(indexReader);
                return pair;
            } catch (IOException e) {
                getLogger().log(Level.WARNING, "Error when trying to read from Lucene index", (Throwable) e);
                throw new IndexManagerException("Error when trying to read from Lucene index", e);
            }
        } catch (Throwable th) {
            this.m_coordinator.releaseIndexReader(null);
            throw th;
        }
    }

    public static Logger getLogger() {
        return m_logger;
    }

    @Override // de.dfki.catwiesel.index.IndexManager
    public Set<de.dfki.catwiesel.document.Document> getDocuments(URI uri, Object obj) {
        getLogger().finest("returning all documents with value " + obj + " in attribute " + uri);
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        if (obj instanceof String) {
            IndexReader indexReader = null;
            try {
                try {
                    indexReader = this.m_coordinator.getIndexReader();
                    TermDocs termDocs = indexReader.termDocs(new Term(uri.toString(), (String) obj));
                    while (termDocs.next()) {
                        linkedHashSet.add(this.m_documentFactory.getDocument(new URIImpl(indexReader.document(termDocs.doc()).get(AttributeURIs.MY_URI.toString()))));
                    }
                    this.m_coordinator.releaseIndexReader(indexReader);
                } catch (IOException e) {
                    getLogger().log(Level.WARNING, "Error when trying to read from Lucene index", (Throwable) e);
                    throw new IndexManagerException("Error when trying to read from Lucene index", e);
                }
            } catch (Throwable th) {
                this.m_coordinator.releaseIndexReader(indexReader);
                throw th;
            }
        }
        return linkedHashSet;
    }

    @Override // de.dfki.catwiesel.index.IndexManager
    public void search(Query query) {
        getLogger().fine("searching for " + query);
        org.apache.lucene.search.Query query2 = (org.apache.lucene.search.Query) query.letParse(this);
        IndexSearcher indexSearcher = null;
        try {
            try {
                indexSearcher = this.m_coordinator.getIndexSearcher();
                query.setResults(new LuceneResultList(indexSearcher.search(query2, (Filter) null, 10000), this.m_documentFactory, indexSearcher, this.m_coordinator));
                this.m_coordinator.releaseIndexSearcher(indexSearcher);
            } catch (IOException e) {
                getLogger().log(Level.WARNING, "Error when trying to search Lucene index", (Throwable) e);
                throw new IndexManagerException("Error when trying to search Lucene index", e);
            }
        } catch (Throwable th) {
            this.m_coordinator.releaseIndexSearcher(indexSearcher);
            throw th;
        }
    }

    public Object parse(Query query) {
        getLogger().warning("Unknown Query type: " + query.getClass().getName() + ". Unable to parse.");
        throw new IndexManagerException("Unknown Query type: " + query.getClass().getName() + ". Unable to parse.");
    }

    @Override // de.dfki.catwiesel.index.IndexManager
    public Object parse(ANDQuery aNDQuery) {
        getLogger().finest("parsing ANDQuery: " + aNDQuery.toString());
        BooleanQuery booleanQuery = new BooleanQuery();
        Iterator<Query> it = aNDQuery.getSubQueries().iterator();
        while (it.hasNext()) {
            booleanQuery.add((org.apache.lucene.search.Query) it.next().letParse(this), BooleanClause.Occur.MUST);
        }
        return booleanQuery;
    }

    @Override // de.dfki.catwiesel.index.IndexManager
    public Object parse(ORQuery oRQuery) {
        getLogger().finest("parsing ORQuery: " + oRQuery.toString());
        BooleanQuery booleanQuery = new BooleanQuery();
        Iterator<Query> it = oRQuery.getSubQueries().iterator();
        while (it.hasNext()) {
            booleanQuery.add((org.apache.lucene.search.Query) it.next().letParse(this), BooleanClause.Occur.SHOULD);
        }
        return booleanQuery;
    }

    @Override // de.dfki.catwiesel.index.IndexManager
    public Object parse(StringQuery stringQuery) {
        getLogger().finest("parsing StringQuery: " + stringQuery.toString());
        BooleanQuery booleanQuery = new BooleanQuery();
        String searchString = stringQuery.getSearchString();
        if (searchString.indexOf("\"") >= 0) {
            getLogger().warning("Use of quotation marks is permitted, quotation marks will be removed from search string");
            searchString.replaceAll("\"", "");
        }
        try {
            Iterator<URI> it = stringQuery.getAttributes().iterator();
            while (it.hasNext()) {
                booleanQuery.add(this.m_queryParser.parse(String.valueOf(QueryParser.escape(it.next().toString())) + ":\"" + searchString + "\""), BooleanClause.Occur.SHOULD);
            }
            BooleanQuery completeQuery = completeQuery(stringQuery, booleanQuery);
            completeQuery.setBoost((float) stringQuery.getBoost());
            return completeQuery;
        } catch (ParseException e) {
            getLogger().log(Level.WARNING, "Error when trying to parse StringQuery " + stringQuery, e);
            throw new IndexManagerException("Error when trying to parse StringQuery " + stringQuery, e);
        }
    }

    private BooleanQuery completeQuery(ElementaryQuery elementaryQuery, org.apache.lucene.search.Query query) {
        BooleanQuery booleanQuery = new BooleanQuery();
        if (elementaryQuery.isInverted()) {
            booleanQuery.add(query, BooleanClause.Occur.MUST_NOT);
            booleanQuery.add(new TermQuery(new Term(AttributeURIs.ENTRY_TYPE.toString(), StringConstants.ENTRY_TYPE_DOCUMENT)), BooleanClause.Occur.SHOULD);
            if (!elementaryQuery.categoriesExcluded()) {
                booleanQuery.add(new TermQuery(new Term(AttributeURIs.ENTRY_TYPE.toString(), StringConstants.ENTRY_TYPE_CATEGORY)), BooleanClause.Occur.SHOULD);
            }
        } else {
            booleanQuery.add(query, BooleanClause.Occur.MUST);
        }
        if (elementaryQuery.categoriesExcluded()) {
            booleanQuery.add(new TermQuery(new Term(AttributeURIs.ENTRY_TYPE.toString(), StringConstants.ENTRY_TYPE_CATEGORY)), BooleanClause.Occur.MUST_NOT);
        }
        return booleanQuery;
    }

    @Override // de.dfki.catwiesel.index.IndexManager
    public Object parse(SimilarityQuery similarityQuery) {
        getLogger().finest("parsing SimilarityQuery: " + similarityQuery.toString());
        IndexReader indexReader = null;
        BooleanQuery booleanQuery = null;
        try {
            try {
                indexReader = this.m_coordinator.getIndexReader();
                MoreLikeThis createMoreLikeThis = createMoreLikeThis(similarityQuery, indexReader);
                for (de.dfki.catwiesel.document.Document document : similarityQuery.getDocuments()) {
                    TermDocs termDocs = indexReader.termDocs(new Term(AttributeURIs.MY_URI.toString(), document.getURI().toString()));
                    if (termDocs.next()) {
                        org.apache.lucene.search.Query like = createMoreLikeThis.like(termDocs.doc());
                        if (booleanQuery == null) {
                            booleanQuery = new BooleanQuery();
                        }
                        booleanQuery.add(like, BooleanClause.Occur.SHOULD);
                    } else {
                        getLogger().warning("Document URI '" + document.getURI() + "' does not exist in Lucene index");
                    }
                }
                if (booleanQuery == null) {
                    this.m_coordinator.releaseIndexReader(indexReader);
                    return new BooleanQuery();
                }
                BooleanQuery completeQuery = completeQuery(similarityQuery, booleanQuery);
                completeQuery.setBoost((float) similarityQuery.getBoost());
                this.m_coordinator.releaseIndexReader(indexReader);
                return completeQuery;
            } catch (IOException e) {
                getLogger().log(Level.WARNING, "Error when trying to read from Lucene index.", (Throwable) e);
                throw new IndexManagerException("Error when trying to read from Lucene index.", e);
            }
        } catch (Throwable th) {
            this.m_coordinator.releaseIndexReader(indexReader);
            throw th;
        }
    }

    @Override // de.dfki.catwiesel.index.IndexManager
    public Object parse(StringSimilarityQuery stringSimilarityQuery) {
        getLogger().finest("parsing StringSimilarityQuery: " + stringSimilarityQuery.toString());
        IndexReader indexReader = null;
        try {
            try {
                indexReader = this.m_coordinator.getIndexReader();
                BooleanQuery completeQuery = completeQuery(stringSimilarityQuery, createMoreLikeThis(stringSimilarityQuery, indexReader).like(new StringReader(stringSimilarityQuery.getString())));
                completeQuery.setBoost((float) stringSimilarityQuery.getBoost());
                this.m_coordinator.releaseIndexReader(indexReader);
                return completeQuery;
            } catch (IOException e) {
                getLogger().log(Level.WARNING, "Error when trying to read from Lucene index.", (Throwable) e);
                throw new IndexManagerException("Error when trying to read from Lucene index.", e);
            }
        } catch (Throwable th) {
            this.m_coordinator.releaseIndexReader(indexReader);
            throw th;
        }
    }

    private MoreLikeThis createMoreLikeThis(ElementaryQuery elementaryQuery, IndexReader indexReader) {
        Collection<URI> attributes = elementaryQuery.getAttributes();
        String[] strArr = new String[attributes.size()];
        int i = 0;
        Iterator<URI> it = attributes.iterator();
        while (it.hasNext()) {
            strArr[i] = it.next().toString();
            i++;
        }
        MoreLikeThis moreLikeThis = new MoreLikeThis(indexReader);
        moreLikeThis.setFieldNames(strArr);
        moreLikeThis.setMinWordLen(5);
        moreLikeThis.setMinTermFreq(0);
        moreLikeThis.setMinDocFreq(1);
        return moreLikeThis;
    }

    @Override // de.dfki.catwiesel.index.IndexManager
    public void delete(URI uri) {
        try {
            Term term = new Term(AttributeURIs.MY_URI.toString(), uri.toString());
            IndexWriter indexWriter = null;
            try {
                indexWriter = this.m_coordinator.getIndexWriter(this.m_analyzer);
                indexWriter.deleteDocuments(term);
                this.m_coordinator.releaseIndexWriter(indexWriter);
                getLogger().fine("Document '" + uri + "' deleted.");
            } catch (Throwable th) {
                this.m_coordinator.releaseIndexWriter(indexWriter);
                throw th;
            }
        } catch (IOException e) {
            getLogger().log(Level.WARNING, "Error when trying to read from Lucene index", (Throwable) e);
            throw new IndexManagerException("Error when trying to read from Lucene index", e);
        }
    }

    @Override // de.dfki.catwiesel.index.IndexManager
    public boolean addAttribute(URI uri, URI uri2, Object obj) {
        Document luceneDocument = getLuceneDocument(uri);
        if (luceneDocument == null) {
            getLogger().warning("No entry with URI '" + uri + "' in Lucene index.");
            throw new IndexManagerException("No entry with URI '" + uri + "' in Lucene index.");
        }
        try {
            addField(luceneDocument, uri2, obj);
            deleteTermAndRewrite(new Term(AttributeURIs.MY_URI.toString(), uri.toString()), this.m_analyzer, luceneDocument);
            getLogger().fine("added value " + obj + " to attribute " + uri2 + " in document " + uri);
            return true;
        } catch (IOException e) {
            getLogger().log(Level.WARNING, "Error when trying to write to Lucene index", (Throwable) e);
            throw new IndexManagerException("Error when trying to write to Lucene index", e);
        }
    }

    private boolean testEntry(URI uri, String str) {
        Term term = new Term(AttributeURIs.MY_URI.toString(), uri.toString());
        try {
            try {
                IndexReader indexReader = this.m_coordinator.getIndexReader();
                TermDocs termDocs = indexReader.termDocs(term);
                if (termDocs.next()) {
                    boolean equals = indexReader.document(termDocs.doc()).get(AttributeURIs.ENTRY_TYPE.toString()).equals(str);
                    this.m_coordinator.releaseIndexReader(indexReader);
                    return equals;
                }
                getLogger().warning("No entry with URI '" + uri + "' in Lucene index.");
                this.m_coordinator.releaseIndexReader(indexReader);
                return false;
            } catch (IOException e) {
                getLogger().log(Level.WARNING, "Error when trying to read from Lucene index", (Throwable) e);
                throw new IndexManagerException("Error when trying to read from Lucene index", e);
            }
        } catch (Throwable th) {
            this.m_coordinator.releaseIndexReader(null);
            throw th;
        }
    }

    @Override // de.dfki.catwiesel.index.IndexManager
    public void close() {
        this.m_coordinator.close();
        getLogger().fine("LuceneIndexManager was closed");
    }

    @Override // de.dfki.catwiesel.index.IndexManager
    public boolean removeAttributeValue(URI uri, URI uri2, Object obj) {
        Document luceneDocument = getLuceneDocument(uri);
        if (luceneDocument == null) {
            getLogger().warning("No entry with URI '" + uri + "' in Lucene index.");
            throw new IndexManagerException("No entry with URI '" + uri + "' in Lucene index.");
        }
        try {
            String[] values = luceneDocument.getValues(uri2.toString());
            if (values == null) {
                return false;
            }
            luceneDocument.removeFields(uri2.toString());
            boolean z = false;
            for (int i = 0; i < values.length; i++) {
                if (values[i].equals(obj)) {
                    z = true;
                } else {
                    addField(luceneDocument, uri2, values[i]);
                }
            }
            if (!z) {
                return false;
            }
            deleteTermAndRewrite(new Term(AttributeURIs.MY_URI.toString(), uri.toString()), this.m_analyzer, luceneDocument);
            getLogger().fine("removed attribute '" + uri2 + " from document '" + uri + "'.");
            return true;
        } catch (IOException e) {
            getLogger().log(Level.WARNING, "Error when trying to write to Lucene index", (Throwable) e);
            throw new IndexManagerException("Error when trying to write to Lucene index", e);
        }
    }

    @Override // de.dfki.catwiesel.index.IndexManager
    public boolean removeAttribute(URI uri, URI uri2) {
        Document luceneDocument = getLuceneDocument(uri);
        if (luceneDocument == null) {
            getLogger().warning("No entry with URI '" + uri + "' in Lucene index.");
            throw new IndexManagerException("No entry with URI '" + uri + "' in Lucene index.");
        }
        try {
            if (luceneDocument.getFields(uri2.toString()) == null) {
                return false;
            }
            luceneDocument.removeFields(uri2.toString());
            deleteTermAndRewrite(new Term(AttributeURIs.MY_URI.toString(), uri.toString()), this.m_analyzer, luceneDocument);
            getLogger().fine("removed attribute '" + uri2 + " from document '" + uri + "'.");
            return true;
        } catch (IOException e) {
            getLogger().log(Level.WARNING, "Error when trying to write to Lucene index", (Throwable) e);
            throw new IndexManagerException("Error when trying to write to Lucene index", e);
        }
    }

    @Override // de.dfki.catwiesel.index.IndexManager
    public List<RankedItem> getTypicalItems(URI uri, URI uri2) {
        List<RankedItem> linkedList;
        Term term = new Term(AttributeURIs.MY_URI.toString(), uri.toString());
        try {
            try {
                IndexReader indexReader = this.m_coordinator.getIndexReader();
                TermDocs termDocs = indexReader.termDocs(term);
                if (termDocs.next()) {
                    TermFreqVector termFreqVector = indexReader.getTermFreqVector(termDocs.doc(), uri2.toString());
                    linkedList = termFreqVector == null ? new LinkedList() : getTypicalTerms(termFreqVector, uri2, indexReader);
                } else {
                    linkedList = new LinkedList();
                }
                List<RankedItem> list = linkedList;
                this.m_coordinator.releaseIndexReader(indexReader);
                return list;
            } catch (IOException e) {
                getLogger().log(Level.WARNING, "Error when trying to read from lucene index", (Throwable) e);
                throw new IndexManagerException("Error when trying to read from lucene index", e);
            }
        } catch (Throwable th) {
            this.m_coordinator.releaseIndexReader(null);
            throw th;
        }
    }

    private List<RankedItem> getTypicalTerms(TermFreqVector termFreqVector, URI uri, IndexReader indexReader) throws IOException {
        String[] terms = termFreqVector.getTerms();
        int[] termFrequencies = termFreqVector.getTermFrequencies();
        int numDocs = indexReader.numDocs();
        int i = 0;
        ArrayList arrayList = new ArrayList(21);
        for (int i2 : termFrequencies) {
            i += i2;
        }
        for (int i3 = 0; i3 < terms.length; i3++) {
            double docFreq = indexReader.docFreq(new Term(uri.toString(), terms[i3])) / numDocs;
            if (docFreq < 0.5d && docFreq >= 0.01d && termFrequencies[i3] >= 2) {
                RankedItem rankedItem = new RankedItem(terms[i3], (termFrequencies[i3] * numDocs) / (i * ((r0 + numDocs) - 1)));
                boolean z = false;
                int i4 = 0;
                while (true) {
                    if (i4 >= arrayList.size()) {
                        break;
                    }
                    if (rankedItem.compareTo((RankedItem) arrayList.get(i4)) > 0) {
                        arrayList.add(i4, rankedItem);
                        z = true;
                        break;
                    }
                    i4++;
                }
                if (!z) {
                    arrayList.add(rankedItem);
                }
                if (arrayList.size() > 20) {
                    arrayList.remove(20);
                }
            }
        }
        return arrayList;
    }

    public String getTermFrequencyVector(URI uri, URI uri2) throws IOException {
        String str = null;
        IndexReader indexReader = null;
        try {
            indexReader = this.m_coordinator.getIndexReader();
            TermDocs termDocs = indexReader.termDocs(new Term(AttributeURIs.MY_URI.toString(), uri.toString()));
            if (termDocs.next()) {
                str = indexReader.getTermFreqVector(termDocs.doc(), uri2.toString()).toString();
            }
            this.m_coordinator.releaseIndexReader(indexReader);
            return str;
        } catch (Throwable th) {
            this.m_coordinator.releaseIndexReader(indexReader);
            throw th;
        }
    }

    @Override // de.dfki.catwiesel.index.IndexLightweightAccess
    public de.dfki.catwiesel.document.Document getDocument(URI uri) throws IndexManagerException {
        getLogger().finer("returning document " + uri);
        if (exists(uri)) {
            return this.m_documentFactory.getDocument(uri);
        }
        getLogger().warning("URI " + uri + " does not exist!");
        throw new IndexManagerException("URI " + uri + " does not exist in the index!");
    }

    @Override // de.dfki.catwiesel.index.IndexManager
    public String getIdentificationString() {
        return this.m_coordinator.getIndexDirectory().toString();
    }

    @Override // de.dfki.catwiesel.index.IndexManager
    public void deleteAllDocumentsAndReinitialize() {
        try {
            String uniqueAsString = this.m_myConfiguration.getUniqueAsString(ReadWriteCoordinator.DIRECTORY_KEY);
            close();
            IndexAccessor.removeReaderFromCache(FileHandling.getNormalizedPath(uniqueAsString));
            FileHandling.deletePlainFilesBelowDirectory(FileHandling.getNormalizedPath(uniqueAsString));
            init(this.m_myConfiguration, this.m_documentFactory);
        } catch (IOException e) {
            getLogger().log(Level.WARNING, "Error while trying to delete index files", (Throwable) e);
            throw new IndexManagerException("Error while trying to delete index files", e);
        } catch (ConfigurationException e2) {
            getLogger().log(Level.WARNING, "Error in IndexManager configuration", e2);
            throw new IndexManagerException("Error in IndexManager configuration", e2);
        }
    }

    @Override // de.dfki.catwiesel.index.IndexManager
    public Set<SimilarityMeasure> getSimilarityMeasures() {
        return this.m_similarityMeasures;
    }

    @Override // de.dfki.catwiesel.index.IndexManager
    public void setWriteonlyMode(boolean z) {
        this.m_coordinator.setWriteonlyMode(z);
    }
}
