package dfki.km.tweekreco.arte;

import de.dfki.delight.common.Documentation;
import de.dfki.inquisition.collections.CollectionUtilz;
import de.dfki.inquisition.processes.StopWatch;
import de.dfki.inquisition.text.StringUtils;
import dfki.km.tweekreco.GlobalConstants;
import dfki.km.tweekreco.lucene.LuceneUtilz;
import dfki.km.tweekreco.ner.MultiNamedEntityRecognizer;
import dfki.km.tweekreco.ner.NerEntity;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.annotation.PreDestroy;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.mlt.MoreLikeThis;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;

/* loaded from: input_file:dfki/km/tweekreco/arte/ArteRecommender.class */
public class ArteRecommender {
    protected boolean m_bVerbose = false;
    protected IndexReader m_indexReader;
    protected IndexSearcher m_indexSearcher;
    protected QueryParser m_queryParser;
    protected MoreLikeThis m_moreLikeThis;
    public static MultiNamedEntityRecognizer m_ner = new MultiNamedEntityRecognizer().init();

    @Documentation(hide = true)
    public static void main(String[] strArr) throws Exception {
        new ArteRecommender().recommend(Collections.singleton("extrait_050183-003"), 0.01f, 1.0f, 1.0f, 1.0f, 0.0f, "", "", "", 13);
    }

    public ArteRecommender() {
        try {
            init(GlobalConstants.strAppBasePath + "index");
        } catch (IOException e) {
            Logger.getLogger(ArteIndexCreator.class.getName()).log(Level.SEVERE, "Error during initializing the arte recommender", (Throwable) e);
        }
    }

    @Documentation(hide = true)
    protected void addValues2Query(BooleanQuery booleanQuery, Document document, String str, Collection<String> collection) {
        addValues2Query(booleanQuery, document.getValues(str), collection);
    }

    @Documentation(hide = true)
    protected void addValues2Query(BooleanQuery booleanQuery, String[] strArr, Collection<String> collection) {
        if (strArr == null) {
            return;
        }
        for (String str : strArr) {
            if (!StringUtils.nullOrWhitespace(str)) {
                Iterator<String> it = collection.iterator();
                while (it.hasNext()) {
                    Query createQuery = LuceneUtilz.createQuery(it.next(), str, GlobalConstants.indexAnalyzer);
                    if (createQuery != null) {
                        booleanQuery.add(createQuery, BooleanClause.Occur.SHOULD);
                    }
                }
            }
        }
    }

    @PreDestroy
    @Documentation(hide = true)
    public void close() throws IOException {
        if (this.m_indexReader != null) {
            this.m_indexReader.close();
            this.m_indexReader = null;
        }
    }

    @Documentation(hide = true)
    public Set<String> extractInterestingTerms(String str, String str2) throws IOException {
        int round = (int) Math.round(Math.min(7, Math.round(str2.length() / 4.0f) * 0.65d));
        this.m_moreLikeThis.setFieldNames((String[]) CollectionUtilz.createArray(new String[]{str}));
        this.m_moreLikeThis.setMinDocFreq(2);
        this.m_moreLikeThis.setMinTermFreq(1);
        this.m_moreLikeThis.setMinWordLen(2);
        this.m_moreLikeThis.setMaxQueryTerms(round);
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        linkedHashSet.addAll(Arrays.asList(this.m_moreLikeThis.retrieveInterestingTerms(new StringReader(str2), str)));
        if (linkedHashSet.size() < round) {
            this.m_moreLikeThis.setMinDocFreq(1);
            String[] retrieveInterestingTerms = this.m_moreLikeThis.retrieveInterestingTerms(new StringReader(str2), str);
            for (int i = 0; linkedHashSet.size() < round && i < retrieveInterestingTerms.length; i++) {
                linkedHashSet.add(retrieveInterestingTerms[i]);
            }
        }
        return linkedHashSet;
    }

    @Documentation(hide = true)
    public Set<String> extractNamedEntities(String str, String str2) throws Exception {
        NerEntity[] recognizeNamedEntities = m_ner.recognizeNamedEntities(str, str2, 23);
        HashSet hashSet = new HashSet();
        for (NerEntity nerEntity : recognizeNamedEntities) {
            hashSet.add(nerEntity.label);
            hashSet.add(nerEntity.textTrigger);
            hashSet.addAll(nerEntity.synonyms);
        }
        return hashSet;
    }

    @Documentation(hide = true)
    protected void init(String str) throws IOException {
        if (this.m_indexReader != null) {
            this.m_indexReader.close();
        }
        this.m_indexReader = DirectoryReader.open(new SimpleFSDirectory(new File(str)));
        this.m_indexSearcher = new IndexSearcher(this.m_indexReader);
        this.m_moreLikeThis = new MoreLikeThis(this.m_indexReader);
        this.m_moreLikeThis.setAnalyzer(GlobalConstants.indexAnalyzer);
        this.m_queryParser = new QueryParser(Version.LUCENE_CURRENT, ArteIndexAtts.description, GlobalConstants.indexAnalyzer);
    }

    @Documentation("the rulesQueries are in standard Lucene syntax - you can specify whatever you want: filters, boosters, etc. Example: 'category:Fernsehfilm^30'. The three parameters define how the query should influence the results. If empty, the parameter will be ignored.\nIf a boost factor is zero, the according data will also be ignored entirely. Thus, you can send arbitrary Lucene queries to the index, creating own, totally new similarity+rules criterias.")
    public List<ArteResult> recommend(Set<String> set, float f, float f2, float f3, float f4, float f5, String str, String str2, String str3, int i) throws Exception {
        List<ArteResultFull> recommendRaw = recommendRaw(set, f, f2, f3, f4, f5, str, str2, str3, i);
        LinkedList linkedList = new LinkedList();
        for (ArteResultFull arteResultFull : recommendRaw) {
            ArteResult arteResult = new ArteResult();
            arteResult.arte_id = arteResultFull.arteId;
            arteResult.score = arteResultFull.score;
            arteResult.title_de = arteResultFull.title_de;
            arteResult.title_fr = arteResultFull.title_fr;
            arteResult.url_de = arteResultFull.url_de;
            arteResult.url_fr = arteResultFull.url_fr;
            linkedList.add(arteResult);
        }
        return linkedList;
    }

    @Documentation(hide = true)
    public List<ArteResultFull> recommendRaw(Set<String> set, float f, float f2, float f3, float f4, float f5, String str, String str2, String str3, int i) throws Exception {
        long currentTimeMillis = System.currentTimeMillis();
        LinkedList linkedList = new LinkedList();
        BooleanQuery booleanQuery = new BooleanQuery();
        for (String str4 : set) {
            Document uniqueDocWithTerm = LuceneUtilz.getUniqueDocWithTerm(new Term(ArteIndexAtts.id, str4), this.m_indexSearcher);
            if (uniqueDocWithTerm == null) {
                uniqueDocWithTerm = LuceneUtilz.getUniqueDocWithTerm(new Term(ArteIndexAtts.arteId, str4), this.m_indexSearcher);
            }
            if (uniqueDocWithTerm == null) {
                return null;
            }
            if (this.m_bVerbose) {
                System.out.print("\n### Query doc: ");
                printResult(doc2result(uniqueDocWithTerm));
                System.out.println("~~~~~~~~~~~~~~~~");
            }
            BooleanQuery booleanQuery2 = new BooleanQuery();
            addValues2Query(booleanQuery2, uniqueDocWithTerm, ArteIndexAtts.category, Collections.singleton(ArteIndexAtts.category));
            addValues2Query(booleanQuery2, uniqueDocWithTerm, ArteIndexAtts.meta_categories, Collections.singleton(ArteIndexAtts.meta_categories));
            addValues2Query(booleanQuery2, uniqueDocWithTerm, ArteIndexAtts.authors, Collections.singleton(ArteIndexAtts.authors));
            addValues2Query(booleanQuery2, uniqueDocWithTerm, ArteIndexAtts.music, Collections.singleton(ArteIndexAtts.music));
            addValues2Query(booleanQuery2, uniqueDocWithTerm, ArteIndexAtts.directions, Collections.singleton(ArteIndexAtts.directions));
            addValues2Query(booleanQuery2, uniqueDocWithTerm, ArteIndexAtts.cast, Collections.singleton(ArteIndexAtts.cast));
            addValues2Query(booleanQuery2, uniqueDocWithTerm, ArteIndexAtts.artist_name, Collections.singleton(ArteIndexAtts.artist_name));
            addValues2Query(booleanQuery2, uniqueDocWithTerm, ArteIndexAtts.concertGenres, Collections.singleton(ArteIndexAtts.concertGenres));
            addValues2Query(booleanQuery2, uniqueDocWithTerm, ArteIndexAtts.concertKeywords, CollectionUtilz.createArrayList(new String[]{ArteIndexAtts.concertKeywords, ArteIndexAtts.description, ArteIndexAtts.description_long}));
            if (this.m_bVerbose) {
                System.out.println("arte data query: " + booleanQuery2);
            }
            if (f != 0.0f) {
                booleanQuery.add(booleanQuery2, BooleanClause.Occur.SHOULD);
            }
            BooleanQuery booleanQuery3 = new BooleanQuery();
            addValues2Query(booleanQuery3, uniqueDocWithTerm, ArteIndexAtts.theme, Collections.singleton(ArteIndexAtts.theme));
            if (this.m_bVerbose) {
                System.out.println("theme data query: " + booleanQuery3);
            }
            if (f2 != 0.0f) {
                booleanQuery.add(booleanQuery3, BooleanClause.Occur.SHOULD);
            }
            BooleanQuery booleanQuery4 = new BooleanQuery();
            String str5 = uniqueDocWithTerm.get(ArteIndexAtts.description_long);
            if (StringUtils.nullOrWhitespace(str5)) {
                str5 = uniqueDocWithTerm.get(ArteIndexAtts.description);
            }
            if (!StringUtils.nullOrWhitespace(str5)) {
                Set<String> extractNamedEntities = extractNamedEntities("arte_de", str5);
                addValues2Query(booleanQuery4, (String[]) extractNamedEntities.toArray(new String[0]), CollectionUtilz.createHashSet(new String[]{ArteIndexAtts.description, ArteIndexAtts.description_long}));
                if (this.m_bVerbose) {
                    System.out.println("extracted entities: " + extractNamedEntities);
                }
            }
            if (f3 != 0.0f) {
                booleanQuery.add(booleanQuery4, BooleanClause.Occur.SHOULD);
            }
            BooleanQuery booleanQuery5 = new BooleanQuery();
            String str6 = uniqueDocWithTerm.get(ArteIndexAtts.description);
            if (!StringUtils.nullOrWhitespace(str6)) {
                Set<String> extractInterestingTerms = extractInterestingTerms(ArteIndexAtts.description, str6);
                for (String str7 : extractInterestingTerms) {
                    if (!StringUtils.nullOrWhitespace(str7)) {
                        booleanQuery5.add(new TermQuery(new Term(ArteIndexAtts.description, str7)), BooleanClause.Occur.SHOULD);
                        booleanQuery5.add(new TermQuery(new Term(ArteIndexAtts.description_long, str7)), BooleanClause.Occur.SHOULD);
                    }
                }
                if (this.m_bVerbose) {
                    System.out.println("extracted buzzwords (short): " + extractInterestingTerms);
                }
            }
            if (f4 != 0.0f) {
                booleanQuery.add(booleanQuery5, BooleanClause.Occur.SHOULD);
            }
            BooleanQuery booleanQuery6 = new BooleanQuery();
            String str8 = uniqueDocWithTerm.get(ArteIndexAtts.description_long);
            if (!StringUtils.nullOrWhitespace(str8)) {
                Set<String> extractInterestingTerms2 = extractInterestingTerms(ArteIndexAtts.description_long, str8);
                for (String str9 : extractInterestingTerms2) {
                    if (!StringUtils.nullOrWhitespace(str9)) {
                        booleanQuery6.add(new TermQuery(new Term(ArteIndexAtts.description_long, str9)), BooleanClause.Occur.SHOULD);
                        booleanQuery6.add(new TermQuery(new Term(ArteIndexAtts.description, str9)), BooleanClause.Occur.SHOULD);
                    }
                }
                if (this.m_bVerbose) {
                    System.out.println("extracted buzzwords (long): " + extractInterestingTerms2);
                }
            }
            if (f5 != 0.0f) {
                booleanQuery.add(booleanQuery6, BooleanClause.Occur.SHOULD);
            }
            if (!StringUtils.nullOrWhitespace(str)) {
                booleanQuery.add(new BooleanClause(this.m_queryParser.parse(str), BooleanClause.Occur.MUST));
            }
            if (!StringUtils.nullOrWhitespace(str2)) {
                booleanQuery.add(new BooleanClause(this.m_queryParser.parse(str2), BooleanClause.Occur.MUST_NOT));
            }
            if (!StringUtils.nullOrWhitespace(str3)) {
                booleanQuery.add(new BooleanClause(this.m_queryParser.parse(str3), BooleanClause.Occur.SHOULD));
            }
            booleanQuery2.setBoost(f);
            booleanQuery3.setBoost(f2);
            booleanQuery4.setBoost(f3);
            booleanQuery5.setBoost(f4);
            booleanQuery6.setBoost(f5);
        }
        if (this.m_bVerbose) {
            System.out.println("final query: " + booleanQuery);
        }
        for (ScoreDoc scoreDoc : this.m_indexSearcher.search(booleanQuery, i).scoreDocs) {
            ArteResultFull doc2result = doc2result(this.m_indexSearcher.doc(scoreDoc.doc));
            doc2result.score = scoreDoc.score;
            if (this.m_bVerbose) {
                Explanation explain = this.m_indexSearcher.explain(booleanQuery, scoreDoc.doc);
                StringBuilder sb = new StringBuilder();
                for (String str10 : explain.toString().split("\\n")) {
                    if (str10.contains("MATCH")) {
                        sb.append('\n').append(str10);
                    }
                }
                doc2result.explanationString = sb.toString();
            }
            linkedList.add(doc2result);
        }
        long currentTimeMillis2 = System.currentTimeMillis();
        if (this.m_bVerbose) {
            System.out.println("\n~~~~ recos ~~~~");
            int i2 = 0;
            Iterator it = linkedList.iterator();
            while (it.hasNext()) {
                ArteResultFull arteResultFull = (ArteResultFull) it.next();
                int i3 = i2;
                i2++;
                System.out.print(i3 + ": ");
                printResult(arteResultFull);
                System.out.println("#########################");
            }
            System.out.println("****************************************************************************************");
            int i4 = 0;
            Iterator it2 = linkedList.iterator();
            while (it2.hasNext()) {
                ArteResultFull arteResultFull2 = (ArteResultFull) it2.next();
                int i5 = i4;
                i4++;
                System.out.print(i5 + ": " + arteResultFull2.id + ":     ");
                System.out.print(arteResultFull2.title + "  /  ");
                System.out.println(arteResultFull2.score);
                System.out.println(arteResultFull2.shortDesc.replace('\n', ' '));
                System.out.println(arteResultFull2.explanationString);
                System.out.println("#########################");
            }
        } else {
            LinkedList linkedList2 = new LinkedList();
            Iterator it3 = linkedList.iterator();
            while (it3.hasNext()) {
                linkedList2.add(((ArteResultFull) it3.next()).id);
            }
            Logger.getLogger(ArteRecommender.class.getName()).info("performed query for " + set + "(" + StopWatch.formatTimeDistance(currentTimeMillis2 - currentTimeMillis) + ") result Ids: " + linkedList2 + ".");
        }
        return linkedList;
    }

    @Documentation(hide = true)
    public void printDocMetadata(String str, String str2) throws Exception {
        List<Document> list = null;
        if (!StringUtils.nullOrWhitespace(str)) {
            list = Collections.singletonList(LuceneUtilz.getUniqueDocWithTerm(new Term(ArteIndexAtts.id, str), this.m_indexSearcher));
        }
        if ((list == null || list.isEmpty()) && !StringUtils.nullOrWhitespace(str2)) {
            list = LuceneUtilz.getDocsWithTerm(new Term(ArteIndexAtts.arteId, str2), 3, this.m_indexSearcher, null);
        }
        if ((list == null || list.isEmpty()) && !StringUtils.nullOrWhitespace(str2)) {
            list = LuceneUtilz.getDocsWithTerm(new Term(ArteIndexAtts.id, str2), 3, this.m_indexSearcher, null);
        }
        if (list == null || list.isEmpty()) {
            System.err.println("didn't found doc for id " + str + " / arteId " + str2);
            return;
        }
        Iterator<Document> it = list.iterator();
        while (it.hasNext()) {
            printResult(doc2result(it.next()));
        }
    }

    protected void test() {
        try {
            System.out.println("TOTAL: " + this.m_indexSearcher.search(LuceneUtilz.createQuery(ArteIndexAtts.description, "Drama", GlobalConstants.indexAnalyzer), 1).totalHits);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private void printResult(ArteResultFull arteResultFull) throws Exception {
        if (arteResultFull.id != null && !"[]".equals(arteResultFull.id)) {
            System.out.print("§§ id:" + arteResultFull.id);
        }
        if (arteResultFull.arteId != null && !"[]".equals(arteResultFull.arteId)) {
            System.out.print("  arteId:" + arteResultFull.arteId);
        }
        System.out.println("  score :" + arteResultFull.score);
        if (arteResultFull.type != null && !"[]".equals(arteResultFull.type)) {
            System.out.println("type            :" + arteResultFull.type);
        }
        if (arteResultFull.title != null && !"[]".equals(arteResultFull.title)) {
            System.out.println("title           :" + arteResultFull.title);
        }
        if (arteResultFull.category != null && !"[]".equals(arteResultFull.category)) {
            System.out.println("category        :" + arteResultFull.category);
        }
        if (arteResultFull.shortDesc != null && !"[]".equals(arteResultFull.shortDesc)) {
            System.out.println("shortDescEntities:" + extractNamedEntities("arte_de", arteResultFull.shortDesc));
            System.out.println("shortDescBuzzwords:" + extractInterestingTerms(ArteIndexAtts.description, arteResultFull.shortDesc));
            System.out.println("shortDesc       :" + arteResultFull.shortDesc.replace('\n', ' '));
        }
        if (arteResultFull.longDesc != null && !"[]".equals(arteResultFull.longDesc.replace('\n', ' '))) {
            System.out.println("longDesc        :" + arteResultFull.longDesc);
        }
        if (arteResultFull.author != null && !"[]".equals(arteResultFull.author)) {
            System.out.println("author          :" + arteResultFull.author);
        }
        if (arteResultFull.director != null && !"[]".equals(arteResultFull.director)) {
            System.out.println("director        :" + arteResultFull.director);
        }
        if (arteResultFull.theme != null && !"[]".equals(arteResultFull.theme)) {
            System.out.println("theme           :" + arteResultFull.theme);
        }
        if (arteResultFull.collectionName != null && !"[]".equals(arteResultFull.collectionName)) {
            System.out.println("collectionName  :" + arteResultFull.collectionName);
        }
        if (arteResultFull.episodeTitle != null && !"[]".equals(arteResultFull.episodeTitle)) {
            System.out.println("episodeTitle    :" + arteResultFull.episodeTitle);
        }
        if (arteResultFull.episodeNumber != null && !"[]".equals(arteResultFull.episodeNumber)) {
            System.out.println("episodeNumber   :" + arteResultFull.episodeNumber);
        }
        if (arteResultFull.screenplay != null && !"[]".equals(arteResultFull.screenplay)) {
            System.out.println("screenplay      :" + arteResultFull.screenplay);
        }
        if (arteResultFull.music != null && !"[]".equals(arteResultFull.music)) {
            System.out.println("music           :" + arteResultFull.music);
        }
        if (arteResultFull.cast != null && !"[]".equals(arteResultFull.cast)) {
            System.out.println("cast            :" + arteResultFull.cast);
        }
        if (arteResultFull.concertKeywords != null && !"[]".equals(arteResultFull.concertKeywords)) {
            System.out.println("concertKeywords :" + arteResultFull.concertKeywords);
        }
        if (arteResultFull.concertGenres == null || "[]".equals(arteResultFull.concertGenres)) {
            return;
        }
        System.out.println("concertGenres   :" + arteResultFull.concertGenres);
    }

    protected ArteResultFull doc2result(Document document) {
        ArteResultFull arteResultFull = new ArteResultFull();
        arteResultFull.id = document.get(ArteIndexAtts.id);
        arteResultFull.arteId = document.get(ArteIndexAtts.arteId);
        arteResultFull.type = Arrays.asList(document.getValues(ArteIndexAtts.tag)).toString();
        arteResultFull.title = document.get(ArteIndexAtts.title);
        arteResultFull.category = Arrays.asList(document.getValues(ArteIndexAtts.category)).toString();
        arteResultFull.shortDesc = document.get(ArteIndexAtts.description);
        arteResultFull.longDesc = document.get(ArteIndexAtts.description_long);
        arteResultFull.author = Arrays.asList(document.getValues(ArteIndexAtts.authors)).toString();
        arteResultFull.director = Arrays.asList(document.getValues(ArteIndexAtts.directions)).toString();
        arteResultFull.theme = Arrays.asList(document.getValues(ArteIndexAtts.theme)).toString();
        arteResultFull.collectionName = document.get(ArteIndexAtts.collection_name);
        arteResultFull.episodeTitle = document.get(ArteIndexAtts.episode_name);
        arteResultFull.episodeNumber = document.get(ArteIndexAtts.episode_number);
        arteResultFull.screenplay = Arrays.asList(document.getValues(ArteIndexAtts.screenplay)).toString();
        arteResultFull.music = Arrays.asList(document.getValues(ArteIndexAtts.music)).toString();
        arteResultFull.cast = Arrays.asList(document.getValues(ArteIndexAtts.cast)).toString();
        arteResultFull.concertKeywords = Arrays.asList(document.getValues(ArteIndexAtts.concertKeywords)).toString();
        arteResultFull.concertGenres = Arrays.asList(document.getValues(ArteIndexAtts.concertGenres)).toString();
        arteResultFull.title_de = document.get(ArteIndexAtts.title_de);
        if (StringUtils.nullOrWhitespace(arteResultFull.title_de)) {
            arteResultFull.title_de = arteResultFull.title;
        }
        arteResultFull.title_fr = document.get(ArteIndexAtts.title_fr);
        arteResultFull.url_de = document.get(ArteIndexAtts.url_de);
        arteResultFull.url_fr = document.get(ArteIndexAtts.url_fr);
        return arteResultFull;
    }
}
