package dfki.km.tweekreco.arte;

import au.com.bytecode.opencsv.CSVReader;
import de.dfki.inquisition.text.StringUtils;
import dfki.km.tweekreco.GlobalConstants;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;

/* loaded from: input_file:dfki/km/tweekreco/arte/ArteIndexCreator.class */
public class ArteIndexCreator {
    protected IndexWriter m_indexWriter;
    protected boolean m_bInitialized;

    protected void addValue2Doc(Document document, String str, String str2, boolean z) {
        if (StringUtils.nullOrWhitespace(str2) || "[]".equals(str2) || "{}".equals(str2)) {
            return;
        }
        String[] split = str2.split("\\[\"|\",\\s*\"|\"\\]|\\{\"|\"\\}");
        int length = split.length;
        for (int i = 0; i < length; i++) {
            String str3 = split[i];
            if (!StringUtils.nullOrWhitespace(str3)) {
                if (str3.contains("\"=>\"")) {
                    str3 = str3.split("\"=>\"")[0];
                }
                if (str3.startsWith("#")) {
                    str3 = str3.substring(1);
                }
                if (z) {
                    document.add(new TextField(str, str3, Field.Store.YES));
                } else {
                    document.add(new StringField(str, str3, Field.Store.YES));
                }
            }
        }
    }

    public void createArteIndex(String str, String str2) throws IOException {
        String str3;
        Logger.getLogger(ArteIndexCreator.class.getName()).info("start loading arte data");
        startWriting(str2, true);
        CSVReader cSVReader = new CSVReader(new InputStreamReader(new FileInputStream(str), "UTF-8"), ';');
        cSVReader.readNext();
        int i = 0;
        while (true) {
            String[] readNext = cSVReader.readNext();
            if (readNext == null) {
                Logger.getLogger(ArteIndexCreator.class.getName()).info("loaded " + StringUtils.beautifyNumber(Integer.valueOf(i)) + " arte data entries");
                cSVReader.close();
                stopWriting();
                Logger.getLogger(ArteIndexCreator.class.getName()).info("...finished");
                return;
            }
            Document document = new Document();
            try {
                str3 = readNext[0];
            } catch (Exception e) {
                Logger.getLogger(ArteIndexCreator.class.getName()).log(Level.WARNING, ("Error while adding a document to the index. " + document + " Current csv line: " + i + " parsed csv line: " + Arrays.asList(readNext)).replace('\n', ' '), (Throwable) e);
            }
            if (!StringUtils.nullOrWhitespace(str3)) {
                String str4 = readNext[1];
                String str5 = readNext[2];
                String str6 = readNext[3];
                String str7 = readNext[4];
                String str8 = readNext[5];
                String str9 = readNext[6];
                String str10 = readNext[7];
                String str11 = readNext[8];
                String str12 = readNext[9];
                String str13 = readNext[10];
                String str14 = readNext[11];
                String str15 = readNext[12];
                String str16 = readNext[13];
                String str17 = readNext[14];
                String str18 = readNext[15];
                String str19 = readNext[16];
                String str20 = readNext[17];
                String str21 = readNext[18];
                String str22 = readNext[19];
                String str23 = readNext[20];
                String str24 = readNext[21];
                String str25 = readNext[22];
                String str26 = readNext[23];
                String str27 = readNext[24];
                String str28 = readNext[25];
                String str29 = readNext[26];
                String str30 = readNext[27];
                addValue2Doc(document, ArteIndexAtts.id, str3, false);
                addValue2Doc(document, ArteIndexAtts.arteId, str4, false);
                addValue2Doc(document, ArteIndexAtts.tag, str5, false);
                addValue2Doc(document, ArteIndexAtts.title, str6, true);
                addValue2Doc(document, ArteIndexAtts.category, str7, false);
                addValue2Doc(document, ArteIndexAtts.meta_categories, str8, false);
                addValue2Doc(document, ArteIndexAtts.description, str9, true);
                addValue2Doc(document, ArteIndexAtts.description_long, str10, true);
                addValue2Doc(document, ArteIndexAtts.published_at, str11, false);
                addValue2Doc(document, ArteIndexAtts.expired_at, str12, false);
                addValue2Doc(document, ArteIndexAtts.allowed_countries, str13, false);
                addValue2Doc(document, ArteIndexAtts.allowed_platforms, str14, false);
                addValue2Doc(document, ArteIndexAtts.thumbnail_loc, str15, false);
                addValue2Doc(document, ArteIndexAtts.player_loc, str16, false);
                addValue2Doc(document, ArteIndexAtts.view_count, str17, false);
                addValue2Doc(document, ArteIndexAtts.authors, str18, false);
                addValue2Doc(document, ArteIndexAtts.directions, str19, false);
                addValue2Doc(document, ArteIndexAtts.video_rank, str20, false);
                addValue2Doc(document, ArteIndexAtts.theme, str21, false);
                addValue2Doc(document, ArteIndexAtts.collection_name, str22, false);
                addValue2Doc(document, ArteIndexAtts.episode_name, str23, true);
                addValue2Doc(document, ArteIndexAtts.episode_number, str24, false);
                addValue2Doc(document, ArteIndexAtts.screenplay, str25, false);
                addValue2Doc(document, ArteIndexAtts.music, str26, false);
                addValue2Doc(document, ArteIndexAtts.cast, str27, false);
                addValue2Doc(document, ArteIndexAtts.concertKeywords, str28, false);
                addValue2Doc(document, ArteIndexAtts.concertGenres, str29, false);
                addValue2Doc(document, ArteIndexAtts.artist_name, str30, false);
                this.m_indexWriter.addDocument(document);
                i++;
                if (i % 300 == 0) {
                    Logger.getLogger(ArteIndexCreator.class.getName()).info("loaded " + StringUtils.beautifyNumber(Integer.valueOf(i)) + " arte data entries");
                }
            }
        }
    }

    public static void main(String[] strArr) throws IOException {
        new ArteIndexCreator().createArteIndex("arte_dump_de_14-04-01.csv", GlobalConstants.strAppBasePath + "index");
    }

    public void startWriting(String str, boolean z) throws IOException {
        if (this.m_indexWriter != null) {
            throw new IllegalStateException("An open IndexWriter exists yet.");
        }
        if (!z && DirectoryReader.indexExists(new SimpleFSDirectory(new File(str)))) {
            throw new IllegalStateException("Found existing index under " + str);
        }
        Logger.getLogger(ArteIndexCreator.class.getName()).info("initialized arte index creator");
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_CURRENT, GlobalConstants.indexAnalyzer);
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        this.m_indexWriter = new IndexWriter(new SimpleFSDirectory(new File(str)), indexWriterConfig);
    }

    public void stopWriting() throws IOException {
        if (this.m_indexWriter != null) {
            this.m_indexWriter.commit();
            this.m_indexWriter.close(true);
            this.m_indexWriter = null;
        }
    }
}
