package de.dfki.leech.movielens;

import au.com.bytecode.opencsv.CSVReader;
import de.dfki.inquisitor.collections.MultiValueHashMap;
import de.dfki.inquisitor.text.DateUtils;
import de.dfki.km.leech.Leech;
import de.dfki.km.leech.parser.NonRecursiveCrawlerParser;
import de.dfki.km.leech.sax.CrawlReportContentHandler;
import de.dfki.km.leech.sax.PrintlnContentHandler;
import java.io.InputStream;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.zip.ZipInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.xml.sax.ContentHandler;

/* loaded from: input_file:de/dfki/leech/movielens/MovieLensParser.class */
public class MovieLensParser extends NonRecursiveCrawlerParser {
    private static final long serialVersionUID = -5502554684474178795L;
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.unmodifiableSet(new HashSet(Arrays.asList(MediaType.application("vnd.etr.movielens"), MediaType.application("vnd.etr.movielens_zip"), MediaType.application("vnd.etr.movielens_gzip"))));
    protected static boolean debug = true;

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:de/dfki/leech/movielens/MovieLensParser$Movie.class */
    public class Movie implements Serializable {
        private static final long serialVersionUID = 6557371772785769920L;
        public HashSet<String> genres;
        public HashSet<String> hs00StarUsers = new HashSet<>();
        public HashSet<String> hs05StarUsers = new HashSet<>();
        public HashSet<String> hs10StarUsers = new HashSet<>();
        public HashSet<String> hs15StarUsers = new HashSet<>();
        public HashSet<String> hs20StarUsers = new HashSet<>();
        public HashSet<String> hs25StarUsers = new HashSet<>();
        public HashSet<String> hs30StarUsers = new HashSet<>();
        public HashSet<String> hs35StarUsers = new HashSet<>();
        public HashSet<String> hs40StarUsers = new HashSet<>();
        public HashSet<String> hs45StarUsers = new HashSet<>();
        public HashSet<String> hs50StarUsers = new HashSet<>();
        public String id;
        public String imdbId;
        public String movieId;
        public String title;
        public String tmdbId;

        protected Movie() {
        }
    }

    /* loaded from: input_file:de/dfki/leech/movielens/MovieLensParser$MovieLensParsingInterator.class */
    protected class MovieLensParsingInterator implements Iterator<MultiValueHashMap<String, Object>> {
        protected ContentHandler m_contentHandler;
        protected CSVReader m_csvReader;
        protected InputStream m_inputStream;
        protected Iterator<Map.Entry<String, Movie>> m_itMovies;
        protected Iterator<Map.Entry<String, User>> m_itUser;
        protected Metadata m_metadata;
        protected MultiValueHashMap<String, Object> m_nextEntry;
        protected ParseContext m_parseContext;
        protected ZipInputStream m_zipInputStream;
        protected boolean m_bInRatings = false;
        protected boolean m_bInTags = false;
        protected boolean m_bNewZipEntry = true;
        protected Map<String, Movie> m_hsMovies = new HashMap();
        protected Map<String, User> m_hsUser = new HashMap();
        protected List<String> m_lNonStreamingFileNames = Arrays.asList("links.csv", "movies.csv");
        protected List<String> m_lStreamingFileNames = Arrays.asList("ratings.csv", "tags.csv");

        public MovieLensParsingInterator(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) {
            this.m_inputStream = inputStream;
            this.m_contentHandler = contentHandler;
            this.m_metadata = metadata;
            this.m_parseContext = parseContext;
            this.m_zipInputStream = new ZipInputStream(inputStream);
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            try {
                if (this.m_bInRatings || this.m_bInTags || processNonStreamingPartsIfNext() != null) {
                    if (!this.m_bInRatings) {
                        if (!this.m_bInTags) {
                            return false;
                        }
                        String[] readNext = this.m_csvReader.readNext();
                        if (readNext == null) {
                            this.m_bInTags = false;
                            return true;
                        }
                        String str = "u" + readNext[0];
                        String str2 = "m" + readNext[1];
                        String str3 = readNext[2];
                        Long valueOf = Long.valueOf(readNext[3]);
                        this.m_nextEntry = new MultiValueHashMap<>();
                        this.m_nextEntry.add("dataEntityId", str + "_" + str3 + "_" + str2 + "_" + valueOf);
                        this.m_nextEntry.add("userId_sv", str);
                        this.m_nextEntry.add("movieId_sv", str2);
                        this.m_nextEntry.add("tag_ss", str3);
                        this.m_nextEntry.add("timestamp_dt", DateUtils.date2SolrString(new Date(valueOf.longValue() * 1000)));
                        this.m_nextEntry.add("Content-Type", "tag");
                        return true;
                    }
                    String[] readNext2 = this.m_csvReader.readNext();
                    if (readNext2 == null) {
                        this.m_bInRatings = false;
                        return true;
                    }
                    String str4 = "u" + readNext2[0];
                    String str5 = "m" + readNext2[1];
                    Float valueOf2 = Float.valueOf(readNext2[2]);
                    Long valueOf3 = Long.valueOf(readNext2[3]);
                    User user = this.m_hsUser.get(str4);
                    if (user == null) {
                        user = new User();
                        user.userId = str4;
                        this.m_hsUser.put(str4, user);
                    }
                    Movie movie = this.m_hsMovies.get(str5);
                    if (movie == null) {
                        movie = new Movie();
                        movie.movieId = str5;
                        this.m_hsMovies.put(str5, movie);
                    }
                    if (valueOf2.floatValue() == 0.0f) {
                        user.hs00StarMovies.add(str5);
                        movie.hs00StarUsers.add(str4);
                    } else if (valueOf2.floatValue() == 0.5f) {
                        user.hs05StarMovies.add(str5);
                        movie.hs05StarUsers.add(str4);
                    } else if (valueOf2.floatValue() == 1.0f) {
                        user.hs10StarMovies.add(str5);
                        movie.hs10StarUsers.add(str4);
                    } else if (valueOf2.floatValue() == 1.5d) {
                        user.hs15StarMovies.add(str5);
                        movie.hs15StarUsers.add(str4);
                    } else if (valueOf2.floatValue() == 2.0f) {
                        user.hs20StarMovies.add(str5);
                        movie.hs20StarUsers.add(str4);
                    } else if (valueOf2.floatValue() == 2.5d) {
                        user.hs25StarMovies.add(str5);
                        movie.hs25StarUsers.add(str4);
                    } else if (valueOf2.floatValue() == 3.0f) {
                        user.hs30StarMovies.add(str5);
                        movie.hs30StarUsers.add(str4);
                    } else if (valueOf2.floatValue() == 3.5d) {
                        user.hs35StarMovies.add(str5);
                        movie.hs35StarUsers.add(str4);
                    } else if (valueOf2.floatValue() == 4.0f) {
                        user.hs40StarMovies.add(str5);
                        movie.hs40StarUsers.add(str4);
                    } else if (valueOf2.floatValue() == 4.5d) {
                        user.hs45StarMovies.add(str5);
                        movie.hs45StarUsers.add(str4);
                    } else if (valueOf2.floatValue() == 5.0f) {
                        user.hs50StarMovies.add(str5);
                        movie.hs50StarUsers.add(str4);
                    }
                    this.m_hsUser.put(str4, user);
                    this.m_hsMovies.put(str5, movie);
                    this.m_nextEntry = new MultiValueHashMap<>();
                    this.m_nextEntry.add("dataEntityId", str4 + "_" + valueOf2 + "_" + str5 + "_" + valueOf3);
                    this.m_nextEntry.add("userId_sv", str4);
                    this.m_nextEntry.add("movieId_sv", str5);
                    this.m_nextEntry.add("rating_tf", valueOf2);
                    this.m_nextEntry.add("timestamp_dt", DateUtils.date2SolrString(new Date(valueOf3.longValue() * 1000)));
                    this.m_nextEntry.add("Content-Type", "rating");
                    return true;
                }
                if (this.m_zipInputStream != null) {
                    this.m_zipInputStream.close();
                    this.m_zipInputStream = null;
                }
                if (this.m_itMovies == null) {
                    this.m_itMovies = this.m_hsMovies.entrySet().iterator();
                }
                if (!this.m_itMovies.hasNext()) {
                    if (this.m_itUser == null) {
                        this.m_itUser = this.m_hsUser.entrySet().iterator();
                    }
                    if (!this.m_itUser.hasNext()) {
                        return false;
                    }
                    User value = this.m_itUser.next().getValue();
                    this.m_nextEntry = new MultiValueHashMap<>();
                    this.m_nextEntry.add("dataEntityId", value.userId);
                    this.m_nextEntry.add("userId_sv", value.userId);
                    Iterator<String> it = value.hs00StarMovies.iterator();
                    while (it.hasNext()) {
                        this.m_nextEntry.add("00StarMovie_ss", it.next());
                    }
                    Iterator<String> it2 = value.hs05StarMovies.iterator();
                    while (it2.hasNext()) {
                        this.m_nextEntry.add("05StarMovie_ss", it2.next());
                    }
                    Iterator<String> it3 = value.hs10StarMovies.iterator();
                    while (it3.hasNext()) {
                        this.m_nextEntry.add("10StarMovie_ss", it3.next());
                    }
                    Iterator<String> it4 = value.hs15StarMovies.iterator();
                    while (it4.hasNext()) {
                        this.m_nextEntry.add("15StarMovie_ss", it4.next());
                    }
                    Iterator<String> it5 = value.hs20StarMovies.iterator();
                    while (it5.hasNext()) {
                        this.m_nextEntry.add("20StarMovie_ss", it5.next());
                    }
                    Iterator<String> it6 = value.hs25StarMovies.iterator();
                    while (it6.hasNext()) {
                        this.m_nextEntry.add("25StarMovie_ss", it6.next());
                    }
                    Iterator<String> it7 = value.hs30StarMovies.iterator();
                    while (it7.hasNext()) {
                        this.m_nextEntry.add("30StarMovie_ss", it7.next());
                    }
                    Iterator<String> it8 = value.hs35StarMovies.iterator();
                    while (it8.hasNext()) {
                        this.m_nextEntry.add("35StarMovie_ss", it8.next());
                    }
                    Iterator<String> it9 = value.hs40StarMovies.iterator();
                    while (it9.hasNext()) {
                        this.m_nextEntry.add("40StarMovie_ss", it9.next());
                    }
                    Iterator<String> it10 = value.hs45StarMovies.iterator();
                    while (it10.hasNext()) {
                        this.m_nextEntry.add("45StarMovie_ss", it10.next());
                    }
                    Iterator<String> it11 = value.hs50StarMovies.iterator();
                    while (it11.hasNext()) {
                        this.m_nextEntry.add("50StarMovie_ss", it11.next());
                    }
                    this.m_nextEntry.add("Content-Type", "user");
                    return true;
                }
                Movie value2 = this.m_itMovies.next().getValue();
                this.m_nextEntry = new MultiValueHashMap<>();
                this.m_nextEntry.add("dataEntityId", value2.movieId);
                this.m_nextEntry.add("imdbId_s", value2.imdbId);
                this.m_nextEntry.add("movieId_sv", value2.movieId);
                this.m_nextEntry.add("tmdbId_s", value2.tmdbId);
                this.m_nextEntry.add("title", value2.title);
                Iterator<String> it12 = value2.genres.iterator();
                while (it12.hasNext()) {
                    this.m_nextEntry.add("genre_ss", it12.next());
                }
                Iterator<String> it13 = value2.hs00StarUsers.iterator();
                while (it13.hasNext()) {
                    this.m_nextEntry.add("00StarUser_ss", it13.next());
                }
                Iterator<String> it14 = value2.hs05StarUsers.iterator();
                while (it14.hasNext()) {
                    this.m_nextEntry.add("05StarUser_ss", it14.next());
                }
                Iterator<String> it15 = value2.hs10StarUsers.iterator();
                while (it15.hasNext()) {
                    this.m_nextEntry.add("10StarUser_ss", it15.next());
                }
                Iterator<String> it16 = value2.hs15StarUsers.iterator();
                while (it16.hasNext()) {
                    this.m_nextEntry.add("15StarUser_ss", it16.next());
                }
                Iterator<String> it17 = value2.hs20StarUsers.iterator();
                while (it17.hasNext()) {
                    this.m_nextEntry.add("20StarUser_ss", it17.next());
                }
                Iterator<String> it18 = value2.hs25StarUsers.iterator();
                while (it18.hasNext()) {
                    this.m_nextEntry.add("25StarUser_ss", it18.next());
                }
                Iterator<String> it19 = value2.hs30StarUsers.iterator();
                while (it19.hasNext()) {
                    this.m_nextEntry.add("30StarUser_ss", it19.next());
                }
                Iterator<String> it20 = value2.hs35StarUsers.iterator();
                while (it20.hasNext()) {
                    this.m_nextEntry.add("35StarUser_ss", it20.next());
                }
                Iterator<String> it21 = value2.hs40StarUsers.iterator();
                while (it21.hasNext()) {
                    this.m_nextEntry.add("40StarUser_ss", it21.next());
                }
                Iterator<String> it22 = value2.hs45StarUsers.iterator();
                while (it22.hasNext()) {
                    this.m_nextEntry.add("45StarUser_ss", it22.next());
                }
                Iterator<String> it23 = value2.hs50StarUsers.iterator();
                while (it23.hasNext()) {
                    this.m_nextEntry.add("50StarUser_ss", it23.next());
                }
                this.m_nextEntry.add("Content-Type", "movie");
                return true;
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public MultiValueHashMap<String, Object> next() {
            MultiValueHashMap<String, Object> multiValueHashMap = this.m_nextEntry;
            this.m_nextEntry = null;
            return multiValueHashMap;
        }

        @Override // java.util.Iterator
        public void remove() {
            throw new UnsupportedOperationException();
        }

        /* JADX WARN: Code restructure failed: missing block: B:65:0x0247, code lost:
        
            if (r9 != null) goto L56;
         */
        /* JADX WARN: Code restructure failed: missing block: B:67:0x0254, code lost:
        
            if (r8.m_lStreamingFileNames.contains(r10) != false) goto L92;
         */
        /* JADX WARN: Code restructure failed: missing block: B:68:0x0257, code lost:
        
            r9 = r8.m_zipInputStream.getNextEntry();
         */
        /* JADX WARN: Code restructure failed: missing block: B:69:0x0260, code lost:
        
            if (r9 != null) goto L61;
         */
        /* JADX WARN: Code restructure failed: missing block: B:70:0x0263, code lost:
        
            r0 = null;
         */
        /* JADX WARN: Code restructure failed: missing block: B:71:0x0274, code lost:
        
            r10 = r0;
         */
        /* JADX WARN: Code restructure failed: missing block: B:72:0x0276, code lost:
        
            if (r9 != null) goto L94;
         */
        /* JADX WARN: Code restructure failed: missing block: B:75:0x0282, code lost:
        
            if (r10.equals("ratings.csv") == false) goto L68;
         */
        /* JADX WARN: Code restructure failed: missing block: B:76:0x0285, code lost:
        
            org.slf4j.LoggerFactory.getLogger(de.dfki.leech.movielens.MovieLensParser.class.getName()).info("process ratings.csv");
            r8.m_bInRatings = true;
         */
        /* JADX WARN: Code restructure failed: missing block: B:77:0x02bd, code lost:
        
            r8.m_csvReader = new au.com.bytecode.opencsv.CSVReader(new java.io.InputStreamReader(r8.m_zipInputStream, java.nio.charset.Charset.forName("UTF-8")));
            r8.m_csvReader.readNext();
         */
        /* JADX WARN: Code restructure failed: missing block: B:79:0x02a4, code lost:
        
            if (r10.equals("tags.csv") == false) goto L71;
         */
        /* JADX WARN: Code restructure failed: missing block: B:80:0x02a7, code lost:
        
            org.slf4j.LoggerFactory.getLogger(de.dfki.leech.movielens.MovieLensParser.class.getName()).info("process tags.csv");
            r8.m_bInTags = true;
         */
        /* JADX WARN: Code restructure failed: missing block: B:82:0x0267, code lost:
        
            r0 = r9.getName().replaceFirst(".+/", "");
         */
        /* JADX WARN: Code restructure failed: missing block: B:85:0x02e2, code lost:
        
            return r9;
         */
        /*
            Code decompiled incorrectly, please refer to instructions dump.
            To view partially-correct add '--show-bad-code' argument
        */
        protected java.util.zip.ZipEntry processNonStreamingPartsIfNext() throws java.io.IOException {
            /*
                Method dump skipped, instructions count: 739
                To view this dump add '--comments-level debug' option
            */
            throw new UnsupportedOperationException("Method not decompiled: de.dfki.leech.movielens.MovieLensParser.MovieLensParsingInterator.processNonStreamingPartsIfNext():java.util.zip.ZipEntry");
        }
    }

    /* loaded from: input_file:de/dfki/leech/movielens/MovieLensParser$Rating.class */
    protected class Rating {
        public String id;
        public String movieId;
        public float rating;
        public String timestamp;
        public String userId;

        protected Rating() {
        }
    }

    /* loaded from: input_file:de/dfki/leech/movielens/MovieLensParser$Tag.class */
    protected class Tag {
        public String id;
        public String movieId;
        public String tag;
        public String timestamp;
        public String userId;

        protected Tag() {
        }
    }

    /* loaded from: input_file:de/dfki/leech/movielens/MovieLensParser$User.class */
    protected class User implements Serializable {
        private static final long serialVersionUID = -941033865400866018L;
        public HashSet<String> hs00StarMovies = new HashSet<>();
        public HashSet<String> hs05StarMovies = new HashSet<>();
        public HashSet<String> hs10StarMovies = new HashSet<>();
        public HashSet<String> hs15StarMovies = new HashSet<>();
        public HashSet<String> hs20StarMovies = new HashSet<>();
        public HashSet<String> hs25StarMovies = new HashSet<>();
        public HashSet<String> hs30StarMovies = new HashSet<>();
        public HashSet<String> hs35StarMovies = new HashSet<>();
        public HashSet<String> hs40StarMovies = new HashSet<>();
        public HashSet<String> hs45StarMovies = new HashSet<>();
        public HashSet<String> hs50StarMovies = new HashSet<>();
        public String id;
        public String userId;

        protected User() {
        }
    }

    public static void main(String[] strArr) throws Exception {
        new Leech().parse("/home/reuschling/downloads/movielens/ml-latest-small.zip", new CrawlReportContentHandler(new PrintlnContentHandler(PrintlnContentHandler.Verbosity.all) { // from class: de.dfki.leech.movielens.MovieLensParser.1
            boolean m_bFirst = false;

            public void processNewData(Metadata metadata, String str) {
                if (this.m_bFirst) {
                    return;
                }
                super.processNewData(metadata, str);
                this.m_bFirst = true;
            }
        }).setCyclicReportPrintln(60000L), new ParseContext());
    }

    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    protected Iterator<MultiValueHashMap<String, Object>> getSubDataEntitiesInformation(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws Exception {
        return new MovieLensParsingInterator(inputStream, contentHandler, metadata, parseContext);
    }
}
