package com.medallia.word2vec;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.primitives.Doubles;
import com.medallia.word2vec.thrift.Word2VecModelThrift;
import com.medallia.word2vec.util.AC;
import com.medallia.word2vec.util.Common;
import com.medallia.word2vec.util.ProfilingTimer;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.DoubleBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;

/* loaded from: input_file:com/medallia/word2vec/Word2VecModel.class */
public class Word2VecModel {
    final List<String> vocab;
    final int layerSize;
    final DoubleBuffer vectors;
    private static final long ONE_GB = 1073741824;

    /* JADX INFO: Access modifiers changed from: package-private */
    public Word2VecModel(Iterable<String> iterable, int i, DoubleBuffer doubleBuffer) {
        this.vocab = ImmutableList.copyOf(iterable);
        this.layerSize = i;
        this.vectors = doubleBuffer;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public Word2VecModel(Iterable<String> iterable, int i, double[] dArr) {
        this(iterable, i, DoubleBuffer.wrap(dArr));
    }

    public Iterable<String> getVocab() {
        return this.vocab;
    }

    public Searcher forSearch() {
        return new SearcherImpl(this);
    }

    public Word2VecModelThrift toThrift() {
        double[] dArr;
        if (this.vectors.hasArray()) {
            dArr = this.vectors.array();
        } else {
            dArr = new double[this.vectors.limit()];
            this.vectors.position(0);
            this.vectors.get(dArr);
        }
        return new Word2VecModelThrift().setVocab(this.vocab).setLayerSize(this.layerSize).setVectors(Doubles.asList(dArr));
    }

    public static Word2VecModel fromThrift(Word2VecModelThrift word2VecModelThrift) {
        return new Word2VecModel(word2VecModelThrift.getVocab(), word2VecModelThrift.getLayerSize(), Doubles.toArray(word2VecModelThrift.getVectors()));
    }

    public static Word2VecModel fromTextFile(File file) throws IOException {
        return fromTextFile(file.getAbsolutePath(), Common.readToList(file));
    }

    public static Word2VecModel fromBinFile(File file) throws IOException {
        return fromBinFile(file, ByteOrder.LITTLE_ENDIAN, ProfilingTimer.NONE);
    }

    public static Word2VecModel fromBinFile(File file, ByteOrder byteOrder) throws IOException {
        return fromBinFile(file, byteOrder, ProfilingTimer.NONE);
    }

    public static Word2VecModel fromBinFile(File file, ByteOrder byteOrder, ProfilingTimer profilingTimer) throws IOException {
        FileInputStream fileInputStream = new FileInputStream(file);
        Throwable th = null;
        try {
            AC start = profilingTimer.start("Loading vectors from bin file", new Object[0]);
            Throwable th2 = null;
            try {
                FileChannel channel = fileInputStream.getChannel();
                profilingTimer.start("Reading gigabyte #1", new Object[0]);
                MappedByteBuffer map = channel.map(FileChannel.MapMode.READ_ONLY, 0L, Math.min(channel.size(), 2147483647L));
                map.order(byteOrder);
                int i = 1;
                StringBuilder sb = new StringBuilder();
                for (char c = (char) map.get(); c != '\n'; c = (char) map.get()) {
                    sb.append(c);
                }
                String sb2 = sb.toString();
                int indexOf = sb2.indexOf(32);
                Preconditions.checkState(indexOf != -1, "Expected a space in the first line of file '%s': '%s'", new Object[]{file.getAbsolutePath(), sb2});
                int parseInt = Integer.parseInt(sb2.substring(0, indexOf));
                int parseInt2 = Integer.parseInt(sb2.substring(indexOf + 1));
                profilingTimer.appendToLog(String.format("Loading %d vectors with dimensionality %d", Integer.valueOf(parseInt), Integer.valueOf(parseInt2)));
                ArrayList arrayList = new ArrayList(parseInt);
                DoubleBuffer asDoubleBuffer = ByteBuffer.allocateDirect(parseInt * parseInt2 * 8).asDoubleBuffer();
                long currentTimeMillis = System.currentTimeMillis();
                float[] fArr = new float[parseInt2];
                for (int i2 = 0; i2 < parseInt; i2++) {
                    sb.setLength(0);
                    for (char c2 = (char) map.get(); c2 != ' '; c2 = (char) map.get()) {
                        if (c2 != '\n') {
                            sb.append(c2);
                        }
                    }
                    arrayList.add(sb.toString());
                    map.asFloatBuffer().get(fArr);
                    for (int i3 = 0; i3 < fArr.length; i3++) {
                        asDoubleBuffer.put((i2 * parseInt2) + i3, fArr[i3]);
                    }
                    map.position(map.position() + (4 * parseInt2));
                    long currentTimeMillis2 = System.currentTimeMillis();
                    if (currentTimeMillis2 - currentTimeMillis > 1000) {
                        profilingTimer.appendToLog(String.format("Loaded %d/%d vectors (%f%%)", Integer.valueOf(i2 + 1), Integer.valueOf(parseInt), Double.valueOf(((i2 + 1) / parseInt) * 100.0d)));
                        currentTimeMillis = currentTimeMillis2;
                    }
                    if (map.position() > ONE_GB) {
                        int position = (int) (map.position() - ONE_GB);
                        long min = Math.min(channel.size() - (ONE_GB * i), 2147483647L);
                        profilingTimer.endAndStart("Reading gigabyte #%d. Start: %d, size: %d", Integer.valueOf(i), Long.valueOf(ONE_GB * i), Long.valueOf(min));
                        map = channel.map(FileChannel.MapMode.READ_ONLY, ONE_GB * i, min);
                        map.order(byteOrder);
                        map.position(position);
                        i++;
                    }
                }
                profilingTimer.end();
                Word2VecModel word2VecModel = new Word2VecModel(arrayList, parseInt2, asDoubleBuffer);
                if (start != null) {
                    if (0 != 0) {
                        try {
                            start.close();
                        } catch (Throwable th3) {
                            th2.addSuppressed(th3);
                        }
                    } else {
                        start.close();
                    }
                }
                return word2VecModel;
            } catch (Throwable th4) {
                if (start != null) {
                    if (0 != 0) {
                        try {
                            start.close();
                        } catch (Throwable th5) {
                            th2.addSuppressed(th5);
                        }
                    } else {
                        start.close();
                    }
                }
                throw th4;
            }
        } finally {
            if (fileInputStream != null) {
                if (0 != 0) {
                    try {
                        fileInputStream.close();
                    } catch (Throwable th6) {
                        th.addSuppressed(th6);
                    }
                } else {
                    fileInputStream.close();
                }
            }
        }
    }

    public void toBinFile(OutputStream outputStream) throws IOException {
        Charset forName = Charset.forName("UTF-8");
        outputStream.write(String.format("%d %d\n", Integer.valueOf(this.vocab.size()), Integer.valueOf(this.layerSize)).getBytes(forName));
        double[] dArr = new double[this.layerSize];
        ByteBuffer allocate = ByteBuffer.allocate(4 * this.layerSize);
        allocate.order(ByteOrder.LITTLE_ENDIAN);
        for (int i = 0; i < this.vocab.size(); i++) {
            outputStream.write(String.format("%s ", this.vocab.get(i)).getBytes(forName));
            this.vectors.position(i * this.layerSize);
            this.vectors.get(dArr);
            allocate.clear();
            for (int i2 = 0; i2 < this.layerSize; i2++) {
                allocate.putFloat((float) dArr[i2]);
            }
            outputStream.write(allocate.array());
            outputStream.write(10);
        }
        outputStream.flush();
    }

    @VisibleForTesting
    static Word2VecModel fromTextFile(String str, List<String> list) throws IOException {
        ArrayList newArrayList = Lists.newArrayList();
        ArrayList newArrayList2 = Lists.newArrayList();
        int parseInt = Integer.parseInt(list.get(0).split(" ")[0]);
        int parseInt2 = Integer.parseInt(list.get(0).split(" ")[1]);
        Preconditions.checkArgument(parseInt == list.size() - 1, "For file '%s', vocab size is %s, but there are %s word vectors in the file", new Object[]{str, Integer.valueOf(parseInt), Integer.valueOf(list.size() - 1)});
        for (int i = 1; i < list.size(); i++) {
            String[] split = list.get(i).split(" ");
            newArrayList.add(split[0]);
            Preconditions.checkArgument(parseInt2 == split.length - 1, "For file '%s', on line %s, layer size is %s, but found %s values in the word vector", new Object[]{str, Integer.valueOf(i), Integer.valueOf(parseInt2), Integer.valueOf(split.length - 1)});
            for (int i2 = 1; i2 < split.length; i2++) {
                newArrayList2.add(Double.valueOf(Double.parseDouble(split[i2])));
            }
        }
        return fromThrift(new Word2VecModelThrift().setLayerSize(parseInt2).setVocab(newArrayList).setVectors(newArrayList2));
    }

    public static Word2VecTrainerBuilder trainer() {
        return new Word2VecTrainerBuilder();
    }
}
