/*
 * Decompiled with CFR 0.152.
 */
package ai.grazie.nlp.encoder;

import ai.grazie.utils.cache.Cache;
import ai.grazie.utils.cache.Caching;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import kotlin.Metadata;
import kotlin.Pair;
import kotlin.TuplesKt;
import kotlin.collections.CollectionsKt;
import kotlin.collections.MapsKt;
import kotlin.collections.SetsKt;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;

@Metadata(mv={1, 7, 0}, k=1, xi=48, d1={"\u00002\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0000\n\u0002\u0010 \n\u0002\u0018\u0002\n\u0002\u0010\u000e\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010$\n\u0002\u0010\b\n\u0000\n\u0002\u0010\"\n\u0002\b\u0005\u0018\u00002\u00020\u0001B\u001f\u0012\u0018\u0010\u0002\u001a\u0014\u0012\u0010\u0012\u000e\u0012\u0004\u0012\u00020\u0005\u0012\u0004\u0012\u00020\u00050\u00040\u0003\u00a2\u0006\u0002\u0010\u0006J(\u0010\f\u001a\u0014\u0012\u0010\u0012\u000e\u0012\u0004\u0012\u00020\u0005\u0012\u0004\u0012\u00020\u00050\u00040\r2\f\u0010\u000e\u001a\b\u0012\u0004\u0012\u00020\u00050\u0003H\u0002J\u0010\u0010\u000f\u001a\u00020\u00052\u0006\u0010\u0010\u001a\u00020\u0005H\u0002J\u000e\u0010\u0011\u001a\u00020\u00052\u0006\u0010\u0010\u001a\u00020\u0005R\u001a\u0010\u0007\u001a\u000e\u0012\u0004\u0012\u00020\u0005\u0012\u0004\u0012\u00020\u00050\bX\u0082\u0004\u00a2\u0006\u0002\n\u0000R&\u0010\t\u001a\u001a\u0012\u0010\u0012\u000e\u0012\u0004\u0012\u00020\u0005\u0012\u0004\u0012\u00020\u00050\u0004\u0012\u0004\u0012\u00020\u000b0\nX\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u0012"}, d2={"Lai/grazie/nlp/encoder/BPE;", "", "bpeMerges", "", "Lkotlin/Pair;", "", "(Ljava/util/List;)V", "bpeCache", "Lai/grazie/utils/cache/Cache;", "bpeRanks", "", "", "getPairs", "", "bpeWord", "runBpeTokenization", "word", "tokenize", "nlp-encoder-engine"})
@SourceDebugExtension(value={"SMAP\nBPE.kt\nKotlin\n*S Kotlin\n*F\n+ 1 BPE.kt\nai/grazie/nlp/encoder/BPE\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n*L\n1#1,96:1\n1559#2:97\n1590#2,4:98\n1549#2:102\n1620#2,3:103\n2333#2,14:106\n*S KotlinDebug\n*F\n+ 1 BPE.kt\nai/grazie/nlp/encoder/BPE\n*L\n15#1:97\n15#1:98,4\n32#1:102\n32#1:103,3\n40#1:106,14\n*E\n"})
public final class BPE {
    @NotNull
    private final Map<Pair<String, String>, Integer> bpeRanks;
    @NotNull
    private final Cache<String, String> bpeCache;

    /*
     * WARNING - void declaration
     */
    public BPE(@NotNull List<Pair<String, String>> bpeMerges) {
        void $this$mapIndexedTo$iv$iv;
        void $this$mapIndexed$iv;
        Intrinsics.checkNotNullParameter(bpeMerges, (String)"bpeMerges");
        Iterable iterable = bpeMerges;
        BPE bPE = this;
        boolean $i$f$mapIndexed = false;
        void var4_5 = $this$mapIndexed$iv;
        Collection destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$mapIndexed$iv, (int)10));
        boolean $i$f$mapIndexedTo = false;
        int index$iv$iv = 0;
        for (Object item$iv$iv : $this$mapIndexedTo$iv$iv) {
            void idx;
            void bpePair;
            int n;
            if ((n = index$iv$iv++) < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            Pair pair = (Pair)item$iv$iv;
            int n2 = n;
            Collection collection = destination$iv$iv;
            boolean bl = false;
            collection.add(TuplesKt.to((Object)bpePair, (Object)((int)idx)));
        }
        bPE.bpeRanks = MapsKt.toMap((Iterable)((List)destination$iv$iv));
        this.bpeCache = Caching.INSTANCE.default(5000);
    }

    @NotNull
    public final String tokenize(@NotNull String word) {
        Intrinsics.checkNotNullParameter((Object)word, (String)"word");
        String cached = this.bpeCache.get(word);
        if (cached != null) {
            return cached;
        }
        String bpeWord = this.runBpeTokenization(word);
        this.bpeCache.put(word, bpeWord);
        return bpeWord;
    }

    /*
     * WARNING - void declaration
     */
    private final String runBpeTokenization(String word) {
        void $this$mapTo$iv$iv;
        Iterable $this$map$iv = StringsKt.toList((CharSequence)word);
        boolean $i$f$map = false;
        Iterable iterable = $this$map$iv;
        Collection destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
        boolean $i$f$mapTo = false;
        for (Object item$iv$iv : $this$mapTo$iv$iv) {
            void it;
            char c = ((Character)item$iv$iv).charValue();
            Collection collection = destination$iv$iv;
            boolean bl = false;
            collection.add(String.valueOf((char)it));
        }
        List bpeWord = (List)destination$iv$iv;
        Set<Pair<String, String>> pairs = this.getPairs(bpeWord);
        if (pairs.isEmpty()) {
            return word;
        }
        while (true) {
            Object v0;
            Iterable $this$minByOrNull$iv = pairs;
            boolean $i$f$minByOrNull = false;
            Iterator iterator$iv = $this$minByOrNull$iv.iterator();
            if (!iterator$iv.hasNext()) {
                v0 = null;
            } else {
                Object minElem$iv = iterator$iv.next();
                if (!iterator$iv.hasNext()) {
                    v0 = minElem$iv;
                } else {
                    Pair it = (Pair)minElem$iv;
                    boolean bl = false;
                    int minValue$iv = ((Number)this.bpeRanks.getOrDefault(it, Integer.MAX_VALUE)).intValue();
                    do {
                        Object e$iv = iterator$iv.next();
                        Pair it2 = (Pair)e$iv;
                        $i$a$-minByOrNull-BPE$runBpeTokenization$bigram$1 = false;
                        int v$iv = ((Number)this.bpeRanks.getOrDefault(it2, Integer.MAX_VALUE)).intValue();
                        if (minValue$iv <= v$iv) continue;
                        minElem$iv = e$iv;
                        minValue$iv = v$iv;
                    } while (iterator$iv.hasNext());
                    v0 = minElem$iv;
                }
            }
            Intrinsics.checkNotNull(v0);
            Pair bigram = v0;
            if (!this.bpeRanks.containsKey(bigram)) break;
            String firstSymbol = (String)bigram.component1();
            String secondSymbol = (String)bigram.component2();
            ArrayList<Object> newBpeWord = new ArrayList<Object>();
            int i = 0;
            while (i < bpeWord.size()) {
                int j = ai.grazie.utils.CollectionsKt.suffix(bpeWord, i).indexOf(firstSymbol);
                if (j == -1) {
                    newBpeWord.addAll(ai.grazie.utils.CollectionsKt.suffix(bpeWord, i));
                    break;
                }
                newBpeWord.addAll(bpeWord.subList(i, i + j));
                if (Intrinsics.areEqual(bpeWord.get(i += j), (Object)firstSymbol) && i < bpeWord.size() - 1 && Intrinsics.areEqual(bpeWord.get(i + 1), (Object)secondSymbol)) {
                    newBpeWord.add(firstSymbol + secondSymbol);
                    i += 2;
                    continue;
                }
                newBpeWord.add(firstSymbol);
                ++i;
            }
            if ((bpeWord = (List)newBpeWord).size() == 1) break;
            pairs = this.getPairs(bpeWord);
        }
        return CollectionsKt.joinToString$default((Iterable)bpeWord, (CharSequence)" ", null, null, (int)0, null, null, (int)62, null);
    }

    private final Set<Pair<String, String>> getPairs(List<String> bpeWord) {
        if (bpeWord.isEmpty()) {
            return SetsKt.emptySet();
        }
        HashSet<Pair> pairs = new HashSet<Pair>();
        String prevSymbol = bpeWord.get(0);
        int n = bpeWord.size();
        for (int idx = 1; idx < n; ++idx) {
            pairs.add(TuplesKt.to((Object)prevSymbol, (Object)bpeWord.get(idx)));
            prevSymbol = bpeWord.get(idx);
        }
        return pairs;
    }
}

