package ai.platon.scent.ml;

import ai.platon.pulsar.common.AppPaths;
import ai.platon.pulsar.common.math.vectors.VectorsKt;
import ai.platon.pulsar.dom.FeaturedDocument;
import ai.platon.pulsar.dom.features.FeatureRegistry;
import ai.platon.pulsar.dom.features.NodeFeature;
import ai.platon.pulsar.dom.nodes.NodesKt;
import ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt;
import ai.platon.pulsar.skeleton.common.options.LoadOptions;
import ai.platon.scent.BasicScentSession;
import ai.platon.scent.dom.features.defined.DefinedFeaturesKt;
import ai.platon.scent.ql.h2.context.ScentSQLContexts;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.nio.file.Files;
import java.nio.file.Path;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import kotlin.Metadata;
import kotlin.Unit;
import kotlin.collections.ArraysKt;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.Ref;
import kotlin.jvm.internal.Reflection;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.jvm.internal.SpreadBuilder;
import kotlin.reflect.full.KTypes;
import kotlin.test.AssertionsKt;
import kotlin.text.StringsKt;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.math3.linear.ArrayRealVector;
import org.apache.commons.math3.linear.RealVector;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.kotlinx.dataframe.DataColumn;
import org.jetbrains.kotlinx.dataframe.DataFrame;
import org.jetbrains.kotlinx.dataframe.api.AppendKt;
import org.jetbrains.kotlinx.dataframe.api.ConstructorsKt;
import org.jetbrains.kotlinx.dataframe.api.CountKt;
import org.jetbrains.kotlinx.dataframe.api.DataFrameBuilder;
import org.jetbrains.kotlinx.dataframe.api.Infer;
import org.jetbrains.kotlinx.dataframe.api.MeanKt;
import org.jetbrains.kotlinx.dataframe.api.PrintKt;
import org.jetbrains.kotlinx.dataframe.api.TypeConversionsKt;
import org.jetbrains.kotlinx.dataframe.io.CsvKt;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.junit.jupiter.api.Test;

/* compiled from: EncoderTests.kt */
@Metadata(mv = {1, 9, 0}, k = 1, xi = 48, d1 = {"��R\n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0010\u000e\n��\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010 \n\u0002\u0010\b\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\n\u0002\u0010\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0006\u0018��2\u00020\u0001B\u0005¢\u0006\u0002\u0010\u0002J\u0018\u0010\u0012\u001a\n\u0012\u0002\b\u00030\u0013j\u0002`\u00142\u0006\u0010\u0015\u001a\u00020\u0016H\u0002J\u0018\u0010\u0017\u001a\u00020\u00182\u0006\u0010\u0015\u001a\u00020\u00162\u0006\u0010\u0019\u001a\u00020\u001aH\u0002J\b\u0010\u001b\u001a\u00020\u0018H\u0007J\b\u0010\u001c\u001a\u00020\u0018H\u0007J\b\u0010\u001d\u001a\u00020\u0018H\u0007J\b\u0010\u001e\u001a\u00020\u0018H\u0007J\b\u0010\u001f\u001a\u00020\u0018H\u0007R\u000e\u0010\u0003\u001a\u00020\u0004X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0005\u001a\u00020\u0006X\u0082D¢\u0006\u0002\n��R\u0016\u0010\u0007\u001a\n \t*\u0004\u0018\u00010\b0\bX\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\n\u001a\b\u0012\u0004\u0012\u00020\f0\u000bX\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\r\u001a\b\u0012\u0004\u0012\u00020\u00060\u000bX\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\u000e\u001a\b\u0012\u0004\u0012\u00020\u00060\u000bX\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u000f\u001a\u00020\u0010X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0011\u001a\u00020\u0006X\u0082D¢\u0006\u0002\n��¨\u0006 "}, d2 = {"Lai/platon/scent/ml/EncoderTests;", "", "()V", "FEATURE_FORMATTER", "Ljava/text/DecimalFormat;", "args", "", "csvFormat", "Lorg/apache/commons/csv/CSVFormat;", "kotlin.jvm.PlatformType", "primaryFeatureKeys", "", "", "primaryFeatureNames", "productUrls", "session", "Lai/platon/scent/BasicScentSession;", "url", "encode", "Lorg/jetbrains/kotlinx/dataframe/DataFrame;", "Lorg/jetbrains/kotlinx/dataframe/AnyFrame;", "document", "Lai/platon/pulsar/dom/FeaturedDocument;", "encodeDirect", "", "pr", "Lorg/apache/commons/csv/CSVPrinter;", "testEncodeDocuments", "testEncodeDocumentsAndExportWithCSVPrinter", "testEncodeSingleDocument", "testRegisteredFeatures", "testSerializeRecordToCSV", "scent-tests"})
@SourceDebugExtension({"SMAP\nEncoderTests.kt\nKotlin\n*S Kotlin\n*F\n+ 1 EncoderTests.kt\nai/platon/scent/ml/EncoderTests\n+ 2 fake.kt\nkotlin/jvm/internal/FakeKt\n+ 3 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n+ 4 constructors.kt\norg/jetbrains/kotlinx/dataframe/api/DataFrameBuilder\n*L\n1#1,178:1\n1#2:179\n1855#3,2:180\n1864#3,3:182\n1855#3,2:185\n1864#3,3:187\n344#4,7:190\n344#4,7:197\n*S KotlinDebug\n*F\n+ 1 EncoderTests.kt\nai/platon/scent/ml/EncoderTests\n*L\n75#1:180,2\n109#1:182,3\n131#1:185,2\n136#1:187,3\n145#1:190,7\n146#1:197,7\n*E\n"})
/* loaded from: input_file:ai/platon/scent/ml/EncoderTests.class */
public final class EncoderTests {

    @NotNull
    private final BasicScentSession session = ScentSQLContexts.INSTANCE.createSession();

    @NotNull
    private final String url = "https://www.amazon.com/dp/B0C1H26C46";

    @NotNull
    private final String args = "-requireSize 1000000";

    @NotNull
    private final List<String> productUrls = CollectionsKt.listOf(new String[]{"https://www.amazon.com/dp/B009FUF6DM", "https://www.amazon.com/dp/B083XFNH46", "https://www.amazon.com/dp/B005NAG9BQ", "https://www.amazon.com/dp/B00K5ZL266", "https://www.amazon.com/dp/B07QFBVZK2", "https://www.amazon.com/dp/B07R4T9WCD", "https://www.amazon.com/dp/B0025TZ26Q", "https://www.amazon.com/dp/B07Y7FXDGW"});
    private final CSVFormat csvFormat = CSVFormat.EXCEL;

    @NotNull
    private final DecimalFormat FEATURE_FORMATTER = new DecimalFormat("#.####");

    @NotNull
    private final List<String> primaryFeatureNames = FeatureRegistry.INSTANCE.getPrimaryFeatureNames();

    @NotNull
    private final List<Integer> primaryFeatureKeys = FeatureRegistry.INSTANCE.getPrimaryFeatureKeys();

    @Test
    public final void testSerializeRecordToCSV() {
        String normalizedURI;
        Element body = this.session.loadDocument(this.url).getBody();
        RealVector features = body.getExtension().getFeatures();
        Intrinsics.checkNotNull(features, "null cannot be cast to non-null type org.apache.commons.math3.linear.ArrayRealVector");
        RealVector realVector = (ArrayRealVector) features;
        Document ownerDocument = body.ownerDocument();
        if (ownerDocument == null || (normalizedURI = NodeExtKt.getNormalizedURI(ownerDocument)) == null) {
            return;
        }
        SpreadBuilder spreadBuilder = new SpreadBuilder(3);
        double[] dataRef = realVector.getDataRef();
        Intrinsics.checkNotNullExpressionValue(dataRef, "getDataRef(...)");
        spreadBuilder.addSpread(ArraysKt.toTypedArray(dataRef));
        spreadBuilder.add("a,b,\"c*%#d-.e");
        spreadBuilder.add(normalizedURI);
        Object[] array = spreadBuilder.toArray(new Object[spreadBuilder.size()]);
        System.out.println(ArraysKt.toList(array));
        AssertionsKt.assertEquals$default(Integer.valueOf(DefinedFeaturesKt.getN2() + 2), Integer.valueOf(array.length), (String) null, 4, (Object) null);
        String format = this.csvFormat.format(Arrays.copyOf(array, array.length));
        System.out.println((Object) format);
        Intrinsics.checkNotNull(format);
        AssertionsKt.assertTrue(StringsKt.contains$default(format, "c*%#d-.e", false, 2, (Object) null), (String) null);
        Iterable parse = CSVParser.parse(format, this.csvFormat);
        Intrinsics.checkNotNullExpressionValue(parse, "parse(...)");
        Iterable iterable = (CSVRecord) CollectionsKt.first(parse);
        Intrinsics.checkNotNull(iterable);
        AssertionsKt.assertTrue(CollectionsKt.contains(iterable, String.valueOf(VectorsKt.get(realVector, 0))), (String) null);
        AssertionsKt.assertTrue(CollectionsKt.contains(iterable, String.valueOf(VectorsKt.get(realVector, 1))), (String) null);
        AssertionsKt.assertTrue(CollectionsKt.contains(iterable, "a,b,\"c*%#d-.e"), (String) null);
        AssertionsKt.assertTrue(CollectionsKt.contains(iterable, normalizedURI), (String) null);
        System.out.println(iterable.toList());
    }

    @Test
    public final void testRegisteredFeatures() {
        Iterator it = FeatureRegistry.INSTANCE.getRegisteredFeatures().iterator();
        while (it.hasNext()) {
            System.out.println((NodeFeature) it.next());
        }
        AssertionsKt.assertEquals$default(Integer.valueOf(DefinedFeaturesKt.getN2()), Integer.valueOf(FeatureRegistry.INSTANCE.getRegisteredFeatures().size()), (String) null, 4, (Object) null);
    }

    @Test
    public final void testEncodeSingleDocument() {
        FeaturedDocument loadDocument = this.session.loadDocument(this.url);
        ArrayRealVector features = loadDocument.getBody().getExtension().getFeatures();
        Intrinsics.checkNotNull(features, "null cannot be cast to non-null type org.apache.commons.math3.linear.ArrayRealVector");
        AssertionsKt.assertEquals$default(Integer.valueOf(DefinedFeaturesKt.getN2()), Integer.valueOf(features.getDimension()), (String) null, 4, (Object) null);
        DataFrame<?> encode = encode(loadDocument);
        PrintKt.print$default(encode, 0, 0, true, false, true, true, 11, (Object) null);
        PrintKt.print(MeanKt.meanFor$default(encode, new String[]{"top", "left", "width", "height"}, false, 2, (Object) null));
        Path tmp = AppPaths.INSTANCE.getTmp("test", new String[]{"dataframe-" + AppPaths.INSTANCE.fileId(this.url) + ".csv"});
        File file = tmp.toFile();
        Intrinsics.checkNotNullExpressionValue(file, "toFile(...)");
        CsvKt.writeCSV$default(encode, file, (CSVFormat) null, 2, (Object) null);
        System.out.println((Object) ("Dataframe is exported | " + tmp));
    }

    @Test
    public final void testEncodeDocuments() {
        LoadOptions options = this.session.options(this.args);
        int size = this.productUrls.size();
        Path resolve = AppPaths.INSTANCE.getTEST_DIR().resolve("dataframe-" + size + ".csv");
        Files.deleteIfExists(resolve);
        int i = 0;
        int i2 = 0;
        for (Object obj : this.productUrls) {
            int i3 = i2;
            i2++;
            if (i3 < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            DataFrame<?> encode = encode(this.session.loadDocument((String) obj, options));
            i += CountKt.count(encode);
            CSVFormat withSkipHeaderRecord = CSVFormat.DEFAULT.withSkipHeaderRecord(i3 > 0);
            FileWriter fileWriter = new FileWriter(resolve.toFile(), true);
            Intrinsics.checkNotNull(withSkipHeaderRecord);
            CsvKt.writeCSV(encode, fileWriter, withSkipHeaderRecord);
        }
        System.out.println((Object) ("Total " + i + " rows in " + size + " documents"));
    }

    @Test
    public final void testEncodeDocumentsAndExportWithCSVPrinter() {
        LoadOptions options = this.session.options(this.args);
        Path resolve = AppPaths.INSTANCE.getTEST_DIR().resolve("dataframe-" + this.productUrls.size() + "-csv-printer.csv");
        Files.deleteIfExists(resolve);
        CSVPrinter cSVPrinter = new CSVPrinter(new BufferedWriter(new FileWriter(resolve.toFile(), true)), this.csvFormat);
        Iterator it = FeatureRegistry.INSTANCE.getPrimaryFeatureNames().iterator();
        while (it.hasNext()) {
            cSVPrinter.print((String) it.next());
        }
        cSVPrinter.print("text");
        cSVPrinter.print("url");
        cSVPrinter.println();
        int i = 0;
        for (Object obj : this.productUrls) {
            int i2 = i;
            i++;
            if (i2 < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            encodeDirect(this.session.loadDocument((String) obj, options), cSVPrinter);
        }
    }

    private final DataFrame<?> encode(final FeaturedDocument featuredDocument) {
        final Ref.IntRef intRef = new Ref.IntRef();
        intRef.element = 200000;
        final Ref.ObjectRef objectRef = new Ref.ObjectRef();
        DataFrameBuilder dataFrameOf = ConstructorsKt.dataFrameOf(FeatureRegistry.INSTANCE.getFeatureNames());
        final int i = 0;
        final Double valueOf = Double.valueOf(0.0d);
        objectRef.element = dataFrameOf.withColumns(new Function1<String, DataColumn<?>>() { // from class: ai.platon.scent.ml.EncoderTests$encode$$inlined$fill$1
            /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
            {
                super(1);
            }

            @NotNull
            public final DataColumn<?> invoke(@NotNull String str) {
                Intrinsics.checkNotNullParameter(str, "name");
                DataColumn.Companion companion = DataColumn.Companion;
                int i2 = i;
                Object obj = valueOf;
                ArrayList arrayList = new ArrayList(i2);
                for (int i3 = 0; i3 < i2; i3++) {
                    arrayList.add(obj);
                }
                return DataColumn.Companion.createValueColumn$default(companion, str, arrayList, KTypes.withNullability(Reflection.typeOf(Double.TYPE), valueOf == null), (Infer) null, (Object) null, 24, (Object) null);
            }
        });
        final Ref.ObjectRef objectRef2 = new Ref.ObjectRef();
        final int i2 = 0;
        final String str = "";
        objectRef2.element = ConstructorsKt.dataFrameOf(new String[]{"text", "url"}).withColumns(new Function1<String, DataColumn<?>>() { // from class: ai.platon.scent.ml.EncoderTests$encode$$inlined$fill$2
            /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
            {
                super(1);
            }

            @NotNull
            public final DataColumn<?> invoke(@NotNull String str2) {
                Intrinsics.checkNotNullParameter(str2, "name");
                DataColumn.Companion companion = DataColumn.Companion;
                int i3 = i2;
                Object obj = str;
                ArrayList arrayList = new ArrayList(i3);
                for (int i4 = 0; i4 < i3; i4++) {
                    arrayList.add(obj);
                }
                return DataColumn.Companion.createValueColumn$default(companion, str2, arrayList, KTypes.withNullability(Reflection.typeOf(String.class), str == null), (Infer) null, (Object) null, 24, (Object) null);
            }
        });
        NodesKt.forEach$default(featuredDocument.getBody(), false, new Function1<Node, Unit>() { // from class: ai.platon.scent.ml.EncoderTests$encode$1
            /* JADX INFO: Access modifiers changed from: package-private */
            /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
            {
                super(1);
            }

            public final void invoke(@NotNull Node node) {
                Intrinsics.checkNotNullParameter(node, "node");
                if (!(node instanceof TextNode) || NodeExtKt.getLeft(node) <= 0 || NodeExtKt.getTop(node) <= 0) {
                    return;
                }
                if (!StringsKt.isBlank(NodeExtKt.getCleanText(node))) {
                    int i3 = intRef.element;
                    intRef.element = i3 - 1;
                    if (i3 > 0) {
                        ArrayRealVector features = ((TextNode) node).getExtension().getFeatures();
                        Intrinsics.checkNotNull(features, "null cannot be cast to non-null type org.apache.commons.math3.linear.ArrayRealVector");
                        double[] dataRef = features.getDataRef();
                        Intrinsics.checkNotNullExpressionValue(dataRef, "getDataRef(...)");
                        Double[] typedArray = ArraysKt.toTypedArray(dataRef);
                        AssertionsKt.assertEquals$default(Integer.valueOf(DefinedFeaturesKt.getN2()), Integer.valueOf(typedArray.length), (String) null, 4, (Object) null);
                        objectRef.element = AppendKt.append((DataFrame) objectRef.element, Arrays.copyOf(typedArray, typedArray.length));
                        objectRef2.element = AppendKt.append((DataFrame) objectRef2.element, new Object[]{NodeExtKt.getCleanText(node), featuredDocument.getNormalizedURI()});
                    }
                }
            }

            public /* bridge */ /* synthetic */ Object invoke(Object obj) {
                invoke((Node) obj);
                return Unit.INSTANCE;
            }
        }, 1, (Object) null);
        return TypeConversionsKt.asColumnGroup((DataFrame) objectRef.element, "numericFeatures").plus(((DataFrame) objectRef2.element).get("text")).plus(((DataFrame) objectRef2.element).get("url"));
    }

    private final void encodeDirect(final FeaturedDocument featuredDocument, final CSVPrinter cSVPrinter) {
        NodesKt.forEach$default(featuredDocument.getBody(), false, new Function1<Node, Unit>() { // from class: ai.platon.scent.ml.EncoderTests$encodeDirect$1
            /* JADX INFO: Access modifiers changed from: package-private */
            /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
            {
                super(1);
            }

            public final void invoke(@NotNull Node node) {
                List list;
                Intrinsics.checkNotNullParameter(node, "node");
                if (!(node instanceof TextNode) || NodeExtKt.getLeft(node) <= 0 || NodeExtKt.getTop(node) <= 0) {
                    return;
                }
                if (!StringsKt.isBlank(NodeExtKt.getCleanText(node))) {
                    ArrayRealVector features = ((TextNode) node).getExtension().getFeatures();
                    Intrinsics.checkNotNull(features, "null cannot be cast to non-null type org.apache.commons.math3.linear.ArrayRealVector");
                    ArrayRealVector arrayRealVector = features;
                    list = EncoderTests.this.primaryFeatureKeys;
                    List list2 = list;
                    CSVPrinter cSVPrinter2 = cSVPrinter;
                    Iterator it = list2.iterator();
                    while (it.hasNext()) {
                        cSVPrinter2.print(Double.valueOf(arrayRealVector.getDataRef()[((Number) it.next()).intValue()]));
                    }
                    cSVPrinter.print(NodeExtKt.getCleanText(node));
                    cSVPrinter.print(featuredDocument.getNormalizedURI());
                    cSVPrinter.println();
                }
            }

            public /* bridge */ /* synthetic */ Object invoke(Object obj) {
                invoke((Node) obj);
                return Unit.INSTANCE;
            }
        }, 1, (Object) null);
    }
}
