/*
 * Decompiled with CFR 0.152.
 */
package ai.platon.scent.tools;

import ai.platon.pulsar.common.ResourceLoader;
import ai.platon.pulsar.common.urls.Hyperlink;
import ai.platon.pulsar.dom.FeaturedDocument;
import ai.platon.pulsar.skeleton.common.options.LoadOptions;
import ai.platon.pulsar.skeleton.crawl.filter.ChainedUrlNormalizer;
import ai.platon.pulsar.skeleton.crawl.filter.ScopedUrlNormalizer;
import ai.platon.scent.BasicScentSession;
import ai.platon.scent.common.MLPaths;
import ai.platon.scent.dom.HarvestOptions;
import ai.platon.scent.ml.EncodeOptions;
import ai.platon.scent.ml.data.SimpleDataFrame;
import ai.platon.scent.ml.encoding.EncodeProject;
import ai.platon.scent.ml.harvest.HarvestProject;
import ai.platon.scent.tools.EBayHarvester;
import ai.platon.scent.tools.EBayProductUrlNormalizer;
import ai.platon.scent.tools.SimpleCrawler;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.attribute.FileAttribute;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.UUID;
import kotlin.Metadata;
import kotlin.Pair;
import kotlin.TuplesKt;
import kotlin.collections.CollectionsKt;
import kotlin.io.path.PathsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.functions.Function2;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.text.StringsKt;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.kotlinx.dataframe.DataColumn;
import org.jetbrains.kotlinx.dataframe.DataFrame;
import org.jetbrains.kotlinx.dataframe.api.ChunkedKt;
import org.jetbrains.kotlinx.dataframe.api.GroupByKt;
import org.jetbrains.kotlinx.dataframe.api.PrintKt;
import org.jetbrains.kotlinx.dataframe.api.RenameKt;
import org.jetbrains.kotlinx.dataframe.api.SelectKt;
import org.jetbrains.kotlinx.dataframe.api.Split;
import org.jetbrains.kotlinx.dataframe.api.SplitKt;
import org.jetbrains.kotlinx.dataframe.io.CsvKt;
import org.jetbrains.kotlinx.dataframe.io.DataFrameHtmlData;
import org.jetbrains.kotlinx.dataframe.io.GuessKt;
import org.jetbrains.kotlinx.dataframe.io.HtmlKt;
import org.jsoup.nodes.Node;

@Metadata(mv={1, 9, 0}, k=1, xi=48, d1={"\u0000V\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0000\n\u0002\u0010\u000e\n\u0002\b\u0005\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010 \n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u0002\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\u0010\u000b\n\u0002\b\u0005\u0018\u00002\u00020\u0001B\u0017\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\b\b\u0002\u0010\u0004\u001a\u00020\u0003\u00a2\u0006\u0002\u0010\u0005J\f\u0010\u0014\u001a\b\u0012\u0004\u0012\u00020\u00160\u0015J\u0006\u0010\u0017\u001a\u00020\u0018J\u0006\u0010\u0019\u001a\u00020\u0018J(\u0010\u001a\u001a\u00020\u00182\f\u0010\u001b\u001a\b\u0012\u0004\u0012\u00020\u00030\u00152\u0012\u0010\u001c\u001a\u000e\u0012\u0004\u0012\u00020\u001e\u0012\u0004\u0012\u00020\u001f0\u001dJ\u0010\u0010 \u001a\u00020\u00032\u0006\u0010!\u001a\u00020\u0003H\u0002J\u001a\u0010\"\u001a\b\u0012\u0004\u0012\u00020\u00030\u00152\f\u0010#\u001a\b\u0012\u0004\u0012\u00020\u00030\u0015R\u0011\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0006\u0010\u0007R\u000e\u0010\b\u001a\u00020\tX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0016\u0010\n\u001a\n \f*\u0004\u0018\u00010\u000b0\u000bX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\r\u001a\u00020\u000eX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u000f\u001a\u00020\u0010X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0011\u0010\u0004\u001a\u00020\u0003\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0011\u0010\u0007R\u000e\u0010\u0012\u001a\u00020\u0013X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006$"}, d2={"Lai/platon/scent/tools/EBayHarvester;", "", "args", "", "projectInfo", "(Ljava/lang/String;Ljava/lang/String;)V", "getArgs", "()Ljava/lang/String;", "crawler", "Lai/platon/scent/tools/SimpleCrawler;", "datasetPath", "Ljava/nio/file/Path;", "kotlin.jvm.PlatformType", "encodeOptions", "Lai/platon/scent/ml/EncodeOptions;", "project", "Lai/platon/scent/ml/encoding/EncodeProject;", "getProjectInfo", "session", "Lai/platon/scent/BasicScentSession;", "collectListPageLinks", "", "Lai/platon/pulsar/common/urls/Hyperlink;", "createHarvestResultDatasetView", "", "createPredictionAndMinimalDataset", "encodeAll", "urls", "nodeFilter", "Lkotlin/Function1;", "Lorg/jsoup/nodes/Node;", "", "getOrCreateProjectInfo", "more", "loadOutPages", "portalUrls", "scent-boot"})
@SourceDebugExtension(value={"SMAP\nEBayHarvest.kt\nKotlin\n*S Kotlin\n*F\n+ 1 EBayHarvest.kt\nai/platon/scent/tools/EBayHarvester\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n+ 3 fake.kt\nkotlin/jvm/internal/FakeKt\n+ 4 forEach.kt\norg/jetbrains/kotlinx/dataframe/api/ForEachKt\n*L\n1#1,203:1\n1360#2:204\n1446#2,5:205\n1611#2:210\n1855#2:211\n1856#2:213\n1612#2:214\n1549#2:215\n1620#2,3:216\n1549#2:220\n1620#2,3:221\n1360#2:224\n1446#2,5:225\n1864#2,3:231\n1#3:212\n1#3:219\n12#4:230\n*S KotlinDebug\n*F\n+ 1 EBayHarvest.kt\nai/platon/scent/tools/EBayHarvester\n*L\n82#1:204\n82#1:205,5\n83#1:210\n83#1:211\n83#1:213\n83#1:214\n84#1:215\n84#1:216,3\n99#1:220\n99#1:221,3\n100#1:224\n100#1:225,5\n138#1:231,3\n83#1:212\n138#1:230\n*E\n"})
public final class EBayHarvester {
    @NotNull
    private final String args;
    @NotNull
    private final String projectInfo;
    @NotNull
    private final SimpleCrawler crawler;
    @NotNull
    private final BasicScentSession session;
    @NotNull
    private final EncodeProject project;
    private final Path datasetPath;
    @NotNull
    private final EncodeOptions encodeOptions;

    public EBayHarvester(@NotNull String args, @NotNull String projectInfo) {
        Intrinsics.checkNotNullParameter((Object)args, (String)"args");
        Intrinsics.checkNotNullParameter((Object)projectInfo, (String)"projectInfo");
        this.args = args;
        this.projectInfo = projectInfo;
        this.crawler = new SimpleCrawler(null, null, 3, null);
        this.session = this.crawler.getSession();
        this.project = EncodeProject.Companion.newProject(EncodeProject.Type.PREDICT);
        this.datasetPath = this.project.getDatasetPath();
        this.encodeOptions = new EncodeOptions(this.datasetPath, false, null, 0, 0, 30, null);
        Files.createDirectories(this.datasetPath.getParent(), new FileAttribute[0]);
        ChainedUrlNormalizer.add$default((ChainedUrlNormalizer)this.session.getContext().getUrlNormalizer(), (ScopedUrlNormalizer)((ScopedUrlNormalizer)new EBayProductUrlNormalizer()), null, (int)2, null);
    }

    public /* synthetic */ EBayHarvester(String string, String string2, int n, DefaultConstructorMarker defaultConstructorMarker) {
        if ((n & 2) != 0) {
            string2 = "";
        }
        this(string, string2);
    }

    @NotNull
    public final String getArgs() {
        return this.args;
    }

    @NotNull
    public final String getProjectInfo() {
        return this.projectInfo;
    }

    @NotNull
    public final List<Hyperlink> collectListPageLinks() {
        return FeaturedDocument.selectHyperlinks$default((FeaturedDocument)this.session.loadDocument("https://www.ebay.com/b/Apple/bn_21819543"), (String)"a[href~=/b/]", (int)0, (int)0, (int)6, null);
    }

    /*
     * WARNING - void declaration
     */
    @NotNull
    public final List<String> loadOutPages(@NotNull List<String> portalUrls) {
        void $this$mapTo$iv$iv;
        void $this$map$iv;
        Object element$iv;
        Iterable $this$mapNotNullTo$iv;
        FeaturedDocument it;
        void $this$flatMapTo$iv$iv;
        Intrinsics.checkNotNullParameter(portalUrls, (String)"portalUrls");
        HarvestOptions options = this.session.options(this.args);
        HarvestOptions itemOptions = options.createItemOptions();
        List documents2 = this.session.loadDocuments((Iterable)portalUrls, options);
        EBayProductUrlNormalizer urlNormalizer = new EBayProductUrlNormalizer();
        Iterable $this$flatMap$iv = documents2;
        boolean $i$f$flatMap = false;
        Iterable iterable = $this$flatMap$iv;
        Collection destination$iv$iv = new ArrayList();
        boolean $i$f$flatMapTo = false;
        for (Object element$iv$iv : $this$flatMapTo$iv$iv) {
            it = (FeaturedDocument)element$iv$iv;
            boolean bl = false;
            Iterable list$iv$iv = FeaturedDocument.selectHyperlinks$default((FeaturedDocument)it, (String)options.getOutLinkSelector(), (int)0, (int)0, (int)6, null);
            CollectionsKt.addAll((Collection)destination$iv$iv, (Iterable)list$iv$iv);
        }
        $this$flatMap$iv = (List)destination$iv$iv;
        Collection destination$iv = new HashSet();
        boolean $i$f$mapNotNullTo22 = false;
        void $this$forEach$iv$iv = $this$mapNotNullTo$iv;
        boolean $i$f$forEach = false;
        Iterator iterator = $this$forEach$iv$iv.iterator();
        while (iterator.hasNext()) {
            Hyperlink it$iv;
            Object element$iv$iv;
            element$iv = element$iv$iv = iterator.next();
            boolean bl = false;
            Hyperlink it2 = (Hyperlink)element$iv;
            boolean bl2 = false;
            if (EBayProductUrlNormalizer.normalize$default(urlNormalizer, it2, null, 2, null) == null) continue;
            boolean bl3 = false;
            destination$iv.add(it$iv);
        }
        $this$mapNotNullTo$iv = destination$iv;
        boolean $i$f$map = false;
        void $i$f$mapNotNullTo22 = $this$map$iv;
        destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
        boolean $i$f$mapTo = false;
        for (Object item$iv$iv : $this$mapTo$iv$iv) {
            element$iv = (Hyperlink)item$iv$iv;
            Collection collection = destination$iv$iv;
            boolean bl = false;
            collection.add(it.getUrl());
        }
        List urls = (List)destination$iv$iv;
        this.session.submitAll((Iterable)urls, (LoadOptions)itemOptions);
        this.session.getContext().await();
        return urls;
    }

    /*
     * WARNING - void declaration
     */
    public final void encodeAll(@NotNull List<String> urls, @NotNull Function1<? super Node, Boolean> nodeFilter2) {
        void $this$flatMapTo$iv$iv;
        void $this$flatMap$iv;
        FeaturedDocument it;
        void $this$mapTo$iv$iv;
        Iterable $this$map$iv;
        Intrinsics.checkNotNullParameter(urls, (String)"urls");
        Intrinsics.checkNotNullParameter(nodeFilter2, (String)"nodeFilter");
        HarvestOptions options = this.session.options(this.args);
        HarvestOptions itemOptions = options.createItemOptions();
        ArrayList componentSelectors = itemOptions.getComponentSelectors();
        if (!(!((Collection)componentSelectors).isEmpty())) {
            boolean $i$a$-require-EBayHarvester$encodeAll$22 = false;
            String $i$a$-require-EBayHarvester$encodeAll$22 = "Component selectors cannot be empty!";
            throw new IllegalArgumentException($i$a$-require-EBayHarvester$encodeAll$22.toString());
        }
        Iterable $i$a$-require-EBayHarvester$encodeAll$22 = urls;
        boolean $i$f$map = false;
        void var9_11 = $this$map$iv;
        Collection destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
        boolean $i$f$mapTo = false;
        for (Object item$iv$iv : $this$mapTo$iv$iv) {
            String string = (String)item$iv$iv;
            Collection collection = destination$iv$iv;
            boolean bl = false;
            collection.add(this.session.loadDocument((String)it));
        }
        $this$map$iv = (List)destination$iv$iv;
        boolean $i$f$flatMap = false;
        $this$mapTo$iv$iv = $this$flatMap$iv;
        destination$iv$iv = new ArrayList();
        boolean $i$f$flatMapTo = false;
        for (Object element$iv$iv : $this$flatMapTo$iv$iv) {
            it = (FeaturedDocument)element$iv$iv;
            boolean bl = false;
            Iterable list$iv$iv = (Iterable)FeaturedDocument.select$default((FeaturedDocument)it, (String)CollectionsKt.joinToString$default((Iterable)componentSelectors, null, null, null, (int)0, null, null, (int)63, null), (int)0, (int)0, (int)6, null);
            CollectionsKt.addAll((Collection)destination$iv$iv, (Iterable)list$iv$iv);
        }
        List rootElements2 = (List)destination$iv$iv;
        SimpleDataFrame dataFrame2 = this.session.encodeForElements((Iterable)rootElements2, this.encodeOptions, (Function1)new Function1<Node, Boolean>(nodeFilter2){
            final /* synthetic */ Function1<Node, Boolean> $nodeFilter;
            {
                this.$nodeFilter = $nodeFilter;
                super(1);
            }

            @NotNull
            public final Boolean invoke(@NotNull Node it) {
                Intrinsics.checkNotNullParameter((Object)it, (String)"it");
                return (Boolean)this.$nodeFilter.invoke((Object)it);
            }
        });
        System.out.println((Object)("Total " + dataFrame2.getRecordCount() + " records in " + dataFrame2.getSize() + " documents are encoded."));
        dataFrame2.export();
        if (Files.exists(this.datasetPath, new LinkOption[0])) {
            Path path = this.datasetPath;
            Intrinsics.checkNotNullExpressionValue((Object)path, (String)"datasetPath");
            Path projectInfoPath = this.datasetPath.resolveSibling(PathsKt.getNameWithoutExtension((Path)path) + ".info.txt");
            String info = this.getOrCreateProjectInfo("dataset base directory: " + projectInfoPath);
            Files.writeString(projectInfoPath, (CharSequence)info, new OpenOption[0]);
            Path path2 = this.datasetPath;
            Intrinsics.checkNotNullExpressionValue((Object)path2, (String)"datasetPath");
            MLPaths.INSTANCE.copyToLearnUnsupervised(path2);
        } else {
            System.out.println((Object)("Dataset is not saved to " + this.datasetPath));
        }
    }

    private final String getOrCreateProjectInfo(String more) {
        if (!StringsKt.isBlank((CharSequence)this.projectInfo)) {
            return this.projectInfo + more;
        }
        return StringsKt.trimIndent((String)("\n            buildTime: " + OffsetDateTime.now() + "\n            nodeFilter: it.isRegularText && it.nthScreen <= 2\n            args: " + this.args + "\n            " + more + "\n    "));
    }

    /*
     * WARNING - void declaration
     */
    public final void createPredictionAndMinimalDataset() {
        HarvestProject project = new HarvestProject(this.project.getId());
        File file = project.getEncodeProject().getDatasetPath().toFile();
        Intrinsics.checkNotNullExpressionValue((Object)file, (String)"toFile(...)");
        DataFrame df2 = GuessKt.read$default((DataFrame.Companion)DataFrame.Companion, (File)file, null, (int)2, null);
        String[] stringArray = new String[]{"label", "prediction", "top-g0", "left-g0", "width-g0", "height-g0", "seq-g0", "text", "url"};
        DataFrame dataFrame2 = SelectKt.select((DataFrame)df2, (String[])stringArray);
        stringArray = new Pair[]{TuplesKt.to((Object)"top-g0", (Object)"top"), TuplesKt.to((Object)"left-g0", (Object)"left"), TuplesKt.to((Object)"width-g0", (Object)"width"), TuplesKt.to((Object)"height-g0", (Object)"height"), TuplesKt.to((Object)"seq-g0", (Object)"seq")};
        df2 = RenameKt.rename((DataFrame)dataFrame2, (Pair[])stringArray);
        FileUtils.deleteDirectory((File)project.getPredictionAndMinimalFeaturesBaseDir().toFile());
        Files.createDirectories(project.getPredictionAndMinimalFeaturesBaseDir(), new FileAttribute[0]);
        DataColumn $this$forEachIndexed$iv = (DataColumn)ChunkedKt.chunked$default((DataFrame)df2, (int)2000, null, (int)2, null);
        boolean $i$f$forEachIndexed = false;
        Iterable $this$forEachIndexed$iv$iv = $this$forEachIndexed$iv.values();
        boolean $i$f$forEachIndexed2 = false;
        int index$iv$iv = 0;
        for (Object item$iv$iv : $this$forEachIndexed$iv$iv) {
            void chunk;
            int n;
            if ((n = index$iv$iv++) < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            DataFrame dataFrame3 = (DataFrame)item$iv$iv;
            int i = n;
            boolean bl = false;
            UUID uuid = UUID.randomUUID();
            String seq = StringUtils.leftPad((String)String.valueOf(i), (int)5, (String)"0");
            File file2 = project.getPredictionAndMinimalFeaturesBaseDir().resolve("part-" + seq + "-" + uuid + ".csv").toFile();
            Intrinsics.checkNotNull((Object)file2);
            CsvKt.writeCSV$default((DataFrame)chunk, (File)file2, null, (int)2, null);
        }
    }

    public final void createHarvestResultDatasetView() {
        File file = this.project.getDatasetPath().toFile();
        Intrinsics.checkNotNullExpressionValue((Object)file, (String)"toFile(...)");
        DataFrame df2 = GuessKt.read$default((DataFrame.Companion)DataFrame.Companion, (File)file, null, (int)2, null);
        String[] stringArray = new String[]{"label", "prediction", "top-g0", "left-g0", "width-g0", "height-g0", "seq-g0", "text", "url"};
        DataFrame dataFrame2 = SelectKt.select((DataFrame)df2, (String[])stringArray);
        stringArray = new Pair[]{TuplesKt.to((Object)"top-g0", (Object)"top"), TuplesKt.to((Object)"left-g0", (Object)"left"), TuplesKt.to((Object)"width-g0", (Object)"width"), TuplesKt.to((Object)"height-g0", (Object)"height"), TuplesKt.to((Object)"seq-g0", (Object)"seq")};
        df2 = RenameKt.rename((DataFrame)dataFrame2, (Pair[])stringArray);
        stringArray = new String[]{"url"};
        DataFrame dataFrame3 = GroupByKt.groupBy((DataFrame)df2, (String[])stringArray).aggregate((Function2)createHarvestResultDatasetView.1.INSTANCE);
        stringArray = new Pair[]{TuplesKt.to((Object)"aggregated", (Object)"texts")};
        DataFrame dataFrame4 = RenameKt.rename((DataFrame)dataFrame3, (Pair[])stringArray);
        stringArray = new String[]{"texts"};
        df2 = SplitKt.into$default((Split)SplitKt.split((DataFrame)SelectKt.select((DataFrame)dataFrame4, (String[])stringArray), (Function2)createHarvestResultDatasetView.2.INSTANCE), (String[])new String[0], null, (int)2, null);
        PrintKt.print$default((DataFrame)df2, (int)0, (int)0, (boolean)false, (boolean)false, (boolean)false, (boolean)false, (int)63, null);
        HarvestProject project = new HarvestProject(this.project.getId());
        FileUtils.deleteDirectory((File)project.getPredictionAndMinimalFeaturesBaseDir().toFile());
        Files.createDirectories(project.getPredictionAndMinimalFeaturesBaseDir(), new FileAttribute[0]);
        File file2 = project.getPredictionAndMinimalFeaturesBaseDir().resolve("harvest-result-dataset-view.html").toFile();
        String style = ResourceLoader.INSTANCE.readString("wwwroot/template/page.table.css");
        DataFrameHtmlData dataFrameHtmlData = HtmlKt.toHTML$default((DataFrame)df2, null, null, null, (int)7, null).plus(new DataFrameHtmlData(style, null, null, 6, null));
        Intrinsics.checkNotNull((Object)file2);
        dataFrameHtmlData.writeHTML(file2);
    }
}

