/*
 * Decompiled with CFR 0.152.
 */
package ai.platon.scent.tools;

import ai.platon.pulsar.common.AppPaths;
import ai.platon.pulsar.common.urls.Hyperlink;
import ai.platon.pulsar.dom.FeaturedDocument;
import ai.platon.pulsar.skeleton.common.options.LoadOptions;
import ai.platon.pulsar.skeleton.crawl.filter.ChainedUrlNormalizer;
import ai.platon.pulsar.skeleton.crawl.filter.ScopedUrlNormalizer;
import ai.platon.scent.BasicScentSession;
import ai.platon.scent.common.MLPaths;
import ai.platon.scent.dom.HarvestOptions;
import ai.platon.scent.ml.EncodeOptions;
import ai.platon.scent.ml.data.SimpleDataFrame;
import ai.platon.scent.tools.EBayProductUrlNormalizer;
import ai.platon.scent.tools.VerboseCrawler;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.attribute.FileAttribute;
import java.time.Instant;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.io.path.PathsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;
import org.jsoup.nodes.Node;

@Metadata(mv={1, 9, 0}, k=1, xi=48, d1={"\u0000P\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0000\n\u0002\u0010\u000e\n\u0002\b\u0006\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010 \n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\u0010\u000b\n\u0002\b\u0005\u0018\u00002\u00020\u0001B!\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\b\b\u0002\u0010\u0004\u001a\u00020\u0003\u0012\b\b\u0002\u0010\u0005\u001a\u00020\u0003\u00a2\u0006\u0002\u0010\u0006J\f\u0010\u0014\u001a\b\u0012\u0004\u0012\u00020\u00160\u0015J(\u0010\u0017\u001a\u00020\u00182\f\u0010\u0019\u001a\b\u0012\u0004\u0012\u00020\u00030\u00152\u0012\u0010\u001a\u001a\u000e\u0012\u0004\u0012\u00020\u001c\u0012\u0004\u0012\u00020\u001d0\u001bJ\u0010\u0010\u001e\u001a\u00020\u00032\u0006\u0010\u001f\u001a\u00020\u0003H\u0002J\u001a\u0010 \u001a\b\u0012\u0004\u0012\u00020\u00030\u00152\f\u0010!\u001a\b\u0012\u0004\u0012\u00020\u00030\u0015R\u0011\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0007\u0010\bR\u000e\u0010\t\u001a\u00020\nX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0016\u0010\u000b\u001a\n \r*\u0004\u0018\u00010\f0\fX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u000e\u001a\u00020\u000fX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0011\u0010\u0005\u001a\u00020\u0003\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0010\u0010\bR\u0011\u0010\u0004\u001a\u00020\u0003\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0011\u0010\bR\u000e\u0010\u0012\u001a\u00020\u0013X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\""}, d2={"Lai/platon/scent/tools/EBayHarvester;", "", "args", "", "projectName", "projectInfo", "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V", "getArgs", "()Ljava/lang/String;", "crawler", "Lai/platon/scent/tools/VerboseCrawler;", "datasetPath", "Ljava/nio/file/Path;", "kotlin.jvm.PlatformType", "encodeOptions", "Lai/platon/scent/ml/EncodeOptions;", "getProjectInfo", "getProjectName", "session", "Lai/platon/scent/BasicScentSession;", "collectListPageLinks", "", "Lai/platon/pulsar/common/urls/Hyperlink;", "encodeAll", "", "urls", "nodeFilter", "Lkotlin/Function1;", "Lorg/jsoup/nodes/Node;", "", "getOrCreateProjectInfo", "more", "loadAndSelectHyperlinks", "portalUrls", "scent-boot"})
@SourceDebugExtension(value={"SMAP\nEBayHarvest.kt\nKotlin\n*S Kotlin\n*F\n+ 1 EBayHarvest.kt\nai/platon/scent/tools/EBayHarvester\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n+ 3 fake.kt\nkotlin/jvm/internal/FakeKt\n*L\n1#1,150:1\n1360#2:151\n1446#2,5:152\n1611#2:157\n1855#2:158\n1856#2:160\n1612#2:161\n1549#2:162\n1620#2,3:163\n1549#2:167\n1620#2,3:168\n1360#2:171\n1446#2,5:172\n1#3:159\n1#3:166\n*S KotlinDebug\n*F\n+ 1 EBayHarvest.kt\nai/platon/scent/tools/EBayHarvester\n*L\n71#1:151\n71#1:152,5\n72#1:157\n72#1:158\n72#1:160\n72#1:161\n73#1:162\n73#1:163,3\n88#1:167\n88#1:168,3\n89#1:171\n89#1:172,5\n72#1:159\n*E\n"})
public final class EBayHarvester {
    @NotNull
    private final String args;
    @NotNull
    private final String projectName;
    @NotNull
    private final String projectInfo;
    @NotNull
    private final VerboseCrawler crawler;
    @NotNull
    private final BasicScentSession session;
    private final Path datasetPath;
    @NotNull
    private final EncodeOptions encodeOptions;

    public EBayHarvester(@NotNull String args, @NotNull String projectName, @NotNull String projectInfo) {
        Intrinsics.checkNotNullParameter((Object)args, (String)"args");
        Intrinsics.checkNotNullParameter((Object)projectName, (String)"projectName");
        Intrinsics.checkNotNullParameter((Object)projectInfo, (String)"projectInfo");
        this.args = args;
        this.projectName = projectName;
        this.projectInfo = projectInfo;
        this.crawler = new VerboseCrawler(null, 1, null);
        this.session = this.crawler.getSession();
        this.datasetPath = AppPaths.INSTANCE.getProcTmp("ml/dataset", new String[0]).resolve("ebay.com/dataset-" + this.projectName + ".csv");
        this.encodeOptions = new EncodeOptions(this.datasetPath, false, null, 0, 0, 30, null);
        Files.createDirectories(this.datasetPath.getParent(), new FileAttribute[0]);
        ChainedUrlNormalizer.add$default((ChainedUrlNormalizer)this.session.getContext().getUrlNormalizer(), (ScopedUrlNormalizer)((ScopedUrlNormalizer)new EBayProductUrlNormalizer()), null, (int)2, null);
    }

    public /* synthetic */ EBayHarvester(String string, String object, String string2, int n, DefaultConstructorMarker defaultConstructorMarker) {
        if ((n & 2) != 0) {
            object = "p" + Instant.now().getEpochSecond();
        }
        if ((n & 4) != 0) {
            string2 = "";
        }
        this(string, (String)object, string2);
    }

    @NotNull
    public final String getArgs() {
        return this.args;
    }

    @NotNull
    public final String getProjectName() {
        return this.projectName;
    }

    @NotNull
    public final String getProjectInfo() {
        return this.projectInfo;
    }

    @NotNull
    public final List<Hyperlink> collectListPageLinks() {
        return FeaturedDocument.selectHyperlinks$default((FeaturedDocument)this.session.loadDocument("https://www.ebay.com/b/Apple/bn_21819543"), (String)"a[href~=/b/]", (int)0, (int)0, (int)6, null);
    }

    /*
     * WARNING - void declaration
     */
    @NotNull
    public final List<String> loadAndSelectHyperlinks(@NotNull List<String> portalUrls) {
        void $this$mapTo$iv$iv;
        void $this$map$iv;
        Object element$iv;
        Iterable $this$mapNotNullTo$iv;
        FeaturedDocument it;
        void $this$flatMapTo$iv$iv;
        Intrinsics.checkNotNullParameter(portalUrls, (String)"portalUrls");
        HarvestOptions options = this.session.options(this.args);
        HarvestOptions itemOptions = options.createItemOptions();
        List documents2 = this.session.loadDocuments((Iterable)portalUrls, options);
        EBayProductUrlNormalizer urlNormalizer = new EBayProductUrlNormalizer();
        Iterable $this$flatMap$iv = documents2;
        boolean $i$f$flatMap = false;
        Iterable iterable = $this$flatMap$iv;
        Collection destination$iv$iv = new ArrayList();
        boolean $i$f$flatMapTo = false;
        for (Object element$iv$iv : $this$flatMapTo$iv$iv) {
            it = (FeaturedDocument)element$iv$iv;
            boolean bl = false;
            Iterable list$iv$iv = FeaturedDocument.selectHyperlinks$default((FeaturedDocument)it, (String)options.getOutLinkSelector(), (int)0, (int)0, (int)6, null);
            CollectionsKt.addAll((Collection)destination$iv$iv, (Iterable)list$iv$iv);
        }
        $this$flatMap$iv = (List)destination$iv$iv;
        Collection destination$iv = new HashSet();
        boolean $i$f$mapNotNullTo22 = false;
        void $this$forEach$iv$iv = $this$mapNotNullTo$iv;
        boolean $i$f$forEach = false;
        Iterator iterator = $this$forEach$iv$iv.iterator();
        while (iterator.hasNext()) {
            Hyperlink it$iv;
            Object element$iv$iv;
            element$iv = element$iv$iv = iterator.next();
            boolean bl = false;
            Hyperlink it2 = (Hyperlink)element$iv;
            boolean bl2 = false;
            if (EBayProductUrlNormalizer.normalize$default(urlNormalizer, it2, null, 2, null) == null) continue;
            boolean bl3 = false;
            destination$iv.add(it$iv);
        }
        $this$mapNotNullTo$iv = destination$iv;
        boolean $i$f$map = false;
        void $i$f$mapNotNullTo22 = $this$map$iv;
        destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
        boolean $i$f$mapTo = false;
        for (Object item$iv$iv : $this$mapTo$iv$iv) {
            element$iv = (Hyperlink)item$iv$iv;
            Collection collection = destination$iv$iv;
            boolean bl = false;
            collection.add(it.getUrl());
        }
        List urls = (List)destination$iv$iv;
        this.session.submitAll((Iterable)urls, (LoadOptions)itemOptions);
        this.session.getContext().await();
        return urls;
    }

    /*
     * WARNING - void declaration
     */
    public final void encodeAll(@NotNull List<String> urls, @NotNull Function1<? super Node, Boolean> nodeFilter2) {
        void $this$flatMapTo$iv$iv;
        void $this$flatMap$iv;
        FeaturedDocument it;
        void $this$mapTo$iv$iv;
        Iterable $this$map$iv;
        Intrinsics.checkNotNullParameter(urls, (String)"urls");
        Intrinsics.checkNotNullParameter(nodeFilter2, (String)"nodeFilter");
        HarvestOptions options = this.session.options(this.args);
        HarvestOptions itemOptions = options.createItemOptions();
        ArrayList componentSelectors = itemOptions.getComponentSelectors();
        if (!(!((Collection)componentSelectors).isEmpty())) {
            boolean $i$a$-require-EBayHarvester$encodeAll$22 = false;
            String $i$a$-require-EBayHarvester$encodeAll$22 = "Component selectors cannot be empty!";
            throw new IllegalArgumentException($i$a$-require-EBayHarvester$encodeAll$22.toString());
        }
        Iterable $i$a$-require-EBayHarvester$encodeAll$22 = urls;
        boolean $i$f$map = false;
        void var9_11 = $this$map$iv;
        Collection destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
        boolean $i$f$mapTo = false;
        for (Object item$iv$iv : $this$mapTo$iv$iv) {
            String string = (String)item$iv$iv;
            Collection collection = destination$iv$iv;
            boolean bl = false;
            collection.add(this.session.loadDocument((String)it));
        }
        $this$map$iv = (List)destination$iv$iv;
        boolean $i$f$flatMap = false;
        $this$mapTo$iv$iv = $this$flatMap$iv;
        destination$iv$iv = new ArrayList();
        boolean $i$f$flatMapTo = false;
        for (Object element$iv$iv : $this$flatMapTo$iv$iv) {
            it = (FeaturedDocument)element$iv$iv;
            boolean bl = false;
            Iterable list$iv$iv = (Iterable)FeaturedDocument.select$default((FeaturedDocument)it, (String)CollectionsKt.joinToString$default((Iterable)componentSelectors, null, null, null, (int)0, null, null, (int)63, null), (int)0, (int)0, (int)6, null);
            CollectionsKt.addAll((Collection)destination$iv$iv, (Iterable)list$iv$iv);
        }
        List rootElements2 = (List)destination$iv$iv;
        SimpleDataFrame dataFrame2 = this.session.encodeForElements((Iterable)rootElements2, this.encodeOptions, (Function1)new Function1<Node, Boolean>(nodeFilter2){
            final /* synthetic */ Function1<Node, Boolean> $nodeFilter;
            {
                this.$nodeFilter = $nodeFilter;
                super(1);
            }

            @NotNull
            public final Boolean invoke(@NotNull Node it) {
                Intrinsics.checkNotNullParameter((Object)it, (String)"it");
                return (Boolean)this.$nodeFilter.invoke((Object)it);
            }
        });
        System.out.println((Object)("Total " + dataFrame2.getRecordCount() + " records in " + dataFrame2.getSize() + " documents are encoded."));
        dataFrame2.export();
        if (Files.exists(this.datasetPath, new LinkOption[0])) {
            Path path = this.datasetPath;
            Intrinsics.checkNotNullExpressionValue((Object)path, (String)"datasetPath");
            Path projectInfoPath = this.datasetPath.resolveSibling(PathsKt.getNameWithoutExtension((Path)path) + ".info.txt");
            String info = this.getOrCreateProjectInfo("dataset base directory: " + projectInfoPath);
            Files.writeString(projectInfoPath, (CharSequence)info, new OpenOption[0]);
            Path path2 = this.datasetPath;
            Intrinsics.checkNotNullExpressionValue((Object)path2, (String)"datasetPath");
            MLPaths.INSTANCE.copyToLearnUnsupervised(path2);
        } else {
            System.out.println((Object)("Dataset is not saved to " + this.datasetPath));
        }
    }

    private final String getOrCreateProjectInfo(String more) {
        if (!StringsKt.isBlank((CharSequence)this.projectInfo)) {
            return this.projectInfo + more;
        }
        return StringsKt.trimIndent((String)("\n            buildTime: " + LocalDateTime.now() + "\n            nodeFilter: it.isRegularText && it.nthScreen <= 2\n            args: " + this.args + "\n            " + more + "\n    "));
    }
}

