/*
 * Decompiled with CFR 0.152.
 */
package ai.platon.scent.tools;

import ai.platon.pulsar.common.AppPaths;
import ai.platon.pulsar.common.urls.Hyperlink;
import ai.platon.pulsar.dom.FeaturedDocument;
import ai.platon.pulsar.skeleton.common.options.LoadOptions;
import ai.platon.pulsar.skeleton.crawl.filter.AbstractScopedUrlNormalizer;
import ai.platon.pulsar.skeleton.crawl.filter.ChainedUrlNormalizer;
import ai.platon.pulsar.skeleton.crawl.filter.ScopedUrlNormalizer;
import ai.platon.scent.BasicScentSession;
import ai.platon.scent.common.MLPaths;
import ai.platon.scent.common.sites.amazon.AmazonAsinUrlNormalizer;
import ai.platon.scent.common.sites.amazon.AmazonUrls;
import ai.platon.scent.dom.HarvestOptions;
import ai.platon.scent.ml.EncodeOptions;
import ai.platon.scent.ml.data.SimpleDataFrame;
import ai.platon.scent.tools.VerboseCrawler;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.attribute.FileAttribute;
import java.time.Instant;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.io.path.PathsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;

@Metadata(mv={1, 9, 0}, k=1, xi=48, d1={"\u0000H\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0000\n\u0002\u0010 \n\u0002\u0010\u000e\n\u0002\b\u0007\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\t\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\u0010\u000b\n\u0002\b\u0002\u0018\u00002\u00020\u0001:\u0001\"B1\u0012\f\u0010\u0002\u001a\b\u0012\u0004\u0012\u00020\u00040\u0003\u0012\b\b\u0002\u0010\u0005\u001a\u00020\u0004\u0012\b\b\u0002\u0010\u0006\u001a\u00020\u0004\u0012\b\b\u0002\u0010\u0007\u001a\u00020\u0004\u00a2\u0006\u0002\u0010\bJ\u001a\u0010\u001c\u001a\u00020\u001d2\u0012\u0010\u001e\u001a\u000e\u0012\u0004\u0012\u00020 \u0012\u0004\u0012\u00020!0\u001fR\u0011\u0010\u0005\u001a\u00020\u0004\u00a2\u0006\b\n\u0000\u001a\u0004\b\t\u0010\nR\u000e\u0010\u000b\u001a\u00020\fX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0016\u0010\r\u001a\n \u000f*\u0004\u0018\u00010\u000e0\u000eX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u0010\u001a\u00020\u0011X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0017\u0010\u0002\u001a\b\u0012\u0004\u0012\u00020\u00040\u0003\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0012\u0010\u0013R\u001a\u0010\u0014\u001a\u00020\u0004X\u0086\u000e\u00a2\u0006\u000e\n\u0000\u001a\u0004\b\u0015\u0010\n\"\u0004\b\u0016\u0010\u0017R\u0011\u0010\u0007\u001a\u00020\u0004\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0018\u0010\nR\u0011\u0010\u0006\u001a\u00020\u0004\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0019\u0010\nR\u000e\u0010\u001a\u001a\u00020\u001bX\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006#"}, d2={"Lai/platon/scent/tools/AmazonHarvester;", "", "portalUrls", "", "", "args", "restrictCss", "projectName", "(Ljava/util/List;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V", "getArgs", "()Ljava/lang/String;", "crawler", "Lai/platon/scent/tools/VerboseCrawler;", "datasetPath", "Ljava/nio/file/Path;", "kotlin.jvm.PlatformType", "encodeOptions", "Lai/platon/scent/ml/EncodeOptions;", "getPortalUrls", "()Ljava/util/List;", "projectInfo", "getProjectInfo", "setProjectInfo", "(Ljava/lang/String;)V", "getProjectName", "getRestrictCss", "session", "Lai/platon/scent/BasicScentSession;", "encodeAll", "", "nodeFilter", "Lkotlin/Function1;", "Lorg/jsoup/nodes/Node;", "", "ItemUrlNormalizer", "scent-boot"})
@SourceDebugExtension(value={"SMAP\nAmazonHarvest.kt\nKotlin\n*S Kotlin\n*F\n+ 1 AmazonHarvest.kt\nai/platon/scent/tools/AmazonHarvester\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n+ 3 fake.kt\nkotlin/jvm/internal/FakeKt\n*L\n1#1,118:1\n1360#2:119\n1446#2,5:120\n1611#2:125\n1855#2:126\n1856#2:128\n1612#2:129\n1549#2:130\n1620#2,3:131\n1549#2:134\n1620#2,3:135\n1603#2,9:138\n1855#2:147\n1856#2:149\n1612#2:150\n1#3:127\n1#3:148\n*S KotlinDebug\n*F\n+ 1 AmazonHarvest.kt\nai/platon/scent/tools/AmazonHarvester\n*L\n59#1:119\n59#1:120,5\n60#1:125\n60#1:126\n60#1:128\n60#1:129\n61#1:130\n61#1:131,3\n66#1:134\n66#1:135,3\n67#1:138,9\n67#1:147\n67#1:149\n67#1:150\n60#1:127\n67#1:148\n*E\n"})
public final class AmazonHarvester {
    @NotNull
    private final List<String> portalUrls;
    @NotNull
    private final String args;
    @NotNull
    private final String restrictCss;
    @NotNull
    private final String projectName;
    @NotNull
    private final VerboseCrawler crawler;
    @NotNull
    private final BasicScentSession session;
    private final Path datasetPath;
    @NotNull
    private final EncodeOptions encodeOptions;
    @NotNull
    private String projectInfo;

    public AmazonHarvester(@NotNull List<String> portalUrls, @NotNull String args, @NotNull String restrictCss, @NotNull String projectName) {
        Intrinsics.checkNotNullParameter(portalUrls, (String)"portalUrls");
        Intrinsics.checkNotNullParameter((Object)args, (String)"args");
        Intrinsics.checkNotNullParameter((Object)restrictCss, (String)"restrictCss");
        Intrinsics.checkNotNullParameter((Object)projectName, (String)"projectName");
        this.portalUrls = portalUrls;
        this.args = args;
        this.restrictCss = restrictCss;
        this.projectName = projectName;
        this.crawler = new VerboseCrawler(null, 1, null);
        this.session = this.crawler.getSession();
        this.datasetPath = AppPaths.INSTANCE.getProcTmp("ml/dataset", new String[0]).resolve("amazon.com/dataset-" + this.projectName + ".csv");
        this.encodeOptions = new EncodeOptions(this.datasetPath, false, null, 0, 0, 30, null);
        this.projectInfo = "buildTime: " + LocalDateTime.now() + " \nargs: " + this.args + " \nrestrictCss: " + this.restrictCss;
        Files.createDirectories(this.datasetPath.getParent(), new FileAttribute[0]);
        ChainedUrlNormalizer.add$default((ChainedUrlNormalizer)this.session.getContext().getUrlNormalizer(), (ScopedUrlNormalizer)((ScopedUrlNormalizer)new AmazonAsinUrlNormalizer()), null, (int)2, null);
    }

    public /* synthetic */ AmazonHarvester(List list, String string, String string2, String object, int n, DefaultConstructorMarker defaultConstructorMarker) {
        if ((n & 2) != 0) {
            string = "-tl 100 -ol a[href*=/dp/]";
        }
        if ((n & 4) != 0) {
            string2 = "body";
        }
        if ((n & 8) != 0) {
            object = "p" + Instant.now().getEpochSecond();
        }
        this(list, string, string2, (String)object);
    }

    @NotNull
    public final List<String> getPortalUrls() {
        return this.portalUrls;
    }

    @NotNull
    public final String getArgs() {
        return this.args;
    }

    @NotNull
    public final String getRestrictCss() {
        return this.restrictCss;
    }

    @NotNull
    public final String getProjectName() {
        return this.projectName;
    }

    @NotNull
    public final String getProjectInfo() {
        return this.projectInfo;
    }

    public final void setProjectInfo(@NotNull String string) {
        Intrinsics.checkNotNullParameter((Object)string, (String)"<set-?>");
        this.projectInfo = string;
    }

    /*
     * WARNING - void declaration
     */
    public final void encodeAll(@NotNull Function1<? super Node, Boolean> nodeFilter2) {
        void $this$mapNotNullTo$iv$iv;
        void $this$mapNotNull$iv;
        void $this$mapTo$iv$iv;
        Iterable $this$map$iv;
        Collection collection;
        void $this$mapTo$iv$iv2;
        void $this$map$iv2;
        Object element$iv;
        Iterable $this$mapNotNullTo$iv;
        FeaturedDocument it;
        void $this$flatMapTo$iv$iv;
        Intrinsics.checkNotNullParameter(nodeFilter2, (String)"nodeFilter");
        HarvestOptions options = this.session.options(this.args);
        HarvestOptions itemOptions = options.createItemOptions();
        List documents2 = this.session.loadDocuments((Iterable)this.portalUrls, options);
        ItemUrlNormalizer urlNormalizer = new ItemUrlNormalizer();
        Iterable $this$flatMap$iv = documents2;
        boolean $i$f$flatMap = false;
        Iterable iterable = $this$flatMap$iv;
        Collection destination$iv$iv = new ArrayList();
        boolean $i$f$flatMapTo = false;
        for (Object element$iv$iv : $this$flatMapTo$iv$iv) {
            it = (FeaturedDocument)element$iv$iv;
            boolean bl = false;
            Iterable list$iv$iv = FeaturedDocument.selectHyperlinks$default((FeaturedDocument)it, (String)options.getOutLinkSelector(), (int)0, (int)0, (int)6, null);
            CollectionsKt.addAll((Collection)destination$iv$iv, (Iterable)list$iv$iv);
        }
        $this$flatMap$iv = (List)destination$iv$iv;
        Collection destination$iv = new HashSet();
        boolean $i$f$mapNotNullTo22 = false;
        void $this$forEach$iv$iv = $this$mapNotNullTo$iv;
        boolean $i$f$forEach = false;
        Iterator iterator = $this$forEach$iv$iv.iterator();
        while (iterator.hasNext()) {
            Hyperlink it$iv;
            Object element$iv$iv;
            element$iv = element$iv$iv = iterator.next();
            boolean bl = false;
            Hyperlink it2 = (Hyperlink)element$iv;
            boolean bl2 = false;
            if (ItemUrlNormalizer.normalize$default(urlNormalizer, it2, null, 2, null) == null) continue;
            boolean bl3 = false;
            destination$iv.add(it$iv);
        }
        $this$mapNotNullTo$iv = destination$iv;
        boolean $i$f$map22 = false;
        void $i$f$mapNotNullTo22 = $this$map$iv2;
        destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv2, (int)10));
        boolean $i$f$mapTo = false;
        for (Object item$iv$iv : $this$mapTo$iv$iv2) {
            element$iv = (Hyperlink)item$iv$iv;
            collection = destination$iv$iv;
            boolean bl = false;
            collection.add(it.getUrl());
        }
        List urls = (List)destination$iv$iv;
        this.session.submitAll((Iterable)urls, (LoadOptions)itemOptions);
        this.session.getContext().await();
        Iterable $i$f$map22 = urls;
        boolean $i$f$map = false;
        destination$iv$iv = $this$map$iv;
        Collection destination$iv$iv2 = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
        boolean $i$f$mapTo2 = false;
        for (Object item$iv$iv : $this$mapTo$iv$iv) {
            void it3;
            String bl = (String)item$iv$iv;
            collection = destination$iv$iv2;
            boolean bl4 = false;
            collection.add(this.session.loadDocument((String)it3));
        }
        $this$map$iv = (List)destination$iv$iv2;
        boolean $i$f$mapNotNull = false;
        $this$mapTo$iv$iv = $this$mapNotNull$iv;
        destination$iv$iv2 = new ArrayList();
        boolean $i$f$mapNotNullTo = false;
        void $this$forEach$iv$iv$iv = $this$mapNotNullTo$iv$iv;
        boolean $i$f$forEach2 = false;
        Iterator iterator2 = $this$forEach$iv$iv$iv.iterator();
        while (iterator2.hasNext()) {
            Element it$iv$iv;
            Object element$iv$iv$iv;
            Object element$iv$iv = element$iv$iv$iv = iterator2.next();
            boolean bl = false;
            FeaturedDocument it4 = (FeaturedDocument)element$iv$iv;
            boolean bl5 = false;
            if (it4.selectFirstOrNull(this.restrictCss) == null) continue;
            boolean bl6 = false;
            destination$iv$iv2.add(it$iv$iv);
        }
        List rootElements2 = (List)destination$iv$iv2;
        SimpleDataFrame dataFrame2 = this.session.encodeForElements((Iterable)rootElements2, this.encodeOptions, (Function1)new Function1<Node, Boolean>(nodeFilter2){
            final /* synthetic */ Function1<Node, Boolean> $nodeFilter;
            {
                this.$nodeFilter = $nodeFilter;
                super(1);
            }

            @NotNull
            public final Boolean invoke(@NotNull Node it) {
                Intrinsics.checkNotNullParameter((Object)it, (String)"it");
                return (Boolean)this.$nodeFilter.invoke((Object)it);
            }
        });
        System.out.println((Object)("Total " + dataFrame2.getRecordCount() + " records in " + dataFrame2.getSize() + " documents are encoded."));
        dataFrame2.export();
        if (Files.exists(this.datasetPath, new LinkOption[0])) {
            Path path = this.datasetPath;
            Intrinsics.checkNotNullExpressionValue((Object)path, (String)"datasetPath");
            Path projectInfoPath = this.datasetPath.resolveSibling(PathsKt.getNameWithoutExtension((Path)path) + ".info.txt");
            Files.writeString(projectInfoPath, (CharSequence)this.projectInfo, new OpenOption[0]);
            Path path2 = this.datasetPath;
            Intrinsics.checkNotNullExpressionValue((Object)path2, (String)"datasetPath");
            MLPaths.INSTANCE.copyToLearnUnsupervised(path2);
        } else {
            System.out.println((Object)("Dataset is not saved to " + this.datasetPath));
        }
    }

    @Metadata(mv={1, 9, 0}, k=1, xi=48, d1={"\u0000\"\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010\u000b\n\u0000\n\u0002\u0010\u000e\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\u0018\u00002\u00020\u0001B\u0005\u00a2\u0006\u0002\u0010\u0002J\u0018\u0010\u0003\u001a\u00020\u00042\u0006\u0010\u0005\u001a\u00020\u00062\u0006\u0010\u0007\u001a\u00020\u0006H\u0016J\u001a\u0010\b\u001a\u0004\u0018\u00010\t2\u0006\u0010\n\u001a\u00020\t2\b\b\u0002\u0010\u0007\u001a\u00020\u0006J\u001a\u0010\b\u001a\u0004\u0018\u00010\u00062\u0006\u0010\u0005\u001a\u00020\u00062\u0006\u0010\u0007\u001a\u00020\u0006H\u0016\u00a8\u0006\u000b"}, d2={"Lai/platon/scent/tools/AmazonHarvester$ItemUrlNormalizer;", "Lai/platon/pulsar/skeleton/crawl/filter/AbstractScopedUrlNormalizer;", "()V", "isRelevant", "", "url", "", "scope", "normalize", "Lai/platon/pulsar/common/urls/Hyperlink;", "link", "scent-boot"})
    public static final class ItemUrlNormalizer
    extends AbstractScopedUrlNormalizer {
        public boolean isRelevant(@NotNull String url, @NotNull String scope) {
            Intrinsics.checkNotNullParameter((Object)url, (String)"url");
            Intrinsics.checkNotNullParameter((Object)scope, (String)"scope");
            return AmazonUrls.INSTANCE.isItemPage(url);
        }

        @Nullable
        public String normalize(@NotNull String url, @NotNull String scope) {
            Intrinsics.checkNotNullParameter((Object)url, (String)"url");
            Intrinsics.checkNotNullParameter((Object)scope, (String)"scope");
            return AmazonUrls.INSTANCE.normalizeAsinUrl(url);
        }

        @Nullable
        public final Hyperlink normalize(@NotNull Hyperlink link, @NotNull String scope) {
            Intrinsics.checkNotNullParameter((Object)link, (String)"link");
            Intrinsics.checkNotNullParameter((Object)scope, (String)"scope");
            String string = AmazonUrls.INSTANCE.normalizeAsinUrl(link.getUrl());
            if (string == null) {
                return null;
            }
            String url = string;
            link.setHref(link.getUrl());
            link.setUrl(url);
            return link;
        }

        public static /* synthetic */ Hyperlink normalize$default(ItemUrlNormalizer itemUrlNormalizer, Hyperlink hyperlink, String string, int n, Object object) {
            if ((n & 2) != 0) {
                string = "";
            }
            return itemUrlNormalizer.normalize(hyperlink, string);
        }
    }
}

