/*
 * Decompiled with CFR 0.152.
 */
package ai.platon.scent.tools;

import ai.platon.pulsar.common.LogsKt;
import ai.platon.pulsar.dom.FeaturedDocument;
import ai.platon.pulsar.dom.nodes.node.ext.ExportPaths;
import ai.platon.pulsar.persist.WebPage;
import ai.platon.pulsar.skeleton.common.options.LoadOptions;
import ai.platon.pulsar.skeleton.context.PulsarContext;
import ai.platon.pulsar.skeleton.crawl.filter.ScopedUrlNormalizer;
import ai.platon.scent.BasicScentSession;
import ai.platon.scent.analysis.corpus.FullFeaturedDocumentKt;
import ai.platon.scent.dom.HNormUrl;
import ai.platon.scent.dom.HarvestOptions;
import ai.platon.scent.dom.nodes.AnchorGroup;
import ai.platon.scent.entities.HarvestResult;
import ai.platon.scent.ml.harvest.HarvestProject;
import ai.platon.scent.ql.h2.context.ScentSQLContexts;
import ai.platon.scent.skeleton.ScentContext;
import ai.platon.scent.skeleton.ScentSession;
import ai.platon.scent.tools.BasicWebHarvester;
import ai.platon.scent.tools.SimpleCrawler;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.concurrent.atomic.AtomicBoolean;
import kotlin.Metadata;
import kotlin.ResultKt;
import kotlin.Unit;
import kotlin.collections.CollectionsKt;
import kotlin.coroutines.Continuation;
import kotlin.coroutines.intrinsics.IntrinsicsKt;
import kotlin.io.path.PathsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.functions.Function2;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.sequences.Sequence;
import kotlin.sequences.SequencesKt;
import kotlinx.coroutines.BuildersKt;
import kotlinx.coroutines.CoroutineScope;
import org.apache.commons.io.FileUtils;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Metadata(mv={1, 9, 0}, k=1, xi=48, d1={"\u0000t\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u000e\n\u0002\b\u0002\n\u0002\u0010\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0010\u001c\n\u0000\n\u0002\u0010 \n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\b\n\u0002\b\u0002\b\u0016\u0018\u00002\u00020\u0001B\u001b\u0012\b\b\u0002\u0010\u0002\u001a\u00020\u0003\u0012\n\b\u0002\u0010\u0004\u001a\u0004\u0018\u00010\u0005\u00a2\u0006\u0002\u0010\u0006J\u0018\u0010\f\u001a\u0004\u0018\u00010\r2\u0006\u0010\u000e\u001a\u00020\u000f2\u0006\u0010\u0010\u001a\u00020\u000fJ\u0016\u0010\u0011\u001a\u00020\u00122\u0006\u0010\u0013\u001a\u00020\u00142\u0006\u0010\u0015\u001a\u00020\u0016J\u000e\u0010\u0017\u001a\u00020\u00122\u0006\u0010\u0018\u001a\u00020\u0019J\u000e\u0010\u0017\u001a\u00020\u00122\u0006\u0010\u001a\u001a\u00020\u000fJ\b\u0010\u001b\u001a\u00020\u0012H\u0016J\u0016\u0010\u001c\u001a\u00020\u00142\u0006\u0010\u000e\u001a\u00020\u000f2\u0006\u0010\u0015\u001a\u00020\u0016J\u0016\u0010\u001c\u001a\u00020\u00142\u0006\u0010\u000e\u001a\u00020\u000f2\u0006\u0010\u0010\u001a\u00020\u000fJ\u001c\u0010\u001c\u001a\u00020\u00142\f\u0010\u001d\u001a\b\u0012\u0004\u0012\u00020\u000f0\u001e2\u0006\u0010\u0010\u001a\u00020\u000fJ\u001c\u0010\u001c\u001a\u00020\u00142\f\u0010\u001f\u001a\b\u0012\u0004\u0012\u00020!0 2\u0006\u0010\u0015\u001a\u00020\u0016J\u001c\u0010\u001c\u001a\u00020\u00142\f\u0010\u001f\u001a\b\u0012\u0004\u0012\u00020!0\"2\u0006\u0010\u0015\u001a\u00020\u0016J$\u0010#\u001a\b\u0012\u0004\u0012\u00020!0\"2\u0006\u0010$\u001a\u00020%2\u0006\u0010&\u001a\u00020'2\u0006\u0010(\u001a\u00020'R\u000e\u0010\u0007\u001a\u00020\bX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0016\u0010\t\u001a\n \u000b*\u0004\u0018\u00010\n0\nX\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006)"}, d2={"Lai/platon/scent/tools/BasicWebHarvester;", "Lai/platon/scent/tools/SimpleCrawler;", "context", "Lai/platon/scent/skeleton/ScentContext;", "normalizer", "Lai/platon/pulsar/skeleton/crawl/filter/ScopedUrlNormalizer;", "(Lai/platon/scent/skeleton/ScentContext;Lai/platon/pulsar/skeleton/crawl/filter/ScopedUrlNormalizer;)V", "closed", "Ljava/util/concurrent/atomic/AtomicBoolean;", "logger", "Lorg/slf4j/Logger;", "kotlin.jvm.PlatformType", "arrangeDocument", "Lai/platon/scent/dom/nodes/AnchorGroup;", "portalUrl", "", "args", "buildViews", "", "result", "Lai/platon/scent/entities/HarvestResult;", "options", "Lai/platon/scent/dom/HarvestOptions;", "clearViews", "project", "Lai/platon/scent/ml/harvest/HarvestProject;", "projectId", "close", "harvest", "urls", "", "documents", "", "Lai/platon/pulsar/dom/FeaturedDocument;", "Lkotlin/sequences/Sequence;", "loadDocuments", "htmlBaseDir", "Ljava/nio/file/Path;", "start", "", "limit", "scent-boot"})
@SourceDebugExtension(value={"SMAP\nBasicWebHarvester.kt\nKotlin\n*S Kotlin\n*F\n+ 1 BasicWebHarvester.kt\nai/platon/scent/tools/BasicWebHarvester\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n+ 3 fake.kt\nkotlin/jvm/internal/FakeKt\n*L\n1#1,127:1\n1855#2:128\n1864#2,3:129\n1549#2:132\n1620#2,3:133\n1549#2:136\n1620#2,3:137\n1856#2:141\n1#3:140\n*S KotlinDebug\n*F\n+ 1 BasicWebHarvester.kt\nai/platon/scent/tools/BasicWebHarvester\n*L\n40#1:128\n41#1:129,3\n43#1:132\n43#1:133,3\n44#1:136\n44#1:137,3\n40#1:141\n*E\n"})
public class BasicWebHarvester
extends SimpleCrawler {
    private final Logger logger;
    @NotNull
    private final AtomicBoolean closed;

    public BasicWebHarvester(@NotNull ScentContext context, @Nullable ScopedUrlNormalizer normalizer) {
        Intrinsics.checkNotNullParameter((Object)context, (String)"context");
        super((PulsarContext)context, normalizer);
        this.logger = LoggerFactory.getLogger(BasicWebHarvester.class);
        this.closed = new AtomicBoolean();
    }

    public /* synthetic */ BasicWebHarvester(ScentContext scentContext, ScopedUrlNormalizer scopedUrlNormalizer, int n, DefaultConstructorMarker defaultConstructorMarker) {
        if ((n & 1) != 0) {
            scentContext = (ScentContext)ScentSQLContexts.INSTANCE.create();
        }
        if ((n & 2) != 0) {
            scopedUrlNormalizer = null;
        }
        this(scentContext, scopedUrlNormalizer);
    }

    /*
     * WARNING - void declaration
     */
    @Nullable
    public final AnchorGroup arrangeDocument(@NotNull String portalUrl, @NotNull String args) {
        FeaturedDocument featuredDocument;
        Intrinsics.checkNotNullParameter((Object)portalUrl, (String)"portalUrl");
        Intrinsics.checkNotNullParameter((Object)args, (String)"args");
        HNormUrl normUrl = ScentSession.DefaultImpls.normalize$default((ScentSession)((ScentSession)this.getSession()), (String)portalUrl, (HarvestOptions)this.getSession().options(args), (boolean)false, (int)4, null);
        HarvestOptions options = normUrl.getHOptions();
        WebPage portalPage = this.getSession().load(normUrl);
        FeaturedDocument portalDocument = this.getSession().parse(portalPage);
        SortedSet anchorGroups = this.getSession().arrangeLinks(normUrl, portalDocument);
        this.logger.info("------------------------------");
        Iterable $this$forEach$iv = CollectionsKt.take((Iterable)anchorGroups, (int)1);
        boolean $i$f$forEach = false;
        for (Object element$iv : $this$forEach$iv) {
            Collection collection;
            Iterable $this$mapTo$iv$iv;
            Iterable $this$map$iv;
            AnchorGroup it = (AnchorGroup)element$iv;
            boolean bl = false;
            Iterable $this$forEachIndexed$iv = CollectionsKt.take((Iterable)CollectionsKt.shuffled((Iterable)it.getUrlStrings()), (int)10);
            boolean $i$f$forEachIndexed = false;
            int index$iv22 = 0;
            for (Object item$iv : $this$forEachIndexed$iv) {
                void url;
                int n;
                if ((n = index$iv22++) < 0) {
                    CollectionsKt.throwIndexOverflow();
                }
                String string = (String)item$iv;
                int i = n;
                boolean bl2 = false;
                System.out.println((Object)(1 + i + ".\t" + (String)url));
            }
            $this$forEachIndexed$iv = CollectionsKt.take((Iterable)it.getUrlStrings(), (int)options.getTopLinks());
            boolean $i$f$map = false;
            void index$iv22 = $this$map$iv;
            Collection destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
            boolean $i$f$mapTo = false;
            for (Object item$iv$iv : $this$mapTo$iv$iv) {
                void it2;
                String i = (String)item$iv$iv;
                collection = destination$iv$iv;
                boolean bl3 = false;
                BasicScentSession basicScentSession = this.getSession();
                Intrinsics.checkNotNull((Object)it2);
                collection.add(basicScentSession.load((String)it2, (LoadOptions)options));
            }
            $this$map$iv = (List)destination$iv$iv;
            $i$f$map = false;
            $this$mapTo$iv$iv = $this$map$iv;
            destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
            $i$f$mapTo = false;
            for (Object item$iv$iv : $this$mapTo$iv$iv) {
                WebPage it2 = (WebPage)item$iv$iv;
                collection = destination$iv$iv;
                boolean bl4 = false;
                collection.add(ScentSession.DefaultImpls.parse$default((ScentSession)((ScentSession)this.getSession()), (WebPage)it2, (HarvestOptions)options, (boolean)false, (int)4, null));
            }
            List it3 = (List)destination$iv$iv;
            boolean bl5 = false;
            this.getSession().arrangeDocuments(normUrl, portalPage, CollectionsKt.asSequence((Iterable)it3));
        }
        FeaturedDocument it = featuredDocument = portalDocument;
        boolean bl = false;
        FullFeaturedDocumentKt.annotateNodes((FeaturedDocument)it, (HarvestOptions)options);
        it = featuredDocument;
        boolean bl6 = false;
        ScentSession.DefaultImpls.export$default((ScentSession)((ScentSession)this.getSession()), (FeaturedDocument)it, (ExportPaths.Type)ExportPaths.Type.PORTAL, (boolean)false, (int)4, null);
        return (AnchorGroup)CollectionsKt.firstOrNull((Iterable)anchorGroups);
    }

    @NotNull
    public final HarvestResult harvest(@NotNull String portalUrl, @NotNull String args) {
        Intrinsics.checkNotNullParameter((Object)portalUrl, (String)"portalUrl");
        Intrinsics.checkNotNullParameter((Object)args, (String)"args");
        return this.harvest(portalUrl, this.getSession().options(args));
    }

    @NotNull
    public final HarvestResult harvest(@NotNull List<? extends FeaturedDocument> documents2, @NotNull HarvestOptions options) {
        Intrinsics.checkNotNullParameter(documents2, (String)"documents");
        Intrinsics.checkNotNullParameter((Object)options, (String)"options");
        HarvestResult result2 = (HarvestResult)BuildersKt.runBlocking$default(null, (Function2)((Function2)new Function2<CoroutineScope, Continuation<? super HarvestResult>, Object>(this, documents2, options, null){
            int label;
            final /* synthetic */ BasicWebHarvester this$0;
            final /* synthetic */ List<FeaturedDocument> $documents;
            final /* synthetic */ HarvestOptions $options;
            {
                this.this$0 = $receiver;
                this.$documents = $documents;
                this.$options = $options;
                super(2, $completion);
            }

            @Nullable
            public final Object invokeSuspend(@NotNull Object object) {
                IntrinsicsKt.getCOROUTINE_SUSPENDED();
                switch (this.label) {
                    case 0: {
                        ResultKt.throwOnFailure((Object)object);
                        return this.this$0.getSession().harvest(CollectionsKt.asSequence((Iterable)this.$documents), this.$options);
                    }
                }
                throw new IllegalStateException("call to 'resume' before 'invoke' with coroutine");
            }

            @NotNull
            public final Continuation<Unit> create(@Nullable Object value, @NotNull Continuation<?> $completion) {
                return (Continuation)new /* invalid duplicate definition of identical inner class */;
            }

            @Nullable
            public final Object invoke(@NotNull CoroutineScope p1, @Nullable Continuation<? super HarvestResult> p2) {
                return (this.create(p1, p2)).invokeSuspend(Unit.INSTANCE);
            }
        }), (int)1, null);
        this.buildViews(result2, options);
        return result2;
    }

    @NotNull
    public final HarvestResult harvest(@NotNull String portalUrl, @NotNull HarvestOptions options) {
        Intrinsics.checkNotNullParameter((Object)portalUrl, (String)"portalUrl");
        Intrinsics.checkNotNullParameter((Object)options, (String)"options");
        HarvestResult result2 = (HarvestResult)BuildersKt.runBlocking$default(null, (Function2)((Function2)new Function2<CoroutineScope, Continuation<? super HarvestResult>, Object>(options, this, portalUrl, null){
            int label;
            final /* synthetic */ HarvestOptions $options;
            final /* synthetic */ BasicWebHarvester this$0;
            final /* synthetic */ String $portalUrl;
            {
                this.$options = $options;
                this.this$0 = $receiver;
                this.$portalUrl = $portalUrl;
                super(2, $completion);
            }

            /*
             * WARNING - void declaration
             * Enabled force condition propagation
             * Lifted jumps to return sites
             */
            @Nullable
            public final Object invokeSuspend(@NotNull Object object) {
                Object object2 = IntrinsicsKt.getCOROUTINE_SUSPENDED();
                switch (this.label) {
                    case 0: {
                        ResultKt.throwOnFailure((Object)object);
                        if (this.$options.isDefault("-topLinks")) {
                            this.$options.setTopLinks(40);
                        }
                        this.label = 1;
                        Object object3 = this.this$0.getSession().harvest(this.$portalUrl, this.$options, (Continuation)this);
                        if (object3 != object2) return object3;
                        return object2;
                    }
                    case 1: {
                        void $result;
                        ResultKt.throwOnFailure((Object)$result);
                        Object object3 = $result;
                        return object3;
                    }
                }
                throw new IllegalStateException("call to 'resume' before 'invoke' with coroutine");
            }

            @NotNull
            public final Continuation<Unit> create(@Nullable Object value, @NotNull Continuation<?> $completion) {
                return (Continuation)new /* invalid duplicate definition of identical inner class */;
            }

            @Nullable
            public final Object invoke(@NotNull CoroutineScope p1, @Nullable Continuation<? super HarvestResult> p2) {
                return (this.create(p1, p2)).invokeSuspend(Unit.INSTANCE);
            }
        }), (int)1, null);
        this.buildViews(result2, options);
        return result2;
    }

    @NotNull
    public final HarvestResult harvest(@NotNull Iterable<String> urls, @NotNull String args) {
        Intrinsics.checkNotNullParameter(urls, (String)"urls");
        Intrinsics.checkNotNullParameter((Object)args, (String)"args");
        return this.harvest((Sequence<? extends FeaturedDocument>)SequencesKt.map((Sequence)CollectionsKt.asSequence(urls), (Function1)((Function1)new Function1<String, FeaturedDocument>(this){
            final /* synthetic */ BasicWebHarvester this$0;
            {
                this.this$0 = $receiver;
                super(1);
            }

            @NotNull
            public final FeaturedDocument invoke(@NotNull String it) {
                Intrinsics.checkNotNullParameter((Object)it, (String)"it");
                return this.this$0.getSession().loadDocument(it);
            }
        })), this.getSession().options(args));
    }

    @NotNull
    public final HarvestResult harvest(@NotNull Sequence<? extends FeaturedDocument> documents2, @NotNull HarvestOptions options) {
        Intrinsics.checkNotNullParameter(documents2, (String)"documents");
        Intrinsics.checkNotNullParameter((Object)options, (String)"options");
        HarvestResult result2 = this.getSession().harvest(documents2, options);
        this.logger.info("Harvest finished.");
        this.logger.info("Ready to build views for the harvest result ...");
        this.buildViews(result2, options);
        return result2;
    }

    public final void buildViews(@NotNull HarvestResult result2, @NotNull HarvestOptions options) {
        Intrinsics.checkNotNullParameter((Object)result2, (String)"result");
        Intrinsics.checkNotNullParameter((Object)options, (String)"options");
        try {
            Map exportedDocuments = this.getSession().buildAll(result2.getTableGroup(), options);
            Path path = (Path)CollectionsKt.firstOrNull((Iterable)exportedDocuments.keySet());
            Path path2 = path != null ? path.getParent() : null;
            if (path2 == null) {
                return;
            }
            Path baseDir = path2;
            this.logger.info("Harvest result: " + baseDir.toUri());
        }
        catch (Exception e) {
            LogsKt.warnUnexpected((Object)this, (Throwable)e, (String)"Failed to report harvest result", (Object[])new Object[0]);
        }
    }

    public final void clearViews(@NotNull String projectId) {
        Intrinsics.checkNotNullParameter((Object)projectId, (String)"projectId");
        FileUtils.deleteDirectory((File)new HarvestProject(projectId).getResultBaseDir().toFile());
    }

    public final void clearViews(@NotNull HarvestProject project) {
        Intrinsics.checkNotNullParameter((Object)project, (String)"project");
        FileUtils.deleteDirectory((File)project.getResultBaseDir().toFile());
    }

    @NotNull
    public final Sequence<FeaturedDocument> loadDocuments(@NotNull Path htmlBaseDir, int start, int limit) {
        long count2;
        Intrinsics.checkNotNullParameter((Object)htmlBaseDir, (String)"htmlBaseDir");
        LinkOption[] linkOptionArray = new LinkOption[]{};
        long l = count2 = Files.notExists(htmlBaseDir, Arrays.copyOf(linkOptionArray, linkOptionArray.length)) ? 0L : Files.list(htmlBaseDir).filter(arg_0 -> BasicWebHarvester.loadDocuments$lambda$7(loadDocuments.count.1.INSTANCE, arg_0)).count();
        if (count2 == 0L) {
            return SequencesKt.sequenceOf((Object[])new FeaturedDocument[0]);
        }
        if (count2 < 20L) {
            this.logger.warn("Too few samples, might not generate a good result");
        }
        Sequence documents2 = SequencesKt.onEach((Sequence)SequencesKt.map((Sequence)SequencesKt.take((Sequence)SequencesKt.drop((Sequence)CollectionsKt.asSequence((Iterable)PathsKt.listDirectoryEntries((Path)htmlBaseDir, (String)"*.html")), (int)start), (int)limit), (Function1)loadDocuments.documents.1.INSTANCE), (Function1)loadDocuments.documents.2.INSTANCE);
        return documents2;
    }

    @Override
    public void close() {
        if (this.closed.compareAndSet(false, true)) {
            // empty if block
        }
    }

    private static final boolean loadDocuments$lambda$7(Function1 $tmp0, Object p0) {
        Intrinsics.checkNotNullParameter((Object)$tmp0, (String)"$tmp0");
        return (Boolean)$tmp0.invoke(p0);
    }

    public BasicWebHarvester() {
        this((ScentContext)null, (ScopedUrlNormalizer)null, 3, (DefaultConstructorMarker)null);
    }
}

