/*
 * Decompiled with CFR 0.152.
 */
package ai.platon.scent.tools;

import ai.platon.pulsar.common.AppPaths;
import ai.platon.pulsar.common.ExceptionsKt;
import ai.platon.pulsar.common.NetUtil;
import ai.platon.pulsar.common.options.LoadOptions;
import ai.platon.pulsar.common.urls.NormUrl;
import ai.platon.pulsar.dom.FeaturedDocument;
import ai.platon.pulsar.persist.WebPage;
import ai.platon.pulsar.session.PulsarSession;
import ai.platon.scent.ScentContext;
import ai.platon.scent.ScentSession;
import ai.platon.scent.context.ScentContexts;
import ai.platon.scent.dom.HNormUrl;
import ai.platon.scent.dom.HarvestOptions;
import ai.platon.scent.dom.nodes.AnchorGroup;
import ai.platon.scent.dom.nodes.FullFeaturedDocumentKt;
import ai.platon.scent.entities.HarvestResult;
import ai.platon.scent.ql.h2.context.ScentSQLContexts;
import ai.platon.scent.tools.VerboseCrawler;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.attribute.FileAttribute;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.SortedSet;
import java.util.concurrent.atomic.AtomicBoolean;
import kotlin.Metadata;
import kotlin.ResultKt;
import kotlin.Unit;
import kotlin.collections.CollectionsKt;
import kotlin.coroutines.Continuation;
import kotlin.coroutines.intrinsics.IntrinsicsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.functions.Function2;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.sequences.Sequence;
import kotlin.sequences.SequencesKt;
import kotlin.text.StringsKt;
import kotlinx.coroutines.BuildersKt;
import kotlinx.coroutines.CoroutineScope;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u0000l\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0010\u000e\n\u0002\b\u0003\n\u0002\u0010\u000b\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0010\u0002\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010 \n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u001e\n\u0002\u0018\u0002\n\u0002\b\u0003\b\u0016\u0018\u00002\u00020\u0001B\u000f\u0012\b\b\u0002\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\u0002\u0010\u0004J\u0016\u0010\u001b\u001a\u00020\u001c2\u0006\u0010\u001d\u001a\u00020\f2\u0006\u0010\u001e\u001a\u00020\fJ\b\u0010\u001f\u001a\u00020\u001cH\u0016J\u001e\u0010 \u001a\u00020!2\u0006\u0010\u0017\u001a\u00020\u00182\u0006\u0010\"\u001a\u00020\f2\u0006\u0010#\u001a\u00020$J\u0016\u0010 \u001a\u00020!2\u0006\u0010\"\u001a\u00020\f2\u0006\u0010#\u001a\u00020$J\u0016\u0010 \u001a\u00020!2\u0006\u0010\"\u001a\u00020\f2\u0006\u0010\u001e\u001a\u00020\fJ\u001c\u0010 \u001a\u00020!2\f\u0010%\u001a\b\u0012\u0004\u0012\u00020'0&2\u0006\u0010#\u001a\u00020$J\u001c\u0010 \u001a\u00020!2\f\u0010%\u001a\b\u0012\u0004\u0012\u00020'0&2\u0006\u0010\u001e\u001a\u00020\fJ\u0016\u0010(\u001a\u00020\u001c2\u0006\u0010\"\u001a\u00020\f2\u0006\u0010#\u001a\u00020)J\u0016\u0010(\u001a\u00020\u001c2\u0006\u0010\"\u001a\u00020\f2\u0006\u0010\u001e\u001a\u00020\fJ\u001c\u0010*\u001a\b\u0012\u0004\u0012\u00020,0+2\u0006\u0010\u001d\u001a\u00020\f2\u0006\u0010#\u001a\u00020)J\u001c\u0010*\u001a\b\u0012\u0004\u0012\u00020,0+2\u0006\u0010\u001d\u001a\u00020\f2\u0006\u0010\u001e\u001a\u00020\fJ\u0018\u0010-\u001a\u00020\u001c2\u0006\u0010.\u001a\u00020!2\u0006\u0010#\u001a\u00020$H\u0002R\u0011\u0010\u0005\u001a\u00020\u0006\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0007\u0010\bR\u0011\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\b\n\u0000\u001a\u0004\b\t\u0010\nR\u0014\u0010\u000b\u001a\u00020\fX\u0086D\u00a2\u0006\b\n\u0000\u001a\u0004\b\r\u0010\u000eR\u0011\u0010\u000f\u001a\u00020\u00108F\u00a2\u0006\u0006\u001a\u0004\b\u000f\u0010\u0011R\u0019\u0010\u0012\u001a\n \u0014*\u0004\u0018\u00010\u00130\u0013\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0015\u0010\u0016R\u0011\u0010\u0017\u001a\u00020\u0018\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0019\u0010\u001a\u00a8\u0006/"}, d2={"Lai/platon/scent/tools/VerboseCrawler;", "Ljava/lang/AutoCloseable;", "context", "Lai/platon/scent/ScentContext;", "(Lai/platon/scent/ScentContext;)V", "closed", "Ljava/util/concurrent/atomic/AtomicBoolean;", "getClosed", "()Ljava/util/concurrent/atomic/AtomicBoolean;", "getContext", "()Lai/platon/scent/ScentContext;", "defaultHarvestArgs", "", "getDefaultHarvestArgs", "()Ljava/lang/String;", "isActive", "", "()Z", "logger", "Lorg/slf4j/Logger;", "kotlin.jvm.PlatformType", "getLogger", "()Lorg/slf4j/Logger;", "session", "Lai/platon/scent/ScentSession;", "getSession", "()Lai/platon/scent/ScentSession;", "arrangeDocument", "", "portalUrl", "args", "close", "harvest", "Lai/platon/scent/entities/HarvestResult;", "url", "options", "Lai/platon/scent/dom/HarvestOptions;", "documents", "", "Lai/platon/pulsar/dom/FeaturedDocument;", "load", "Lai/platon/pulsar/common/options/LoadOptions;", "loadOutPages", "", "Lai/platon/pulsar/persist/WebPage;", "report", "result", "scent-engine"})
public class VerboseCrawler
implements AutoCloseable {
    @NotNull
    private final ScentContext context;
    private final Logger logger;
    @NotNull
    private final AtomicBoolean closed;
    @NotNull
    private final String defaultHarvestArgs;
    @NotNull
    private final ScentSession session;

    public VerboseCrawler(@NotNull ScentContext context) {
        Intrinsics.checkNotNullParameter((Object)context, (String)"context");
        this.context = context;
        this.logger = LoggerFactory.getLogger(VerboseCrawler.class);
        this.closed = new AtomicBoolean();
        this.defaultHarvestArgs = " -nScreens 5 -diagnose -nVerbose 1 -showTip -showImage";
        this.session = ScentContexts.INSTANCE.createSession();
    }

    public /* synthetic */ VerboseCrawler(ScentContext scentContext, int n, DefaultConstructorMarker defaultConstructorMarker) {
        if ((n & 1) != 0) {
            scentContext = ScentSQLContexts.INSTANCE.create();
        }
        this(scentContext);
    }

    @NotNull
    public final ScentContext getContext() {
        return this.context;
    }

    public final Logger getLogger() {
        return this.logger;
    }

    @NotNull
    public final AtomicBoolean getClosed() {
        return this.closed;
    }

    public final boolean isActive() {
        return !this.closed.get() && this.session.isActive();
    }

    @NotNull
    public final String getDefaultHarvestArgs() {
        return this.defaultHarvestArgs;
    }

    @NotNull
    public final ScentSession getSession() {
        return this.session;
    }

    public final void load(@NotNull String url, @NotNull String args) {
        Intrinsics.checkNotNullParameter((Object)url, (String)"url");
        Intrinsics.checkNotNullParameter((Object)args, (String)"args");
        this.load(url, PulsarSession.DefaultImpls.options$default((PulsarSession)this.session, (String)args, null, (int)2, null));
    }

    /*
     * WARNING - void declaration
     */
    public final void load(@NotNull String url, @NotNull LoadOptions options) {
        void $this$filterTo$iv$iv;
        String $this$filter$iv;
        Object object;
        String it;
        Collection collection;
        Object $this$mapTo$iv;
        Intrinsics.checkNotNullParameter((Object)url, (String)"url");
        Intrinsics.checkNotNullParameter((Object)options, (String)"options");
        WebPage page = this.session.load(url);
        FeaturedDocument doc = this.session.parse(page);
        doc.absoluteLinks();
        doc.stripScripts();
        Sequence sequence = SequencesKt.filter((Sequence)CollectionsKt.asSequence((Iterable)FeaturedDocument.select$default((FeaturedDocument)doc, (String)options.getOutLinkSelector(), (int)0, (int)0, (Function1)load.1.INSTANCE, (int)6, null)), (Function1)load.2.INSTANCE);
        Collection destination$iv = new HashSet();
        boolean $i$f$mapTo = false;
        for (Object item$iv : $this$mapTo$iv) {
            String string = (String)item$iv;
            collection = destination$iv;
            boolean bl = false;
            Intrinsics.checkNotNullExpressionValue((Object)it, (String)"it");
            object = StringsKt.substringBefore$default((String)it, (String)".com", null, (int)2, null);
            collection.add(object);
        }
        $this$mapTo$iv = SequencesKt.filter((Sequence)CollectionsKt.asSequence((Iterable)destination$iv), (Function1)load.4.INSTANCE);
        destination$iv = new HashSet();
        $i$f$mapTo = false;
        for (Object item$iv : $this$mapTo$iv) {
            it = (String)item$iv;
            collection = destination$iv;
            boolean bl = false;
            object = it + ".com";
            collection.add(object);
        }
        $this$mapTo$iv = destination$iv;
        boolean $i$f$filter = false;
        void $i$f$mapTo2 = $this$filter$iv;
        Collection destination$iv$iv = new ArrayList();
        boolean $i$f$filterTo = false;
        for (Object element$iv$iv : $this$filterTo$iv$iv) {
            String it2 = (String)element$iv$iv;
            boolean bl = false;
            if (!NetUtil.testHttpNetwork((URL)new URL(it2))) continue;
            destination$iv$iv.add(element$iv$iv);
        }
        $this$filter$iv = CollectionsKt.joinToString$default((Iterable)CollectionsKt.take((Iterable)((List)destination$iv$iv), (int)10), (CharSequence)"\n", null, null, (int)0, null, (Function1)load.7.INSTANCE, (int)30, null);
        boolean bl = false;
        boolean bl2 = false;
        String it3 = $this$filter$iv;
        boolean bl3 = false;
        boolean bl4 = false;
        System.out.println((Object)it3);
        Path path = this.session.export(doc);
        this.logger.info("Export to: file://{}", (Object)path);
    }

    @NotNull
    public final Collection<WebPage> loadOutPages(@NotNull String portalUrl, @NotNull String args) {
        Intrinsics.checkNotNullParameter((Object)portalUrl, (String)"portalUrl");
        Intrinsics.checkNotNullParameter((Object)args, (String)"args");
        return this.loadOutPages(portalUrl, PulsarSession.DefaultImpls.options$default((PulsarSession)this.session, (String)args, null, (int)2, null));
    }

    /*
     * WARNING - void declaration
     */
    @NotNull
    public final Collection<WebPage> loadOutPages(@NotNull String portalUrl, @NotNull LoadOptions options) {
        void $this$mapTo$iv$iv;
        Object object;
        Collection collection;
        void destination$iv;
        void $this$mapTo$iv;
        Intrinsics.checkNotNullParameter((Object)portalUrl, (String)"portalUrl");
        Intrinsics.checkNotNullParameter((Object)options, (String)"options");
        WebPage page = this.session.load(portalUrl, options);
        if (!page.getProtocolStatus().isSuccess()) {
            this.logger.warn("Failed to load page | {}", (Object)portalUrl);
        }
        FeaturedDocument document = this.session.parse(page);
        document.absoluteLinks();
        document.stripScripts();
        Path path = this.session.export(document);
        this.logger.info("Portal page is exported to: file://" + path);
        Iterable iterable = FeaturedDocument.select$default((FeaturedDocument)document, (String)options.getOutLinkSelector(), (int)0, (int)0, (Function1)loadOutPages.links.1.INSTANCE, (int)6, null);
        boolean bl = false;
        Collection collection2 = new LinkedHashSet();
        boolean $i$f$mapTo22 = false;
        for (Object item$iv : $this$mapTo$iv) {
            Iterator it;
            String string = (String)item$iv;
            collection = destination$iv;
            boolean bl2 = false;
            PulsarSession pulsarSession = this.getSession();
            Intrinsics.checkNotNullExpressionValue((Object)it, (String)"it");
            object = PulsarSession.DefaultImpls.normalize$default((PulsarSession)pulsarSession, (String)((Object)it), (LoadOptions)options, (boolean)false, (int)4, null);
            collection.add(object);
        }
        Iterable $this$map$iv = CollectionsKt.take((Iterable)((Iterable)destination$iv), (int)options.getTopLinks());
        boolean $i$f$map = false;
        Iterable $i$f$mapTo22 = $this$map$iv;
        Collection destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
        boolean $i$f$mapTo = false;
        for (Object item$iv$iv : $this$mapTo$iv$iv) {
            void it;
            NormUrl normUrl = (NormUrl)item$iv$iv;
            collection = destination$iv$iv;
            boolean bl3 = false;
            object = it.getSpec();
            collection.add(object);
        }
        List links2 = (List)destination$iv$iv;
        this.logger.info("Total {} items to load", (Object)links2.size());
        LoadOptions loadOptions = options.createItemOptions();
        boolean bl4 = false;
        boolean bl5 = false;
        LoadOptions $this$loadOutPages_u24lambda_u2d6 = loadOptions;
        boolean bl6 = false;
        $this$loadOutPages_u24lambda_u2d6.setParse(true);
        LoadOptions itemOptions = loadOptions;
        return this.session.loadAll(links2, itemOptions);
    }

    public final void arrangeDocument(@NotNull String portalUrl, @NotNull String args) {
        Intrinsics.checkNotNullParameter((Object)portalUrl, (String)"portalUrl");
        Intrinsics.checkNotNullParameter((Object)args, (String)"args");
        String taskName = AppPaths.fromUri$default((AppPaths)AppPaths.INSTANCE, (String)portalUrl, null, null, (int)6, null);
        HNormUrl normUrl = ScentSession.DefaultImpls.normalize$default(this.session, portalUrl, (HarvestOptions)PulsarSession.DefaultImpls.options$default((PulsarSession)this.session, (String)args, null, (int)2, null), false, 4, null);
        HarvestOptions options = normUrl.getHOptions();
        WebPage portalPage = this.session.load(normUrl);
        FeaturedDocument portalDocument = this.session.parse(portalPage);
        SortedSet<AnchorGroup> anchorGroups = this.session.arrangeLinks(normUrl, portalDocument);
        this.logger.info("------------------------------");
        Iterable $this$forEach$iv = CollectionsKt.take((Iterable)anchorGroups, (int)1);
        boolean $i$f$forEach = false;
        for (Object element$iv : $this$forEach$iv) {
            WebPage webPage;
            WebPage it;
            Collection collection;
            Iterable $this$mapTo$iv$iv;
            Iterable $this$map$iv;
            String url;
            AnchorGroup it2 = (AnchorGroup)element$iv;
            boolean bl = false;
            Iterable $this$forEachIndexed$iv = CollectionsKt.take((Iterable)CollectionsKt.shuffled((Iterable)it2.getUrlStrings()), (int)10);
            boolean $i$f$forEachIndexed = false;
            int index$iv22 = 0;
            for (Object item$iv : $this$forEachIndexed$iv) {
                int n = index$iv22++;
                boolean bl2 = false;
                if (n < 0) {
                    CollectionsKt.throwIndexOverflow();
                }
                String string = (String)item$iv;
                int i = n;
                boolean bl3 = false;
                String string2 = 1 + i + ".\t" + url;
                boolean bl4 = false;
                System.out.println((Object)string2);
            }
            $this$forEachIndexed$iv = CollectionsKt.take((Iterable)it2.getUrlStrings(), (int)options.getTopLinks());
            boolean $i$f$map = false;
            void index$iv22 = $this$map$iv;
            Collection destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
            boolean $i$f$mapTo = false;
            for (Object item$iv$iv : $this$mapTo$iv$iv) {
                url = (String)item$iv$iv;
                collection = destination$iv$iv;
                boolean bl5 = false;
                ScentSession scentSession = this.getSession();
                Intrinsics.checkNotNullExpressionValue((Object)it, (String)"it");
                webPage = scentSession.load((String)it, (LoadOptions)options);
                collection.add(webPage);
            }
            $this$map$iv = (List)destination$iv$iv;
            $i$f$map = false;
            $this$mapTo$iv$iv = $this$map$iv;
            destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
            $i$f$mapTo = false;
            for (Object item$iv$iv : $this$mapTo$iv$iv) {
                it = (WebPage)item$iv$iv;
                collection = destination$iv$iv;
                boolean bl6 = false;
                webPage = ScentSession.DefaultImpls.parse$default(this.getSession(), it, options, false, 4, null);
                collection.add(webPage);
            }
            List list = (List)destination$iv$iv;
            boolean bl7 = false;
            boolean bl8 = false;
            List it3 = list;
            boolean bl9 = false;
            this.getSession().arrangeDocuments(normUrl, portalPage, it3);
        }
        FeaturedDocument featuredDocument = portalDocument;
        boolean bl = false;
        boolean bl10 = false;
        FeaturedDocument it = featuredDocument;
        boolean bl11 = false;
        FullFeaturedDocumentKt.annotateNodes((FeaturedDocument)it, (HarvestOptions)options);
        bl = false;
        bl10 = false;
        it = featuredDocument;
        boolean bl12 = false;
        ScentSession.DefaultImpls.export$default(this.getSession(), it, "portal", false, 4, null);
    }

    @NotNull
    public final HarvestResult harvest(@NotNull String url, @NotNull String args) {
        Intrinsics.checkNotNullParameter((Object)url, (String)"url");
        Intrinsics.checkNotNullParameter((Object)args, (String)"args");
        return this.harvest(url, (HarvestOptions)PulsarSession.DefaultImpls.options$default((PulsarSession)this.session, (String)args, null, (int)2, null));
    }

    @NotNull
    public final HarvestResult harvest(@NotNull String url, @NotNull HarvestOptions options) {
        Intrinsics.checkNotNullParameter((Object)url, (String)"url");
        Intrinsics.checkNotNullParameter((Object)options, (String)"options");
        return this.harvest(this.session, url, options);
    }

    @NotNull
    public final HarvestResult harvest(@NotNull List<? extends FeaturedDocument> documents2, @NotNull String args) {
        Intrinsics.checkNotNullParameter(documents2, (String)"documents");
        Intrinsics.checkNotNullParameter((Object)args, (String)"args");
        return this.harvest(documents2, (HarvestOptions)PulsarSession.DefaultImpls.options$default((PulsarSession)this.session, (String)args, null, (int)2, null));
    }

    @NotNull
    public final HarvestResult harvest(@NotNull List<? extends FeaturedDocument> documents2, @NotNull HarvestOptions options) {
        Intrinsics.checkNotNullParameter(documents2, (String)"documents");
        Intrinsics.checkNotNullParameter((Object)options, (String)"options");
        HarvestResult result2 = this.session.harvest(documents2, options);
        this.report(result2, options);
        return result2;
    }

    @NotNull
    public final HarvestResult harvest(@NotNull ScentSession session, @NotNull String url, @NotNull HarvestOptions options) {
        Intrinsics.checkNotNullParameter((Object)session, (String)"session");
        Intrinsics.checkNotNullParameter((Object)url, (String)"url");
        Intrinsics.checkNotNullParameter((Object)options, (String)"options");
        HarvestResult result2 = (HarvestResult)BuildersKt.runBlocking$default(null, (Function2)((Function2)new Function2<CoroutineScope, Continuation<? super HarvestResult>, Object>(session, url, options, null){
            int label;
            final /* synthetic */ ScentSession $session;
            final /* synthetic */ String $url;
            final /* synthetic */ HarvestOptions $options;
            {
                this.$session = $session;
                this.$url = $url;
                this.$options = $options;
                super(2, $completion);
            }

            /*
             * WARNING - void declaration
             * Enabled force condition propagation
             * Lifted jumps to return sites
             */
            @Nullable
            public final Object invokeSuspend(@NotNull Object object) {
                Object object2 = IntrinsicsKt.getCOROUTINE_SUSPENDED();
                switch (this.label) {
                    case 0: {
                        ResultKt.throwOnFailure((Object)object);
                        this.label = 1;
                        Object object3 = this.$session.harvest(this.$url, this.$options, (Continuation<? super HarvestResult>)((Continuation)this));
                        if (object3 != object2) return object3;
                        return object2;
                    }
                    case 1: {
                        void $result;
                        ResultKt.throwOnFailure((Object)$result);
                        Object object3 = $result;
                        return object3;
                    }
                }
                throw new IllegalStateException("call to 'resume' before 'invoke' with coroutine");
            }

            @NotNull
            public final Continuation<Unit> create(@Nullable Object value, @NotNull Continuation<?> $completion) {
                return (Continuation)new /* invalid duplicate definition of identical inner class */;
            }

            @Nullable
            public final Object invoke(@NotNull CoroutineScope p1, @Nullable Continuation<? super HarvestResult> p2) {
                return (this.create(p1, p2)).invokeSuspend(Unit.INSTANCE);
            }
        }), (int)1, null);
        this.report(result2, options);
        return result2;
    }

    private final void report(HarvestResult result2, HarvestOptions options) {
        try {
            this.session.buildAll(result2.getTableGroup(), options);
            String json = this.session.buildJson(result2.getTableGroup());
            Path path = AppPaths.INSTANCE.getREPORT_DIR().resolve("harvest/corpus/last-page-tables.json");
            Files.createDirectories(path.getParent(), new FileAttribute[0]);
            Files.writeString(path, (CharSequence)json, new OpenOption[0]);
            this.logger.info("Harvest result: file://" + path.getParent());
        }
        catch (Exception e) {
            this.logger.warn(ExceptionsKt.stringify$default((Throwable)e, (String)"Failed to report harvest result - ", null, (int)2, null));
        }
    }

    @Override
    public void close() {
        if (this.closed.compareAndSet(false, true)) {
            // empty if block
        }
    }

    public VerboseCrawler() {
        this(null, 1, null);
    }
}

