/*
 * Decompiled with CFR 0.152.
 */
package ai.platon.scent.analysis;

import ai.platon.pulsar.common.AppFiles;
import ai.platon.pulsar.common.DateTimes;
import ai.platon.pulsar.common.OpenMapTable;
import ai.platon.pulsar.common.Strings;
import ai.platon.pulsar.common.concurrent.ConcurrentExpiringLRUCache;
import ai.platon.pulsar.common.config.ImmutableConfig;
import ai.platon.pulsar.common.urls.NormUrl;
import ai.platon.pulsar.common.urls.UrlUtils;
import ai.platon.pulsar.dom.FeaturedDocument;
import ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt;
import ai.platon.pulsar.dom.select.QueriesKt;
import ai.platon.pulsar.persist.WebPage;
import ai.platon.scent.analysis.AutoMiner;
import ai.platon.scent.analysis.AutoMiningResult;
import ai.platon.scent.analysis.AutoMiningTask;
import ai.platon.scent.analysis.DocumentLoader;
import ai.platon.scent.analysis.HarvestTaskTracker;
import ai.platon.scent.analysis.corpus.AnalysablePageCorpus;
import ai.platon.scent.common.message.ScentMiscMessageWriter;
import ai.platon.scent.dom.HNormUrl;
import ai.platon.scent.dom.HarvestOptions;
import ai.platon.scent.dom.nodes.AnchorGroup;
import ai.platon.scent.dom.nodes.FullFeaturedDocumentKt;
import ai.platon.scent.dom.nodes.NavigateAnchor;
import ai.platon.scent.dom.nodes.NavigateDocument;
import ai.platon.scent.dom.nodes.VisualDocument;
import ai.platon.scent.entities.AnchorGroupInfo;
import ai.platon.scent.entities.HarvestTableInfo;
import ai.platon.scent.entities.HarvestTaskStatus;
import ai.platon.scent.entities.PageTableGroup;
import ai.platon.scent.entities.PageTableKt;
import ai.platon.scent.entities.TableData;
import ai.platon.scent.ml.BasicNGramNodeEncoder;
import ai.platon.scent.ml.NodeDataFrame;
import ai.platon.scent.ml.NodePoint;
import ai.platon.scent.segment.NavigationLocator;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import kotlin.Metadata;
import kotlin.ResultKt;
import kotlin.TuplesKt;
import kotlin.collections.CollectionsKt;
import kotlin.collections.MapsKt;
import kotlin.collections.SetsKt;
import kotlin.comparisons.ComparisonsKt;
import kotlin.coroutines.Continuation;
import kotlin.coroutines.intrinsics.IntrinsicsKt;
import kotlin.coroutines.jvm.internal.Boxing;
import kotlin.coroutines.jvm.internal.ContinuationImpl;
import kotlin.jdk7.AutoCloseableKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.Ref;
import kotlin.jvm.internal.StringCompanionObject;
import kotlin.ranges.RangesKt;
import kotlin.sequences.Sequence;
import kotlin.sequences.SequencesKt;
import kotlin.text.StringsKt;
import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u0000\u00c0\u0001\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\b\n\u0002\b\u0005\n\u0002\u0010 \n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u001c\n\u0000\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\u0010\u000b\n\u0002\b\u0002\n\u0002\u0010\u0002\n\u0002\b\u0003\n\u0002\u0010#\n\u0002\u0010\u000e\n\u0002\b\u0003\n\u0002\u0010\"\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0004\u0018\u0000 K2\u00020\u0001:\u0001KB\u001d\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\u0006\u0010\u0004\u001a\u00020\u0005\u0012\u0006\u0010\u0006\u001a\u00020\u0007\u00a2\u0006\u0002\u0010\bJ\u0014\u0010\u0013\u001a\b\u0012\u0004\u0012\u00020\u00150\u00142\u0006\u0010\u0016\u001a\u00020\u0017J\u001c\u0010\u0018\u001a\b\u0012\u0004\u0012\u00020\u001a0\u00192\u0006\u0010\u001b\u001a\u00020\u001c2\u0006\u0010\u001d\u001a\u00020\u001eJ\"\u0010\u0018\u001a\b\u0012\u0004\u0012\u00020\u001a0\u00192\f\u0010\u001f\u001a\b\u0012\u0004\u0012\u00020\u001e0\u00142\u0006\u0010 \u001a\u00020!JI\u0010\"\u001a\u0004\u0018\u00010\u00172\u0006\u0010\u001b\u001a\u00020\u001c2\u0006\u0010#\u001a\u00020$2\f\u0010%\u001a\b\u0012\u0004\u0012\u00020\u001a0\u00192\u0006\u0010&\u001a\u00020!2\u0006\u0010'\u001a\u00020(2\u0006\u0010)\u001a\u00020*H\u0086@\u00f8\u0001\u0000\u00a2\u0006\u0002\u0010+J0\u0010,\u001a\u00020-2\f\u0010.\u001a\b\u0012\u0004\u0012\u00020\u001e0/2\u0006\u0010 \u001a\u00020!2\u0012\u00100\u001a\u000e\u0012\u0004\u0012\u000202\u0012\u0004\u0012\u00020301J\u001c\u0010,\u001a\u00020-2\f\u00104\u001a\b\u0012\u0004\u0012\u0002020\u00142\u0006\u0010 \u001a\u00020!J.\u00105\u001a\u0002062\u0006\u00107\u001a\u00020$2\u0006\u00108\u001a\u00020\u001a2\f\u00109\u001a\b\u0012\u0004\u0012\u00020;0:2\u0006\u0010 \u001a\u00020!H\u0002J4\u0010<\u001a\u0002062\u0006\u0010=\u001a\u00020\u000e2\f\u0010>\u001a\b\u0012\u0004\u0012\u00020;0?2\u0006\u00108\u001a\u00020\u001a2\f\u0010.\u001a\b\u0012\u0004\u0012\u00020\u001e0\u0014H\u0002J\u000e\u0010@\u001a\u00020A2\u0006\u0010\u0016\u001a\u00020\u0017J \u0010B\u001a\u0002062\u0006\u0010\u0016\u001a\u00020\u00172\u0006\u0010C\u001a\u00020D2\u0006\u0010E\u001a\u00020AH\u0002J \u0010F\u001a\u0002062\u0006\u0010\u0016\u001a\u00020\u00172\u0006\u0010G\u001a\u00020H2\u0006\u0010C\u001a\u00020DH\u0002J\u0016\u0010I\u001a\u00020*2\u0006\u0010J\u001a\u00020\u00172\u0006\u0010\u0016\u001a\u00020*R\u0011\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\b\n\u0000\u001a\u0004\b\t\u0010\nR\u000e\u0010\u0006\u001a\u00020\u0007X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u000b\u001a\u00020\fX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0014\u0010\r\u001a\u00020\u000eX\u0086D\u00a2\u0006\b\n\u0000\u001a\u0004\b\u000f\u0010\u0010R\u000e\u0010\u0004\u001a\u00020\u0005X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0014\u0010\u0011\u001a\u00020\u000eX\u0086D\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0012\u0010\u0010\u0082\u0002\u0004\n\u0002\b\u0019\u00a8\u0006L"}, d2={"Lai/platon/scent/analysis/AutoMiner;", "", "activeHarvestTracker", "Lai/platon/scent/analysis/HarvestTaskTracker;", "messageWriter", "Lai/platon/scent/common/message/ScentMiscMessageWriter;", "conf", "Lai/platon/pulsar/common/config/ImmutableConfig;", "(Lai/platon/scent/analysis/HarvestTaskTracker;Lai/platon/scent/common/message/ScentMiscMessageWriter;Lai/platon/pulsar/common/config/ImmutableConfig;)V", "getActiveHarvestTracker", "()Lai/platon/scent/analysis/HarvestTaskTracker;", "logger", "Lorg/slf4j/Logger;", "maxNumShow", "", "getMaxNumShow", "()I", "minSamples", "getMinSamples", "arrangeDocuments", "", "Lai/platon/scent/dom/nodes/VisualDocument;", "task", "Lai/platon/scent/analysis/AutoMiningTask;", "arrangeLinks", "Ljava/util/SortedSet;", "Lai/platon/scent/dom/nodes/AnchorGroup;", "portalUrl", "Lai/platon/scent/dom/HNormUrl;", "doc", "Lai/platon/pulsar/dom/FeaturedDocument;", "docs", "options", "Lai/platon/scent/dom/HarvestOptions;", "create", "portalPage", "Lai/platon/pulsar/persist/WebPage;", "anchorGroups", "itemOptions", "documentLoader", "Lai/platon/scent/analysis/DocumentLoader;", "taskStatus", "Lai/platon/scent/entities/HarvestTaskStatus;", "(Lai/platon/scent/dom/HNormUrl;Lai/platon/pulsar/persist/WebPage;Ljava/util/SortedSet;Lai/platon/scent/dom/HarvestOptions;Lai/platon/scent/analysis/DocumentLoader;Lai/platon/scent/entities/HarvestTaskStatus;Lkotlin/coroutines/Continuation;)Ljava/lang/Object;", "encodeNodes", "Lai/platon/scent/ml/NodeDataFrame;", "documents", "", "filter", "Lkotlin/Function1;", "Lorg/jsoup/nodes/Node;", "", "nodes", "loadAnchorGroupTo", "", "page", "anchorGroup", "destination", "", "", "logAnchorAnalysisRound", "round", "urls", "", "mine", "Lai/platon/scent/entities/PageTableGroup;", "reportTableGroup", "start", "Ljava/time/Instant;", "tableGroup", "reportTableGroupIfNecessary", "result", "Lai/platon/scent/analysis/AutoMiningResult;", "updateTask", "miningTask", "Companion", "scent-auto-mining"})
public final class AutoMiner {
    @NotNull
    public static final Companion Companion = new Companion(null);
    @NotNull
    private final HarvestTaskTracker activeHarvestTracker;
    @NotNull
    private final ScentMiscMessageWriter messageWriter;
    @NotNull
    private final ImmutableConfig conf;
    @NotNull
    private final Logger logger;
    private final int maxNumShow;
    private final int minSamples;
    @NotNull
    private static final ConcurrentExpiringLRUCache<String, VisualDocument> visualDocumentCache = new ConcurrentExpiringLRUCache(null, 0, 3, null);

    public AutoMiner(@NotNull HarvestTaskTracker activeHarvestTracker, @NotNull ScentMiscMessageWriter messageWriter, @NotNull ImmutableConfig conf) {
        Intrinsics.checkNotNullParameter((Object)activeHarvestTracker, (String)"activeHarvestTracker");
        Intrinsics.checkNotNullParameter((Object)messageWriter, (String)"messageWriter");
        Intrinsics.checkNotNullParameter((Object)conf, (String)"conf");
        this.activeHarvestTracker = activeHarvestTracker;
        this.messageWriter = messageWriter;
        this.conf = conf;
        Logger logger = LoggerFactory.getLogger(AutoMiner.class);
        Intrinsics.checkNotNull((Object)logger);
        this.logger = logger;
        this.maxNumShow = 50;
        this.minSamples = 10;
    }

    @NotNull
    public final HarvestTaskTracker getActiveHarvestTracker() {
        return this.activeHarvestTracker;
    }

    public final int getMaxNumShow() {
        return this.maxNumShow;
    }

    public final int getMinSamples() {
        return this.minSamples;
    }

    /*
     * Unable to fully structure code
     */
    @Nullable
    public final Object create(@NotNull HNormUrl var1_1, @NotNull WebPage var2_2, @NotNull SortedSet<AnchorGroup> var3_3, @NotNull HarvestOptions var4_4, @NotNull DocumentLoader var5_5, @NotNull HarvestTaskStatus var6_6, @NotNull Continuation<? super AutoMiningTask> var7_7) {
        if (!(var7_7 instanceof create.1)) ** GOTO lbl-1000
        var27_8 = var7_7;
        if ((var27_8.label & -2147483648) != 0) {
            var27_8.label -= -2147483648;
        } else lbl-1000:
        // 2 sources

        {
            $continuation = new ContinuationImpl(this, var7_7){
                Object L$0;
                Object L$1;
                Object L$2;
                Object L$3;
                Object L$4;
                Object L$5;
                Object L$6;
                Object L$7;
                Object L$8;
                Object L$9;
                Object L$10;
                Object L$11;
                Object L$12;
                int I$0;
                int I$1;
                int I$2;
                /* synthetic */ Object result;
                final /* synthetic */ AutoMiner this$0;
                int label;
                {
                    this.this$0 = this$0;
                    super($completion);
                }

                @Nullable
                public final Object invokeSuspend(@NotNull Object $result) {
                    this.result = $result;
                    this.label |= Integer.MIN_VALUE;
                    return this.this$0.create(null, null, null, null, null, null, (Continuation<? super AutoMiningTask>)((Continuation)this));
                }
            };
        }
        $result = $continuation.result;
        var28_10 = IntrinsicsKt.getCOROUTINE_SUSPENDED();
        block0 : switch ($continuation.label) {
            case 0: {
                ResultKt.throwOnFailure((Object)$result);
                if (UrlUtils.isInternal((String)portalUrl.getSpec())) {
                    this.logger.warn("Unexpected internal portal url");
                    return null;
                }
                startTime = Instant.now();
                portalOptions = portalUrl.getHOptions();
                var11_13 = false;
                documents = CollectionsKt.emptyList();
                topNAnchorGroups = portalUrl.getOptions().getTopNAnchorGroups();
                maxSize = 0;
                task = new Ref.ObjectRef();
                round = 0;
                elapsed = Duration.ofSeconds(0L);
                timeout = Duration.ofMinutes(2L);
                it = anchorGroups.iterator();
lbl26:
                // 3 sources

                while (it.hasNext()) {
                    var18_22 = round;
                    round = var18_22 + 1;
                    if (var18_22 >= topNAnchorGroups || maxSize >= this.getMinSamples() || elapsed.compareTo(timeout) >= 0) break block0;
                    if (round > 1) {
                        var18_23 = new Object[]{Boxing.boxInt((int)round), Boxing.boxInt((int)documents.size()), Boxing.boxInt((int)this.getMinSamples())};
                        this.logger.warn("In round #{} found {} samples but at least {} are required, trying the next group", var18_23);
                    }
                    anchorGroup = (AnchorGroup)it.next();
                    var20_28 = false;
                    urls = new LinkedHashSet<E>();
                    if (portalOptions.getLoadAnchorGroup()) {
                        Intrinsics.checkNotNullExpressionValue((Object)anchorGroup, (String)"anchorGroup");
                        this.loadAnchorGroupTo(portalPage, anchorGroup, urls, portalOptions);
                    }
                    SequencesKt.toCollection((Sequence)SequencesKt.take((Sequence)CollectionsKt.asSequence((Iterable)anchorGroup.getUrlStrings()), (int)portalOptions.getTopLinks()), (Collection)urls);
                    $continuation.L$0 = this;
                    $continuation.L$1 = portalUrl;
                    $continuation.L$2 = portalPage;
                    $continuation.L$3 = itemOptions;
                    $continuation.L$4 = documentLoader;
                    $continuation.L$5 = taskStatus;
                    $continuation.L$6 = startTime;
                    $continuation.L$7 = portalOptions;
                    $continuation.L$8 = task;
                    $continuation.L$9 = timeout;
                    $continuation.L$10 = it;
                    $continuation.L$11 = anchorGroup;
                    $continuation.L$12 = urls;
                    $continuation.I$0 = topNAnchorGroups;
                    $continuation.I$1 = maxSize;
                    $continuation.I$2 = round;
                    $continuation.label = 1;
                    v0 = documentLoader.loadAll(portalUrl, urls, itemOptions, (Continuation<? super ArrayList<FeaturedDocument>>)$continuation);
                    if (v0 == var28_10) {
                        return var28_10;
                    }
                    ** GOTO lbl82
                }
                break;
            }
            case 1: {
                round = $continuation.I$2;
                maxSize = $continuation.I$1;
                topNAnchorGroups = $continuation.I$0;
                urls = (Set)$continuation.L$12;
                anchorGroup = (AnchorGroup)$continuation.L$11;
                it = (Iterator<E>)$continuation.L$10;
                timeout = (Duration)$continuation.L$9;
                task = (Ref.ObjectRef)$continuation.L$8;
                portalOptions = (HarvestOptions)$continuation.L$7;
                startTime = (Instant)$continuation.L$6;
                taskStatus = (HarvestTaskStatus)$continuation.L$5;
                documentLoader = (DocumentLoader)$continuation.L$4;
                itemOptions = (HarvestOptions)$continuation.L$3;
                portalPage = (WebPage)$continuation.L$2;
                portalUrl = (HNormUrl)$continuation.L$1;
                this = (AutoMiner)$continuation.L$0;
                ResultKt.throwOnFailure((Object)$result);
                v0 = $result;
lbl82:
                // 2 sources

                featuredDocuments = (ArrayList)v0;
                this.logger.info("Total {} featured documents, {} urls", (Object)Boxing.boxInt((int)featuredDocuments.size()), (Object)Boxing.boxInt((int)urls.size()));
                task0 = new AutoMiningTask(portalUrl, portalPage, featuredDocuments, itemOptions);
                Intrinsics.checkNotNullExpressionValue((Object)anchorGroup, (String)"anchorGroup");
                task0.setAnchorGroup(anchorGroup);
                documents = this.arrangeDocuments(task0);
                if (documents.size() > maxSize) {
                    maxSize = documents.size();
                    task.element = task0;
                }
                Intrinsics.checkNotNullExpressionValue((Object)startTime, (String)"startTime");
                elapsed = DateTimes.elapsedTime$default((DateTimes)DateTimes.INSTANCE, (Instant)startTime, null, (int)2, null);
                if (!this.logger.isInfoEnabled()) ** GOTO lbl26
                this.logAnchorAnalysisRound(round, urls, anchorGroup, featuredDocuments);
                ** GOTO lbl26
            }
        }
        if (elapsed.compareTo(timeout) >= 0) {
            this.logger.warn("Harvest task takes long time({}) | {}", (Object)elapsed, (Object)portalUrl);
        }
        if (task.element != null) {
            block15: {
                anchorGroup = taskStatus.getAnchorGroups();
                var24_30 = taskStatus;
                $i$f$firstOrNull = false;
                for (T element$iv : $this$firstOrNull$iv) {
                    it = (AnchorGroupInfo)element$iv;
                    $i$a$-firstOrNull-AutoMiner$create$2 = false;
                    if (!Intrinsics.areEqual((Object)it.getAnchorGroupPath(), (Object)((AutoMiningTask)task.element).getAnchorGroup().getPath())) continue;
                    v1 = element$iv;
                    break block15;
                }
                v1 = null;
            }
            var25_33 = v1;
            var24_30.setAnchorGroup((AnchorGroupInfo)var25_33);
        }
        return task.element;
        throw new IllegalStateException("call to 'resume' before 'invoke' with coroutine");
    }

    /*
     * WARNING - void declaration
     */
    @NotNull
    public final SortedSet<AnchorGroup> arrangeLinks(@NotNull HNormUrl portalUrl, @NotNull FeaturedDocument doc) {
        Intrinsics.checkNotNullParameter((Object)portalUrl, (String)"portalUrl");
        Intrinsics.checkNotNullParameter((Object)doc, (String)"doc");
        if (UrlUtils.isInternal((String)portalUrl.getSpec())) {
            this.logger.warn("Unexpected internal portal url");
            return SetsKt.sortedSetOf((Object[])new AnchorGroup[0]);
        }
        HarvestOptions options = portalUrl.getHOptions();
        HarvestTaskStatus taskStatus = this.activeHarvestTracker.computeIfAbsent((NormUrl)portalUrl);
        CharSequence charSequence = options.getOutLinkSelector();
        boolean bl = false;
        if (!StringsKt.isBlank((CharSequence)charSequence)) {
            void $this$mapTo$iv$iv;
            String restrictCss = QueriesKt.appendSelectorIfMissing((String)options.getOutLinkSelector(), (String)"a");
            Object[] $this$map$iv = (Object[])FeaturedDocument.select$default((FeaturedDocument)doc, (String)restrictCss, (int)0, (int)0, (int)6, null);
            boolean $i$f$map = false;
            Object[] objectArray = $this$map$iv;
            Collection destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
            boolean $i$f$mapTo = false;
            for (Object item$iv$iv : $this$mapTo$iv$iv) {
                void it;
                Element element = (Element)item$iv$iv;
                Collection collection = destination$iv$iv;
                boolean bl2 = false;
                Intrinsics.checkNotNullExpressionValue((Object)it, (String)"it");
                NavigateAnchor navigateAnchor = new NavigateAnchor((Element)it, null, options.getIgnoreUrlQuery(), 2, null);
                collection.add(navigateAnchor);
            }
            List anchors = (List)destination$iv$iv;
            $this$map$iv = new AnchorGroup[]{new AnchorGroup(options.getOutLinkSelector(), anchors, null, null, 12, null)};
            return SetsKt.sortedSetOf((Object[])$this$map$iv);
        }
        NavigationLocator locator = new NavigationLocator(options);
        locator.locate(doc);
        Iterable $this$map$iv = locator.getAnchorGroups().values();
        Intrinsics.checkNotNullExpressionValue((Object)$this$map$iv, (String)"locator.anchorGroups.values()");
        SortedSet anchorGroups = CollectionsKt.toSortedSet((Iterable)$this$map$iv, (Comparator)ComparisonsKt.reverseOrder());
        if (taskStatus != null) {
            void $this$mapTo$iv;
            $this$map$iv = anchorGroups;
            Collection destination$iv = taskStatus.getAnchorGroups();
            boolean $i$f$mapTo = false;
            for (Object item$iv : $this$mapTo$iv) {
                void it;
                AnchorGroup anchorGroup = (AnchorGroup)item$iv;
                Collection collection = destination$iv;
                boolean bl3 = false;
                AnchorGroupInfo anchorGroupInfo = new AnchorGroupInfo(portalUrl.getConfiguredUrl(), options.getOutLinkSelector(), it.getPath(), it.getSize());
                collection.add(anchorGroupInfo);
            }
        }
        Object[] objectArray = (Object[])anchorGroups;
        boolean bl4 = false;
        if (!objectArray.isEmpty()) {
            objectArray = new Object[]{anchorGroups.size(), doc.getLocation(), NodeExtKt.getExportPaths((Document)doc.getDocument()).getPortal()};
            this.logger.info("Find {} anchor groups in page {} | file://{}", objectArray);
        } else {
            this.logger.warn("\n!!! No navigate url is found in document {} | file://{} !!!\n", (Object)doc.getLocation(), (Object)NodeExtKt.getExportPaths((Document)doc.getDocument()).getPortal());
        }
        return anchorGroups;
    }

    /*
     * WARNING - void declaration
     */
    @NotNull
    public final SortedSet<AnchorGroup> arrangeLinks(@NotNull List<? extends FeaturedDocument> docs, @NotNull HarvestOptions options) {
        Intrinsics.checkNotNullParameter(docs, (String)"docs");
        Intrinsics.checkNotNullParameter((Object)options, (String)"options");
        if (docs.isEmpty()) {
            return SetsKt.sortedSetOf((Object[])new AnchorGroup[0]);
        }
        CharSequence charSequence = options.getOutLinkSelector();
        boolean bl = false;
        if (!StringsKt.isBlank((CharSequence)charSequence)) {
            void $this$mapTo$iv$iv;
            Object[] $this$map$iv;
            Iterable list$iv$iv;
            FeaturedDocument it;
            void $this$flatMapTo$iv$iv;
            String restrictCss = QueriesKt.appendSelectorIfMissing((String)options.getOutLinkSelector(), (String)"a");
            Iterable $this$flatMap$iv = docs;
            boolean $i$f$flatMap = false;
            Iterable iterable = $this$flatMap$iv;
            Collection destination$iv$iv = new ArrayList();
            boolean $i$f$flatMapTo = false;
            for (Object element$iv$iv : $this$flatMapTo$iv$iv) {
                it = (FeaturedDocument)element$iv$iv;
                boolean bl2 = false;
                list$iv$iv = (Iterable)FeaturedDocument.select$default((FeaturedDocument)it, (String)restrictCss, (int)0, (int)0, (int)6, null);
                CollectionsKt.addAll((Collection)destination$iv$iv, (Iterable)list$iv$iv);
            }
            $this$flatMap$iv = (List)destination$iv$iv;
            boolean $i$f$map = false;
            $this$flatMapTo$iv$iv = $this$map$iv;
            destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
            boolean $i$f$mapTo = false;
            for (Object item$iv$iv : $this$mapTo$iv$iv) {
                list$iv$iv = (Element)item$iv$iv;
                Collection collection = destination$iv$iv;
                boolean bl3 = false;
                Intrinsics.checkNotNullExpressionValue((Object)it, (String)"it");
                NavigateAnchor navigateAnchor = new NavigateAnchor((Element)it, null, options.getIgnoreUrlQuery(), 2, null);
                collection.add(navigateAnchor);
            }
            List anchors = (List)destination$iv$iv;
            $this$map$iv = new AnchorGroup[]{new AnchorGroup(options.getOutLinkSelector(), anchors, null, null, 12, null)};
            return SetsKt.sortedSetOf((Object[])$this$map$iv);
        }
        NavigationLocator locator = new NavigationLocator(options);
        locator.locate(docs);
        Collection $this$map$iv = locator.getAnchorGroups().values();
        Intrinsics.checkNotNullExpressionValue((Object)$this$map$iv, (String)"locator.anchorGroups.values()");
        SortedSet anchorGroups = CollectionsKt.toSortedSet((Iterable)$this$map$iv, (Comparator)ComparisonsKt.reverseOrder());
        FeaturedDocument doc = (FeaturedDocument)CollectionsKt.first(docs);
        Object[] objectArray = (Object[])anchorGroups;
        boolean bl4 = false;
        if (!objectArray.isEmpty()) {
            objectArray = new Object[]{anchorGroups.size(), docs.size(), doc.getLocation(), NodeExtKt.getExportPaths((Document)doc.getDocument()).getPortal()};
            this.logger.info("Find {} anchor groups in {} pages, portal {} | file://{}", objectArray);
        } else {
            objectArray = new Object[]{docs.size(), doc.getLocation(), NodeExtKt.getExportPaths((Document)doc.getDocument()).getPortal()};
            this.logger.warn("\n!!! No navigate url is found in {} pages, portal {} | file://{} !!!\n", objectArray);
        }
        return anchorGroups;
    }

    /*
     * WARNING - void declaration
     */
    @NotNull
    public final List<VisualDocument> arrangeDocuments(@NotNull AutoMiningTask task) {
        Iterable $this$associateTo$iv$iv;
        Object $this$arrangeDocuments_u24lambda_u2d6_u24lambda_u2d5;
        Object it;
        Intrinsics.checkNotNullParameter((Object)task, (String)"task");
        Iterable<FeaturedDocument> $this$forEach$iv = task.getDocuments();
        boolean $i$f$forEach = false;
        Iterator<FeaturedDocument> iterator = $this$forEach$iv.iterator();
        while (iterator.hasNext()) {
            FeaturedDocument element$iv;
            it = element$iv = iterator.next();
            boolean bl = false;
            NavigateDocument navigateDocument = new NavigateDocument(it);
            boolean bl2 = false;
            boolean bl3 = false;
            $this$arrangeDocuments_u24lambda_u2d6_u24lambda_u2d5 = navigateDocument;
            boolean bl4 = false;
            $this$arrangeDocuments_u24lambda_u2d6_u24lambda_u2d5.partition(task.getItemOptions());
        }
        Iterable<FeaturedDocument> $this$associate$iv = task.getDocuments();
        boolean $i$f$associate = false;
        int capacity$iv = RangesKt.coerceAtLeast((int)MapsKt.mapCapacity((int)CollectionsKt.collectionSizeOrDefault($this$associate$iv, (int)10)), (int)16);
        it = $this$associate$iv;
        Object destination$iv$iv = new LinkedHashMap(capacity$iv);
        boolean $i$f$associateTo = false;
        for (Object element$iv$iv : $this$associateTo$iv$iv) {
            $this$arrangeDocuments_u24lambda_u2d6_u24lambda_u2d5 = destination$iv$iv;
            FeaturedDocument it2 = (FeaturedDocument)element$iv$iv;
            boolean bl = false;
            it2 = TuplesKt.to((Object)it2.getBaseUri(), (Object)new VisualDocument(it2, task.getItemOptions()));
            bl = false;
            $this$arrangeDocuments_u24lambda_u2d6_u24lambda_u2d5.put(it2.getFirst(), it2.getSecond());
        }
        Map samples = destination$iv$iv;
        AnalysablePageCorpus corpus = new AnalysablePageCorpus(task.getPortalUrl(), samples, this.conf);
        corpus.arrange();
        task.setCorpus$scent_auto_mining(corpus);
        if (this.logger.isDebugEnabled()) {
            void $this$forEachIndexed$iv;
            FeaturedDocument it3;
            void $this$mapTo$iv$iv;
            Iterable $this$map$iv = corpus.getQualifiedDocuments$scent_auto_mining();
            boolean $i$f$map = false;
            $this$associateTo$iv$iv = $this$map$iv;
            destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
            boolean $i$f$mapTo = false;
            for (Object item$iv$iv : $this$mapTo$iv$iv) {
                $this$arrangeDocuments_u24lambda_u2d6_u24lambda_u2d5 = (VisualDocument)item$iv$iv;
                Object object = destination$iv$iv;
                boolean bl = false;
                FeaturedDocument featuredDocument = it3.getFeaturedDocument();
                object.add(featuredDocument);
            }
            $this$map$iv = (List)destination$iv$iv;
            boolean $i$f$forEachIndexed = false;
            int index$iv = 0;
            for (Object item$iv : $this$forEachIndexed$iv) {
                void document;
                int n = index$iv++;
                boolean bl = false;
                if (n < 0) {
                    CollectionsKt.throwIndexOverflow();
                }
                it3 = (FeaturedDocument)item$iv;
                int i = n;
                boolean bl5 = false;
                FullFeaturedDocumentKt.annotateNodes((FeaturedDocument)document, (HarvestOptions)task.getItemOptions());
                AppFiles.INSTANCE.saveTo(document.getPrettyHtml(), NodeExtKt.getExportPaths((Document)document.getDocument()).getAnnotatedView(), true);
            }
        }
        return corpus.getQualifiedDocuments$scent_auto_mining();
    }

    @NotNull
    public final NodeDataFrame encodeNodes(@NotNull Iterable<? extends FeaturedDocument> documents, @NotNull HarvestOptions options, @NotNull Function1<? super Node, Boolean> filter2) {
        Intrinsics.checkNotNullParameter(documents, (String)"documents");
        Intrinsics.checkNotNullParameter((Object)options, (String)"options");
        Intrinsics.checkNotNullParameter(filter2, (String)"filter");
        boolean bl = false;
        List nodes2 = new ArrayList();
        Iterable<? extends FeaturedDocument> $this$forEach$iv = documents;
        boolean $i$f$forEach = false;
        Iterator<? extends FeaturedDocument> iterator = $this$forEach$iv.iterator();
        while (iterator.hasNext()) {
            FeaturedDocument element$iv;
            FeaturedDocument document = element$iv = iterator.next();
            boolean bl2 = false;
            Node $this$collectIfTo$iv = (Node)document.getDocument();
            boolean $i$f$collectIfTo = false;
            NodeTraversor.traverse((NodeVisitor)new NodeVisitor(filter2, nodes2){
                final /* synthetic */ Function1 $filter;
                final /* synthetic */ Collection $destination;
                {
                    this.$filter = $filter;
                    this.$destination = $destination;
                }

                public final void head(@NotNull Node node, int $noName_1) {
                    Intrinsics.checkNotNullParameter((Object)node, (String)"node");
                    if (((Boolean)this.$filter.invoke((Object)node)).booleanValue()) {
                        this.$destination.add(node);
                    }
                }
            }, (Node)$this$collectIfTo$iv);
        }
        return this.encodeNodes(nodes2, options);
    }

    @NotNull
    public final NodeDataFrame encodeNodes(@NotNull List<? extends Node> nodes2, @NotNull HarvestOptions options) {
        Intrinsics.checkNotNullParameter(nodes2, (String)"nodes");
        Intrinsics.checkNotNullParameter((Object)options, (String)"options");
        if (nodes2.isEmpty()) {
            return NodeDataFrame.Companion.emptyDataFrame();
        }
        BasicNGramNodeEncoder encoder = new BasicNGramNodeEncoder(options.getNGram(), 0, 2, null);
        ArrayList<NodePoint> points = encoder.encode(nodes2);
        return new NodeDataFrame(encoder.getSchema(), points, null, null, 12, null);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @NotNull
    public final PageTableGroup mine(@NotNull AutoMiningTask task) {
        AutoMiningResult autoMiningResult;
        AnalysablePageCorpus analysablePageCorpus;
        Intrinsics.checkNotNullParameter((Object)task, (String)"task");
        Instant start = Instant.now();
        if (task.getCorpus$scent_auto_mining() == null) {
            this.arrangeDocuments(task);
        }
        if ((analysablePageCorpus = task.getCorpus$scent_auto_mining()) == null) {
            return PageTableGroup.Companion.getEMPTY();
        }
        AnalysablePageCorpus corpus = analysablePageCorpus;
        AutoCloseable autoCloseable = corpus;
        boolean bl = false;
        boolean bl2 = false;
        Throwable throwable = null;
        try {
            AnalysablePageCorpus c = (AnalysablePageCorpus)autoCloseable;
            boolean bl3 = false;
            autoMiningResult = c.analyse();
        }
        catch (Throwable throwable2) {
            throwable = throwable2;
            throw throwable2;
        }
        finally {
            AutoCloseableKt.closeFinally((AutoCloseable)autoCloseable, (Throwable)throwable);
        }
        AutoMiningResult result = autoMiningResult;
        Intrinsics.checkNotNullExpressionValue((Object)start, (String)"start");
        this.reportTableGroup(task, start, result.getPageTableGroup());
        return result.getPageTableGroup();
    }

    private final void reportTableGroupIfNecessary(AutoMiningTask task, AutoMiningResult result, Instant start) {
        boolean diagnose2 = task.getPortalUrl().getHOptions().getDiagnose();
        if (diagnose2 || this.logger.isInfoEnabled()) {
            this.reportTableGroup(task, start, result.getPageTableGroup());
        }
    }

    /*
     * WARNING - void declaration
     */
    @NotNull
    public final HarvestTaskStatus updateTask(@NotNull AutoMiningTask miningTask, @NotNull HarvestTaskStatus task) {
        int n;
        Intrinsics.checkNotNullParameter((Object)miningTask, (String)"miningTask");
        Intrinsics.checkNotNullParameter((Object)task, (String)"task");
        AnalysablePageCorpus analysablePageCorpus = miningTask.getCorpus$scent_auto_mining();
        if (analysablePageCorpus == null) {
            return task;
        }
        AnalysablePageCorpus corpus = analysablePageCorpus;
        AutoMiningResult autoMiningResult = miningTask.getMiningResult();
        if (autoMiningResult == null) {
            return task;
        }
        AutoMiningResult miningResult = autoMiningResult;
        PageTableGroup tableGroup = miningResult.getPageTableGroup();
        String configuredUrl = miningTask.getConfiguredUrl();
        Object object = task.getActualTask();
        HarvestTaskStatus activeTask = object == null ? task : object;
        activeTask.setNSuccessPages(miningTask.getNumSamples());
        activeTask.setNFailedPages(miningTask.getAnchorGroup().getSize() - miningTask.getNumSamples());
        activeTask.setNRecoverableDocuments(corpus.getQualifiedDocuments$scent_auto_mining().size());
        activeTask.setRecoverableConfidence(corpus.getRecoverableConfidence$scent_auto_mining());
        activeTask.setNTables(tableGroup.getSize());
        object = tableGroup.getTables();
        HarvestTaskStatus harvestTaskStatus = activeTask;
        boolean bl = false;
        int n2 = 0;
        Iterator iterator = object.iterator();
        while (iterator.hasNext()) {
            void it;
            Object t = iterator.next();
            OpenMapTable openMapTable = (OpenMapTable)t;
            n = n2;
            boolean bl2 = false;
            int n3 = it.getNumColumns();
            n2 = n + n3;
        }
        n = n2;
        harvestTaskStatus.setNFields(n);
        Iterable $this$forEach$iv = tableGroup.getTables();
        boolean $i$f$forEach = false;
        for (Object element$iv : $this$forEach$iv) {
            OpenMapTable table = (OpenMapTable)element$iv;
            boolean bl3 = false;
            TableData data = PageTableKt.getData((OpenMapTable)table);
            HarvestTableInfo tableInfo = new HarvestTableInfo(configuredUrl, 0, 0, data.getName(), null, table.getNumRows(), table.getNumColumns(), data.getDataTypeStatistics(), data.getClusterTaskStatus(), data.getClusterGroupMetrics(), 22, null);
            activeTask.getTableInfos().add(tableInfo);
        }
        return task;
    }

    private final void reportTableGroup(AutoMiningTask task, Instant start, PageTableGroup tableGroup) {
        AnalysablePageCorpus analysablePageCorpus = task.getCorpus$scent_auto_mining();
        if (analysablePageCorpus == null) {
            return;
        }
        AnalysablePageCorpus corpus = analysablePageCorpus;
        Duration elapsed = Duration.between(start, Instant.now());
        int numNature = corpus.getNaturalComponents$scent_auto_mining().size();
        this.logger.info("Total " + elapsed + " to analyze " + task.getNumSamples() + " documents, found " + numNature + " natural components, generated " + tableGroup.getSize() + " tables");
        if (tableGroup.isNotEmpty()) {
            String s2 = CollectionsKt.joinToString$default((Iterable)tableGroup.getTables(), (CharSequence)", ", null, null, (int)0, null, (Function1)reportTableGroup.s.1.INSTANCE, (int)30, null);
            this.logger.info("Table dimensions (row x col): " + s2);
        } else {
            this.logger.warn("!!! No table generated !!!");
        }
    }

    /*
     * WARNING - void declaration
     */
    private final void logAnchorAnalysisRound(int round, Set<String> urls, AnchorGroup anchorGroup, List<? extends FeaturedDocument> documents) {
        int n = this.maxNumShow;
        int n2 = documents.size();
        boolean bl = false;
        int numShow = Math.min(n, n2);
        String s2 = "Round #" + round + " find " + documents.size() + "/" + urls.size() + "/" + anchorGroup.getSize() + " documents/urls/anchors in group #" + anchorGroup.getId() + "[" + anchorGroup.getPath() + "] with score <" + anchorGroup.getScore() + ">";
        if (this.logger.isDebugEnabled()) {
            void $this$mapIndexedTo$iv$iv;
            StringBuilder sb = new StringBuilder(s2);
            sb.append(":\n");
            Iterable $this$mapIndexed$iv = CollectionsKt.take((Iterable)documents, (int)numShow);
            boolean $i$f$mapIndexed = false;
            Iterable iterable = $this$mapIndexed$iv;
            Collection destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$mapIndexed$iv, (int)10));
            boolean $i$f$mapIndexedTo = false;
            int index$iv$iv = 0;
            for (Object item$iv$iv : $this$mapIndexedTo$iv$iv) {
                String string;
                void doc;
                void j;
                int n3 = index$iv$iv++;
                boolean bl2 = false;
                if (n3 < 0) {
                    CollectionsKt.throwIndexOverflow();
                }
                FeaturedDocument featuredDocument = (FeaturedDocument)item$iv$iv;
                int n4 = n3;
                Collection collection = destination$iv$iv;
                boolean bl3 = false;
                StringCompanionObject stringCompanionObject = StringCompanionObject.INSTANCE;
                String string2 = "%d.\t%s [%s]\t->\t file://%s";
                Object[] objectArray = new Object[]{(int)(j + true), StringUtils.abbreviateMiddle((String)doc.getLocation(), (String)"...", (int)100), Strings.readableBytes((long)ai.platon.scent.dom.nodes.node.ext.NodeExtKt.getContentBytes((Document)doc.getDocument())), NodeExtKt.getExportPaths((Document)doc.getDocument()).getAnnotatedView()};
                boolean bl4 = false;
                Intrinsics.checkNotNullExpressionValue((Object)String.format(string2, Arrays.copyOf(objectArray, objectArray.length)), (String)"java.lang.String.format(format, *args)");
                collection.add(string);
            }
            CollectionsKt.joinTo$default((Iterable)((List)destination$iv$iv), (Appendable)sb, (CharSequence)"\n", null, null, (int)0, null, (Function1)logAnchorAnalysisRound.2.INSTANCE, (int)60, null);
            this.logger.debug(sb.toString());
        } else {
            this.logger.info(s2);
        }
    }

    private final void loadAnchorGroupTo(WebPage page, AnchorGroup anchorGroup, Set<String> destination, HarvestOptions options) {
        String ident = "gn:" + anchorGroup.getId();
        Map map = page.getVividLinks();
        Intrinsics.checkNotNullExpressionValue((Object)map, (String)"page.vividLinks");
        SequencesKt.toCollection((Sequence)SequencesKt.take((Sequence)SequencesKt.mapNotNull((Sequence)MapsKt.asSequence((Map)map), (Function1)((Function1)new Function1<Map.Entry<? extends CharSequence, ? extends CharSequence>, String>(ident){
            final /* synthetic */ String $ident;
            {
                this.$ident = $ident;
                super(1);
            }

            @Nullable
            public final String invoke(@NotNull Map.Entry<? extends CharSequence, ? extends CharSequence> it) {
                Map.Entry<? extends CharSequence, ? extends CharSequence> entry;
                Intrinsics.checkNotNullParameter(it, (String)"it");
                Object object = it;
                String string = this.$ident;
                boolean bl = false;
                boolean bl2 = false;
                Map.Entry<? extends CharSequence, ? extends CharSequence> it2 = object;
                boolean bl3 = false;
                CharSequence charSequence = it2.getValue();
                Intrinsics.checkNotNullExpressionValue((Object)charSequence, (String)"it.value");
                Map.Entry<? extends CharSequence, ? extends CharSequence> entry2 = entry = StringsKt.contains$default((CharSequence)charSequence, (CharSequence)string, (boolean)false, (int)2, null) ? object : null;
                return entry == null ? null : ((object = entry.getKey()) == null ? null : object.toString());
            }
        })), (int)options.getMaxLoadedAnchors()), (Collection)destination);
    }

    @Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u0000\u001a\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0010\u000e\n\u0002\u0018\u0002\n\u0000\b\u0086\u0003\u0018\u00002\u00020\u0001B\u0007\b\u0002\u00a2\u0006\u0002\u0010\u0002R\u001a\u0010\u0003\u001a\u000e\u0012\u0004\u0012\u00020\u0005\u0012\u0004\u0012\u00020\u00060\u0004X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u0007"}, d2={"Lai/platon/scent/analysis/AutoMiner$Companion;", "", "()V", "visualDocumentCache", "Lai/platon/pulsar/common/concurrent/ConcurrentExpiringLRUCache;", "", "Lai/platon/scent/dom/nodes/VisualDocument;", "scent-auto-mining"})
    public static final class Companion {
        private Companion() {
        }

        public /* synthetic */ Companion(DefaultConstructorMarker $constructor_marker) {
            this();
        }
    }
}

