package ai.platon.scent.analysis;

import ai.platon.pulsar.common.AppFiles;
import ai.platon.pulsar.common.LogsKt;
import ai.platon.pulsar.common.OpenMapTable;
import ai.platon.pulsar.common.Strings;
import ai.platon.pulsar.common.config.ImmutableConfig;
import ai.platon.pulsar.common.urls.UrlUtils;
import ai.platon.pulsar.dom.FeaturedDocument;
import ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt;
import ai.platon.pulsar.dom.select.DomQueriesKt;
import ai.platon.pulsar.persist.WebPage;
import ai.platon.pulsar.skeleton.common.urls.NormURL;
import ai.platon.scent.analysis.corpus.AnalysablePageCorpus;
import ai.platon.scent.analysis.corpus.FullFeaturedDocumentKt;
import ai.platon.scent.analysis.corpus.VisualDocument;
import ai.platon.scent.dom.HNormUrl;
import ai.platon.scent.dom.HarvestOptions;
import ai.platon.scent.dom.nodes.AnchorGroup;
import ai.platon.scent.dom.nodes.NavigateAnchor;
import ai.platon.scent.dom.nodes.NavigateComponent;
import ai.platon.scent.dom.nodes.NavigateDocument;
import ai.platon.scent.entities.AnchorGroupInfo;
import ai.platon.scent.entities.HarvestTableInfo;
import ai.platon.scent.entities.HarvestTaskStatus;
import ai.platon.scent.entities.PageTableGroup;
import ai.platon.scent.entities.PageTableKt;
import ai.platon.scent.entities.TableData;
import ai.platon.scent.segment.NavigationLocator;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.concurrent.atomic.AtomicInteger;
import kotlin.Metadata;
import kotlin.Pair;
import kotlin.TuplesKt;
import kotlin.collections.CollectionsKt;
import kotlin.collections.MapsKt;
import kotlin.collections.SetsKt;
import kotlin.comparisons.ComparisonsKt;
import kotlin.jdk7.AutoCloseableKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.Reflection;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.jvm.internal.StringCompanionObject;
import kotlin.sequences.Sequence;
import kotlin.sequences.SequencesKt;
import kotlin.text.StringsKt;
import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.jsoup.nodes.Element;
import org.slf4j.Logger;

/* compiled from: AutoMiner.kt */
@Metadata(mv = {1, 9, 0}, k = 1, xi = 48, d1 = {"��´\u0001\n\u0002\u0018\u0002\n\u0002\u0010��\n��\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n��\n\u0002\u0010\b\n\u0002\b\u0005\n\u0002\u0010 \n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010\u0002\n\u0002\b\u0003\n\u0002\u0010#\n\u0002\u0010\u000e\n\u0002\b\u0003\n\u0002\u0010\"\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0003\u0018��2\u00020\u0001B\u000f\b\u0016\u0012\u0006\u0010\u0002\u001a\u00020\u0003¢\u0006\u0002\u0010\u0004B\u0007\b\u0016¢\u0006\u0002\u0010\u0005B\u0015\u0012\u0006\u0010\u0006\u001a\u00020\u0007\u0012\u0006\u0010\u0002\u001a\u00020\u0003¢\u0006\u0002\u0010\bJ\u0014\u0010\u0017\u001a\b\u0012\u0004\u0012\u00020\u00190\u00182\u0006\u0010\u001a\u001a\u00020\u001bJ\u001c\u0010\u001c\u001a\b\u0012\u0004\u0012\u00020\u001e0\u001d2\u0006\u0010\u001f\u001a\u00020 2\u0006\u0010!\u001a\u00020\u0019J\"\u0010\u001c\u001a\b\u0012\u0004\u0012\u00020\u001e0\u001d2\f\u0010\"\u001a\b\u0012\u0004\u0012\u00020\u00190\u00182\u0006\u0010#\u001a\u00020$JF\u0010%\u001a\u0004\u0018\u00010\u001b2\u0006\u0010&\u001a\u00020 2\u0006\u0010'\u001a\u00020(2\f\u0010)\u001a\b\u0012\u0004\u0012\u00020\u001e0\u001d2\u0006\u0010*\u001a\u00020$2\u0006\u0010+\u001a\u00020,2\u0006\u0010-\u001a\u00020.H\u0086@¢\u0006\u0002\u0010/J \u00100\u001a\u0002012\u0006\u00102\u001a\u00020\u00192\u0006\u0010#\u001a\u00020$2\u0006\u00103\u001a\u000204H\u0002J\u0016\u00105\u001a\b\u0012\u0004\u0012\u0002010\u00182\u0006\u0010\u001a\u001a\u00020\u001bH\u0002J.\u00106\u001a\u0002072\u0006\u00108\u001a\u00020(2\u0006\u00109\u001a\u00020\u001e2\f\u0010:\u001a\b\u0012\u0004\u0012\u00020<0;2\u0006\u0010#\u001a\u00020$H\u0002J4\u0010=\u001a\u0002072\u0006\u0010>\u001a\u00020\u00122\f\u0010?\u001a\b\u0012\u0004\u0012\u00020<0@2\u0006\u00109\u001a\u00020\u001e2\f\u0010A\u001a\b\u0012\u0004\u0012\u00020\u00190\u0018H\u0002J\u000e\u0010B\u001a\u00020C2\u0006\u0010\u001a\u001a\u00020\u001bJ \u0010D\u001a\u0002072\u0006\u0010\u001a\u001a\u00020\u001b2\u0006\u0010E\u001a\u00020F2\u0006\u0010G\u001a\u00020CH\u0002J \u0010H\u001a\u0002072\u0006\u0010\u001a\u001a\u00020\u001b2\u0006\u0010I\u001a\u00020J2\u0006\u0010E\u001a\u00020FH\u0002J\u0016\u0010K\u001a\u00020.2\u0006\u0010L\u001a\u00020\u001b2\u0006\u0010\u001a\u001a\u00020.R\u0011\u0010\u0006\u001a\u00020\u0007¢\u0006\b\n��\u001a\u0004\b\t\u0010\nR\u000e\u0010\u0002\u001a\u00020\u0003X\u0082\u0004¢\u0006\u0002\n��R\u0011\u0010\u000b\u001a\u00020\f¢\u0006\b\n��\u001a\u0004\b\r\u0010\u000eR\u000e\u0010\u000f\u001a\u00020\u0010X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\u0011\u001a\u00020\u0012X\u0086D¢\u0006\b\n��\u001a\u0004\b\u0013\u0010\u0014R\u0014\u0010\u0015\u001a\u00020\u0012X\u0086D¢\u0006\b\n��\u001a\u0004\b\u0016\u0010\u0014¨\u0006M"}, d2 = {"Lai/platon/scent/analysis/AutoMiner;", "", "conf", "Lai/platon/pulsar/common/config/ImmutableConfig;", "(Lai/platon/pulsar/common/config/ImmutableConfig;)V", "()V", "activeHarvestTracker", "Lai/platon/scent/analysis/HarvestTaskTracker;", "(Lai/platon/scent/analysis/HarvestTaskTracker;Lai/platon/pulsar/common/config/ImmutableConfig;)V", "getActiveHarvestTracker", "()Lai/platon/scent/analysis/HarvestTaskTracker;", "encoder", "Lai/platon/scent/analysis/DomNodesEncoder;", "getEncoder", "()Lai/platon/scent/analysis/DomNodesEncoder;", "logger", "Lorg/slf4j/Logger;", "maxNumShow", "", "getMaxNumShow", "()I", "minSamples", "getMinSamples", "arrangeDocuments", "", "Lai/platon/pulsar/dom/FeaturedDocument;", "task", "Lai/platon/scent/analysis/AutoMiningTask;", "arrangeLinks", "Ljava/util/SortedSet;", "Lai/platon/scent/dom/nodes/AnchorGroup;", "url", "Lai/platon/scent/dom/HNormUrl;", "doc", "docs", "options", "Lai/platon/scent/dom/HarvestOptions;", "createMiningTask", "portalUrl", "portalPage", "Lai/platon/pulsar/persist/WebPage;", "anchorGroups", "itemOptions", "documentLoader", "Lai/platon/scent/analysis/DocumentLoader;", "taskStatus", "Lai/platon/scent/entities/HarvestTaskStatus;", "(Lai/platon/scent/dom/HNormUrl;Lai/platon/pulsar/persist/WebPage;Ljava/util/SortedSet;Lai/platon/scent/dom/HarvestOptions;Lai/platon/scent/analysis/DocumentLoader;Lai/platon/scent/entities/HarvestTaskStatus;Lkotlin/coroutines/Continuation;)Ljava/lang/Object;", "createVisualDocument", "Lai/platon/scent/analysis/corpus/VisualDocument;", "document", "count", "Ljava/util/concurrent/atomic/AtomicInteger;", "createVisualDocuments", "loadAnchorGroupTo", "", "page", "anchorGroup", "destination", "", "", "logAnchorAnalysisRound", "round", "urls", "", "documents", "mine", "Lai/platon/scent/entities/PageTableGroup;", "reportTableGroup", "start", "Ljava/time/Instant;", "tableGroup", "reportTableGroupIfNecessary", "result", "Lai/platon/scent/analysis/AutoMiningResult;", "updateTask", "miningTask", "scent-auto-mining"})
@SourceDebugExtension({"SMAP\nAutoMiner.kt\nKotlin\n*S Kotlin\n*F\n+ 1 AutoMiner.kt\nai/platon/scent/analysis/AutoMiner\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n+ 3 _Sequences.kt\nkotlin/sequences/SequencesKt___SequencesKt\n+ 4 fake.kt\nkotlin/jvm/internal/FakeKt\n*L\n1#1,419:1\n288#2,2:420\n1549#2:422\n1620#2,3:423\n1620#2,3:426\n1360#2:429\n1446#2,5:430\n1549#2:435\n1620#2,3:436\n1549#2:439\n1620#2,3:440\n1549#2:447\n1620#2,3:448\n1864#2,3:451\n1855#2,2:455\n1559#2:457\n1590#2,4:458\n739#3,4:443\n1#4:454\n*S KotlinDebug\n*F\n+ 1 AutoMiner.kt\nai/platon/scent/analysis/AutoMiner\n*L\n134#1:420,2\n157#1:422\n157#1:423,3\n168#1:426,3\n198#1:429\n198#1:430,5\n198#1:435\n198#1:436,3\n226#1:439\n226#1:440,3\n249#1:447\n249#1:448,3\n249#1:451,3\n342#1:455,2\n392#1:457\n392#1:458,4\n233#1:443,4\n*E\n"})
/* loaded from: input_file:ai/platon/scent/analysis/AutoMiner.class */
public final class AutoMiner {

    @NotNull
    private final HarvestTaskTracker activeHarvestTracker;

    @NotNull
    private final ImmutableConfig conf;

    @NotNull
    private final Logger logger;

    @NotNull
    private final DomNodesEncoder encoder;
    private final int maxNumShow;
    private final int minSamples;

    public AutoMiner(@NotNull HarvestTaskTracker harvestTaskTracker, @NotNull ImmutableConfig immutableConfig) {
        Intrinsics.checkNotNullParameter(harvestTaskTracker, "activeHarvestTracker");
        Intrinsics.checkNotNullParameter(immutableConfig, "conf");
        this.activeHarvestTracker = harvestTaskTracker;
        this.conf = immutableConfig;
        this.logger = LogsKt.getLogger(Reflection.getOrCreateKotlinClass(AutoMiner.class));
        this.encoder = new DomNodesEncoder();
        this.maxNumShow = 50;
        this.minSamples = 10;
    }

    @NotNull
    public final HarvestTaskTracker getActiveHarvestTracker() {
        return this.activeHarvestTracker;
    }

    @NotNull
    public final DomNodesEncoder getEncoder() {
        return this.encoder;
    }

    public final int getMaxNumShow() {
        return this.maxNumShow;
    }

    public final int getMinSamples() {
        return this.minSamples;
    }

    /* JADX WARN: 'this' call moved to the top of the method (can break code semantics) */
    public AutoMiner(@NotNull ImmutableConfig immutableConfig) {
        this(new HarvestTaskTracker(), immutableConfig);
        Intrinsics.checkNotNullParameter(immutableConfig, "conf");
    }

    public AutoMiner() {
        this(new ImmutableConfig());
    }

    /* JADX WARN: Failed to find 'out' block for switch in B:7:0x0046. Please report as an issue. */
    /* JADX WARN: Removed duplicated region for block: B:16:0x00ba  */
    /* JADX WARN: Removed duplicated region for block: B:34:0x02b8  */
    /* JADX WARN: Removed duplicated region for block: B:37:0x02e6  */
    /* JADX WARN: Removed duplicated region for block: B:40:0x0302  */
    /* JADX WARN: Removed duplicated region for block: B:43:0x0319  */
    /* JADX WARN: Removed duplicated region for block: B:56:0x01e9  */
    /* JADX WARN: Removed duplicated region for block: B:57:0x037e  */
    /* JADX WARN: Removed duplicated region for block: B:8:0x005c  */
    /* JADX WARN: Unsupported multi-entry loop pattern (BACK_EDGE: B:36:0x02e3 -> B:14:0x00b0). Please report as a decompilation issue!!! */
    /* JADX WARN: Unsupported multi-entry loop pattern (BACK_EDGE: B:37:0x02e6 -> B:14:0x00b0). Please report as a decompilation issue!!! */
    @org.jetbrains.annotations.Nullable
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public final java.lang.Object createMiningTask(@org.jetbrains.annotations.NotNull ai.platon.scent.dom.HNormUrl r9, @org.jetbrains.annotations.NotNull ai.platon.pulsar.persist.WebPage r10, @org.jetbrains.annotations.NotNull java.util.SortedSet<ai.platon.scent.dom.nodes.AnchorGroup> r11, @org.jetbrains.annotations.NotNull ai.platon.scent.dom.HarvestOptions r12, @org.jetbrains.annotations.NotNull ai.platon.scent.analysis.DocumentLoader r13, @org.jetbrains.annotations.NotNull ai.platon.scent.entities.HarvestTaskStatus r14, @org.jetbrains.annotations.NotNull kotlin.coroutines.Continuation<? super ai.platon.scent.analysis.AutoMiningTask> r15) {
        /*
            Method dump skipped, instructions count: 905
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: ai.platon.scent.analysis.AutoMiner.createMiningTask(ai.platon.scent.dom.HNormUrl, ai.platon.pulsar.persist.WebPage, java.util.SortedSet, ai.platon.scent.dom.HarvestOptions, ai.platon.scent.analysis.DocumentLoader, ai.platon.scent.entities.HarvestTaskStatus, kotlin.coroutines.Continuation):java.lang.Object");
    }

    @NotNull
    public final SortedSet<AnchorGroup> arrangeLinks(@NotNull HNormUrl hNormUrl, @NotNull FeaturedDocument featuredDocument) {
        Intrinsics.checkNotNullParameter(hNormUrl, "url");
        Intrinsics.checkNotNullParameter(featuredDocument, "doc");
        if (UrlUtils.isInternal(hNormUrl.getSpec())) {
            this.logger.warn("Unexpected internal portal url");
            return SetsKt.sortedSetOf(new AnchorGroup[0]);
        }
        HarvestOptions hOptions = hNormUrl.getHOptions();
        HarvestTaskStatus computeIfAbsent = this.activeHarvestTracker.computeIfAbsent((NormURL) hNormUrl);
        if (!StringsKt.isBlank(hOptions.getOutLinkSelector())) {
            Iterable<Element> select$default = FeaturedDocument.select$default(featuredDocument, DomQueriesKt.appendSelectorIfMissing(hOptions.getOutLinkSelector(), "a"), 0, 0, 6, (Object) null);
            ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(select$default, 10));
            for (Element element : select$default) {
                Intrinsics.checkNotNull(element);
                arrayList.add(new NavigateAnchor(element, (NavigateComponent) null, hOptions.getIgnoreUrlQuery(), 2, (DefaultConstructorMarker) null));
            }
            return SetsKt.sortedSetOf(new AnchorGroup[]{new AnchorGroup(hOptions.getOutLinkSelector(), arrayList, (AnchorGroup) null, (NavigateComponent) null, 12, (DefaultConstructorMarker) null)});
        }
        NavigationLocator navigationLocator = new NavigationLocator(hOptions);
        navigationLocator.locate(featuredDocument);
        Collection values = navigationLocator.getAnchorGroups().values();
        Intrinsics.checkNotNullExpressionValue(values, "values(...)");
        SortedSet<AnchorGroup> sortedSet = CollectionsKt.toSortedSet(values, ComparisonsKt.reverseOrder());
        SortedSet<AnchorGroup> sortedSet2 = sortedSet;
        List anchorGroups = computeIfAbsent.getAnchorGroups();
        for (AnchorGroup anchorGroup : sortedSet2) {
            anchorGroups.add(new AnchorGroupInfo(hNormUrl.getConfiguredUrl(), hOptions.getOutLinkSelector(), anchorGroup.getPath(), anchorGroup.getSize()));
        }
        if (!sortedSet.isEmpty()) {
            this.logger.info("Find {} anchor groups in page {} | file://{}", new Object[]{Integer.valueOf(sortedSet.size()), featuredDocument.getLocation(), NodeExtKt.getExportPaths(featuredDocument.getDocument()).getPortal()});
        } else {
            this.logger.warn("\n!!! No navigate url is found in document {} | file://{} !!!\n", featuredDocument.getLocation(), NodeExtKt.getExportPaths(featuredDocument.getDocument()).getPortal());
        }
        return sortedSet;
    }

    @NotNull
    public final SortedSet<AnchorGroup> arrangeLinks(@NotNull List<? extends FeaturedDocument> list, @NotNull HarvestOptions harvestOptions) {
        Intrinsics.checkNotNullParameter(list, "docs");
        Intrinsics.checkNotNullParameter(harvestOptions, "options");
        if (list.isEmpty()) {
            return SetsKt.sortedSetOf(new AnchorGroup[0]);
        }
        if (!(!StringsKt.isBlank(harvestOptions.getOutLinkSelector()))) {
            NavigationLocator navigationLocator = new NavigationLocator(harvestOptions);
            navigationLocator.locate(list);
            Collection values = navigationLocator.getAnchorGroups().values();
            Intrinsics.checkNotNullExpressionValue(values, "values(...)");
            SortedSet<AnchorGroup> sortedSet = CollectionsKt.toSortedSet(values, ComparisonsKt.reverseOrder());
            FeaturedDocument featuredDocument = (FeaturedDocument) CollectionsKt.first(list);
            if (!sortedSet.isEmpty()) {
                this.logger.info("Find {} anchor groups in {} pages, portal {} | file://{}", new Object[]{Integer.valueOf(sortedSet.size()), Integer.valueOf(list.size()), featuredDocument.getLocation(), NodeExtKt.getExportPaths(featuredDocument.getDocument()).getPortal()});
            } else {
                this.logger.warn("\n!!! No navigate url is found in {} pages, portal {} | file://{} !!!\n", new Object[]{Integer.valueOf(list.size()), featuredDocument.getLocation(), NodeExtKt.getExportPaths(featuredDocument.getDocument()).getPortal()});
            }
            return sortedSet;
        }
        String appendSelectorIfMissing = DomQueriesKt.appendSelectorIfMissing(harvestOptions.getOutLinkSelector(), "a");
        ArrayList arrayList = new ArrayList();
        Iterator<T> it = list.iterator();
        while (it.hasNext()) {
            CollectionsKt.addAll(arrayList, FeaturedDocument.select$default((FeaturedDocument) it.next(), appendSelectorIfMissing, 0, 0, 6, (Object) null));
        }
        ArrayList<Element> arrayList2 = arrayList;
        ArrayList arrayList3 = new ArrayList(CollectionsKt.collectionSizeOrDefault(arrayList2, 10));
        for (Element element : arrayList2) {
            Intrinsics.checkNotNull(element);
            arrayList3.add(new NavigateAnchor(element, (NavigateComponent) null, harvestOptions.getIgnoreUrlQuery(), 2, (DefaultConstructorMarker) null));
        }
        return SetsKt.sortedSetOf(new AnchorGroup[]{new AnchorGroup(harvestOptions.getOutLinkSelector(), arrayList3, (AnchorGroup) null, (NavigateComponent) null, 12, (DefaultConstructorMarker) null)});
    }

    @NotNull
    public final List<FeaturedDocument> arrangeDocuments(@NotNull AutoMiningTask autoMiningTask) {
        Intrinsics.checkNotNullParameter(autoMiningTask, "task");
        List<VisualDocument> createVisualDocuments = createVisualDocuments(autoMiningTask);
        ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(createVisualDocuments, 10));
        Iterator<T> it = createVisualDocuments.iterator();
        while (it.hasNext()) {
            arrayList.add(((VisualDocument) it.next()).getFeaturedDocument());
        }
        return arrayList;
    }

    private final List<VisualDocument> createVisualDocuments(AutoMiningTask autoMiningTask) {
        this.logger.info("Creating visual documents");
        AtomicInteger atomicInteger = new AtomicInteger();
        Sequence<FeaturedDocument> documents = autoMiningTask.getDocuments();
        LinkedHashMap linkedHashMap = new LinkedHashMap(1000);
        for (FeaturedDocument featuredDocument : documents) {
            String normalizedURI = featuredDocument.getNormalizedURI();
            if (normalizedURI == null) {
                normalizedURI = featuredDocument.getBaseURI();
            }
            Pair pair = TuplesKt.to(normalizedURI, createVisualDocument(featuredDocument, autoMiningTask.getItemOptions(), atomicInteger));
            linkedHashMap.put(pair.getFirst(), pair.getSecond());
        }
        LinkedHashMap linkedHashMap2 = linkedHashMap;
        this.logger.info("Total {} visual documents are created", Integer.valueOf(linkedHashMap2.size()));
        AnalysablePageCorpus analysablePageCorpus = new AnalysablePageCorpus(autoMiningTask.getPortalUrl(), linkedHashMap2, this.conf);
        analysablePageCorpus.calculateCorpusFeatures();
        analysablePageCorpus.findQualifiedDocuments();
        autoMiningTask.setCorpus$scent_auto_mining(analysablePageCorpus);
        if (this.logger.isDebugEnabled()) {
            List<VisualDocument> qualifiedDocuments$scent_auto_mining = analysablePageCorpus.getQualifiedDocuments$scent_auto_mining();
            ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(qualifiedDocuments$scent_auto_mining, 10));
            Iterator<T> it = qualifiedDocuments$scent_auto_mining.iterator();
            while (it.hasNext()) {
                arrayList.add(((VisualDocument) it.next()).getFeaturedDocument());
            }
            int i = 0;
            for (Object obj : arrayList) {
                int i2 = i;
                i++;
                if (i2 < 0) {
                    CollectionsKt.throwIndexOverflow();
                }
                FeaturedDocument featuredDocument2 = (FeaturedDocument) obj;
                FullFeaturedDocumentKt.annotateNodes(featuredDocument2, autoMiningTask.getItemOptions());
                AppFiles.INSTANCE.saveTo(featuredDocument2.getPrettyHtml(), NodeExtKt.getExportPaths(featuredDocument2.getDocument()).getAnnotatedView(), true);
            }
        }
        return analysablePageCorpus.getQualifiedDocuments$scent_auto_mining();
    }

    private final VisualDocument createVisualDocument(FeaturedDocument featuredDocument, HarvestOptions harvestOptions, AtomicInteger atomicInteger) {
        int incrementAndGet = atomicInteger.incrementAndGet();
        if (incrementAndGet > 1000 && incrementAndGet % 1000 == 0) {
            this.logger.info("Creating {}-th visual document", Integer.valueOf(incrementAndGet));
        } else if (incrementAndGet > 500 && incrementAndGet % 100 == 0) {
            this.logger.info("Creating {}-th visual document", Integer.valueOf(incrementAndGet));
        } else if (incrementAndGet % 50 == 0) {
            this.logger.info("Creating {}-th visual document", Integer.valueOf(incrementAndGet));
        }
        if (incrementAndGet % 100 == 0) {
            System.gc();
        }
        String normalizedURI = featuredDocument.getNormalizedURI();
        if (normalizedURI == null) {
            normalizedURI = featuredDocument.getBaseURI();
        }
        String str = normalizedURI;
        if (featuredDocument.getNormalizedURI() == null) {
            featuredDocument.getDocument().head().appendElement("link").attr("rel", "normalizedURI").attr("href", str);
        }
        NavigateDocument navigateDocument = new NavigateDocument(featuredDocument);
        navigateDocument.partition(harvestOptions);
        ai.platon.scent.dom.nodes.node.ext.NodeExtKt.setNavigateDocument(featuredDocument.getDocument(), navigateDocument);
        VisualDocument visualDocument = new VisualDocument(str, featuredDocument, harvestOptions);
        visualDocument.getFeaturedDocument().removeScripts();
        visualDocument.getFeaturedDocument().removeStyles();
        visualDocument.calculateFeatures();
        return visualDocument;
    }

    @NotNull
    public final PageTableGroup mine(@NotNull AutoMiningTask autoMiningTask) {
        Intrinsics.checkNotNullParameter(autoMiningTask, "task");
        Instant now = Instant.now();
        if (autoMiningTask.getCorpus$scent_auto_mining() == null) {
            arrangeDocuments(autoMiningTask);
        }
        AnalysablePageCorpus corpus$scent_auto_mining = autoMiningTask.getCorpus$scent_auto_mining();
        if (corpus$scent_auto_mining == null) {
            return PageTableGroup.Companion.getEMPTY();
        }
        AnalysablePageCorpus analysablePageCorpus = corpus$scent_auto_mining;
        Throwable th = null;
        try {
            try {
                AutoMiningResult analyse = analysablePageCorpus.analyse();
                AutoCloseableKt.closeFinally(analysablePageCorpus, (Throwable) null);
                Intrinsics.checkNotNull(now);
                reportTableGroup(autoMiningTask, now, analyse.getPageTableGroup());
                return analyse.getPageTableGroup();
            } finally {
            }
        } catch (Throwable th2) {
            AutoCloseableKt.closeFinally(analysablePageCorpus, th);
            throw th2;
        }
    }

    private final void reportTableGroupIfNecessary(AutoMiningTask autoMiningTask, AutoMiningResult autoMiningResult, Instant instant) {
        if (autoMiningTask.getPortalUrl().getHOptions().getDiagnose() || this.logger.isInfoEnabled()) {
            reportTableGroup(autoMiningTask, instant, autoMiningResult.getPageTableGroup());
        }
    }

    @NotNull
    public final HarvestTaskStatus updateTask(@NotNull AutoMiningTask autoMiningTask, @NotNull HarvestTaskStatus harvestTaskStatus) {
        AutoMiningResult miningResult;
        Intrinsics.checkNotNullParameter(autoMiningTask, "miningTask");
        Intrinsics.checkNotNullParameter(harvestTaskStatus, "task");
        AnalysablePageCorpus corpus$scent_auto_mining = autoMiningTask.getCorpus$scent_auto_mining();
        if (corpus$scent_auto_mining != null && (miningResult = autoMiningTask.getMiningResult()) != null) {
            PageTableGroup pageTableGroup = miningResult.getPageTableGroup();
            String configuredUrl = autoMiningTask.getConfiguredUrl();
            HarvestTaskStatus actualTask = harvestTaskStatus.getActualTask();
            if (actualTask == null) {
                actualTask = harvestTaskStatus;
            }
            HarvestTaskStatus harvestTaskStatus2 = actualTask;
            harvestTaskStatus2.setNSuccessPages(autoMiningTask.getNumSamples());
            harvestTaskStatus2.setNFailedPages(autoMiningTask.getAnchorGroup().getSize() - autoMiningTask.getNumSamples());
            harvestTaskStatus2.setNRecoverableDocuments(corpus$scent_auto_mining.getQualifiedDocuments$scent_auto_mining().size());
            harvestTaskStatus2.setRecoverableConfidence(corpus$scent_auto_mining.getRecoverableConfidence$scent_auto_mining());
            harvestTaskStatus2.setNTables(pageTableGroup.getSize());
            int i = 0;
            Iterator it = pageTableGroup.getTables().iterator();
            while (it.hasNext()) {
                i += ((OpenMapTable) it.next()).getNumColumns();
            }
            harvestTaskStatus2.setNFields(i);
            for (OpenMapTable openMapTable : pageTableGroup.getTables()) {
                TableData data = PageTableKt.getData(openMapTable);
                harvestTaskStatus2.getTableInfos().add(new HarvestTableInfo(configuredUrl, 0, 0, data.getName(), (String) null, openMapTable.getNumRows(), openMapTable.getNumColumns(), data.getDataTypeStatistics(), data.getClusterTaskStatus(), data.getClusterGroupMetrics(), 22, (DefaultConstructorMarker) null));
            }
            return harvestTaskStatus;
        }
        return harvestTaskStatus;
    }

    private final void reportTableGroup(AutoMiningTask autoMiningTask, Instant instant, PageTableGroup pageTableGroup) {
        AnalysablePageCorpus corpus$scent_auto_mining = autoMiningTask.getCorpus$scent_auto_mining();
        if (corpus$scent_auto_mining == null) {
            return;
        }
        this.logger.info("Total " + Duration.between(instant, Instant.now()) + " to analyze " + autoMiningTask.getNumSamples() + " documents, found " + corpus$scent_auto_mining.getNaturalComponents$scent_auto_mining().size() + " natural components, generated " + pageTableGroup.getSize() + " tables");
        if (!pageTableGroup.isNotEmpty()) {
            this.logger.warn("!!! No table generated !!!");
        } else {
            this.logger.info("Table dimensions (row x col): " + CollectionsKt.joinToString$default(pageTableGroup.getTables(), ", ", (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, new Function1<OpenMapTable, CharSequence>() { // from class: ai.platon.scent.analysis.AutoMiner$reportTableGroup$s$1
                @NotNull
                public final CharSequence invoke(@NotNull OpenMapTable openMapTable) {
                    Intrinsics.checkNotNullParameter(openMapTable, "it");
                    return openMapTable.getNumRows() + " x " + openMapTable.getNumColumns();
                }
            }, 30, (Object) null));
        }
    }

    private final void logAnchorAnalysisRound(int i, Set<String> set, AnchorGroup anchorGroup, List<? extends FeaturedDocument> list) {
        int min = Math.min(this.maxNumShow, list.size());
        String str = "Round #" + i + " find " + list.size() + "/" + set.size() + "/" + anchorGroup.getSize() + " documents/urls/anchors in group #" + anchorGroup.getId() + "[" + anchorGroup.getPath() + "] with score <" + anchorGroup.getScore() + ">";
        if (!this.logger.isDebugEnabled()) {
            this.logger.info(str);
            return;
        }
        StringBuilder sb = new StringBuilder(str);
        sb.append(":\n");
        List take = CollectionsKt.take(list, min);
        ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(take, 10));
        int i2 = 0;
        for (Object obj : take) {
            int i3 = i2;
            i2++;
            if (i3 < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            FeaturedDocument featuredDocument = (FeaturedDocument) obj;
            StringCompanionObject stringCompanionObject = StringCompanionObject.INSTANCE;
            Object[] objArr = {Integer.valueOf(i3 + 1), StringUtils.abbreviateMiddle(featuredDocument.getLocation(), "...", 100), Strings.compactFormat(ai.platon.scent.dom.nodes.node.ext.NodeExtKt.getContentBytes(featuredDocument.getDocument())), NodeExtKt.getExportPaths(featuredDocument.getDocument()).getAnnotatedView()};
            String format = String.format("%d.\t%s [%s]\t->\t file://%s", Arrays.copyOf(objArr, objArr.length));
            Intrinsics.checkNotNullExpressionValue(format, "format(...)");
            arrayList.add(format);
        }
        CollectionsKt.joinTo$default(arrayList, sb, "\n", (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, new Function1<String, CharSequence>() { // from class: ai.platon.scent.analysis.AutoMiner$logAnchorAnalysisRound$2
            @NotNull
            public final CharSequence invoke(@NotNull String str2) {
                Intrinsics.checkNotNullParameter(str2, "it");
                return str2;
            }
        }, 60, (Object) null);
        this.logger.debug(sb.toString());
    }

    private final void loadAnchorGroupTo(WebPage webPage, AnchorGroup anchorGroup, Set<String> set, HarvestOptions harvestOptions) {
        final String str = "gn:" + anchorGroup.getId();
        SequencesKt.toCollection(SequencesKt.take(SequencesKt.mapNotNull(MapsKt.asSequence(webPage.getVividLinks()), new Function1<Map.Entry<? extends CharSequence, ? extends CharSequence>, String>() { // from class: ai.platon.scent.analysis.AutoMiner$loadAnchorGroupTo$1
            /* JADX INFO: Access modifiers changed from: package-private */
            /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
            {
                super(1);
            }

            @Nullable
            public final String invoke(@NotNull Map.Entry<? extends CharSequence, ? extends CharSequence> entry) {
                Intrinsics.checkNotNullParameter(entry, "it");
                Map.Entry<? extends CharSequence, ? extends CharSequence> entry2 = StringsKt.contains$default(entry.getValue(), str, false, 2, (Object) null) ? entry : null;
                if (entry2 != null) {
                    CharSequence key = entry2.getKey();
                    if (key != null) {
                        return key.toString();
                    }
                }
                return null;
            }
        }), harvestOptions.getMaxLoadedAnchors()), set);
    }
}
