package ai.platon.scent.analysis.corpus;

import ai.platon.pulsar.common.ExceptionsKt;
import ai.platon.pulsar.common.Frequency;
import ai.platon.pulsar.common.FrequencyManager;
import ai.platon.pulsar.common.LangKt;
import ai.platon.pulsar.common.LogsKt;
import ai.platon.pulsar.common.config.ImmutableConfig;
import ai.platon.pulsar.common.sql.ResultSetFormatter;
import ai.platon.pulsar.dom.nodes.NodesKt;
import ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt;
import ai.platon.pulsar.dom.select.DomQueriesKt;
import ai.platon.scent.common.Systems;
import ai.platon.scent.dom.HNormUrl;
import ai.platon.scent.dom.HarvestOptions;
import ai.platon.scent.dom.features.defined.FF;
import ai.platon.scent.dom.nodes.node.ext.NodeCharactersKt;
import com.google.common.collect.Multiset;
import com.google.common.collect.TreeMultimap;
import java.sql.ResultSet;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.atomic.AtomicInteger;
import kotlin.Deprecated;
import kotlin.Metadata;
import kotlin.Pair;
import kotlin.TuplesKt;
import kotlin.Unit;
import kotlin.collections.CollectionsKt;
import kotlin.collections.MapsKt;
import kotlin.comparisons.ComparisonsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.functions.Function2;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.Reflection;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.jvm.internal.StringCompanionObject;
import kotlin.ranges.IntRange;
import kotlin.ranges.RangesKt;
import kotlin.sequences.SequencesKt;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.perf4j.slf4j.Slf4JStopWatch;
import org.slf4j.Logger;

/* compiled from: LimitedPageCorpus.kt */
@Metadata(mv = {1, 9, 0}, k = 1, xi = 48, d1 = {"��²\u0001\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\n\u0002\u0010%\n\u0002\u0010\u000e\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0010\b\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0010!\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\n\u0002\u0010#\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0007\n\u0002\u0010\u0006\n\u0002\b\u0005\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0018\u0002\n��\n\u0002\u0010\u0002\n\u0002\b\u0013\n\u0002\u0010 \n\u0002\b\n\b��\u0018�� b2\u00020\u0001:\u0002bcB)\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\u0012\u0010\u0004\u001a\u000e\u0012\u0004\u0012\u00020\u0006\u0012\u0004\u0012\u00020\u00070\u0005\u0012\u0006\u0010\b\u001a\u00020\t¢\u0006\u0002\u0010\nJ\b\u0010E\u001a\u00020FH\u0002J\u0006\u0010G\u001a\u00020FJ\b\u0010H\u001a\u00020FH\u0002J\u0010\u0010H\u001a\u00020F2\u0006\u0010I\u001a\u00020\u0007H\u0002J\b\u0010J\u001a\u00020FH\u0002J\b\u0010K\u001a\u00020FH\u0002J\b\u0010L\u001a\u00020FH\u0002J\b\u0010M\u001a\u00020FH\u0002J\b\u0010N\u001a\u00020FH\u0002J\u0006\u0010O\u001a\u00020FJ\b\u0010P\u001a\u00020FH\u0002J\b\u0010Q\u001a\u00020FH\u0002J\b\u0010R\u001a\u00020FH\u0002J\b\u0010S\u001a\u00020FH\u0002J\b\u0010T\u001a\u00020FH\u0002J\b\u0010U\u001a\u00020FH\u0016J\b\u0010V\u001a\u00020FH\u0002J\b\u0010W\u001a\u00020FH\u0002J\b\u0010X\u001a\u00020FH\u0002J\f\u0010Y\u001a\b\u0012\u0004\u0012\u00020\u00070ZJ\b\u0010[\u001a\u00020FH\u0002J\b\u0010\\\u001a\u00020FH\u0002J\u0016\u0010]\u001a\u00020F2\f\u0010\u0017\u001a\b\u0012\u0004\u0012\u00020\r0\u0018H\u0002J\b\u0010^\u001a\u00020FH\u0002J\b\u0010_\u001a\u00020FH\u0002J\b\u0010`\u001a\u00020FH\u0002J\b\u0010a\u001a\u00020FH\u0002R\u001e\u0010\u000b\u001a\u0012\u0012\u0004\u0012\u00020\r\u0012\u0004\u0012\u00020\u000e0\fj\u0002`\u000fX\u0082\u0004¢\u0006\u0002\n��R\u001a\u0010\u0010\u001a\b\u0012\u0004\u0012\u00020\u000e0\u0011X\u0080\u0004¢\u0006\b\n��\u001a\u0004\b\u0012\u0010\u0013R\u001a\u0010\u0014\u001a\b\u0012\u0004\u0012\u00020\u00150\u0011X\u0080\u0004¢\u0006\b\n��\u001a\u0004\b\u0016\u0010\u0013R\u000e\u0010\b\u001a\u00020\tX\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\u0017\u001a\b\u0012\u0004\u0012\u00020\r0\u0018X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\u0019\u001a\b\u0012\u0004\u0012\u00020\u00060\u001aX\u0082\u0004¢\u0006\u0002\n��R\u001a\u0010\u001b\u001a\b\u0012\u0004\u0012\u00020\u00060\u001cX\u0080\u0004¢\u0006\b\n��\u001a\u0004\b\u001d\u0010\u001eR\u001e\u0010\u001f\u001a\u0012\u0012\u0004\u0012\u00020\r\u0012\u0004\u0012\u00020 0\fj\u0002`!X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\"\u001a\u00020#X\u0082\u0004¢\u0006\u0002\n��R\u001a\u0010$\u001a\b\u0012\u0004\u0012\u00020\u00150\u0011X\u0080\u0004¢\u0006\b\n��\u001a\u0004\b%\u0010\u0013R\u0014\u0010&\u001a\u00020'8BX\u0082\u0004¢\u0006\u0006\u001a\u0004\b(\u0010)R\u0011\u0010\u0002\u001a\u00020\u0003¢\u0006\b\n��\u001a\u0004\b*\u0010+R\u001a\u0010,\u001a\b\u0012\u0004\u0012\u00020\u00070\u0011X\u0080\u0004¢\u0006\b\n��\u001a\u0004\b-\u0010\u0013R\u001a\u0010.\u001a\u00020/X\u0080\u000e¢\u0006\u000e\n��\u001a\u0004\b0\u00101\"\u0004\b2\u00103R\u001c\u00104\u001a\b\u0012\u0004\u0012\u0002050\u00188\u0002X\u0083\u0004¢\u0006\b\n��\u0012\u0004\b6\u00107R\u001e\u00108\u001a\u0012\u0012\u0004\u0012\u000205\u0012\u0004\u0012\u00020 0\fj\u0002`9X\u0082\u0004¢\u0006\u0002\n��R\u001a\u0010\u0004\u001a\u000e\u0012\u0004\u0012\u00020\u0006\u0012\u0004\u0012\u00020\u00070\u0005X\u0082\u0004¢\u0006\u0002\n��R\u0016\u0010:\u001a\n <*\u0004\u0018\u00010;0;X\u0082\u0004¢\u0006\u0002\n��R\u0010\u0010=\u001a\u0004\u0018\u00010>X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010?\u001a\u00020#X\u0082\u0004¢\u0006\u0002\n��R \u0010@\u001a\u000e\u0012\u0004\u0012\u00020\u0006\u0012\u0004\u0012\u00020\u00150\fX\u0080\u0004¢\u0006\b\n��\u001a\u0004\bA\u0010BR\u000e\u0010C\u001a\u00020DX\u0082\u0004¢\u0006\u0002\n��¨\u0006d"}, d2 = {"Lai/platon/scent/analysis/corpus/LimitedPageCorpus;", "Ljava/lang/AutoCloseable;", "portalUrl", "Lai/platon/scent/dom/HNormUrl;", "samples", "", "", "Lai/platon/scent/analysis/corpus/VisualDocument;", "conf", "Lai/platon/pulsar/common/config/ImmutableConfig;", "(Lai/platon/scent/dom/HNormUrl;Ljava/util/Map;Lai/platon/pulsar/common/config/ImmutableConfig;)V", "captionedElementIndexer", "Lcom/google/common/collect/TreeMultimap;", "", "Lorg/jsoup/nodes/Element;", "Lai/platon/scent/dom/nodes/IntElementIndexer;", "componentElements", "", "getComponentElements$scent_auto_mining", "()Ljava/util/List;", "components", "Lai/platon/scent/analysis/corpus/VisualComponent;", "getComponents$scent_auto_mining", "dff", "Lai/platon/pulsar/common/Frequency;", "documentFrequency", "Lai/platon/pulsar/common/FrequencyManager;", "documentUrls", "", "getDocumentUrls$scent_auto_mining", "()Ljava/util/Set;", "layoutLeftIndexer", "Lorg/jsoup/nodes/Node;", "Lai/platon/scent/dom/nodes/IntNodeIndexer;", "logger", "Lorg/slf4j/Logger;", "naturalComponents", "getNaturalComponents$scent_auto_mining", "options", "Lai/platon/scent/dom/HarvestOptions;", "getOptions", "()Lai/platon/scent/dom/HarvestOptions;", "getPortalUrl", "()Lai/platon/scent/dom/HNormUrl;", "qualifiedDocuments", "getQualifiedDocuments$scent_auto_mining", "recoverableConfidence", "", "getRecoverableConfidence$scent_auto_mining", "()D", "setRecoverableConfidence$scent_auto_mining", "(D)V", "regionalTileFrequency", "Lai/platon/scent/analysis/corpus/RegionalTile;", "getRegionalTileFrequency$annotations", "()V", "regionalTileNodeIndexer", "Lai/platon/scent/analysis/corpus/RegionalTileNodeIndexer;", "startTime", "Ljava/time/Instant;", "kotlin.jvm.PlatformType", "stopWatch", "Lorg/perf4j/slf4j/Slf4JStopWatch;", "taskLogger", "uniquePathComponents", "getUniquePathComponents$scent_auto_mining", "()Lcom/google/common/collect/TreeMultimap;", "unusedData", "Lai/platon/scent/analysis/corpus/LimitedPageCorpus$UnusedData;", "addLabeledDocuments", "", "analyse", "analysisVariables", "doc", "annotateNodes", "arrangeComponents", "assembleCaptionedElementIndexer", "assembleLayoutLeftIndexer", "assembleRegionalTextNodeIndex", "calculateCorpusFeatures", "calculateCorpusFeatures0", "calculateDocumentFrequency", "calculateMoreCorpusFeatures", "calculateTextNodeDocFrequency", "chooseQualifiedDocuments", "close", "divideDistricts", "findComponents", "findLayoutComponents", "findQualifiedDocuments", "", "nominalFeaturesToIndex", "reportComponents", "reportDff", "simplifyAnnotations", "styleFeaturesToIndex", "supplementComponents", "validateFeatures", "Companion", "UnusedData", "scent-auto-mining"})
@SourceDebugExtension({"SMAP\nLimitedPageCorpus.kt\nKotlin\n*S Kotlin\n*F\n+ 1 LimitedPageCorpus.kt\nai/platon/scent/analysis/corpus/LimitedPageCorpus\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n+ 3 _Arrays.kt\nkotlin/collections/ArraysKt___ArraysKt\n+ 4 _Maps.kt\nkotlin/collections/MapsKt___MapsKt\n+ 5 fake.kt\nkotlin/jvm/internal/FakeKt\n*L\n1#1,920:1\n1855#2,2:921\n1855#2,2:923\n1446#2,5:925\n1446#2,5:930\n1620#2,3:935\n1855#2,2:938\n1864#2,3:944\n1864#2,3:951\n1855#2,2:954\n1855#2,2:956\n1620#2,3:959\n1855#2,2:962\n1611#2:965\n1855#2:966\n1620#2,3:967\n1856#2:972\n1612#2:973\n1855#2,2:974\n1855#2,2:976\n288#2,2:978\n1611#2:980\n1855#2:981\n1856#2:983\n1612#2:984\n1611#2:985\n1855#2:986\n1856#2:988\n1612#2:989\n1855#2,2:990\n1620#2,3:993\n1855#2,2:996\n1446#2,2:999\n1549#2:1001\n1620#2,3:1002\n1448#2,3:1005\n1179#2,2:1008\n1253#2,2:1010\n1620#2,3:1012\n1256#2:1015\n1271#2,2:1016\n1285#2,4:1018\n1855#2,2:1023\n1855#2,2:1026\n1549#2:1028\n1620#2,3:1029\n766#2:1032\n857#2,2:1033\n1360#2:1035\n1446#2,5:1036\n1620#2,3:1041\n1855#2,2:1044\n1855#2:1046\n1856#2:1051\n766#2:1052\n857#2,2:1053\n1855#2,2:1055\n766#2:1057\n857#2,2:1058\n1855#2,2:1060\n766#2:1062\n857#2,2:1063\n1855#2,2:1065\n11065#3:940\n11400#3,3:941\n11065#3:947\n11400#3,3:948\n215#4:958\n216#4:964\n215#4:992\n216#4:998\n215#4:1022\n216#4:1025\n125#4:1047\n152#4,3:1048\n1#5:970\n1#5:971\n1#5:982\n1#5:987\n*S KotlinDebug\n*F\n+ 1 LimitedPageCorpus.kt\nai/platon/scent/analysis/corpus/LimitedPageCorpus\n*L\n309#1:921,2\n379#1:923,2\n402#1:925,5\n404#1:930,5\n406#1:935,3\n410#1:938,2\n446#1:944,3\n486#1:951,3\n538#1:954,2\n568#1:956,2\n604#1:959,3\n608#1:962,2\n620#1:965\n620#1:966\n621#1:967,3\n620#1:972\n620#1:973\n630#1:974,2\n639#1:976,2\n653#1:978,2\n654#1:980\n654#1:981\n654#1:983\n654#1:984\n655#1:985\n655#1:986\n655#1:988\n655#1:989\n701#1:990,2\n723#1:993,3\n728#1:996,2\n749#1:999,2\n750#1:1001\n750#1:1002,3\n749#1:1005,3\n753#1:1008,2\n753#1:1010,2\n753#1:1012,3\n753#1:1015\n765#1:1016,2\n765#1:1018,4\n800#1:1023,2\n826#1:1026,2\n833#1:1028\n833#1:1029,3\n837#1:1032\n837#1:1033,2\n837#1:1035\n837#1:1036,5\n844#1:1041,3\n852#1:1044,2\n856#1:1046\n856#1:1051\n868#1:1052\n868#1:1053,2\n868#1:1055,2\n874#1:1057\n874#1:1058,2\n874#1:1060,2\n910#1:1062\n910#1:1063,2\n910#1:1065,2\n442#1:940\n442#1:941,3\n481#1:947\n481#1:948,3\n602#1:958\n602#1:964\n722#1:992\n722#1:998\n796#1:1022\n796#1:1025\n860#1:1047\n860#1:1048,3\n620#1:971\n654#1:982\n655#1:987\n*E\n"})
/* loaded from: input_file:ai/platon/scent/analysis/corpus/LimitedPageCorpus.class */
public final class LimitedPageCorpus implements AutoCloseable {

    @NotNull
    private final HNormUrl portalUrl;

    @NotNull
    private final Map<String, VisualDocument> samples;

    @NotNull
    private final ImmutableConfig conf;

    @NotNull
    private final Logger logger;

    @NotNull
    private final Logger taskLogger;
    private final Instant startTime;

    @Nullable
    private final Slf4JStopWatch stopWatch;

    @NotNull
    private final Set<String> documentUrls;

    @NotNull
    private final List<VisualDocument> qualifiedDocuments;

    @NotNull
    private final FrequencyManager<String> documentFrequency;

    @NotNull
    private final Frequency<Integer> dff;

    @NotNull
    private final TreeMultimap<RegionalTile, Node> regionalTileNodeIndexer;

    @NotNull
    private final Frequency<RegionalTile> regionalTileFrequency;

    @NotNull
    private final TreeMultimap<Integer, Element> captionedElementIndexer;

    @NotNull
    private final UnusedData unusedData;
    private double recoverableConfidence;

    @NotNull
    private final TreeMultimap<Integer, Node> layoutLeftIndexer;

    @NotNull
    private final List<VisualComponent> components;

    @NotNull
    private final TreeMultimap<String, VisualComponent> uniquePathComponents;

    @NotNull
    private final List<VisualComponent> naturalComponents;

    @NotNull
    private final List<Element> componentElements;

    @NotNull
    public static final Companion Companion = new Companion(null);

    @NotNull
    private static final LimitedPageCorpus EMPTY = new LimitedPageCorpus(HNormUrl.Companion.getNIL(), new LinkedHashMap(), ImmutableConfig.Companion.getEMPTY());

    /* compiled from: LimitedPageCorpus.kt */
    @Metadata(mv = {1, 9, 0}, k = 1, xi = 48, d1 = {"��\u0014\n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\b\u0086\u0003\u0018��2\u00020\u0001B\u0007\b\u0002¢\u0006\u0002\u0010\u0002R\u0011\u0010\u0003\u001a\u00020\u0004¢\u0006\b\n��\u001a\u0004\b\u0005\u0010\u0006¨\u0006\u0007"}, d2 = {"Lai/platon/scent/analysis/corpus/LimitedPageCorpus$Companion;", "", "()V", "EMPTY", "Lai/platon/scent/analysis/corpus/LimitedPageCorpus;", "getEMPTY", "()Lai/platon/scent/analysis/corpus/LimitedPageCorpus;", "scent-auto-mining"})
    /* loaded from: input_file:ai/platon/scent/analysis/corpus/LimitedPageCorpus$Companion.class */
    public static final class Companion {
        private Companion() {
        }

        @NotNull
        public final LimitedPageCorpus getEMPTY() {
            return LimitedPageCorpus.EMPTY;
        }

        public /* synthetic */ Companion(DefaultConstructorMarker defaultConstructorMarker) {
            this();
        }
    }

    /* compiled from: LimitedPageCorpus.kt */
    @Metadata(mv = {1, 9, 0}, k = 1, xi = 48, d1 = {"��B\n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0010\u000e\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0010\b\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0010\u0002\n��\b\u0002\u0018��2\u00020\u0001B\u0005¢\u0006\u0002\u0010\u0002J\u0006\u0010\u0016\u001a\u00020\u0017R!\u0010\u0003\u001a\u0012\u0012\u0004\u0012\u00020\u0005\u0012\u0004\u0012\u00020\u00060\u0004j\u0002`\u0007¢\u0006\b\n��\u001a\u0004\b\b\u0010\tR!\u0010\n\u001a\u0012\u0012\u0004\u0012\u00020\u000b\u0012\u0004\u0012\u00020\f0\u0004j\u0002`\r¢\u0006\b\n��\u001a\u0004\b\u000e\u0010\tR\u001d\u0010\u000f\u001a\u000e\u0012\u0004\u0012\u00020\u0006\u0012\u0004\u0012\u00020\u00110\u0010¢\u0006\b\n��\u001a\u0004\b\u0012\u0010\u0013R!\u0010\u0014\u001a\u0012\u0012\u0004\u0012\u00020\u000b\u0012\u0004\u0012\u00020\f0\u0004j\u0002`\r¢\u0006\b\n��\u001a\u0004\b\u0015\u0010\t¨\u0006\u0018"}, d2 = {"Lai/platon/scent/analysis/corpus/LimitedPageCorpus$UnusedData;", "", "()V", "categorizedTextNodeIndexer", "Lcom/google/common/collect/TreeMultimap;", "", "Lorg/jsoup/nodes/TextNode;", "Lai/platon/scent/dom/nodes/TextNodeIndexer;", "getCategorizedTextNodeIndexer", "()Lcom/google/common/collect/TreeMultimap;", "constantCaptionedElementIndexer", "", "Lorg/jsoup/nodes/Element;", "Lai/platon/scent/dom/nodes/IntElementIndexer;", "getConstantCaptionedElementIndexer", "orderedRegionalTextNodeIndexer", "Ljava/util/TreeMap;", "Lai/platon/scent/analysis/corpus/OrderedRegionalTileNode;", "getOrderedRegionalTextNodeIndexer", "()Ljava/util/TreeMap;", "pseudoConstantTextBlockIndexer", "getPseudoConstantTextBlockIndexer", "clear", "", "scent-auto-mining"})
    /* loaded from: input_file:ai/platon/scent/analysis/corpus/LimitedPageCorpus$UnusedData.class */
    private static final class UnusedData {

        @NotNull
        private final TreeMap<TextNode, OrderedRegionalTileNode> orderedRegionalTextNodeIndexer = new TreeMap<>(NodesKt.getNodePositionComparator());

        @NotNull
        private final TreeMultimap<Integer, Element> constantCaptionedElementIndexer;

        @NotNull
        private final TreeMultimap<Integer, Element> pseudoConstantTextBlockIndexer;

        @NotNull
        private final TreeMultimap<String, TextNode> categorizedTextNodeIndexer;

        public UnusedData() {
            TreeMultimap<Integer, Element> create = TreeMultimap.create(ComparisonsKt.naturalOrder(), NodesKt.getNodeComparator());
            Intrinsics.checkNotNullExpressionValue(create, "create(...)");
            this.constantCaptionedElementIndexer = create;
            TreeMultimap<Integer, Element> create2 = TreeMultimap.create(ComparisonsKt.naturalOrder(), NodesKt.getNodeComparator());
            Intrinsics.checkNotNullExpressionValue(create2, "create(...)");
            this.pseudoConstantTextBlockIndexer = create2;
            TreeMultimap<String, TextNode> create3 = TreeMultimap.create(ComparisonsKt.naturalOrder(), NodesKt.getNodeComparator());
            Intrinsics.checkNotNullExpressionValue(create3, "create(...)");
            this.categorizedTextNodeIndexer = create3;
        }

        @NotNull
        public final TreeMap<TextNode, OrderedRegionalTileNode> getOrderedRegionalTextNodeIndexer() {
            return this.orderedRegionalTextNodeIndexer;
        }

        @NotNull
        public final TreeMultimap<Integer, Element> getConstantCaptionedElementIndexer() {
            return this.constantCaptionedElementIndexer;
        }

        @NotNull
        public final TreeMultimap<Integer, Element> getPseudoConstantTextBlockIndexer() {
            return this.pseudoConstantTextBlockIndexer;
        }

        @NotNull
        public final TreeMultimap<String, TextNode> getCategorizedTextNodeIndexer() {
            return this.categorizedTextNodeIndexer;
        }

        public final void clear() {
            this.orderedRegionalTextNodeIndexer.clear();
            this.constantCaptionedElementIndexer.clear();
            this.pseudoConstantTextBlockIndexer.clear();
            this.categorizedTextNodeIndexer.clear();
        }
    }

    public LimitedPageCorpus(@NotNull HNormUrl hNormUrl, @NotNull Map<String, VisualDocument> map, @NotNull ImmutableConfig immutableConfig) {
        Intrinsics.checkNotNullParameter(hNormUrl, "portalUrl");
        Intrinsics.checkNotNullParameter(map, "samples");
        Intrinsics.checkNotNullParameter(immutableConfig, "conf");
        this.portalUrl = hNormUrl;
        this.samples = map;
        this.conf = immutableConfig;
        this.logger = LogsKt.getLogger(Reflection.getOrCreateKotlinClass(LimitedPageCorpus.class));
        this.taskLogger = LogsKt.getLogger(Reflection.getOrCreateKotlinClass(LimitedPageCorpus.class), ".Task");
        this.startTime = Instant.now();
        this.stopWatch = new Slf4JStopWatch();
        this.documentUrls = new LinkedHashSet();
        this.qualifiedDocuments = new ArrayList();
        this.documentFrequency = new FrequencyManager<>();
        this.dff = new Frequency<>((String) null, 1, (DefaultConstructorMarker) null);
        TreeMultimap<RegionalTile, Node> create = TreeMultimap.create(ComparisonsKt.naturalOrder(), NodesKt.getNodePositionComparator());
        Intrinsics.checkNotNullExpressionValue(create, "create(...)");
        this.regionalTileNodeIndexer = create;
        this.regionalTileFrequency = new Frequency<>((String) null, 1, (DefaultConstructorMarker) null);
        TreeMultimap<Integer, Element> create2 = TreeMultimap.create(ComparisonsKt.naturalOrder(), NodesKt.getNodeComparator());
        Intrinsics.checkNotNullExpressionValue(create2, "create(...)");
        this.captionedElementIndexer = create2;
        this.unusedData = new UnusedData();
        TreeMultimap<Integer, Node> create3 = TreeMultimap.create(ComparisonsKt.naturalOrder(), new Comparator() { // from class: ai.platon.scent.analysis.corpus.LimitedPageCorpus$special$$inlined$compareBy$1
            @Override // java.util.Comparator
            public final int compare(T t, T t2) {
                return ComparisonsKt.compareValues(Integer.valueOf(NodeExtKt.getTop((Node) t)), Integer.valueOf(NodeExtKt.getTop((Node) t2)));
            }
        });
        Intrinsics.checkNotNullExpressionValue(create3, "create(...)");
        this.layoutLeftIndexer = create3;
        this.components = new ArrayList();
        TreeMultimap<String, VisualComponent> create4 = TreeMultimap.create();
        Intrinsics.checkNotNullExpressionValue(create4, "create(...)");
        this.uniquePathComponents = create4;
        this.naturalComponents = new ArrayList();
        this.componentElements = new ArrayList();
    }

    @NotNull
    public final HNormUrl getPortalUrl() {
        return this.portalUrl;
    }

    private final HarvestOptions getOptions() {
        return this.portalUrl.getHOptions();
    }

    @NotNull
    public final Set<String> getDocumentUrls$scent_auto_mining() {
        return this.documentUrls;
    }

    @NotNull
    public final List<VisualDocument> getQualifiedDocuments$scent_auto_mining() {
        return this.qualifiedDocuments;
    }

    @Deprecated(message = "Used to calculate node.district, but not used currently")
    private static /* synthetic */ void getRegionalTileFrequency$annotations() {
    }

    public final double getRecoverableConfidence$scent_auto_mining() {
        return this.recoverableConfidence;
    }

    public final void setRecoverableConfidence$scent_auto_mining(double d) {
        this.recoverableConfidence = d;
    }

    @NotNull
    public final List<VisualComponent> getComponents$scent_auto_mining() {
        return this.components;
    }

    @NotNull
    public final TreeMultimap<String, VisualComponent> getUniquePathComponents$scent_auto_mining() {
        return this.uniquePathComponents;
    }

    @NotNull
    public final List<VisualComponent> getNaturalComponents$scent_auto_mining() {
        return this.naturalComponents;
    }

    @NotNull
    public final List<Element> getComponentElements$scent_auto_mining() {
        return this.componentElements;
    }

    public final void calculateCorpusFeatures() {
        if (!this.qualifiedDocuments.isEmpty()) {
            return;
        }
        if (this.samples.size() < 5) {
            this.logger.warn("Too few samples: " + this.samples.size());
            return;
        }
        this.logger.info("Corpus feature calculation start - {}", Systems.INSTANCE.getHeapMessage());
        try {
            try {
                calculateCorpusFeatures0();
                this.logger.info("Corpus feature calculation finished - {}", Systems.INSTANCE.getHeapMessage());
                Slf4JStopWatch slf4JStopWatch = this.stopWatch;
                if (slf4JStopWatch != null) {
                    slf4JStopWatch.lap("detectRecoverableDocuments");
                }
            } catch (OutOfMemoryError e) {
                e.printStackTrace(System.err);
                this.logger.info("Corpus feature calculation finished - {}", Systems.INSTANCE.getHeapMessage());
            }
        } catch (Throwable th) {
            this.logger.info("Corpus feature calculation finished - {}", Systems.INSTANCE.getHeapMessage());
            throw th;
        }
    }

    @NotNull
    public final List<VisualDocument> findQualifiedDocuments() {
        if (this.qualifiedDocuments.isEmpty()) {
            chooseQualifiedDocuments();
        }
        return this.qualifiedDocuments;
    }

    public final void analyse() {
        this.logger.info("Analysis start - {}", Systems.INSTANCE.getHeapMessage());
        calculateMoreCorpusFeatures();
        if (!LangKt.alwaysFalse()) {
            findComponents();
            Slf4JStopWatch slf4JStopWatch = this.stopWatch;
            if (slf4JStopWatch != null) {
                slf4JStopWatch.lap("partition");
            }
        }
        if (this.taskLogger.isInfoEnabled()) {
            reportComponents();
        }
    }

    private final void calculateCorpusFeatures0() {
        if (this.samples.isEmpty()) {
            this.logger.warn("No sample in the corpus");
            return;
        }
        calculateDocumentFrequency();
        Slf4JStopWatch slf4JStopWatch = this.stopWatch;
        if (slf4JStopWatch != null) {
            slf4JStopWatch.lap("buildDocumentFrequency");
        }
        assembleRegionalTextNodeIndex();
        Slf4JStopWatch slf4JStopWatch2 = this.stopWatch;
        if (slf4JStopWatch2 != null) {
            slf4JStopWatch2.lap("buildRegionalTextNodeIndex");
        }
        calculateTextNodeDocFrequency();
        Slf4JStopWatch slf4JStopWatch3 = this.stopWatch;
        if (slf4JStopWatch3 != null) {
            slf4JStopWatch3.lap("calculateTextNodeDocFrequency");
        }
    }

    private final void calculateMoreCorpusFeatures() {
        if (this.qualifiedDocuments.isEmpty()) {
            return;
        }
        if (getOptions().getDiagnose() && getOptions().getNVerbose() > 0) {
            Iterator it = CollectionsKt.take(CollectionsKt.shuffled(new IntRange(0, this.qualifiedDocuments.size() - 1)), getOptions().getNVerbose()).iterator();
            while (it.hasNext()) {
                this.qualifiedDocuments.get(((Number) it.next()).intValue()).setVerbose(true);
            }
        }
        assembleLayoutLeftIndexer();
        assembleCaptionedElementIndexer();
        styleFeaturesToIndex();
        Slf4JStopWatch slf4JStopWatch = this.stopWatch;
        if (slf4JStopWatch != null) {
            slf4JStopWatch.lap("styleFeaturesToIndex");
        }
        nominalFeaturesToIndex();
        Slf4JStopWatch slf4JStopWatch2 = this.stopWatch;
        if (slf4JStopWatch2 != null) {
            slf4JStopWatch2.lap("nominalFeaturesToIndex");
        }
    }

    private final void findComponents() {
        if (this.qualifiedDocuments.isEmpty()) {
            this.logger.warn("No qualified documents, do not find any component");
            return;
        }
        Iterator<T> it = this.qualifiedDocuments.iterator();
        while (it.hasNext()) {
            ((VisualDocument) it.next()).findComponents();
        }
        Slf4JStopWatch slf4JStopWatch = this.stopWatch;
        if (slf4JStopWatch != null) {
            slf4JStopWatch.lap("findComponents");
        }
        findLayoutComponents();
        Slf4JStopWatch slf4JStopWatch2 = this.stopWatch;
        if (slf4JStopWatch2 != null) {
            slf4JStopWatch2.lap("findLayoutComponents");
        }
        arrangeComponents();
        Slf4JStopWatch slf4JStopWatch3 = this.stopWatch;
        if (slf4JStopWatch3 != null) {
            slf4JStopWatch3.lap("arrangeComponents");
        }
        List<VisualDocument> list = this.qualifiedDocuments;
        List<VisualComponent> list2 = this.components;
        Iterator<T> it2 = list.iterator();
        while (it2.hasNext()) {
            CollectionsKt.addAll(list2, ((VisualDocument) it2.next()).getComponents());
        }
        List<VisualDocument> list3 = this.qualifiedDocuments;
        List<VisualComponent> list4 = this.naturalComponents;
        Iterator<T> it3 = list3.iterator();
        while (it3.hasNext()) {
            CollectionsKt.addAll(list4, ((VisualDocument) it3.next()).getNaturalComponents());
        }
        List<VisualComponent> list5 = this.components;
        List<Element> list6 = this.componentElements;
        Iterator<T> it4 = list5.iterator();
        while (it4.hasNext()) {
            list6.add(((VisualComponent) it4.next()).getElement());
        }
    }

    @Override // java.lang.AutoCloseable
    public void close() {
        Iterator<T> it = this.samples.values().iterator();
        while (it.hasNext()) {
            VisualDocumentKt.setVisualDocument(((VisualDocument) it.next()).getDocument(), null);
        }
        this.documentUrls.clear();
        this.unusedData.clear();
        this.qualifiedDocuments.clear();
        this.documentFrequency.clear();
        this.regionalTileNodeIndexer.clear();
        this.regionalTileFrequency.clear();
        if (!this.captionedElementIndexer.isEmpty()) {
            throw new IllegalArgumentException("Failed requirement.".toString());
        }
        if (!this.layoutLeftIndexer.isEmpty()) {
            throw new IllegalArgumentException("Failed requirement.".toString());
        }
        this.components.clear();
        this.uniquePathComponents.clear();
        this.naturalComponents.clear();
        this.componentElements.clear();
    }

    private final void nominalFeaturesToIndex() {
        final FF[] ffArr = {FF.TAG, FF.NID, FF.NCS};
        final LimitedPageCorpus$nominalFeaturesToIndex$nominalExtractors$1 limitedPageCorpus$nominalFeaturesToIndex$nominalExtractors$1 = new Function1<Element, String[]>() { // from class: ai.platon.scent.analysis.corpus.LimitedPageCorpus$nominalFeaturesToIndex$nominalExtractors$1
            @NotNull
            public final String[] invoke(@NotNull Element element) {
                Intrinsics.checkNotNullParameter(element, "it");
                String tagName = element.tagName();
                Intrinsics.checkNotNullExpressionValue(tagName, "tagName(...)");
                String id = element.id();
                Intrinsics.checkNotNullExpressionValue(id, "id(...)");
                String className = element.className();
                Intrinsics.checkNotNullExpressionValue(className, "className(...)");
                return new String[]{tagName, id, className};
            }
        };
        ArrayList arrayList = new ArrayList(ffArr.length);
        for (FF ff : ffArr) {
            arrayList.add(this.documentFrequency.computeIfAbsent(ff.getAlias()).ordinalMap());
        }
        final ArrayList arrayList2 = arrayList;
        int i = 0;
        for (Object obj : this.qualifiedDocuments) {
            final int i2 = i;
            i++;
            if (i2 < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            VisualDocument visualDocument = (VisualDocument) obj;
            NodesKt.forEach(visualDocument.getBody(), true, new Function1<Node, Unit>() { // from class: ai.platon.scent.analysis.corpus.LimitedPageCorpus$nominalFeaturesToIndex$1$1
                /* JADX INFO: Access modifiers changed from: package-private */
                /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                {
                    super(1);
                }

                public final void invoke(@NotNull Node node) {
                    Intrinsics.checkNotNullParameter(node, "it");
                    ai.platon.scent.dom.nodes.node.ext.NodeExtKt.setPid(node, 1 + i2);
                }

                public /* bridge */ /* synthetic */ Object invoke(Object obj2) {
                    invoke((Node) obj2);
                    return Unit.INSTANCE;
                }
            });
            NodesKt.forEachMatching(visualDocument.getBody(), new Function1<Node, Boolean>() { // from class: ai.platon.scent.analysis.corpus.LimitedPageCorpus$nominalFeaturesToIndex$1$2
                @NotNull
                public final Boolean invoke(@NotNull Node node) {
                    Intrinsics.checkNotNullParameter(node, "it");
                    return Boolean.valueOf(NodeCharactersKt.isTile(node));
                }
            }, new Function1<Node, Unit>() { // from class: ai.platon.scent.analysis.corpus.LimitedPageCorpus$nominalFeaturesToIndex$1$3
                /* JADX INFO: Access modifiers changed from: package-private */
                /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                /* JADX WARN: Multi-variable type inference failed */
                {
                    super(1);
                }

                public final void invoke(@NotNull Node node) {
                    Intrinsics.checkNotNullParameter(node, "node");
                    Object[] objArr = (Object[]) limitedPageCorpus$nominalFeaturesToIndex$nominalExtractors$1.invoke(NodeExtKt.getBestElement(node));
                    FF[] ffArr2 = ffArr;
                    List<Map<String, Integer>> list = arrayList2;
                    int i3 = 0;
                    for (Object obj2 : objArr) {
                        int i4 = i3;
                        i3++;
                        String str = (String) obj2;
                        if (!StringsKt.isBlank(str)) {
                            int key = ffArr2[i4].getKey();
                            Integer num = list.get(i4).get(str);
                            if (num != null) {
                                NodeExtKt.setFeature(NodeExtKt.getBestElement(node), key, 1 + num.intValue());
                            }
                        }
                    }
                }

                public /* bridge */ /* synthetic */ Object invoke(Object obj2) {
                    invoke((Node) obj2);
                    return Unit.INSTANCE;
                }
            });
        }
    }

    private final void styleFeaturesToIndex() {
        final FF[] ffArr = {FF.FTSZ, FF.COLR, FF.BCOLR};
        final Function1<Element, List<? extends String>> function1 = new Function1<Element, List<? extends String>>() { // from class: ai.platon.scent.analysis.corpus.LimitedPageCorpus$styleFeaturesToIndex$styleExtractors$1
            /* JADX INFO: Access modifiers changed from: package-private */
            /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
            {
                super(1);
            }

            @NotNull
            public final List<String> invoke(@NotNull Element element) {
                Intrinsics.checkNotNullParameter(element, "it");
                String attr = NodeExtKt.getBestElement((Node) element).attr("st");
                Intrinsics.checkNotNullExpressionValue(attr, "attr(...)");
                List<String> split = new Regex(";\\s*").split(attr, 0);
                List<String> list = split.size() == ffArr.length ? split : null;
                return list == null ? CollectionsKt.emptyList() : list;
            }
        };
        ArrayList arrayList = new ArrayList(ffArr.length);
        for (FF ff : ffArr) {
            arrayList.add(this.documentFrequency.computeIfAbsent(ff.getAlias()).ordinalMap());
        }
        final ArrayList arrayList2 = arrayList;
        int i = 0;
        for (Object obj : this.qualifiedDocuments) {
            int i2 = i;
            i++;
            if (i2 < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            NodesKt.forEachMatching(((VisualDocument) obj).getBody(), new Function1<Node, Boolean>() { // from class: ai.platon.scent.analysis.corpus.LimitedPageCorpus$styleFeaturesToIndex$1$1
                @NotNull
                public final Boolean invoke(@NotNull Node node) {
                    Intrinsics.checkNotNullParameter(node, "it");
                    return Boolean.valueOf(NodeCharactersKt.isTile(node));
                }
            }, new Function1<Node, Unit>() { // from class: ai.platon.scent.analysis.corpus.LimitedPageCorpus$styleFeaturesToIndex$1$2
                /* JADX INFO: Access modifiers changed from: package-private */
                /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                /* JADX WARN: Multi-variable type inference failed */
                {
                    super(1);
                }

                public final void invoke(@NotNull Node node) {
                    Intrinsics.checkNotNullParameter(node, "node");
                    Iterable iterable = (Iterable) function1.invoke(NodeExtKt.getBestElement(node));
                    FF[] ffArr2 = ffArr;
                    List<Map<String, Integer>> list = arrayList2;
                    int i3 = 0;
                    for (Object obj2 : iterable) {
                        int i4 = i3;
                        i3++;
                        if (i4 < 0) {
                            CollectionsKt.throwIndexOverflow();
                        }
                        String str = (String) obj2;
                        int key = ffArr2[i4].getKey();
                        Integer num = list.get(i4).get(str);
                        if (num != null) {
                            NodeExtKt.setFeature(NodeExtKt.getBestElement(node), key, 1 + num.intValue());
                        }
                    }
                }

                public /* bridge */ /* synthetic */ Object invoke(Object obj2) {
                    invoke((Node) obj2);
                    return Unit.INSTANCE;
                }
            });
        }
    }

    private final void divideDistricts() {
    }

    private final void calculateDocumentFrequency() {
        for (VisualDocument visualDocument : this.samples.values()) {
            if (!(!visualDocument.getTemporaryInternalIndexers$scent_auto_mining().getTermFrequency$scent_auto_mining().isEmpty())) {
                throw new IllegalArgumentException("Doc.termFrequency is required to calculate document frequency".toString());
            }
            FrequencyManager<String> termFrequency$scent_auto_mining = visualDocument.getTemporaryInternalIndexers$scent_auto_mining().getTermFrequency$scent_auto_mining();
            Function2<String, Frequency<String>, Unit> function2 = new Function2<String, Frequency<String>, Unit>() { // from class: ai.platon.scent.analysis.corpus.LimitedPageCorpus$calculateDocumentFrequency$1$2
                /* JADX INFO: Access modifiers changed from: package-private */
                {
                    super(2);
                }

                public final void invoke(@NotNull String str, @NotNull Frequency<String> frequency) {
                    FrequencyManager frequencyManager;
                    Intrinsics.checkNotNullParameter(str, "name");
                    Intrinsics.checkNotNullParameter(frequency, "tf");
                    frequencyManager = LimitedPageCorpus.this.documentFrequency;
                    Frequency computeIfAbsent = frequencyManager.computeIfAbsent(str);
                    Iterator it = frequency.elementSet().iterator();
                    while (it.hasNext()) {
                        computeIfAbsent.add((String) it.next());
                    }
                }

                public /* bridge */ /* synthetic */ Object invoke(Object obj, Object obj2) {
                    invoke((String) obj, (Frequency<String>) obj2);
                    return Unit.INSTANCE;
                }
            };
            termFrequency$scent_auto_mining.forEach((v1, v2) -> {
                calculateDocumentFrequency$lambda$13$lambda$12(r1, v1, v2);
            });
        }
    }

    private final void assembleRegionalTextNodeIndex() {
        Iterator<VisualDocument> it = this.samples.values().iterator();
        while (it.hasNext()) {
            it.next().getTemporaryInternalIndexers$scent_auto_mining().moveRegionalTileNodeIndexerTo(this.regionalTileNodeIndexer);
        }
    }

    private final void addLabeledDocuments() {
        if (getOptions().getTrustSamples()) {
            this.qualifiedDocuments.addAll(this.samples.values());
            return;
        }
        for (VisualDocument visualDocument : this.samples.values()) {
            if (DomQueriesKt.any(visualDocument.getDocument(), new Function1<Node, Boolean>() { // from class: ai.platon.scent.analysis.corpus.LimitedPageCorpus$addLabeledDocuments$1$labeled$1
                @NotNull
                public final Boolean invoke(@NotNull Node node) {
                    Intrinsics.checkNotNullParameter(node, "it");
                    return Boolean.valueOf(!NodeExtKt.getMlLabels(node).isEmpty());
                }
            })) {
                this.qualifiedDocuments.add(visualDocument);
            }
        }
    }

    private final void calculateTextNodeDocFrequency() {
        String str;
        if (this.regionalTileNodeIndexer.isEmpty()) {
            this.logger.warn("No regional text node in the indexer, we can not calculate the text node doc frequency");
            return;
        }
        if (!(this.regionalTileNodeIndexer.size() > this.qualifiedDocuments.size())) {
            throw new IllegalArgumentException(("RegionalTileNodeIndexer size " + this.regionalTileNodeIndexer.size() + ", document size: " + this.qualifiedDocuments.size()).toString());
        }
        double min = Math.min(5.0d, 0.8d * this.samples.size());
        NavigableMap asMap = this.regionalTileNodeIndexer.asMap();
        Intrinsics.checkNotNullExpressionValue(asMap, "asMap(...)");
        for (Map.Entry entry : asMap.entrySet()) {
            RegionalTile regionalTile = (RegionalTile) entry.getKey();
            Collection<Node> collection = (Collection) entry.getValue();
            Intrinsics.checkNotNull(collection);
            Collection<Node> collection2 = collection;
            HashSet hashSet = new HashSet();
            for (Node node : collection2) {
                Intrinsics.checkNotNull(node);
                hashSet.add(NodeExtKt.getLocation(NodeExtKt.getOwnerDocument(node)));
            }
            int size = hashSet.size();
            regionalTile.setDf(size);
            for (Node node2 : collection) {
                Intrinsics.checkNotNull(node2);
                ai.platon.scent.dom.nodes.node.ext.NodeExtKt.setTextDocFrequency(node2, size);
            }
            if (size >= min) {
                this.dff.add(Integer.valueOf(size));
            }
        }
        if (!this.dff.isEmpty()) {
            for (VisualDocument visualDocument : this.qualifiedDocuments) {
                ai.platon.scent.dom.nodes.node.ext.NodeExtKt.setCorpusSize(visualDocument.getDocument(), this.qualifiedDocuments.size());
                Node ownerBody = visualDocument.getDocument().getExtension().getOwnerBody();
                if (ownerBody != null) {
                    ownerBody.attr("a-corpus-time", this.startTime.toString());
                }
                Node ownerBody2 = visualDocument.getDocument().getExtension().getOwnerBody();
                if (ownerBody2 != null) {
                    ownerBody2.attr("a-corpus-size", String.valueOf(this.qualifiedDocuments.size()));
                }
            }
            int intValue = ((Number) this.dff.getMode()).intValue();
            Collection<Node> values = this.regionalTileNodeIndexer.values();
            Intrinsics.checkNotNullExpressionValue(values, "values(...)");
            for (Node node3 : values) {
                Intrinsics.checkNotNull(node3);
                ai.platon.scent.dom.nodes.node.ext.NodeExtKt.setTextDocFrequency(node3, ai.platon.scent.dom.nodes.node.ext.NodeExtKt.getTextDocFrequency(node3) / intValue);
            }
            reportDff(this.dff);
            return;
        }
        this.logger.warn("!!! No regional texts shared by " + this.samples.size() + " sample documents (df rate: " + 4605380978949069210 + ") !!!");
        if (this.logger.isInfoEnabled()) {
            Collection values2 = this.regionalTileNodeIndexer.asMap().values();
            Intrinsics.checkNotNullExpressionValue(values2, "<get-values>(...)");
            Collection<Collection> collection3 = values2;
            TreeSet treeSet = new TreeSet(ComparisonsKt.reverseOrder());
            for (Collection collection4 : collection3) {
                Intrinsics.checkNotNull(collection4);
                Collection collection5 = collection4;
                HashSet hashSet2 = new HashSet();
                Iterator it = collection5.iterator();
                while (it.hasNext()) {
                    Node ownerDocument = ((Node) it.next()).ownerDocument();
                    if (ownerDocument != null) {
                        Intrinsics.checkNotNull(ownerDocument);
                        str = NodeExtKt.getLocation(ownerDocument);
                    } else {
                        str = null;
                    }
                    hashSet2.add(str);
                }
                Integer valueOf = Integer.valueOf(hashSet2.size());
                Integer num = valueOf.intValue() > 1 ? valueOf : null;
                if (num != null) {
                    treeSet.add(num);
                }
            }
            this.logger.info("Document frequency of regional texts: " + CollectionsKt.joinToString$default(treeSet, (CharSequence) null, (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, new Function1<Integer, CharSequence>() { // from class: ai.platon.scent.analysis.corpus.LimitedPageCorpus$calculateTextNodeDocFrequency$s$1
                @NotNull
                public final CharSequence invoke(int i) {
                    return String.valueOf(i);
                }

                public /* bridge */ /* synthetic */ Object invoke(Object obj) {
                    return invoke(((Number) obj).intValue());
                }
            }, 31, (Object) null));
        }
    }

    private final void chooseQualifiedDocuments() {
        Object obj;
        String str;
        if (getOptions().getTrustSamples()) {
            this.qualifiedDocuments.addAll(this.samples.values());
        } else if (!this.dff.isEmpty()) {
            int intValue = ((Number) this.dff.getMode()).intValue();
            Collection values = this.regionalTileNodeIndexer.asMap().values();
            Intrinsics.checkNotNullExpressionValue(values, "<get-values>(...)");
            Iterator it = values.iterator();
            while (true) {
                if (!it.hasNext()) {
                    obj = null;
                    break;
                }
                Object next = it.next();
                if (((Collection) next).size() == intValue) {
                    obj = next;
                    break;
                }
            }
            Collection collection = (Collection) obj;
            if (collection != null) {
                Collection collection2 = collection;
                Set<String> set = this.documentUrls;
                Iterator it2 = collection2.iterator();
                while (it2.hasNext()) {
                    Node ownerDocument = ((Node) it2.next()).ownerDocument();
                    if (ownerDocument != null) {
                        Intrinsics.checkNotNull(ownerDocument);
                        str = NodeExtKt.getLocation(ownerDocument);
                    } else {
                        str = null;
                    }
                    if (str != null) {
                        set.add(str);
                    }
                }
            }
            Collection<VisualDocument> values2 = this.samples.values();
            List<VisualDocument> list = this.qualifiedDocuments;
            for (VisualDocument visualDocument : values2) {
                VisualDocument visualDocument2 = this.documentUrls.contains(NodeExtKt.getLocation(visualDocument.getDocument())) ? visualDocument : null;
                if (visualDocument2 != null) {
                    list.add(visualDocument2);
                }
            }
        }
        this.samples.clear();
    }

    private final void reportDff(Frequency<Integer> frequency) {
        Multiset.Entry mostEntry = frequency.getMostEntry();
        Integer num = (Integer) mostEntry.getElement();
        int count = mostEntry.getCount();
        double modePercentage = frequency.getModePercentage();
        this.recoverableConfidence = modePercentage;
        Logger logger = this.logger;
        Logger logger2 = logger.isInfoEnabled() ? logger : null;
        if (logger2 != null) {
            StringCompanionObject stringCompanionObject = StringCompanionObject.INSTANCE;
            Object[] objArr = {num, Integer.valueOf(count), Integer.valueOf(frequency.getTotalFrequency()), Double.valueOf(100 * modePercentage)};
            String format = String.format("Total %d documents share %d terms out of %d counted (%4.2f%%)", Arrays.copyOf(objArr, objArr.length));
            Intrinsics.checkNotNullExpressionValue(format, "format(...)");
            logger2.info(format);
        }
        if (!(!this.qualifiedDocuments.isEmpty())) {
            this.logger.warn("No recoverable documents after pre-processing !!");
            this.logger.info(Frequency.toReport$default(frequency, "Term frequency of df report, `dff = tf(df, <df>)`:\n", (String) null, 2, (Object) null));
            return;
        }
        Logger logger3 = this.logger;
        StringCompanionObject stringCompanionObject2 = StringCompanionObject.INSTANCE;
        Object[] objArr2 = {Integer.valueOf(this.qualifiedDocuments.size()), Integer.valueOf(this.samples.size()), Double.valueOf(100 * modePercentage)};
        String format2 = String.format("Find %d/%d recoverable documents with confidence %4.2f%%", Arrays.copyOf(objArr2, objArr2.length));
        Intrinsics.checkNotNullExpressionValue(format2, "format(...)");
        logger3.info(format2);
        this.logger.info(Frequency.toReport$default(frequency, "Term frequency of df report, `dff = tf(df, <df>)`:\n", (String) null, 2, (Object) null));
    }

    private final void assembleLayoutLeftIndexer() {
        Iterator<VisualDocument> it = this.qualifiedDocuments.iterator();
        while (it.hasNext()) {
            it.next().getTemporaryInternalIndexers$scent_auto_mining().moveLayoutLeftIndexerTo(this.layoutLeftIndexer);
        }
    }

    private final void assembleCaptionedElementIndexer() {
        for (VisualDocument visualDocument : this.qualifiedDocuments) {
            visualDocument.buildCaptionedElementIndexer$scent_auto_mining();
            visualDocument.getTemporaryInternalIndexers$scent_auto_mining().moveCaptionedElementIndexerTo(this.captionedElementIndexer);
        }
    }

    private final void findLayoutComponents() {
        if (this.qualifiedDocuments.isEmpty()) {
            return;
        }
        if (!(!this.layoutLeftIndexer.isEmpty())) {
            throw new IllegalArgumentException("LayoutLeftIndexer should not be empty.".toString());
        }
        NavigableMap asMap = this.layoutLeftIndexer.asMap();
        Intrinsics.checkNotNullExpressionValue(asMap, "asMap(...)");
        for (Map.Entry entry : asMap.entrySet()) {
            Integer num = (Integer) entry.getKey();
            Collection<Element> collection = (Collection) entry.getValue();
            Intrinsics.checkNotNull(collection);
            Collection<Node> collection2 = collection;
            Frequency frequency = (Collection) new Frequency((String) null, 1, (DefaultConstructorMarker) null);
            for (Node node : collection2) {
                Intrinsics.checkNotNull(node);
                frequency.add(ai.platon.scent.dom.nodes.node.ext.NodeExtKt.getRelativePath(node));
            }
            String str = (String) frequency.getMostEntry().getElement();
            if (r0.getCount() / this.qualifiedDocuments.size() >= 0.8d) {
                for (Element element : collection) {
                    String str2 = "lay(" + num + ")";
                    Document ownerDocument = element.ownerDocument();
                    if (ownerDocument != null) {
                        Intrinsics.checkNotNull(ownerDocument);
                        VisualDocument visualDocument = VisualDocumentKt.getVisualDocument(ownerDocument);
                        if (visualDocument != null) {
                            Intrinsics.checkNotNull(element, "null cannot be cast to non-null type org.jsoup.nodes.Element");
                            Pair<VisualComponent, Boolean> addComponentIfAbsent = visualDocument.addComponentIfAbsent(element, VisualComponentType.LAYOUT, str2);
                            VisualComponent visualComponent = (VisualComponent) addComponentIfAbsent.component1();
                            boolean booleanValue = ((Boolean) addComponentIfAbsent.component2()).booleanValue();
                            if (visualComponent != null && booleanValue) {
                                Intrinsics.checkNotNull(str);
                                visualComponent.setUniquePath(str);
                                this.uniquePathComponents.put(str, visualComponent);
                            }
                        }
                    }
                }
            }
        }
        this.layoutLeftIndexer.clear();
    }

    private final void supplementComponents() {
        List<VisualDocument> list = this.qualifiedDocuments;
        HashSet hashSet = new HashSet();
        Iterator<T> it = list.iterator();
        while (it.hasNext()) {
            TreeSet<VisualComponent> components = ((VisualDocument) it.next()).getComponents();
            ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(components, 10));
            Iterator<T> it2 = components.iterator();
            while (it2.hasNext()) {
                arrayList.add(((VisualComponent) it2.next()).getRelativePath());
            }
            CollectionsKt.addAll(hashSet, arrayList);
        }
        HashSet hashSet2 = hashSet;
        List<VisualDocument> list2 = this.qualifiedDocuments;
        final LinkedHashMap linkedHashMap = new LinkedHashMap(RangesKt.coerceAtLeast(MapsKt.mapCapacity(CollectionsKt.collectionSizeOrDefault(list2, 10)), 16));
        for (VisualDocument visualDocument : list2) {
            String baseURI = visualDocument.getBaseURI();
            TreeSet<VisualComponent> components2 = visualDocument.getComponents();
            HashSet hashSet3 = new HashSet();
            Iterator<T> it3 = components2.iterator();
            while (it3.hasNext()) {
                hashSet3.add(((VisualComponent) it3.next()).getRelativePath());
            }
            Pair pair = TuplesKt.to(baseURI, hashSet3);
            linkedHashMap.put(pair.getFirst(), pair.getSecond());
        }
        this.logger.info("There are {} relative paths in {} qualified documents", Integer.valueOf(hashSet2.size()), Integer.valueOf(this.qualifiedDocuments.size()));
        HashSet hashSet4 = hashSet2;
        LinkedHashMap linkedHashMap2 = new LinkedHashMap(RangesKt.coerceAtLeast(MapsKt.mapCapacity(CollectionsKt.collectionSizeOrDefault(hashSet4, 10)), 16));
        for (Object obj : hashSet4) {
            final String str = (String) obj;
            linkedHashMap2.put(obj, SequencesKt.toList(SequencesKt.mapNotNull(SequencesKt.filter(CollectionsKt.asSequence(this.qualifiedDocuments), new Function1<VisualDocument, Boolean>() { // from class: ai.platon.scent.analysis.corpus.LimitedPageCorpus$supplementComponents$componentCandidateGroups$1$1
                /* JADX INFO: Access modifiers changed from: package-private */
                /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                /* JADX WARN: Multi-variable type inference failed */
                {
                    super(1);
                }

                @NotNull
                public final Boolean invoke(@NotNull VisualDocument visualDocument2) {
                    Intrinsics.checkNotNullParameter(visualDocument2, "it");
                    HashSet<String> hashSet5 = linkedHashMap.get(visualDocument2.getBaseURI());
                    return Boolean.valueOf(hashSet5 != null ? !hashSet5.contains(str) : false);
                }
            }), new Function1<VisualDocument, Element>() { // from class: ai.platon.scent.analysis.corpus.LimitedPageCorpus$supplementComponents$componentCandidateGroups$1$2
                /* JADX INFO: Access modifiers changed from: package-private */
                /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                {
                    super(1);
                }

                @Nullable
                public final Element invoke(@NotNull VisualDocument visualDocument2) {
                    Intrinsics.checkNotNullParameter(visualDocument2, "it");
                    return DomQueriesKt.selectFirstOrNull(visualDocument2.getDocument(), str);
                }
            })));
        }
        LinkedHashMap linkedHashMap3 = linkedHashMap2;
        this.logger.info("There are {} node groups across the corpus, grouped by relative path", Integer.valueOf(linkedHashMap3.size()));
        AtomicInteger atomicInteger = new AtomicInteger();
        for (Map.Entry entry : linkedHashMap3.entrySet()) {
            String str2 = (String) entry.getKey();
            List<Element> list3 = (List) entry.getValue();
            if (list3.size() / this.qualifiedDocuments.size() >= 0.8d) {
                for (Element element : list3) {
                    String str3 = "sup(" + ai.platon.scent.dom.nodes.node.ext.NodeExtKt.getAlignedLeft((Node) element) + ")/" + ai.platon.scent.dom.nodes.node.ext.NodeExtKt.getVcTiles((Node) element);
                    Document ownerDocument = element.ownerDocument();
                    if (ownerDocument != null) {
                        Intrinsics.checkNotNull(ownerDocument);
                        VisualDocument visualDocument2 = VisualDocumentKt.getVisualDocument(ownerDocument);
                        if (visualDocument2 != null) {
                            Pair<VisualComponent, Boolean> addComponentIfAbsent = visualDocument2.addComponentIfAbsent(element, VisualComponentType.SUPPLEMENT, str3);
                            VisualComponent visualComponent = (VisualComponent) addComponentIfAbsent.component1();
                            boolean booleanValue = ((Boolean) addComponentIfAbsent.component2()).booleanValue();
                            if (visualComponent != null) {
                                atomicInteger.incrementAndGet();
                                visualComponent.setUniquePath(str2);
                                this.uniquePathComponents.put(str2, visualComponent);
                            }
                            if (!booleanValue) {
                            }
                        }
                    }
                }
            }
        }
        this.logger.info("Supplemented {} components", atomicInteger);
    }

    private final void arrangeComponents() {
        for (VisualDocument visualDocument : this.qualifiedDocuments) {
            visualDocument.buildComponentTree();
            visualDocument.arrangeComponents();
        }
    }

    private final void reportComponents() {
        List<VisualDocument> list = this.qualifiedDocuments;
        ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(list, 10));
        Iterator<T> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(Integer.valueOf(((VisualDocument) it.next()).getComponents().size()));
        }
        String joinToString$default = CollectionsKt.joinToString$default(CollectionsKt.sortedDescending(arrayList), (CharSequence) null, (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, (Function1) null, 63, (Object) null);
        this.logger.info("Recognized components in each document: " + joinToString$default);
        this.taskLogger.info("Recognized components in each document: " + joinToString$default);
        List<VisualDocument> list2 = this.qualifiedDocuments;
        ArrayList arrayList2 = new ArrayList();
        for (Object obj : list2) {
            if (((VisualDocument) obj).getVerbose()) {
                arrayList2.add(obj);
            }
        }
        ArrayList arrayList3 = arrayList2;
        ArrayList arrayList4 = new ArrayList();
        Iterator it2 = arrayList3.iterator();
        while (it2.hasNext()) {
            CollectionsKt.addAll(arrayList4, ((VisualDocument) it2.next()).getComponents());
        }
        ArrayList arrayList5 = arrayList4;
        ResultSet resultSet = VisualComponent.Companion.toResultSet(arrayList5, getOptions());
        StringBuilder sb = new StringBuilder("Show " + arrayList5.size() + " sample components:\n");
        new ResultSetFormatter(resultSet, false, true, 0, sb, 10, (DefaultConstructorMarker) null).format();
        this.taskLogger.info(sb.toString());
        if (!(!this.naturalComponents.isEmpty())) {
            this.logger.info("No natural components");
            return;
        }
        List<VisualComponent> list3 = this.naturalComponents;
        HashSet hashSet = new HashSet();
        Iterator<T> it3 = list3.iterator();
        while (it3.hasNext()) {
            hashSet.add(((VisualComponent) it3.next()).getName());
        }
        this.logger.info("Total {} natural components: {}", Integer.valueOf(this.naturalComponents.size()), hashSet);
    }

    private final void analysisVariables() {
        Iterator<T> it = this.qualifiedDocuments.iterator();
        while (it.hasNext()) {
            analysisVariables((VisualDocument) it.next());
        }
    }

    private final void analysisVariables(VisualDocument visualDocument) {
        for (VisualComponent visualComponent : visualDocument.getComponents()) {
            visualComponent.findVariables();
            if (visualDocument.getVerbose() && this.taskLogger.isDebugEnabled()) {
                NavigableMap asMap = visualComponent.getVariables().asMap();
                Intrinsics.checkNotNullExpressionValue(asMap, "asMap(...)");
                NavigableMap navigableMap = asMap;
                ArrayList arrayList = new ArrayList(navigableMap.size());
                for (Map.Entry entry : navigableMap.entrySet()) {
                    Node node = (Node) entry.getKey();
                    Collection collection = (Collection) entry.getValue();
                    String cleanText = NodeExtKt.getCleanText(node);
                    Intrinsics.checkNotNull(collection);
                    arrayList.add("<" + cleanText + " : " + CollectionsKt.joinToString$default(collection, (CharSequence) null, (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, new Function1<Node, CharSequence>() { // from class: ai.platon.scent.analysis.corpus.LimitedPageCorpus$analysisVariables$2$1$1
                        @NotNull
                        public final CharSequence invoke(Node node2) {
                            return NodeExtKt.getCleanText(node2);
                        }
                    }, 31, (Object) null) + ">");
                }
                String joinToString$default = CollectionsKt.joinToString$default(arrayList, (CharSequence) null, (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, (Function1) null, 63, (Object) null);
                String str = !StringsKt.isBlank(joinToString$default) ? joinToString$default : null;
                if (str != null) {
                    this.taskLogger.debug(str);
                }
            }
        }
    }

    private final void annotateNodes() {
        List<VisualDocument> list = this.qualifiedDocuments;
        ArrayList arrayList = new ArrayList();
        for (Object obj : list) {
            if (((VisualDocument) obj).getVerbose()) {
                arrayList.add(obj);
            }
        }
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            FullFeaturedDocumentKt.annotateNodes(((VisualDocument) it.next()).getFeaturedDocument(), getOptions());
        }
    }

    private final void simplifyAnnotations() {
        List<VisualDocument> list = this.qualifiedDocuments;
        ArrayList arrayList = new ArrayList();
        for (Object obj : list) {
            if (((VisualDocument) obj).getVerbose()) {
                arrayList.add(obj);
            }
        }
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            FullFeaturedDocumentKt.simplifyAnnotations(((VisualDocument) it.next()).getFeaturedDocument(), getOptions());
        }
    }

    private final void validateFeatures() {
        try {
            List<VisualDocument> list = this.qualifiedDocuments;
            ArrayList arrayList = new ArrayList();
            for (Object obj : list) {
                if (((VisualDocument) obj).getVerbose()) {
                    arrayList.add(obj);
                }
            }
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                ((VisualDocument) it.next()).validateFeatures();
            }
        } catch (Exception e) {
            this.logger.error(ExceptionsKt.stringify$default(e, (String) null, (String) null, 3, (Object) null));
        }
    }

    private static final void calculateDocumentFrequency$lambda$13$lambda$12(Function2 function2, Object obj, Object obj2) {
        Intrinsics.checkNotNullParameter(function2, "$tmp0");
        function2.invoke(obj, obj2);
    }
}
