package ai.platon.scent.analysis.corpus;

import ai.platon.pulsar.common.AppPaths;
import ai.platon.pulsar.common.ExceptionsKt;
import ai.platon.pulsar.common.Frequency;
import ai.platon.pulsar.common.FrequencyManager;
import ai.platon.pulsar.common.LogsKt;
import ai.platon.pulsar.common.OpenMapTable;
import ai.platon.pulsar.common.config.ImmutableConfig;
import ai.platon.pulsar.common.math.vectors.VectorsKt;
import ai.platon.pulsar.common.sql.ResultSetFormatter;
import ai.platon.pulsar.dom.nodes.NodesKt;
import ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt;
import ai.platon.pulsar.dom.select.QueriesKt;
import ai.platon.scent.analysis.AutoMiningResult;
import ai.platon.scent.analysis.corpus.AnalysablePageCorpus;
import ai.platon.scent.analysis.diagnosis.AutoMiningDiagnotor;
import ai.platon.scent.analysis.view.CorpusVisualizer;
import ai.platon.scent.common.ScentPaths;
import ai.platon.scent.common.Systems;
import ai.platon.scent.dom.HNormUrl;
import ai.platon.scent.dom.HarvestOptions;
import ai.platon.scent.dom.features.defined.FF;
import ai.platon.scent.dom.nodes.FullFeaturedDocumentKt;
import ai.platon.scent.dom.nodes.OrderedRegionalTileNode;
import ai.platon.scent.dom.nodes.RegionalTile;
import ai.platon.scent.dom.nodes.VisualComponent;
import ai.platon.scent.dom.nodes.VisualComponentType;
import ai.platon.scent.dom.nodes.VisualDocument;
import ai.platon.scent.dom.nodes.node.ext.NodeCharactersKt;
import ai.platon.scent.entities.PageTableGroup;
import ai.platon.scent.entities.PageTableKt;
import ai.platon.scent.ml.Schema;
import ai.platon.scent.ml.semisupervised.NodeClusterRunner;
import ai.platon.scent.ml.unsupervised.TileClusterGroup;
import com.google.common.collect.Multiset;
import com.google.common.collect.TreeMultimap;
import java.io.PrintWriter;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.FileAttribute;
import java.sql.ResultSet;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.NavigableSet;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentSkipListSet;
import kotlin.Metadata;
import kotlin.Pair;
import kotlin.TuplesKt;
import kotlin.Unit;
import kotlin.collections.CollectionsKt;
import kotlin.collections.MapsKt;
import kotlin.comparisons.ComparisonsKt;
import kotlin.io.CloseableKt;
import kotlin.jdk7.AutoCloseableKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.Reflection;
import kotlin.jvm.internal.StringCompanionObject;
import kotlin.ranges.IntRange;
import kotlin.ranges.RangesKt;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import org.apache.commons.math3.linear.RealVector;
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
import org.apache.commons.math3.util.Precision;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.NodeFilter;
import org.jsoup.select.NodeTraversor;
import org.perf4j.slf4j.Slf4JStopWatch;
import org.slf4j.Logger;

/* compiled from: AnalysablePageCorpus.kt */
@Metadata(mv = {1, 5, 1}, k = 1, xi = 48, d1 = {"��\u008e\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\n\u0002\u0010$\n\u0002\u0010\u000e\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0010\b\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010!\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0010#\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\t\n\u0002\u0010\u0006\n\u0002\b\u0005\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0010\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0010 \n\u0002\b\u0004\n\u0002\u0010\u000b\n\u0002\b\t\n\u0002\u0018\u0002\n\u0002\b\u0006\n\u0002\u0018\u0002\n\u0002\b\t\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u000b\n\u0002\u0018\u0002\n\u0002\b\u0005\b��\u0018�� \u0096\u00012\u00020\u0001:\u0004\u0096\u0001\u0097\u0001B)\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\u0012\u0010\u0004\u001a\u000e\u0012\u0004\u0012\u00020\u0006\u0012\u0004\u0012\u00020\u00070\u0005\u0012\u0006\u0010\b\u001a\u00020\t¢\u0006\u0002\u0010\nJ\b\u0010X\u001a\u00020YH\u0002J\u0006\u0010Z\u001a\u00020[J\b\u0010\\\u001a\u00020YH\u0002J\u0010\u0010\\\u001a\u00020Y2\u0006\u0010]\u001a\u00020\u0007H\u0002J\b\u0010^\u001a\u00020YH\u0002J\f\u0010_\u001a\b\u0012\u0004\u0012\u00020\u00070`J\b\u0010a\u001a\u00020YH\u0002J\b\u0010b\u001a\u00020YH\u0002J\u0012\u0010c\u001a\u00020Y2\b\b\u0002\u0010d\u001a\u00020eH\u0002J\b\u0010f\u001a\u00020YH\u0002J\b\u0010g\u001a\u00020YH\u0002J\b\u0010h\u001a\u00020YH\u0002J\b\u0010i\u001a\u00020YH\u0002J\b\u0010j\u001a\u00020YH\u0002J\b\u0010k\u001a\u00020YH\u0002J\b\u0010l\u001a\u00020YH\u0002J\b\u0010m\u001a\u00020YH\u0016J \u0010n\u001a\u00020o2\f\u0010p\u001a\b\u0012\u0004\u0012\u00020o0`2\b\b\u0002\u0010q\u001a\u00020\u0006H\u0002J\b\u0010r\u001a\u00020YH\u0002J\b\u0010s\u001a\u00020YH\u0002J\u0010\u0010t\u001a\u00020[2\u0006\u0010u\u001a\u00020vH\u0002J\n\u0010w\u001a\u0004\u0018\u00010NH\u0002J\u0010\u0010x\u001a\u00020Y2\u0006\u0010y\u001a\u00020[H\u0002J\u0010\u0010z\u001a\u00020\r2\u0006\u0010{\u001a\u00020(H\u0002J\b\u0010|\u001a\u00020YH\u0002J\u001f\u0010}\u001a\u00020Y2\f\u0010~\u001a\b\u0012\u0004\u0012\u00020\u00070`2\u0007\u0010\u007f\u001a\u00030\u0080\u0001H\u0002J'\u0010\u0081\u0001\u001a\u00020Y2\b\u0010\u0082\u0001\u001a\u00030\u0083\u00012\u0007\u0010\u007f\u001a\u00030\u0080\u00012\t\b\u0002\u0010\u0084\u0001\u001a\u00020\u0006H\u0002J/\u0010\u0085\u0001\u001a\u00020Y2\u0006\u0010{\u001a\u00020(2\b\u0010\u0086\u0001\u001a\u00030\u0087\u00012\u0007\u0010\u007f\u001a\u00030\u0080\u00012\t\b\u0002\u0010\u0084\u0001\u001a\u00020\u0006H\u0002J\u001e\u0010\u0088\u0001\u001a\b\u0012\u0004\u0012\u00020\u00070`2\r\u0010\u0089\u0001\u001a\b\u0012\u0004\u0012\u00020\u00070`H\u0002J\t\u0010\u008a\u0001\u001a\u00020YH\u0002J\t\u0010\u008b\u0001\u001a\u00020YH\u0002J\t\u0010\u008c\u0001\u001a\u00020YH\u0002J\u0018\u0010\u008d\u0001\u001a\u00020Y2\r\u0010\u008e\u0001\u001a\b\u0012\u0004\u0012\u00020\r0JH\u0002J\t\u0010\u008f\u0001\u001a\u00020YH\u0002J\t\u0010\u0090\u0001\u001a\u00020YH\u0002J\t\u0010\u0091\u0001\u001a\u00020YH\u0002J\u001a\u0010\u0092\u0001\u001a\u00030\u0093\u00012\u0006\u0010u\u001a\u00020v2\u0006\u00100\u001a\u000201H\u0002J\t\u0010\u0094\u0001\u001a\u00020YH\u0002J\t\u0010\u0095\u0001\u001a\u00020YH\u0002R$\u0010\u000b\u001a\u0012\u0012\u0004\u0012\u00020\r\u0012\u0004\u0012\u00020\u000e0\fj\u0002`\u000fX\u0080\u0004¢\u0006\b\n��\u001a\u0004\b\u0010\u0010\u0011R$\u0010\u0012\u001a\u0012\u0012\u0004\u0012\u00020\u0006\u0012\u0004\u0012\u00020\u00130\fj\u0002`\u0014X\u0080\u0004¢\u0006\b\n��\u001a\u0004\b\u0015\u0010\u0011R\u001a\u0010\u0016\u001a\b\u0012\u0004\u0012\u00020\u000e0\u0017X\u0080\u0004¢\u0006\b\n��\u001a\u0004\b\u0018\u0010\u0019R\u001a\u0010\u001a\u001a\b\u0012\u0004\u0012\u00020\u001b0\u0017X\u0080\u0004¢\u0006\b\n��\u001a\u0004\b\u001c\u0010\u0019R\u000e\u0010\b\u001a\u00020\tX\u0082\u0004¢\u0006\u0002\n��R$\u0010\u001d\u001a\u0012\u0012\u0004\u0012\u00020\r\u0012\u0004\u0012\u00020\u000e0\fj\u0002`\u000fX\u0080\u0004¢\u0006\b\n��\u001a\u0004\b\u001e\u0010\u0011R\u001a\u0010\u001f\u001a\b\u0012\u0004\u0012\u00020\u00060 X\u0080\u0004¢\u0006\b\n��\u001a\u0004\b!\u0010\"R\u001a\u0010#\u001a\b\u0012\u0004\u0012\u00020\u00060$X\u0080\u0004¢\u0006\b\n��\u001a\u0004\b%\u0010&R$\u0010'\u001a\u0012\u0012\u0004\u0012\u00020\r\u0012\u0004\u0012\u00020(0\fj\u0002`)X\u0080\u0004¢\u0006\b\n��\u001a\u0004\b*\u0010\u0011R\u0016\u0010+\u001a\n -*\u0004\u0018\u00010,0,X\u0082\u0004¢\u0006\u0002\n��R\u001a\u0010.\u001a\b\u0012\u0004\u0012\u00020\u001b0\u0017X\u0080\u0004¢\u0006\b\n��\u001a\u0004\b/\u0010\u0019R\u0014\u00100\u001a\u0002018BX\u0082\u0004¢\u0006\u0006\u001a\u0004\b2\u00103R \u00104\u001a\u000e\u0012\u0004\u0012\u00020\u0013\u0012\u0004\u0012\u00020605X\u0080\u0004¢\u0006\b\n��\u001a\u0004\b7\u00108R\u0011\u0010\u0002\u001a\u00020\u0003¢\u0006\b\n��\u001a\u0004\b9\u0010:R$\u0010;\u001a\u0012\u0012\u0004\u0012\u00020\r\u0012\u0004\u0012\u00020\u000e0\fj\u0002`\u000fX\u0080\u0004¢\u0006\b\n��\u001a\u0004\b<\u0010\u0011R\u001a\u0010=\u001a\b\u0012\u0004\u0012\u00020\u00070\u0017X\u0080\u0004¢\u0006\b\n��\u001a\u0004\b>\u0010\u0019R\u001a\u0010?\u001a\u00020@X\u0080\u000e¢\u0006\u000e\n��\u001a\u0004\bA\u0010B\"\u0004\bC\u0010DR$\u0010E\u001a\u0012\u0012\u0004\u0012\u00020F\u0012\u0004\u0012\u00020(0\fj\u0002`GX\u0080\u0004¢\u0006\b\n��\u001a\u0004\bH\u0010\u0011R\u001a\u0010I\u001a\b\u0012\u0004\u0012\u00020F0JX\u0080\u0004¢\u0006\b\n��\u001a\u0004\bK\u0010LR\u0019\u0010M\u001a\n -*\u0004\u0018\u00010N0N8F¢\u0006\u0006\u001a\u0004\bO\u0010PR\u001a\u0010\u0004\u001a\u000e\u0012\u0004\u0012\u00020\u0006\u0012\u0004\u0012\u00020\u00070\u0005X\u0082\u0004¢\u0006\u0002\n��R\u0016\u0010Q\u001a\n -*\u0004\u0018\u00010R0RX\u0082\u0004¢\u0006\u0002\n��R\u0010\u0010S\u001a\u0004\u0018\u00010TX\u0082\u0004¢\u0006\u0002\n��R\u0016\u0010U\u001a\n -*\u0004\u0018\u00010,0,X\u0082\u0004¢\u0006\u0002\n��R \u0010V\u001a\u000e\u0012\u0004\u0012\u00020\u0006\u0012\u0004\u0012\u00020\u001b0\fX\u0080\u0004¢\u0006\b\n��\u001a\u0004\bW\u0010\u0011¨\u0006\u0098\u0001"}, d2 = {"Lai/platon/scent/analysis/corpus/AnalysablePageCorpus;", "Ljava/lang/AutoCloseable;", "portalUrl", "Lai/platon/scent/dom/HNormUrl;", "samples", "", "", "Lai/platon/scent/dom/nodes/VisualDocument;", "conf", "Lai/platon/pulsar/common/config/ImmutableConfig;", "(Lai/platon/scent/dom/HNormUrl;Ljava/util/Map;Lai/platon/pulsar/common/config/ImmutableConfig;)V", "captionedElementIndexer", "Lcom/google/common/collect/TreeMultimap;", "", "Lorg/jsoup/nodes/Element;", "Lai/platon/scent/dom/nodes/IntElementIndexer;", "getCaptionedElementIndexer$scent_auto_mining", "()Lcom/google/common/collect/TreeMultimap;", "categorizedTextNodeIndexer", "Lorg/jsoup/nodes/TextNode;", "Lai/platon/scent/dom/nodes/TextNodeIndexer;", "getCategorizedTextNodeIndexer$scent_auto_mining", "componentElements", "", "getComponentElements$scent_auto_mining", "()Ljava/util/List;", "components", "Lai/platon/scent/dom/nodes/VisualComponent;", "getComponents$scent_auto_mining", "constantCaptionedElementIndexer", "getConstantCaptionedElementIndexer$scent_auto_mining", "documentFrequency", "Lai/platon/pulsar/common/FrequencyManager;", "getDocumentFrequency$scent_auto_mining", "()Lai/platon/pulsar/common/FrequencyManager;", "documentUrls", "", "getDocumentUrls$scent_auto_mining", "()Ljava/util/Set;", "layoutLeftIndexer", "Lorg/jsoup/nodes/Node;", "Lai/platon/pulsar/dom/IntNodeIndexer;", "getLayoutLeftIndexer$scent_auto_mining", "logger", "Lorg/slf4j/Logger;", "kotlin.jvm.PlatformType", "naturalComponents", "getNaturalComponents$scent_auto_mining", "options", "Lai/platon/scent/dom/HarvestOptions;", "getOptions", "()Lai/platon/scent/dom/HarvestOptions;", "orderedRegionalTextNodeIndexer", "Ljava/util/TreeMap;", "Lai/platon/scent/dom/nodes/OrderedRegionalTileNode;", "getOrderedRegionalTextNodeIndexer$scent_auto_mining", "()Ljava/util/TreeMap;", "getPortalUrl", "()Lai/platon/scent/dom/HNormUrl;", "pseudoConstantTextBlockIndexer", "getPseudoConstantTextBlockIndexer$scent_auto_mining", "qualifiedDocuments", "getQualifiedDocuments$scent_auto_mining", "recoverableConfidence", "", "getRecoverableConfidence$scent_auto_mining", "()D", "setRecoverableConfidence$scent_auto_mining", "(D)V", "regionalTileNodeIndexer", "Lai/platon/scent/dom/nodes/RegionalTile;", "Lai/platon/scent/dom/nodes/RegionalTileNodeIndexer;", "getRegionalTileNodeIndexer$scent_auto_mining", "regionalTiles", "Lai/platon/pulsar/common/Frequency;", "getRegionalTiles$scent_auto_mining", "()Lai/platon/pulsar/common/Frequency;", "reportDirectory", "Ljava/nio/file/Path;", "getReportDirectory", "()Ljava/nio/file/Path;", "startTime", "Ljava/time/OffsetDateTime;", "stopWatch", "Lorg/perf4j/slf4j/Slf4JStopWatch;", "taskLogger", "uniquePathComponents", "getUniquePathComponents$scent_auto_mining", "addLabeledDocuments", "", "analyse", "Lai/platon/scent/analysis/AutoMiningResult;", "analysisVariables", "doc", "annotateNodes", "arrange", "", "arrangeComponents", "buildRegionalTextNodeIndex", "calculateAdvancedFeatures", "encodeOnly", "", "calculateCaptionDocFrequency", "calculateComputedStylesFeatures", "calculateGeneralCategoricalFeatures", "calculateGeneralDocFrequency", "calculateGeometricFeatures", "calculateTextNodeDocFrequency", "calculateTextualFeatures", "close", "combineTables", "Lai/platon/pulsar/common/OpenMapTable;", "tables", "uniqueKeyColumn", "detectRecoverableDocuments", "divideDistricts", "doAutoMining", "clusterer", "Lai/platon/scent/ml/semisupervised/NodeClusterRunner;", "doEncodeToDataset", "drawIfNecessary", "result", "encodeLabel", "node", "enhancePartition", "exportAllElements", "documents", "pw", "Ljava/io/PrintWriter;", "exportHeader", "schema", "Lai/platon/scent/ml/Schema;", "separator", "exportNode", "features", "Lorg/apache/commons/math3/linear/RealVector;", "filterByUris", "docs", "findLayoutComponents", "partition", "reportComponents", "reportDff", "dff", "simplifyAnnotations", "simplifyCaptions", "supplementPartition", "tabulate", "Lai/platon/scent/entities/PageTableGroup;", "traceCorpus", "validateFeatures", "Companion", "OrderingValue", "scent-auto-mining"})
/* loaded from: input_file:ai/platon/scent/analysis/corpus/AnalysablePageCorpus.class */
public final class AnalysablePageCorpus implements AutoCloseable {

    @NotNull
    private final HNormUrl portalUrl;

    @NotNull
    private final Map<String, VisualDocument> samples;

    @NotNull
    private final ImmutableConfig conf;
    private final Logger logger;
    private final Logger taskLogger;
    private final OffsetDateTime startTime;

    @Nullable
    private final Slf4JStopWatch stopWatch;

    @NotNull
    private final Set<String> documentUrls;

    @NotNull
    private final List<VisualDocument> qualifiedDocuments;

    @NotNull
    private final FrequencyManager<String> documentFrequency;

    @NotNull
    private final TreeMultimap<RegionalTile, Node> regionalTileNodeIndexer;

    @NotNull
    private final Frequency<RegionalTile> regionalTiles;

    @NotNull
    private final TreeMap<TextNode, OrderedRegionalTileNode> orderedRegionalTextNodeIndexer;

    @NotNull
    private final TreeMultimap<Integer, Element> captionedElementIndexer;

    @NotNull
    private final TreeMultimap<Integer, Element> constantCaptionedElementIndexer;

    @NotNull
    private final TreeMultimap<Integer, Element> pseudoConstantTextBlockIndexer;

    @NotNull
    private final TreeMultimap<String, TextNode> categorizedTextNodeIndexer;
    private double recoverableConfidence;

    @NotNull
    private final TreeMultimap<Integer, Node> layoutLeftIndexer;

    @NotNull
    private final List<VisualComponent> components;

    @NotNull
    private final TreeMultimap<String, VisualComponent> uniquePathComponents;

    @NotNull
    private final List<VisualComponent> naturalComponents;

    @NotNull
    private final List<Element> componentElements;

    @NotNull
    public static final Companion Companion = new Companion(null);

    @NotNull
    private static final AnalysablePageCorpus EMPTY = new AnalysablePageCorpus(HNormUrl.Companion.getNIL(), MapsKt.emptyMap(), ImmutableConfig.Companion.getEMPTY());

    /* compiled from: AnalysablePageCorpus.kt */
    @Metadata(mv = {1, 5, 1}, k = 1, xi = 48, d1 = {"��\u0014\n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\b\u0086\u0003\u0018��2\u00020\u0001B\u0007\b\u0002¢\u0006\u0002\u0010\u0002R\u0011\u0010\u0003\u001a\u00020\u0004¢\u0006\b\n��\u001a\u0004\b\u0005\u0010\u0006¨\u0006\u0007"}, d2 = {"Lai/platon/scent/analysis/corpus/AnalysablePageCorpus$Companion;", "", "()V", "EMPTY", "Lai/platon/scent/analysis/corpus/AnalysablePageCorpus;", "getEMPTY", "()Lai/platon/scent/analysis/corpus/AnalysablePageCorpus;", "scent-auto-mining"})
    /* loaded from: input_file:ai/platon/scent/analysis/corpus/AnalysablePageCorpus$Companion.class */
    public static final class Companion {
        private Companion() {
        }

        @NotNull
        public final AnalysablePageCorpus getEMPTY() {
            return AnalysablePageCorpus.EMPTY;
        }

        public /* synthetic */ Companion(DefaultConstructorMarker defaultConstructorMarker) {
            this();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* compiled from: AnalysablePageCorpus.kt */
    @Metadata(mv = {1, 5, 1}, k = 1, xi = 48, d1 = {"��\u0018\n\u0002\u0018\u0002\n\u0002\u0010��\n��\n\u0002\u0010\b\n��\n\u0002\u0010\u000e\n\u0002\b\u0006\b\u0002\u0018��2\u00020\u0001B\u0017\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\b\u0010\u0004\u001a\u0004\u0018\u00010\u0005¢\u0006\u0002\u0010\u0006R\u0011\u0010\u0002\u001a\u00020\u0003¢\u0006\b\n��\u001a\u0004\b\u0007\u0010\bR\u0013\u0010\u0004\u001a\u0004\u0018\u00010\u0005¢\u0006\b\n��\u001a\u0004\b\t\u0010\n¨\u0006\u000b"}, d2 = {"Lai/platon/scent/analysis/corpus/AnalysablePageCorpus$OrderingValue;", "", "order", "", "value", "", "(ILjava/lang/String;)V", "getOrder", "()I", "getValue", "()Ljava/lang/String;", "scent-auto-mining"})
    /* loaded from: input_file:ai/platon/scent/analysis/corpus/AnalysablePageCorpus$OrderingValue.class */
    public static final class OrderingValue {
        private final int order;

        @Nullable
        private final String value;

        public OrderingValue(int i, @Nullable String str) {
            this.order = i;
            this.value = str;
        }

        public final int getOrder() {
            return this.order;
        }

        @Nullable
        public final String getValue() {
            return this.value;
        }
    }

    public AnalysablePageCorpus(@NotNull HNormUrl hNormUrl, @NotNull Map<String, VisualDocument> map, @NotNull ImmutableConfig immutableConfig) {
        Intrinsics.checkNotNullParameter(hNormUrl, "portalUrl");
        Intrinsics.checkNotNullParameter(map, "samples");
        Intrinsics.checkNotNullParameter(immutableConfig, "conf");
        this.portalUrl = hNormUrl;
        this.samples = map;
        this.conf = immutableConfig;
        this.logger = LogsKt.getLogger(Reflection.getOrCreateKotlinClass(AnalysablePageCorpus.class));
        this.taskLogger = LogsKt.getLogger(Reflection.getOrCreateKotlinClass(AnalysablePageCorpus.class), ".Task");
        this.startTime = OffsetDateTime.now();
        this.stopWatch = this.logger.isDebugEnabled() ? new Slf4JStopWatch() : null;
        this.documentUrls = new LinkedHashSet();
        this.qualifiedDocuments = new ArrayList();
        this.documentFrequency = new FrequencyManager<>();
        TreeMultimap<RegionalTile, Node> create = TreeMultimap.create(ComparisonsKt.naturalOrder(), NodesKt.getNodePositionComparator());
        Intrinsics.checkNotNullExpressionValue(create, "create(naturalOrder(), nodePositionComparator)");
        this.regionalTileNodeIndexer = create;
        this.regionalTiles = new Frequency<>((String) null, 1, (DefaultConstructorMarker) null);
        this.orderedRegionalTextNodeIndexer = new TreeMap<>(NodesKt.getNodePositionComparator());
        TreeMultimap<Integer, Element> create2 = TreeMultimap.create(ComparisonsKt.naturalOrder(), NodesKt.getNodeComparator());
        Intrinsics.checkNotNullExpressionValue(create2, "create(naturalOrder(), nodeComparator)");
        this.captionedElementIndexer = create2;
        TreeMultimap<Integer, Element> create3 = TreeMultimap.create(ComparisonsKt.naturalOrder(), NodesKt.getNodeComparator());
        Intrinsics.checkNotNullExpressionValue(create3, "create(naturalOrder(), nodeComparator)");
        this.constantCaptionedElementIndexer = create3;
        TreeMultimap<Integer, Element> create4 = TreeMultimap.create(ComparisonsKt.naturalOrder(), NodesKt.getNodeComparator());
        Intrinsics.checkNotNullExpressionValue(create4, "create(naturalOrder(), nodeComparator)");
        this.pseudoConstantTextBlockIndexer = create4;
        TreeMultimap<String, TextNode> create5 = TreeMultimap.create(ComparisonsKt.naturalOrder(), NodesKt.getNodeComparator());
        Intrinsics.checkNotNullExpressionValue(create5, "create(naturalOrder(), nodeComparator)");
        this.categorizedTextNodeIndexer = create5;
        TreeMultimap<Integer, Node> create6 = TreeMultimap.create(ComparisonsKt.naturalOrder(), new Comparator() { // from class: ai.platon.scent.analysis.corpus.AnalysablePageCorpus$special$$inlined$compareBy$1
            @Override // java.util.Comparator
            public final int compare(T t, T t2) {
                Node node = (Node) t;
                Intrinsics.checkNotNullExpressionValue(node, "it");
                Integer valueOf = Integer.valueOf(NodeExtKt.getTop(node));
                Node node2 = (Node) t2;
                Intrinsics.checkNotNullExpressionValue(node2, "it");
                return ComparisonsKt.compareValues(valueOf, Integer.valueOf(NodeExtKt.getTop(node2)));
            }
        });
        Intrinsics.checkNotNullExpressionValue(create6, "create(naturalOrder(), compareBy { it.top })");
        this.layoutLeftIndexer = create6;
        this.components = new ArrayList();
        TreeMultimap<String, VisualComponent> create7 = TreeMultimap.create();
        Intrinsics.checkNotNullExpressionValue(create7, "create()");
        this.uniquePathComponents = create7;
        this.naturalComponents = new ArrayList();
        this.componentElements = new ArrayList();
    }

    @NotNull
    public final HNormUrl getPortalUrl() {
        return this.portalUrl;
    }

    public final Path getReportDirectory() {
        return AppPaths.INSTANCE.getREPORT_DIR().resolve("harvest/corpus").resolve(ScentPaths.INSTANCE.fromHost(this.portalUrl.getUrl()));
    }

    /* JADX INFO: Access modifiers changed from: private */
    public final HarvestOptions getOptions() {
        return this.portalUrl.getHOptions();
    }

    @NotNull
    public final Set<String> getDocumentUrls$scent_auto_mining() {
        return this.documentUrls;
    }

    @NotNull
    public final List<VisualDocument> getQualifiedDocuments$scent_auto_mining() {
        return this.qualifiedDocuments;
    }

    @NotNull
    public final FrequencyManager<String> getDocumentFrequency$scent_auto_mining() {
        return this.documentFrequency;
    }

    @NotNull
    public final TreeMultimap<RegionalTile, Node> getRegionalTileNodeIndexer$scent_auto_mining() {
        return this.regionalTileNodeIndexer;
    }

    @NotNull
    public final Frequency<RegionalTile> getRegionalTiles$scent_auto_mining() {
        return this.regionalTiles;
    }

    @NotNull
    public final TreeMap<TextNode, OrderedRegionalTileNode> getOrderedRegionalTextNodeIndexer$scent_auto_mining() {
        return this.orderedRegionalTextNodeIndexer;
    }

    @NotNull
    public final TreeMultimap<Integer, Element> getCaptionedElementIndexer$scent_auto_mining() {
        return this.captionedElementIndexer;
    }

    @NotNull
    public final TreeMultimap<Integer, Element> getConstantCaptionedElementIndexer$scent_auto_mining() {
        return this.constantCaptionedElementIndexer;
    }

    @NotNull
    public final TreeMultimap<Integer, Element> getPseudoConstantTextBlockIndexer$scent_auto_mining() {
        return this.pseudoConstantTextBlockIndexer;
    }

    @NotNull
    public final TreeMultimap<String, TextNode> getCategorizedTextNodeIndexer$scent_auto_mining() {
        return this.categorizedTextNodeIndexer;
    }

    public final double getRecoverableConfidence$scent_auto_mining() {
        return this.recoverableConfidence;
    }

    public final void setRecoverableConfidence$scent_auto_mining(double d) {
        this.recoverableConfidence = d;
    }

    @NotNull
    public final TreeMultimap<Integer, Node> getLayoutLeftIndexer$scent_auto_mining() {
        return this.layoutLeftIndexer;
    }

    @NotNull
    public final List<VisualComponent> getComponents$scent_auto_mining() {
        return this.components;
    }

    @NotNull
    public final TreeMultimap<String, VisualComponent> getUniquePathComponents$scent_auto_mining() {
        return this.uniquePathComponents;
    }

    @NotNull
    public final List<VisualComponent> getNaturalComponents$scent_auto_mining() {
        return this.naturalComponents;
    }

    @NotNull
    public final List<Element> getComponentElements$scent_auto_mining() {
        return this.componentElements;
    }

    @NotNull
    public final List<VisualDocument> arrange() {
        if (!this.qualifiedDocuments.isEmpty()) {
            return this.qualifiedDocuments;
        }
        if (this.samples.size() < 5) {
            this.logger.warn("Too few samples: " + this.samples.size());
            return CollectionsKt.emptyList();
        }
        this.logger.info("Arrangement start - {}", Systems.INSTANCE.getHeapMessage());
        try {
            try {
                detectRecoverableDocuments();
                this.logger.info("Arrangement finished - {}", Systems.INSTANCE.getHeapMessage());
                Slf4JStopWatch slf4JStopWatch = this.stopWatch;
                if (slf4JStopWatch != null) {
                    slf4JStopWatch.lap("detectRecoverableDocuments");
                }
                return this.qualifiedDocuments;
            } catch (OutOfMemoryError e) {
                e.printStackTrace(System.err);
                List<VisualDocument> emptyList = CollectionsKt.emptyList();
                this.logger.info("Arrangement finished - {}", Systems.INSTANCE.getHeapMessage());
                return emptyList;
            }
        } catch (Throwable th) {
            this.logger.info("Arrangement finished - {}", Systems.INSTANCE.getHeapMessage());
            throw th;
        }
    }

    @NotNull
    public final AutoMiningResult analyse() {
        this.logger.info("Analysis start - {}", Systems.INSTANCE.getHeapMessage());
        calculateAdvancedFeatures$default(this, false, 1, null);
        NodeClusterRunner nodeClusterRunner = new NodeClusterRunner(this.qualifiedDocuments, this.uniquePathComponents, this.conf);
        Throwable th = (Throwable) null;
        try {
            AutoMiningResult doAutoMining = doAutoMining(nodeClusterRunner);
            AutoCloseableKt.closeFinally(nodeClusterRunner, th);
            this.logger.info("Analysis finished - {}", Systems.INSTANCE.getHeapMessage());
            drawIfNecessary(doAutoMining);
            return doAutoMining;
        } catch (Throwable th2) {
            AutoCloseableKt.closeFinally(nodeClusterRunner, th);
            throw th2;
        }
    }

    @Override // java.lang.AutoCloseable
    public void close() {
        Iterator<T> it = this.samples.values().iterator();
        while (it.hasNext()) {
            ai.platon.scent.dom.nodes.node.ext.NodeExtKt.setVisualDocument(((VisualDocument) it.next()).getDocument(), (VisualDocument) null);
        }
        this.documentUrls.clear();
        this.qualifiedDocuments.clear();
        this.documentFrequency.clear();
        this.regionalTileNodeIndexer.clear();
        this.regionalTiles.clear();
        this.orderedRegionalTextNodeIndexer.clear();
        this.captionedElementIndexer.clear();
        this.constantCaptionedElementIndexer.clear();
        this.pseudoConstantTextBlockIndexer.clear();
        this.categorizedTextNodeIndexer.clear();
        this.layoutLeftIndexer.clear();
        this.components.clear();
        this.uniquePathComponents.clear();
        this.naturalComponents.clear();
        this.componentElements.clear();
    }

    private final void drawIfNecessary(AutoMiningResult autoMiningResult) {
        if (this.portalUrl.getHOptions().getDraw()) {
            new CorpusVisualizer(autoMiningResult, this.qualifiedDocuments, true, true, true).draw();
        }
    }

    private final void calculateAdvancedFeatures(boolean z) {
        if (this.qualifiedDocuments.isEmpty()) {
            return;
        }
        if (getOptions().getDiagnose() && getOptions().getNVerbose() > 0) {
            Iterator it = CollectionsKt.take(CollectionsKt.shuffled(new IntRange(0, this.qualifiedDocuments.size() - 1)), getOptions().getNVerbose()).iterator();
            while (it.hasNext()) {
                getQualifiedDocuments$scent_auto_mining().get(((Number) it.next()).intValue()).setVerbose(true);
            }
        }
        calculateGeometricFeatures();
        Slf4JStopWatch slf4JStopWatch = this.stopWatch;
        if (slf4JStopWatch != null) {
            slf4JStopWatch.lap("calculateGeometricFeatures");
        }
        calculateTextualFeatures();
        Slf4JStopWatch slf4JStopWatch2 = this.stopWatch;
        if (slf4JStopWatch2 != null) {
            slf4JStopWatch2.lap("calculateTextualFeatures");
        }
        calculateCaptionDocFrequency();
        Slf4JStopWatch slf4JStopWatch3 = this.stopWatch;
        if (slf4JStopWatch3 != null) {
            slf4JStopWatch3.lap("calculateCaptionDocFrequency");
        }
        if (!z) {
            partition();
            Slf4JStopWatch slf4JStopWatch4 = this.stopWatch;
            if (slf4JStopWatch4 != null) {
                slf4JStopWatch4.lap("partition");
            }
        }
        calculateGeneralCategoricalFeatures();
        Slf4JStopWatch slf4JStopWatch5 = this.stopWatch;
        if (slf4JStopWatch5 != null) {
            slf4JStopWatch5.lap("calculateGeneralCategoricalFeatures");
        }
        calculateComputedStylesFeatures();
        Slf4JStopWatch slf4JStopWatch6 = this.stopWatch;
        if (slf4JStopWatch6 != null) {
            slf4JStopWatch6.lap("calculateComputedStylesFeatures");
        }
        divideDistricts();
        Slf4JStopWatch slf4JStopWatch7 = this.stopWatch;
        if (slf4JStopWatch7 != null) {
            slf4JStopWatch7.lap("divideDistricts");
        }
        if (this.taskLogger.isInfoEnabled()) {
            reportComponents();
        }
    }

    static /* synthetic */ void calculateAdvancedFeatures$default(AnalysablePageCorpus analysablePageCorpus, boolean z, int i, Object obj) {
        if ((i & 1) != 0) {
            z = false;
        }
        analysablePageCorpus.calculateAdvancedFeatures(z);
    }

    private final Path doEncodeToDataset() {
        this.logger.info("Encoding {} documents", Integer.valueOf(this.qualifiedDocuments.size()));
        Path createTempFile = Files.createTempFile("dataset-", ".csv", new FileAttribute[0]);
        PrintWriter printWriter = new PrintWriter(createTempFile.toFile());
        Throwable th = (Throwable) null;
        try {
            try {
                exportAllElements(getQualifiedDocuments$scent_auto_mining(), printWriter);
                Unit unit = Unit.INSTANCE;
                CloseableKt.closeFinally(printWriter, th);
                return createTempFile;
            } finally {
            }
        } catch (Throwable th2) {
            CloseableKt.closeFinally(printWriter, th);
            throw th2;
        }
    }

    private final AutoMiningResult doAutoMining(NodeClusterRunner nodeClusterRunner) {
        Slf4JStopWatch slf4JStopWatch = this.stopWatch;
        if (slf4JStopWatch != null) {
            slf4JStopWatch.lap("cluster");
        }
        nodeClusterRunner.cluster();
        if (this.taskLogger.isTraceEnabled()) {
            traceCorpus();
        }
        AutoMiningResult autoMiningResult = new AutoMiningResult(this.portalUrl, this.samples, this.qualifiedDocuments, this.componentElements, tabulate(nodeClusterRunner, getOptions()));
        if (getOptions().getDiagnose()) {
            AutoMiningDiagnotor autoMiningDiagnotor = new AutoMiningDiagnotor(this, nodeClusterRunner, autoMiningResult);
            this.logger.info("Start diagnosing ...");
            autoMiningDiagnotor.diagnose();
            this.logger.info("Diagnosing finished, reporting ...");
            autoMiningDiagnotor.report();
        }
        return autoMiningResult;
    }

    private final void exportAllElements(List<VisualDocument> list, final PrintWriter printWriter) {
        int i = 0;
        Iterator<T> it = list.iterator();
        while (it.hasNext()) {
            NodesKt.forEachElement$default(((VisualDocument) it.next()).getDocument(), false, new Function1<Element, Unit>() { // from class: ai.platon.scent.analysis.corpus.AnalysablePageCorpus$exportAllElements$1$1
                /* JADX INFO: Access modifiers changed from: package-private */
                /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                {
                    super(1);
                }

                public final void invoke(@NotNull Element element) {
                    Intrinsics.checkNotNullParameter(element, "ele");
                    int left = NodeExtKt.getLeft((Node) element);
                    if (0 <= left ? left <= 500 : false) {
                        int width = NodeExtKt.getWidth((Node) element);
                        if (200 <= width ? width <= 1100 : false) {
                            int top = NodeExtKt.getTop((Node) element);
                            if (!(100 <= top ? top <= 500 : false) || NodeExtKt.getBottom((Node) element) <= 200) {
                                return;
                            }
                            RealVector features = element.getExtension().getFeatures();
                            Intrinsics.checkNotNullExpressionValue(features, "ele.extension.features");
                            AnalysablePageCorpus.exportNode$default(AnalysablePageCorpus.this, (Node) element, features, printWriter, null, 8, null);
                            printWriter.println();
                        }
                    }
                }

                public /* bridge */ /* synthetic */ Object invoke(Object obj) {
                    invoke((Element) obj);
                    return Unit.INSTANCE;
                }
            }, 1, (Object) null);
            i++;
            if (i % 100 == 0) {
                this.logger.info("Encoded {} documents", Integer.valueOf(i));
            }
        }
    }

    private final void exportHeader(Schema schema, PrintWriter printWriter, String str) {
        printWriter.print("Label");
        printWriter.print(str);
        printWriter.println(CollectionsKt.joinToString$default(schema.getColumns(), ",", (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, new Function1<Schema.Column, CharSequence>() { // from class: ai.platon.scent.analysis.corpus.AnalysablePageCorpus$exportHeader$1
            @NotNull
            public final CharSequence invoke(@NotNull Schema.Column column) {
                Intrinsics.checkNotNullParameter(column, "it");
                return column.getName();
            }
        }, 30, (Object) null));
    }

    static /* synthetic */ void exportHeader$default(AnalysablePageCorpus analysablePageCorpus, Schema schema, PrintWriter printWriter, String str, int i, Object obj) {
        if ((i & 4) != 0) {
            str = ",";
        }
        analysablePageCorpus.exportHeader(schema, printWriter, str);
    }

    private final void exportNode(Node node, RealVector realVector, PrintWriter printWriter, String str) {
        printWriter.print(encodeLabel(node));
        int i = 0;
        int dimension = realVector.getDimension();
        if (0 >= dimension) {
            return;
        }
        do {
            int i2 = i;
            i++;
            printWriter.print(str);
            printWriter.print(VectorsKt.get(realVector, i2));
        } while (i < dimension);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static /* synthetic */ void exportNode$default(AnalysablePageCorpus analysablePageCorpus, Node node, RealVector realVector, PrintWriter printWriter, String str, int i, Object obj) {
        if ((i & 8) != 0) {
            str = ",";
        }
        analysablePageCorpus.exportNode(node, realVector, printWriter, str);
    }

    private final int encodeLabel(Node node) {
        List mlLabels = NodeExtKt.getMlLabels(node);
        if (mlLabels.contains("Title")) {
            return 1;
        }
        if (mlLabels.contains("Meta")) {
            return 2;
        }
        return mlLabels.contains("Content") ? 3 : 0;
    }

    private final PageTableGroup tabulate(NodeClusterRunner nodeClusterRunner, HarvestOptions harvestOptions) {
        List sortedWith = CollectionsKt.sortedWith(nodeClusterRunner.getTileClusterGroups$scent_auto_mining(), new Comparator() { // from class: ai.platon.scent.analysis.corpus.AnalysablePageCorpus$tabulate$$inlined$sortedByDescending$1
            /* JADX WARN: Multi-variable type inference failed */
            @Override // java.util.Comparator
            public final int compare(T t, T t2) {
                return ComparisonsKt.compareValues(((TileClusterGroup) t2).getScore(), ((TileClusterGroup) t).getScore());
            }
        });
        ArrayList arrayList = new ArrayList();
        int i = 0;
        for (Object obj : sortedWith) {
            int i2 = i;
            i++;
            if (i2 < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            OpenMapTable tabulate = new PageCorpusTabulator(this, (TileClusterGroup) obj, 1 + i2, harvestOptions.getCellType()).tabulate();
            OpenMapTable openMapTable = tabulate.isNotEmpty() ? tabulate : null;
            if (openMapTable != null) {
                arrayList.add(openMapTable);
            }
        }
        ArrayList arrayList2 = arrayList;
        arrayList2.add(0, combineTables$default(this, arrayList2, null, 2, null));
        PageTableGroup pageTableGroup = new PageTableGroup(this.portalUrl, arrayList2, harvestOptions);
        pageTableGroup.setSorted(true);
        return pageTableGroup;
    }

    private final OpenMapTable combineTables(List<OpenMapTable> list, String str) {
        int i;
        String obj;
        String obj2;
        ArrayList arrayList = new ArrayList();
        for (OpenMapTable openMapTable : list) {
            OpenMapTable.Column[] columns = openMapTable.getColumns();
            ArrayList arrayList2 = new ArrayList(columns.length);
            for (OpenMapTable.Column column : columns) {
                arrayList2.add(TuplesKt.to(Integer.valueOf(openMapTable.getIdent()), column));
            }
            CollectionsKt.addAll(arrayList, arrayList2);
        }
        ArrayList arrayList3 = arrayList;
        OpenMapTable openMapTable2 = new OpenMapTable(arrayList3.size(), 0);
        PageTableKt.getData(openMapTable2).setCombined(true);
        int i2 = 0;
        for (OpenMapTable.Column column2 : openMapTable2.getColumns()) {
            int i3 = i2;
            i2++;
            Pair pair = (Pair) arrayList3.get(i3);
            int intValue = ((Number) pair.component1()).intValue();
            OpenMapTable.Column column3 = (OpenMapTable.Column) pair.component2();
            String name = column3.getName();
            String str2 = new Regex("T\\d+C\\d+").matches(name) ? name : null;
            column2.setName(str2 == null ? "T" + intValue + column3.getName() : str2);
            PageTableKt.setData(column2, PageTableKt.getData(column3));
            column2.getAttributes().putAll(column3.getAttributes());
        }
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        int i4 = 0;
        for (Object obj3 : list) {
            int i5 = i4;
            i4++;
            if (i5 < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            OpenMapTable openMapTable3 = (OpenMapTable) obj3;
            int numColumns = openMapTable3.getNumColumns();
            OpenMapTable.Column[] columns2 = openMapTable3.getColumns();
            int i6 = 0;
            int length = columns2.length;
            while (true) {
                if (i6 >= length) {
                    i = -1;
                    break;
                }
                if (StringsKt.equals(columns2[i6].getName(), str, true)) {
                    i = i6;
                    break;
                }
                i6++;
            }
            int i7 = i;
            for (OpenMapTable.Row row : openMapTable3.getRows()) {
                if (row.getCells().length != numColumns) {
                    this.logger.warn("Row value count does not match column count | " + row.getCells().length + " <- " + numColumns);
                }
                OpenMapTable.Cell cell = row.get(i7);
                if (cell == null) {
                    obj = null;
                } else {
                    Object value = cell.getValue();
                    obj = value == null ? null : value.toString();
                }
                String str3 = obj;
                if (str3 != null) {
                    Object computeIfAbsent = linkedHashMap.computeIfAbsent(str3, (v1) -> {
                        return m5combineTables$lambda18$lambda17$lambda15(r2, v1);
                    });
                    Intrinsics.checkNotNullExpressionValue(computeIfAbsent, "rows.computeIfAbsent(key… ArrayList(columnCount) }");
                    ArrayList arrayList4 = (ArrayList) computeIfAbsent;
                    int i8 = 0;
                    for (OpenMapTable.Cell cell2 : row.getCells()) {
                        ArrayList arrayList5 = arrayList4;
                        int i9 = i8;
                        i8++;
                        int i10 = (i5 * 10000) + i9;
                        if (cell2 == null) {
                            obj2 = null;
                        } else {
                            Object value2 = cell2.getValue();
                            obj2 = value2 == null ? null : value2.toString();
                        }
                        arrayList5.add(new OrderingValue(i10, obj2));
                    }
                }
            }
        }
        for (Map.Entry entry : linkedHashMap.entrySet()) {
            String str4 = (String) entry.getKey();
            final ArrayList arrayList6 = (ArrayList) entry.getValue();
            openMapTable2.computeIfAbsent(str4, new Function1<OpenMapTable.Row, Unit>() { // from class: ai.platon.scent.analysis.corpus.AnalysablePageCorpus$combineTables$3$1
                /* JADX INFO: Access modifiers changed from: package-private */
                /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                {
                    super(1);
                }

                public final void invoke(@NotNull OpenMapTable.Row row2) {
                    Intrinsics.checkNotNullParameter(row2, "row");
                    int i11 = 0;
                    for (Object obj4 : CollectionsKt.sortedWith(arrayList6, new Comparator() { // from class: ai.platon.scent.analysis.corpus.AnalysablePageCorpus$combineTables$3$1$invoke$$inlined$sortedBy$1
                        /* JADX WARN: Multi-variable type inference failed */
                        @Override // java.util.Comparator
                        public final int compare(T t, T t2) {
                            return ComparisonsKt.compareValues(Integer.valueOf(((AnalysablePageCorpus.OrderingValue) t).getOrder()), Integer.valueOf(((AnalysablePageCorpus.OrderingValue) t2).getOrder()));
                        }
                    })) {
                        int i12 = i11;
                        i11++;
                        if (i12 < 0) {
                            CollectionsKt.throwIndexOverflow();
                        }
                        row2.setValue(i12, ((AnalysablePageCorpus.OrderingValue) obj4).getValue());
                    }
                }

                public /* bridge */ /* synthetic */ Object invoke(Object obj4) {
                    invoke((OpenMapTable.Row) obj4);
                    return Unit.INSTANCE;
                }
            });
        }
        return openMapTable2;
    }

    static /* synthetic */ OpenMapTable combineTables$default(AnalysablePageCorpus analysablePageCorpus, List list, String str, int i, Object obj) {
        if ((i & 2) != 0) {
            str = "Url";
        }
        return analysablePageCorpus.combineTables(list, str);
    }

    private final void traceCorpus() {
        if (this.taskLogger.isTraceEnabled()) {
            validateFeatures();
            annotateNodes();
            simplifyAnnotations();
            simplifyCaptions();
        }
    }

    private final void detectRecoverableDocuments() {
        if (this.samples.isEmpty()) {
            this.logger.warn("Sample is empty");
            return;
        }
        Iterator<T> it = this.samples.values().iterator();
        while (it.hasNext()) {
            ((VisualDocument) it.next()).calculateCategoricalTermFrequency();
        }
        Slf4JStopWatch slf4JStopWatch = this.stopWatch;
        if (slf4JStopWatch != null) {
            slf4JStopWatch.lap("calculateCategoricalTermFrequency");
        }
        Iterator<T> it2 = this.samples.values().iterator();
        while (it2.hasNext()) {
            ((VisualDocument) it2.next()).calculateComponentEdges();
        }
        Slf4JStopWatch slf4JStopWatch2 = this.stopWatch;
        if (slf4JStopWatch2 != null) {
            slf4JStopWatch2.lap("calculateComponentEdges");
        }
        Iterator<T> it3 = this.samples.values().iterator();
        while (it3.hasNext()) {
            ((VisualDocument) it3.next()).buildRegionalTileNodeIndex();
        }
        Slf4JStopWatch slf4JStopWatch3 = this.stopWatch;
        if (slf4JStopWatch3 != null) {
            slf4JStopWatch3.lap("buildRegionalTileNodeIndex");
        }
        calculateGeneralDocFrequency();
        Slf4JStopWatch slf4JStopWatch4 = this.stopWatch;
        if (slf4JStopWatch4 != null) {
            slf4JStopWatch4.lap("calculateGeneralDocFrequency");
        }
        buildRegionalTextNodeIndex();
        Slf4JStopWatch slf4JStopWatch5 = this.stopWatch;
        if (slf4JStopWatch5 != null) {
            slf4JStopWatch5.lap("buildRegionalTileNodeIndex");
        }
        calculateTextNodeDocFrequency();
        Slf4JStopWatch slf4JStopWatch6 = this.stopWatch;
        if (slf4JStopWatch6 != null) {
            slf4JStopWatch6.lap("calculateTextNodeDocFrequency");
        }
        Iterator<T> it4 = this.samples.values().iterator();
        while (it4.hasNext()) {
            ((VisualDocument) it4.next()).calculateCollinearNodes();
        }
        Slf4JStopWatch slf4JStopWatch7 = this.stopWatch;
        if (slf4JStopWatch7 == null) {
            return;
        }
        slf4JStopWatch7.lap("calculateCollinearNodes");
    }

    private final List<VisualDocument> filterByUris(List<VisualDocument> list) {
        Frequency frequency = (Collection) new Frequency((String) null, 1, (DefaultConstructorMarker) null);
        Iterator<T> it = list.iterator();
        while (it.hasNext()) {
            frequency.add(Integer.valueOf(((VisualDocument) it.next()).getLocation().length()));
        }
        Integer num = (Integer) frequency.getMostEntry().getElement();
        Intrinsics.checkNotNullExpressionValue(num, "length");
        if (num.intValue() < 20) {
            ArrayList arrayList = new ArrayList();
            for (Object obj : list) {
                if (num != null && ((VisualDocument) obj).getLocation().length() == num.intValue()) {
                    arrayList.add(obj);
                }
            }
            return arrayList;
        }
        DescriptiveStatistics descriptiveStatistics = new DescriptiveStatistics();
        List<VisualDocument> list2 = list;
        ArrayList arrayList2 = new ArrayList(CollectionsKt.collectionSizeOrDefault(list2, 10));
        Iterator<T> it2 = list2.iterator();
        while (it2.hasNext()) {
            arrayList2.add(Double.valueOf(((VisualDocument) it2.next()).getLocation().length()));
        }
        Iterator it3 = arrayList2.iterator();
        while (it3.hasNext()) {
            descriptiveStatistics.addValue(((Number) it3.next()).doubleValue());
        }
        int percentile = (int) descriptiveStatistics.getPercentile(0.25d);
        int percentile2 = (int) descriptiveStatistics.getPercentile(0.75d);
        ArrayList arrayList3 = new ArrayList();
        for (Object obj2 : list) {
            int length = ((VisualDocument) obj2).getLocation().length();
            if (percentile <= length ? length <= percentile2 : false) {
                arrayList3.add(obj2);
            }
        }
        return arrayList3;
    }

    private final void calculateGeneralCategoricalFeatures() {
        final FF[] ffArr = {FF.TAG, FF.NID, FF.NCS};
        final AnalysablePageCorpus$calculateGeneralCategoricalFeatures$nominalExtractors$1 analysablePageCorpus$calculateGeneralCategoricalFeatures$nominalExtractors$1 = new Function1<Element, String[]>() { // from class: ai.platon.scent.analysis.corpus.AnalysablePageCorpus$calculateGeneralCategoricalFeatures$nominalExtractors$1
            @NotNull
            public final String[] invoke(@NotNull Element element) {
                Intrinsics.checkNotNullParameter(element, "it");
                String tagName = element.tagName();
                Intrinsics.checkNotNullExpressionValue(tagName, "it.tagName()");
                String id = element.id();
                Intrinsics.checkNotNullExpressionValue(id, "it.id()");
                String className = element.className();
                Intrinsics.checkNotNullExpressionValue(className, "it.className()");
                return new String[]{tagName, id, className};
            }
        };
        ArrayList arrayList = new ArrayList(ffArr.length);
        for (FF ff : ffArr) {
            arrayList.add(getDocumentFrequency$scent_auto_mining().computeIfAbsent(ff.getAlias()).ordinalMap());
        }
        final ArrayList arrayList2 = arrayList;
        int i = 0;
        for (Object obj : this.qualifiedDocuments) {
            final int i2 = i;
            i++;
            if (i2 < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            VisualDocument visualDocument = (VisualDocument) obj;
            NodesKt.forEach(visualDocument.getBody(), true, new Function1<Node, Unit>() { // from class: ai.platon.scent.analysis.corpus.AnalysablePageCorpus$calculateGeneralCategoricalFeatures$1$1
                /* JADX INFO: Access modifiers changed from: package-private */
                /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                {
                    super(1);
                }

                public final void invoke(@NotNull Node node) {
                    Intrinsics.checkNotNullParameter(node, "it");
                    ai.platon.scent.dom.nodes.node.ext.NodeExtKt.setPid(node, 1 + i2);
                }

                public /* bridge */ /* synthetic */ Object invoke(Object obj2) {
                    invoke((Node) obj2);
                    return Unit.INSTANCE;
                }
            });
            NodesKt.forEachMatching(visualDocument.getBody(), new Function1<Node, Boolean>() { // from class: ai.platon.scent.analysis.corpus.AnalysablePageCorpus$calculateGeneralCategoricalFeatures$1$2
                @NotNull
                public final Boolean invoke(@NotNull Node node) {
                    Intrinsics.checkNotNullParameter(node, "it");
                    return Boolean.valueOf(NodeCharactersKt.isTile(node));
                }
            }, new Function1<Node, Unit>() { // from class: ai.platon.scent.analysis.corpus.AnalysablePageCorpus$calculateGeneralCategoricalFeatures$1$3
                /* JADX INFO: Access modifiers changed from: package-private */
                /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                /* JADX WARN: Multi-variable type inference failed */
                {
                    super(1);
                }

                public final void invoke(@NotNull Node node) {
                    Intrinsics.checkNotNullParameter(node, "node");
                    Object[] objArr = (Object[]) analysablePageCorpus$calculateGeneralCategoricalFeatures$nominalExtractors$1.invoke(NodeExtKt.getBestElement(node));
                    FF[] ffArr2 = ffArr;
                    List<Map<String, Integer>> list = arrayList2;
                    int i3 = 0;
                    for (Object obj2 : objArr) {
                        int i4 = i3;
                        i3++;
                        String str = (String) obj2;
                        if (!StringsKt.isBlank(str)) {
                            int key = ffArr2[i4].getKey();
                            Integer num = list.get(i4).get(str);
                            if (num != null) {
                                NodeExtKt.setFeature(NodeExtKt.getBestElement(node), key, 1 + num.intValue());
                            }
                        }
                    }
                }

                public /* bridge */ /* synthetic */ Object invoke(Object obj2) {
                    invoke((Node) obj2);
                    return Unit.INSTANCE;
                }
            });
        }
    }

    private final void calculateComputedStylesFeatures() {
        final FF[] ffArr = {FF.FTSZ, FF.COLR, FF.BCOLR};
        final Function1<Element, List<? extends String>> function1 = new Function1<Element, List<? extends String>>() { // from class: ai.platon.scent.analysis.corpus.AnalysablePageCorpus$calculateComputedStylesFeatures$nominalExtractors$1
            /* JADX INFO: Access modifiers changed from: package-private */
            /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
            {
                super(1);
            }

            @NotNull
            public final List<String> invoke(@NotNull Element element) {
                Intrinsics.checkNotNullParameter(element, "it");
                String attr = NodeExtKt.getBestElement((Node) element).attr("st");
                Intrinsics.checkNotNullExpressionValue(attr, "it.bestElement.attr(PULS…_ATTR_ELEMENT_NODE_STYLE)");
                List<String> split = new Regex("; ").split(attr, 0);
                List<String> list = split.size() == ffArr.length ? split : null;
                return list == null ? CollectionsKt.emptyList() : list;
            }
        };
        ArrayList arrayList = new ArrayList(ffArr.length);
        for (FF ff : ffArr) {
            arrayList.add(getDocumentFrequency$scent_auto_mining().computeIfAbsent(ff.getAlias()).ordinalMap());
        }
        final ArrayList arrayList2 = arrayList;
        int i = 0;
        for (Object obj : this.qualifiedDocuments) {
            int i2 = i;
            i++;
            if (i2 < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            NodesKt.forEachMatching(((VisualDocument) obj).getBody(), new Function1<Node, Boolean>() { // from class: ai.platon.scent.analysis.corpus.AnalysablePageCorpus$calculateComputedStylesFeatures$1$1
                @NotNull
                public final Boolean invoke(@NotNull Node node) {
                    Intrinsics.checkNotNullParameter(node, "it");
                    return Boolean.valueOf(NodeCharactersKt.isTile(node));
                }
            }, new Function1<Node, Unit>() { // from class: ai.platon.scent.analysis.corpus.AnalysablePageCorpus$calculateComputedStylesFeatures$1$2
                /* JADX INFO: Access modifiers changed from: package-private */
                /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                /* JADX WARN: Multi-variable type inference failed */
                {
                    super(1);
                }

                public final void invoke(@NotNull Node node) {
                    Intrinsics.checkNotNullParameter(node, "node");
                    Iterable iterable = (Iterable) function1.invoke(NodeExtKt.getBestElement(node));
                    FF[] ffArr2 = ffArr;
                    List<Map<String, Integer>> list = arrayList2;
                    int i3 = 0;
                    for (Object obj2 : iterable) {
                        int i4 = i3;
                        i3++;
                        if (i4 < 0) {
                            CollectionsKt.throwIndexOverflow();
                        }
                        String str = (String) obj2;
                        int key = ffArr2[i4].getKey();
                        Integer num = list.get(i4).get(str);
                        if (num != null) {
                            NodeExtKt.setFeature(NodeExtKt.getBestElement(node), key, 1 + num.intValue());
                        }
                    }
                }

                public /* bridge */ /* synthetic */ Object invoke(Object obj2) {
                    invoke((Node) obj2);
                    return Unit.INSTANCE;
                }
            });
        }
    }

    private final void divideDistricts() {
        Map ordinalMap = this.regionalTiles.ordinalMap();
        NavigableMap asMap = this.regionalTileNodeIndexer.asMap();
        Intrinsics.checkNotNullExpressionValue(asMap, "regionalTileNodeIndexer.asMap()");
        for (Map.Entry entry : asMap.entrySet()) {
            RegionalTile regionalTile = (RegionalTile) entry.getKey();
            Collection collection = (Collection) entry.getValue();
            Integer num = (Integer) ordinalMap.get(regionalTile);
            int intValue = num == null ? 0 : num.intValue();
            Intrinsics.checkNotNullExpressionValue(collection, "nodes");
            Collection collection2 = collection;
            ArrayList<Node> arrayList = new ArrayList();
            for (Object obj : collection2) {
                Node node = (Node) obj;
                Intrinsics.checkNotNullExpressionValue(node, "it");
                if (ai.platon.scent.dom.nodes.node.ext.NodeExtKt.getMaybeLocallyConstant(node)) {
                    arrayList.add(obj);
                }
            }
            for (Node node2 : arrayList) {
                Intrinsics.checkNotNullExpressionValue(node2, "it");
                ai.platon.scent.dom.nodes.node.ext.NodeExtKt.setDistrict(node2, intValue);
            }
        }
        Iterator<T> it = this.qualifiedDocuments.iterator();
        while (it.hasNext()) {
            Iterator it2 = ((VisualDocument) it.next()).getComponents().iterator();
            while (it2.hasNext()) {
                int i = 0;
                for (Node node3 : ((VisualComponent) it2.next()).getOrderedTiles()) {
                    if (ai.platon.scent.dom.nodes.node.ext.NodeExtKt.getMaybeLocallyConstant(node3)) {
                        i = ai.platon.scent.dom.nodes.node.ext.NodeExtKt.getDistrict(node3);
                    } else {
                        ai.platon.scent.dom.nodes.node.ext.NodeExtKt.setDistrict(node3, i);
                    }
                }
            }
        }
    }

    private final void calculateGeneralDocFrequency() {
        Iterator<T> it = this.samples.values().iterator();
        while (it.hasNext()) {
            ((VisualDocument) it.next()).getTermFrequency().forEach((v1, v2) -> {
                m6calculateGeneralDocFrequency$lambda41$lambda40(r1, v1, v2);
            });
        }
    }

    private final void buildRegionalTextNodeIndex() {
        Iterator<VisualDocument> it = this.samples.values().iterator();
        while (it.hasNext()) {
            it.next().getRegionalTileNodeIndexer().asMap().forEach((v1, v2) -> {
                m7buildRegionalTextNodeIndex$lambda42(r1, v1, v2);
            });
        }
    }

    private final void addLabeledDocuments() {
        if (getOptions().getTrustSamples()) {
            this.qualifiedDocuments.addAll(this.samples.values());
            return;
        }
        for (VisualDocument visualDocument : this.samples.values()) {
            if (QueriesKt.any(visualDocument.getDocument(), new Function1<Node, Boolean>() { // from class: ai.platon.scent.analysis.corpus.AnalysablePageCorpus$addLabeledDocuments$1$labeled$1
                @NotNull
                public final Boolean invoke(@NotNull Node node) {
                    Intrinsics.checkNotNullParameter(node, "it");
                    return Boolean.valueOf(!NodeExtKt.getMlLabels(node).isEmpty());
                }
            })) {
                getQualifiedDocuments$scent_auto_mining().add(visualDocument);
            }
        }
    }

    private final void calculateTextNodeDocFrequency() {
        Object obj;
        if (this.regionalTileNodeIndexer.isEmpty()) {
            this.logger.warn("No regional text node");
            return;
        }
        if (!(this.regionalTileNodeIndexer.size() > this.qualifiedDocuments.size())) {
            throw new IllegalArgumentException(("RegionalTileNodeIndexer size " + getRegionalTileNodeIndexer$scent_auto_mining().size() + ", document size: " + getQualifiedDocuments$scent_auto_mining().size()).toString());
        }
        Frequency<Integer> frequency = new Frequency<>((String) null, 1, (DefaultConstructorMarker) null);
        double min = Math.min(5.0d, 0.8d * this.samples.size());
        this.regionalTileNodeIndexer.asMap().forEach((v2, v3) -> {
            m8calculateTextNodeDocFrequency$lambda47(r1, r2, v2, v3);
        });
        if (frequency.isEmpty()) {
            this.logger.warn("!!! No regional texts shared by " + this.samples.size() + " sample documents (df rate: " + 4605380978949069210 + ") !!!");
            if (this.logger.isInfoEnabled()) {
                Collection values = this.regionalTileNodeIndexer.asMap().values();
                Intrinsics.checkNotNullExpressionValue(values, "regionalTileNodeIndexer.asMap().values");
                Collection<Collection> collection = values;
                TreeSet treeSet = new TreeSet(ComparisonsKt.reverseOrder());
                for (Collection collection2 : collection) {
                    Intrinsics.checkNotNullExpressionValue(collection2, "it");
                    Collection<Node> collection3 = collection2;
                    HashSet hashSet = new HashSet();
                    for (Node node : collection3) {
                        Intrinsics.checkNotNullExpressionValue(node, "it");
                        hashSet.add(NodeExtKt.getLocation(NodeExtKt.getOwnerDocument(node)));
                    }
                    Integer valueOf = Integer.valueOf(hashSet.size());
                    Integer num = valueOf.intValue() > 1 ? valueOf : null;
                    if (num != null) {
                        treeSet.add(num);
                    }
                }
                this.logger.info("Document frequency of regional texts: " + CollectionsKt.joinToString$default(treeSet, (CharSequence) null, (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, new Function1<Integer, CharSequence>() { // from class: ai.platon.scent.analysis.corpus.AnalysablePageCorpus$calculateTextNodeDocFrequency$s$1
                    @NotNull
                    public final CharSequence invoke(int i) {
                        return String.valueOf(i);
                    }

                    public /* bridge */ /* synthetic */ Object invoke(Object obj2) {
                        return invoke(((Number) obj2).intValue());
                    }
                }, 31, (Object) null));
                return;
            }
            return;
        }
        int intValue = ((Number) frequency.getMode()).intValue();
        if (getOptions().getTrustSamples()) {
            this.qualifiedDocuments.addAll(this.samples.values());
        } else {
            Collection values2 = this.regionalTileNodeIndexer.asMap().values();
            Intrinsics.checkNotNullExpressionValue(values2, "regionalTileNodeIndexer.asMap().values");
            Iterator it = values2.iterator();
            while (true) {
                if (!it.hasNext()) {
                    obj = null;
                    break;
                }
                Object next = it.next();
                if (((Collection) next).size() == intValue) {
                    obj = next;
                    break;
                }
            }
            Collection collection4 = (Collection) obj;
            if (collection4 != null) {
                Collection<Node> collection5 = collection4;
                Set<String> set = this.documentUrls;
                for (Node node2 : collection5) {
                    Intrinsics.checkNotNullExpressionValue(node2, "it");
                    set.add(NodeExtKt.getLocation(NodeExtKt.getOwnerDocument(node2)));
                }
            }
            Collection<VisualDocument> values3 = this.samples.values();
            List<VisualDocument> list = this.qualifiedDocuments;
            for (VisualDocument visualDocument : values3) {
                VisualDocument visualDocument2 = getDocumentUrls$scent_auto_mining().contains(NodeExtKt.getLocation(visualDocument.getDocument())) ? visualDocument : null;
                if (visualDocument2 != null) {
                    list.add(visualDocument2);
                }
            }
        }
        for (VisualDocument visualDocument3 : this.qualifiedDocuments) {
            ai.platon.scent.dom.nodes.node.ext.NodeExtKt.setCorpusSize(visualDocument3.getDocument(), getQualifiedDocuments$scent_auto_mining().size());
            Node ownerBody = visualDocument3.getDocument().getExtension().getOwnerBody();
            if (ownerBody != null) {
                ownerBody.attr("a-corpus-time", this.startTime.toString());
            }
            Node ownerBody2 = visualDocument3.getDocument().getExtension().getOwnerBody();
            if (ownerBody2 != null) {
                ownerBody2.attr("a-corpus-size", String.valueOf(getQualifiedDocuments$scent_auto_mining().size()));
            }
        }
        Collection<Node> values4 = this.regionalTileNodeIndexer.values();
        Intrinsics.checkNotNullExpressionValue(values4, "regionalTileNodeIndexer.values()");
        for (Node node3 : values4) {
            Intrinsics.checkNotNullExpressionValue(node3, "it");
            ai.platon.scent.dom.nodes.node.ext.NodeExtKt.setTextDocFrequency(node3, ai.platon.scent.dom.nodes.node.ext.NodeExtKt.getTextDocFrequency(node3) / intValue);
        }
        reportDff(frequency);
    }

    private final void reportDff(Frequency<Integer> frequency) {
        Multiset.Entry mostEntry = frequency.getMostEntry();
        Integer num = (Integer) mostEntry.getElement();
        int count = mostEntry.getCount();
        double modePercentage = frequency.getModePercentage();
        this.recoverableConfidence = modePercentage;
        Logger logger = this.logger;
        Logger logger2 = logger.isInfoEnabled() ? logger : null;
        if (logger2 != null) {
            StringCompanionObject stringCompanionObject = StringCompanionObject.INSTANCE;
            Object[] objArr = {num, Integer.valueOf(count), Integer.valueOf(frequency.getTotalFrequency()), Double.valueOf(100 * modePercentage)};
            String format = String.format("Total %d documents share %d terms out of %d counted (%4.2f%%)", Arrays.copyOf(objArr, objArr.length));
            Intrinsics.checkNotNullExpressionValue(format, "java.lang.String.format(format, *args)");
            logger2.info(format);
        }
        if (!(!this.qualifiedDocuments.isEmpty())) {
            this.logger.warn("No recoverable documents after pre-processing !!");
            this.logger.info(Frequency.toReport$default(frequency, "Term frequency of df report, `dff = tf(df, <df>)`:\n", (String) null, 2, (Object) null));
            return;
        }
        Logger logger3 = this.logger;
        StringCompanionObject stringCompanionObject2 = StringCompanionObject.INSTANCE;
        Object[] objArr2 = {Integer.valueOf(this.qualifiedDocuments.size()), Integer.valueOf(this.samples.size()), Double.valueOf(100 * modePercentage)};
        String format2 = String.format("Find %d/%d recoverable documents with confidence %4.2f%%", Arrays.copyOf(objArr2, objArr2.length));
        Intrinsics.checkNotNullExpressionValue(format2, "java.lang.String.format(format, *args)");
        logger3.info(format2);
        this.logger.info(Frequency.toReport$default(frequency, "Term frequency of df report, `dff = tf(df, <df>)`:\n", (String) null, 2, (Object) null));
    }

    private final void calculateGeometricFeatures() {
        Iterator<VisualDocument> it = this.qualifiedDocuments.iterator();
        while (it.hasNext()) {
            this.layoutLeftIndexer.putAll(it.next().getLayoutLeftIndexer());
        }
    }

    private final void calculateTextualFeatures() {
        boolean z;
        Iterator<VisualDocument> it = this.qualifiedDocuments.iterator();
        while (it.hasNext()) {
            Collection<Node> values = it.next().getRegionalTileNodeIndexer().values();
            Intrinsics.checkNotNullExpressionValue(values, "doc.regionalTileNodeIndexer.values()");
            for (Node node : values) {
                if ((node instanceof TextNode) && !StringsKt.isBlank(NodeExtKt.getCleanText(node))) {
                    final String replace = new Regex("\\s+").replace(NodeExtKt.getCleanText(node), "_");
                    if (ai.platon.scent.dom.nodes.node.ext.NodeExtKt.isLocallyConstant(node)) {
                        NodesKt.forEachAncestor(node, new Function1<Element, Unit>() { // from class: ai.platon.scent.analysis.corpus.AnalysablePageCorpus$calculateTextualFeatures$1$1
                            /* JADX INFO: Access modifiers changed from: package-private */
                            /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                            {
                                super(1);
                            }

                            public final void invoke(@NotNull Element element) {
                                Intrinsics.checkNotNullParameter(element, "ancestor");
                                NodeExtKt.addCaptionWord((Node) element, replace);
                                ai.platon.scent.dom.nodes.node.ext.NodeExtKt.setNumConstTextNodes((Node) element, ai.platon.scent.dom.nodes.node.ext.NodeExtKt.getNumConstTextNodes((Node) element) + 1);
                            }

                            public /* bridge */ /* synthetic */ Object invoke(Object obj) {
                                invoke((Element) obj);
                                return Unit.INSTANCE;
                            }
                        });
                    } else {
                        double textDocFrequency = ai.platon.scent.dom.nodes.node.ext.NodeExtKt.getTextDocFrequency(node);
                        Double[] dArr = {Double.valueOf(0.2d), Double.valueOf(0.25d), Double.valueOf(0.3333333333333333d), Double.valueOf(0.5d)};
                        int length = dArr.length;
                        int i = 0;
                        while (true) {
                            if (i >= length) {
                                z = false;
                                break;
                            } else {
                                if (Precision.equals(textDocFrequency, dArr[i].doubleValue(), 4)) {
                                    z = true;
                                    break;
                                }
                                i++;
                            }
                        }
                        if (z) {
                            getCategorizedTextNodeIndexer$scent_auto_mining().put(replace, node);
                        }
                    }
                }
            }
        }
        Frequency<RegionalTile> frequency = this.regionalTiles;
        NavigableSet keySet = this.regionalTileNodeIndexer.keySet();
        Intrinsics.checkNotNullExpressionValue(keySet, "regionalTileNodeIndexer.keySet()");
        frequency.addAll(keySet);
    }

    private final void calculateCaptionDocFrequency() {
        Iterator<VisualDocument> it = this.qualifiedDocuments.iterator();
        while (it.hasNext()) {
            NodeTraversor.filter(new NodeFilter() { // from class: ai.platon.scent.analysis.corpus.AnalysablePageCorpus$calculateCaptionDocFrequency$1
                @NotNull
                public NodeFilter.FilterResult head(@NotNull Node node, int i) {
                    HarvestOptions options;
                    Intrinsics.checkNotNullParameter(node, "node");
                    if (!(node instanceof Element)) {
                        return NodeFilter.FilterResult.CONTINUE;
                    }
                    options = AnalysablePageCorpus.this.getOptions();
                    if (NodeCharactersKt.isDenseTextBlock$default(node, options, 0, 2, (Object) null)) {
                        return NodeFilter.FilterResult.SKIP_ENTIRELY;
                    }
                    if (NodeExtKt.hasCaption(node)) {
                        AnalysablePageCorpus.this.getCaptionedElementIndexer$scent_auto_mining().put(Integer.valueOf((NodeExtKt.getCaption(node) + ";" + NodeExtKt.getName(node)).hashCode()), node);
                    }
                    return NodeFilter.FilterResult.CONTINUE;
                }
            }, it.next().getBody());
        }
        NavigableMap asMap = this.captionedElementIndexer.asMap();
        Intrinsics.checkNotNullExpressionValue(asMap, "captionedElementIndexer.asMap()");
        for (Map.Entry entry : asMap.entrySet()) {
            Integer num = (Integer) entry.getKey();
            Collection<Node> collection = (Collection) entry.getValue();
            Intrinsics.checkNotNullExpressionValue(collection, "nodes");
            for (Node node : collection) {
                Intrinsics.checkNotNullExpressionValue(node, "node");
                ai.platon.scent.dom.nodes.node.ext.NodeExtKt.setCaptionDocFrequency(node, collection.size() / getQualifiedDocuments$scent_auto_mining().size());
                if (Precision.equals(ai.platon.scent.dom.nodes.node.ext.NodeExtKt.getCaptionDocFrequency(node), 1.0d)) {
                    getConstantCaptionedElementIndexer$scent_auto_mining().put(num, node);
                }
                if (NodeCharactersKt.isPseudoConstantTextBlock(node, getOptions())) {
                    getPseudoConstantTextBlockIndexer$scent_auto_mining().putAll(num, collection);
                }
            }
        }
    }

    private final void partition() {
        if (this.qualifiedDocuments.isEmpty()) {
            return;
        }
        Iterator<T> it = this.qualifiedDocuments.iterator();
        while (it.hasNext()) {
            ((VisualDocument) it.next()).partition();
        }
        supplementPartition();
        findLayoutComponents();
        arrangeComponents();
        List<VisualDocument> list = this.qualifiedDocuments;
        List<VisualComponent> list2 = this.components;
        Iterator<T> it2 = list.iterator();
        while (it2.hasNext()) {
            CollectionsKt.addAll(list2, ((VisualDocument) it2.next()).getComponents());
        }
        List<VisualDocument> list3 = this.qualifiedDocuments;
        List<VisualComponent> list4 = this.naturalComponents;
        Iterator<T> it3 = list3.iterator();
        while (it3.hasNext()) {
            CollectionsKt.addAll(list4, ((VisualDocument) it3.next()).getNaturalComponents());
        }
        List<VisualComponent> list5 = this.components;
        List<Element> list6 = this.componentElements;
        Iterator<T> it4 = list5.iterator();
        while (it4.hasNext()) {
            list6.add(((VisualComponent) it4.next()).getElement());
        }
    }

    private final void findLayoutComponents() {
        NavigableMap asMap = this.layoutLeftIndexer.asMap();
        Intrinsics.checkNotNullExpressionValue(asMap, "layoutLeftIndexer.asMap()");
        for (Map.Entry entry : asMap.entrySet()) {
            Integer num = (Integer) entry.getKey();
            Collection<Element> collection = (Collection) entry.getValue();
            Intrinsics.checkNotNullExpressionValue(collection, "nodes");
            Collection<Node> collection2 = collection;
            Frequency frequency = (Collection) new Frequency((String) null, 1, (DefaultConstructorMarker) null);
            for (Node node : collection2) {
                Intrinsics.checkNotNullExpressionValue(node, "it");
                frequency.add(ai.platon.scent.dom.nodes.node.ext.NodeExtKt.getRelativePath(node));
            }
            String str = (String) frequency.getMostEntry().getElement();
            if (r0.getCount() / getQualifiedDocuments$scent_auto_mining().size() >= 0.8d) {
                for (Element element : collection) {
                    String str2 = "lay(" + num + ")";
                    Intrinsics.checkNotNullExpressionValue(element, "it");
                    VisualDocument visualDocument = ai.platon.scent.dom.nodes.node.ext.NodeExtKt.getVisualDocument(NodeExtKt.getOwnerDocument(element));
                    if (visualDocument != null) {
                        Pair addComponentIfAbsent = visualDocument.addComponentIfAbsent(element, VisualComponentType.LAYOUT, str2);
                        VisualComponent visualComponent = (VisualComponent) addComponentIfAbsent.component1();
                        boolean booleanValue = ((Boolean) addComponentIfAbsent.component2()).booleanValue();
                        if (visualComponent != null && booleanValue) {
                            Intrinsics.checkNotNullExpressionValue(str, "relativePath");
                            visualComponent.setUniquePath(str);
                            getUniquePathComponents$scent_auto_mining().put(str, visualComponent);
                        }
                    }
                }
            }
        }
    }

    private final void enhancePartition() {
    }

    private final void supplementPartition() {
        List<VisualDocument> list = this.qualifiedDocuments;
        HashSet hashSet = new HashSet();
        Iterator<T> it = list.iterator();
        while (it.hasNext()) {
            TreeSet components = ((VisualDocument) it.next()).getComponents();
            ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(components, 10));
            Iterator it2 = components.iterator();
            while (it2.hasNext()) {
                arrayList.add(((VisualComponent) it2.next()).getRelativePath());
            }
            CollectionsKt.addAll(hashSet, arrayList);
        }
        HashSet hashSet2 = hashSet;
        LinkedHashMap linkedHashMap = new LinkedHashMap(RangesKt.coerceAtLeast(MapsKt.mapCapacity(CollectionsKt.collectionSizeOrDefault(hashSet2, 10)), 16));
        for (Object obj : hashSet2) {
            LinkedHashMap linkedHashMap2 = linkedHashMap;
            String str = (String) obj;
            List<VisualDocument> qualifiedDocuments$scent_auto_mining = getQualifiedDocuments$scent_auto_mining();
            ArrayList arrayList2 = new ArrayList();
            Iterator<T> it3 = qualifiedDocuments$scent_auto_mining.iterator();
            while (it3.hasNext()) {
                Element selectFirst = ((VisualDocument) it3.next()).getDocument().selectFirst(str);
                if (selectFirst != null) {
                    arrayList2.add(selectFirst);
                }
            }
            linkedHashMap2.put(obj, arrayList2);
        }
        for (Map.Entry entry : linkedHashMap.entrySet()) {
            String str2 = (String) entry.getKey();
            List<Node> list2 = (List) entry.getValue();
            if (list2.size() / getQualifiedDocuments$scent_auto_mining().size() >= 0.8d) {
                for (Node node : list2) {
                    Intrinsics.checkNotNullExpressionValue(node, "it");
                    String str3 = "sup(" + ai.platon.scent.dom.nodes.node.ext.NodeExtKt.getAlignedLeft(node) + ")/" + ai.platon.scent.dom.nodes.node.ext.NodeExtKt.getVcTiles(node);
                    VisualDocument visualDocument = ai.platon.scent.dom.nodes.node.ext.NodeExtKt.getVisualDocument(NodeExtKt.getOwnerDocument(node));
                    if (visualDocument != null) {
                        Pair addComponentIfAbsent = visualDocument.addComponentIfAbsent(node, VisualComponentType.SUPPLEMENT, str3);
                        VisualComponent visualComponent = (VisualComponent) addComponentIfAbsent.component1();
                        boolean booleanValue = ((Boolean) addComponentIfAbsent.component2()).booleanValue();
                        if (visualComponent != null) {
                            visualComponent.setUniquePath(str2);
                            getUniquePathComponents$scent_auto_mining().put(str2, visualComponent);
                        }
                        if (!booleanValue) {
                        }
                    }
                }
            }
        }
    }

    private final void arrangeComponents() {
        for (VisualDocument visualDocument : this.qualifiedDocuments) {
            visualDocument.buildComponentTree();
            visualDocument.arrangeComponents();
        }
    }

    private final void reportComponents() {
        List<VisualDocument> list = this.qualifiedDocuments;
        ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(list, 10));
        Iterator<T> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(Integer.valueOf(((VisualDocument) it.next()).getComponents().size()));
        }
        String joinToString$default = CollectionsKt.joinToString$default(CollectionsKt.sortedDescending(arrayList), (CharSequence) null, (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, (Function1) null, 63, (Object) null);
        this.logger.info("Recognized components in each document: " + joinToString$default);
        this.taskLogger.info("Recognized components in each document: " + joinToString$default);
        List<VisualDocument> list2 = this.qualifiedDocuments;
        ArrayList arrayList2 = new ArrayList();
        for (Object obj : list2) {
            if (((VisualDocument) obj).getVerbose()) {
                arrayList2.add(obj);
            }
        }
        ArrayList arrayList3 = arrayList2;
        ArrayList arrayList4 = new ArrayList();
        Iterator it2 = arrayList3.iterator();
        while (it2.hasNext()) {
            CollectionsKt.addAll(arrayList4, ((VisualDocument) it2.next()).getComponents());
        }
        ArrayList arrayList5 = arrayList4;
        ResultSet resultSet = VisualComponent.Companion.toResultSet(arrayList5, getOptions());
        StringBuilder sb = new StringBuilder("Show " + arrayList5.size() + " sample components:\n");
        new ResultSetFormatter(resultSet, false, true, false, 0, sb, 26, (DefaultConstructorMarker) null).format();
        this.taskLogger.info(sb.toString());
        if (!(!this.naturalComponents.isEmpty())) {
            this.logger.info("No natural components");
            return;
        }
        List<VisualComponent> list3 = this.naturalComponents;
        HashSet hashSet = new HashSet();
        Iterator<T> it3 = list3.iterator();
        while (it3.hasNext()) {
            hashSet.add(((VisualComponent) it3.next()).getName());
        }
        this.logger.info("Total {} natural components: {}", Integer.valueOf(this.naturalComponents.size()), hashSet);
    }

    private final void analysisVariables() {
        Iterator<T> it = this.qualifiedDocuments.iterator();
        while (it.hasNext()) {
            analysisVariables((VisualDocument) it.next());
        }
    }

    private final void analysisVariables(VisualDocument visualDocument) {
        for (VisualComponent visualComponent : visualDocument.getComponents()) {
            visualComponent.findVariables();
            if (visualDocument.getVerbose() && this.taskLogger.isDebugEnabled()) {
                NavigableMap asMap = visualComponent.getVariables().asMap();
                Intrinsics.checkNotNullExpressionValue(asMap, "component.variables.asMap()");
                NavigableMap navigableMap = asMap;
                ArrayList arrayList = new ArrayList(navigableMap.size());
                for (Map.Entry entry : navigableMap.entrySet()) {
                    Node node = (Node) entry.getKey();
                    Collection collection = (Collection) entry.getValue();
                    String cleanText = NodeExtKt.getCleanText(node);
                    Intrinsics.checkNotNullExpressionValue(collection, "valueNodes");
                    arrayList.add("<" + cleanText + " : " + CollectionsKt.joinToString$default(collection, (CharSequence) null, (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, new Function1<Node, CharSequence>() { // from class: ai.platon.scent.analysis.corpus.AnalysablePageCorpus$analysisVariables$2$1$1
                        @NotNull
                        public final CharSequence invoke(Node node2) {
                            return NodeExtKt.getCleanText(node2);
                        }
                    }, 31, (Object) null) + ">");
                }
                String joinToString$default = CollectionsKt.joinToString$default(arrayList, (CharSequence) null, (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, (Function1) null, 63, (Object) null);
                String str = !StringsKt.isBlank(joinToString$default) ? joinToString$default : null;
                if (str != null) {
                    this.taskLogger.debug(str);
                }
            }
        }
    }

    private final void annotateNodes() {
        List<VisualDocument> list = this.qualifiedDocuments;
        ArrayList arrayList = new ArrayList();
        for (Object obj : list) {
            if (((VisualDocument) obj).getVerbose()) {
                arrayList.add(obj);
            }
        }
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            FullFeaturedDocumentKt.annotateNodes(((VisualDocument) it.next()).getFeaturedDocument(), getOptions());
        }
    }

    private final void simplifyAnnotations() {
        List<VisualDocument> list = this.qualifiedDocuments;
        ArrayList arrayList = new ArrayList();
        for (Object obj : list) {
            if (((VisualDocument) obj).getVerbose()) {
                arrayList.add(obj);
            }
        }
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            FullFeaturedDocumentKt.simplifyAnnotations(((VisualDocument) it.next()).getFeaturedDocument(), getOptions());
        }
    }

    private final void simplifyCaptions() {
        Collection values = this.captionedElementIndexer.values();
        Intrinsics.checkNotNullExpressionValue(values, "captionedElementIndexer.values()");
        for (final Node node : CollectionsKt.sortedWith(values, new Comparator() { // from class: ai.platon.scent.analysis.corpus.AnalysablePageCorpus$simplifyCaptions$$inlined$sortedByDescending$1
            @Override // java.util.Comparator
            public final int compare(T t, T t2) {
                Node node2 = (Element) t2;
                Intrinsics.checkNotNullExpressionValue(node2, "it");
                Integer valueOf = Integer.valueOf(NodeExtKt.getDepth(node2));
                Node node3 = (Element) t;
                Intrinsics.checkNotNullExpressionValue(node3, "it");
                return ComparisonsKt.compareValues(valueOf, Integer.valueOf(NodeExtKt.getDepth(node3)));
            }
        })) {
            if (Intrinsics.areEqual(node, node.getExtension().getOwnerBody())) {
                Intrinsics.checkNotNullExpressionValue(node, "node");
                NodeExtKt.removeVariable(node, "a-caption");
            }
            Intrinsics.checkNotNullExpressionValue(node, "node");
            if (!NodeExtKt.hasVariable(node, "a-caption-ref")) {
                NodesKt.forEachAncestor(node, new Function1<Element, Unit>() { // from class: ai.platon.scent.analysis.corpus.AnalysablePageCorpus$simplifyCaptions$2$1
                    /* JADX INFO: Access modifiers changed from: package-private */
                    /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                    {
                        super(1);
                    }

                    public final void invoke(@NotNull Element element) {
                        Intrinsics.checkNotNullParameter(element, "it");
                        if (NodeExtKt.hasVariable((Node) element, "a-caption-ref")) {
                            return;
                        }
                        Node node2 = node;
                        Intrinsics.checkNotNullExpressionValue(node2, "node");
                        if (Intrinsics.areEqual(NodeExtKt.getCaption(node2), NodeExtKt.getCaption((Node) element))) {
                            NodeExtKt.clearCaption((Node) element);
                            Node node3 = node;
                            Intrinsics.checkNotNullExpressionValue(node3, "node");
                            NodeExtKt.setVariable((Node) element, "a-caption-ref", Integer.valueOf(NodeExtKt.getSequence(node3)));
                        }
                    }

                    public /* bridge */ /* synthetic */ Object invoke(Object obj) {
                        invoke((Element) obj);
                        return Unit.INSTANCE;
                    }
                });
            }
        }
    }

    private final void validateFeatures() {
        try {
            Iterator<T> it = this.qualifiedDocuments.iterator();
            while (it.hasNext()) {
                ConcurrentSkipListSet threadIds = NodeExtKt.getThreadIds(((VisualDocument) it.next()).getDocument());
                if (threadIds.size() > 1) {
                    this.logger.warn("Only one thread is allowed to access a document, actual {}, {}", Integer.valueOf(threadIds.size()), threadIds);
                }
            }
            List<VisualDocument> list = this.qualifiedDocuments;
            ArrayList arrayList = new ArrayList();
            for (Object obj : list) {
                if (((VisualDocument) obj).getVerbose()) {
                    arrayList.add(obj);
                }
            }
            Iterator it2 = arrayList.iterator();
            while (it2.hasNext()) {
                ((VisualDocument) it2.next()).validateFeatures();
            }
        } catch (Exception e) {
            this.logger.error(ExceptionsKt.stringify$default(e, (String) null, (String) null, 3, (Object) null));
        }
    }

    /* renamed from: combineTables$lambda-18$lambda-17$lambda-15, reason: not valid java name */
    private static final ArrayList m5combineTables$lambda18$lambda17$lambda15(int i, String str) {
        Intrinsics.checkNotNullParameter(str, "it");
        return new ArrayList(i);
    }

    /* renamed from: calculateGeneralDocFrequency$lambda-41$lambda-40, reason: not valid java name */
    private static final void m6calculateGeneralDocFrequency$lambda41$lambda40(AnalysablePageCorpus analysablePageCorpus, String str, Frequency frequency) {
        Intrinsics.checkNotNullParameter(analysablePageCorpus, "this$0");
        Intrinsics.checkNotNullParameter(str, "name");
        Intrinsics.checkNotNullParameter(frequency, "tf");
        Frequency computeIfAbsent = analysablePageCorpus.getDocumentFrequency$scent_auto_mining().computeIfAbsent(str);
        Iterator it = frequency.elementSet().iterator();
        while (it.hasNext()) {
            computeIfAbsent.add((String) it.next());
        }
    }

    /* renamed from: buildRegionalTextNodeIndex$lambda-42, reason: not valid java name */
    private static final void m7buildRegionalTextNodeIndex$lambda42(AnalysablePageCorpus analysablePageCorpus, RegionalTile regionalTile, Collection collection) {
        Intrinsics.checkNotNullParameter(analysablePageCorpus, "this$0");
        if (!(regionalTile.getTf() == ((double) collection.size()))) {
            throw new IllegalArgumentException("Failed requirement.".toString());
        }
        analysablePageCorpus.getRegionalTileNodeIndexer$scent_auto_mining().putAll(regionalTile, collection);
    }

    /* renamed from: calculateTextNodeDocFrequency$lambda-47, reason: not valid java name */
    private static final void m8calculateTextNodeDocFrequency$lambda47(double d, Frequency frequency, RegionalTile regionalTile, Collection collection) {
        Intrinsics.checkNotNullParameter(frequency, "$dff");
        Intrinsics.checkNotNullExpressionValue(collection, "nodes");
        HashSet hashSet = new HashSet();
        Iterator it = collection.iterator();
        while (it.hasNext()) {
            Node node = (Node) it.next();
            Intrinsics.checkNotNullExpressionValue(node, "it");
            hashSet.add(NodeExtKt.getLocation(NodeExtKt.getOwnerDocument(node)));
        }
        int size = hashSet.size();
        regionalTile.setDf(size);
        Iterator it2 = collection.iterator();
        while (it2.hasNext()) {
            Node node2 = (Node) it2.next();
            Intrinsics.checkNotNullExpressionValue(node2, "it");
            ai.platon.scent.dom.nodes.node.ext.NodeExtKt.setTextDocFrequency(node2, size);
        }
        if (size >= d) {
            frequency.add(Integer.valueOf(size));
        }
    }
}
