package ai.platon.scent.analysis.corpus;

import ai.platon.pulsar.common.LangKt;
import ai.platon.pulsar.common.LogsKt;
import ai.platon.pulsar.common.OpenMapTable;
import ai.platon.pulsar.common.ScoreVector;
import ai.platon.pulsar.dom.select.QueriesKt;
import ai.platon.scent.analysis.MLUtilsKt;
import ai.platon.scent.common.options.CellType;
import ai.platon.scent.dom.HNormUrl;
import ai.platon.scent.dom.HarvestOptions;
import ai.platon.scent.dom.nodes.HyperPath;
import ai.platon.scent.dom.nodes.HyperPathBuilder;
import ai.platon.scent.dom.nodes.node.ext.NodeExtKt;
import ai.platon.scent.entities.ClusterTaskStatus;
import ai.platon.scent.entities.DataTypeStatistics;
import ai.platon.scent.entities.NodeClusterGroupMetrics;
import ai.platon.scent.entities.PageTableKt;
import ai.platon.scent.entities.RowData;
import ai.platon.scent.entities.TableData;
import ai.platon.scent.ml.NodePoint;
import ai.platon.scent.ml.unsupervised.TileCluster;
import ai.platon.scent.ml.unsupervised.TileClusterGroup;
import com.google.common.base.Strings;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import kotlin.Lazy;
import kotlin.Metadata;
import kotlin.Pair;
import kotlin.TuplesKt;
import kotlin.Unit;
import kotlin.collections.ArraysKt;
import kotlin.collections.CollectionsKt;
import kotlin.collections.MapsKt;
import kotlin.comparisons.ComparisonsKt;
import kotlin.jvm.functions.Function0;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.Ref;
import kotlin.jvm.internal.Reflection;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.jvm.internal.StringCompanionObject;
import kotlin.ranges.RangesKt;
import kotlin.sequences.SequencesKt;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.NotNull;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.slf4j.Logger;

/* compiled from: PageCorpusTabulator.kt */
@Metadata(mv = {1, 9, 0}, k = 1, xi = 48, d1 = {"��\u0084\u0001\n\u0002\u0018\u0002\n\u0002\u0010��\n��\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\n\u0002\u0010\b\n��\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010\u000e\n\u0002\b\u0006\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0010 \n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0010\u0006\n\u0002\b\u0004\n\u0002\u0010\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\t\n\u0002\u0010\u000b\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0018\u0002\n\u0002\b\t\n\u0002\u0018\u0002\n\u0002\b\u0004\b��\u0018��2\u00020\u0001B'\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\u0006\u0010\u0004\u001a\u00020\u0005\u0012\u0006\u0010\u0006\u001a\u00020\u0007\u0012\b\b\u0002\u0010\b\u001a\u00020\t¢\u0006\u0002\u0010\nJ\u0010\u0010(\u001a\u00020)2\u0006\u0010*\u001a\u00020+H\u0002J\u0010\u0010,\u001a\u00020)2\u0006\u0010*\u001a\u00020+H\u0002J\u0010\u0010-\u001a\u00020\f2\u0006\u0010.\u001a\u00020\fH\u0002J\u0010\u0010/\u001a\u00020)2\u0006\u0010*\u001a\u00020+H\u0002J\u0010\u00100\u001a\u00020)2\u0006\u0010*\u001a\u00020+H\u0002J,\u00101\u001a\u00020\f2\u0006\u0010.\u001a\u00020\f2\u0006\u00102\u001a\u00020\f2\b\b\u0002\u00103\u001a\u00020\u00072\b\b\u0002\u00104\u001a\u000205H\u0002J\u000e\u00106\u001a\b\u0012\u0004\u0012\u00020\u001e0\u001dH\u0002J \u00107\u001a\u00020\f2\u0006\u0010*\u001a\u00020+2\u0006\u00108\u001a\u00020\f2\u0006\u00109\u001a\u00020:H\u0002J\u0010\u0010;\u001a\u00020)2\u0006\u0010*\u001a\u00020+H\u0002J\u0010\u0010<\u001a\u00020)2\u0006\u0010*\u001a\u00020+H\u0002J\u001a\u0010=\u001a\u00020\f2\b\u0010>\u001a\u0004\u0018\u00010\u00012\u0006\u0010?\u001a\u00020@H\u0002J\u001a\u0010A\u001a\u00020\f2\b\u0010>\u001a\u0004\u0018\u00010\u00012\u0006\u0010?\u001a\u00020@H\u0002J\u001a\u0010B\u001a\u00020\f2\b\u0010>\u001a\u0004\u0018\u00010\u00012\u0006\u0010?\u001a\u00020@H\u0002J\u0010\u0010C\u001a\u00020)2\u0006\u0010*\u001a\u00020+H\u0002J\u001c\u0010D\u001a\u00020\f2\b\u0010?\u001a\u0004\u0018\u00010@2\b\b\u0002\u0010E\u001a\u00020\fH\u0002J\u0006\u0010F\u001a\u00020+J\u0018\u0010G\u001a\u00020)2\u0006\u0010H\u001a\u00020\u001e2\u0006\u0010I\u001a\u00020JH\u0002J\u0010\u0010K\u001a\u00020)2\u0006\u0010I\u001a\u00020JH\u0002J\u0010\u0010L\u001a\u00020)2\u0006\u0010I\u001a\u00020JH\u0002J\b\u0010M\u001a\u00020)H\u0002R\u000e\u0010\b\u001a\u00020\tX\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0004\u001a\u00020\u0005X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\u000b\u001a\u00020\f8BX\u0082\u0004¢\u0006\u0006\u001a\u0004\b\r\u0010\u000eR\u000e\u0010\u0002\u001a\u00020\u0003X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\u000f\u001a\u00020\u00078BX\u0082\u0004¢\u0006\u0006\u001a\u0004\b\u0010\u0010\u0011R\u000e\u0010\u0006\u001a\u00020\u0007X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0012\u001a\u00020\u0013X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\u0014\u001a\u00020\u00158BX\u0082\u0004¢\u0006\u0006\u001a\u0004\b\u0016\u0010\u0017R\u0014\u0010\u0018\u001a\u00020\u00198BX\u0082\u0004¢\u0006\u0006\u001a\u0004\b\u001a\u0010\u001bR!\u0010\u001c\u001a\b\u0012\u0004\u0012\u00020\u001e0\u001d8BX\u0082\u0084\u0002¢\u0006\f\n\u0004\b!\u0010\"\u001a\u0004\b\u001f\u0010 R\u000e\u0010#\u001a\u00020$X\u0082D¢\u0006\u0002\n��R\u0014\u0010%\u001a\u00020$8BX\u0082\u0004¢\u0006\u0006\u001a\u0004\b&\u0010'¨\u0006N"}, d2 = {"Lai/platon/scent/analysis/corpus/PageCorpusTabulator;", "", "corpus", "Lai/platon/scent/analysis/corpus/AnalysablePageCorpus;", "clusterGroup", "Lai/platon/scent/ml/unsupervised/TileClusterGroup;", "groupIndex", "", "cellType", "Lai/platon/scent/common/options/CellType;", "(Lai/platon/scent/analysis/corpus/AnalysablePageCorpus;Lai/platon/scent/ml/unsupervised/TileClusterGroup;ILai/platon/scent/common/options/CellType;)V", "command", "", "getCommand", "()Ljava/lang/String;", "documentSize", "getDocumentSize", "()I", "logger", "Lorg/slf4j/Logger;", "options", "Lai/platon/scent/dom/HarvestOptions;", "getOptions", "()Lai/platon/scent/dom/HarvestOptions;", "portalUrl", "Lai/platon/scent/dom/HNormUrl;", "getPortalUrl", "()Lai/platon/scent/dom/HNormUrl;", "qualifiedClusters", "", "Lai/platon/scent/ml/unsupervised/TileCluster;", "getQualifiedClusters", "()Ljava/util/List;", "qualifiedClusters$delegate", "Lkotlin/Lazy;", "qualifiedRate", "", "qualifiedSize", "getQualifiedSize", "()D", "buildDatabaseRows", "", "table", "Lai/platon/pulsar/common/OpenMapTable;", "buildHtmlRows", "buildLinkCell", "location", "buildNodeRows", "buildPlainTextRows", "buildTitleCell", "title", "displayLength", "textOnly", "", "computeQualifiedClusters", "createXSQL", "url", "restrictPath", "Lai/platon/scent/dom/nodes/HyperPath;", "fillRowCells", "fillSpecialRowCells", "getRichText", "lastValue", "node", "Lorg/jsoup/nodes/Node;", "getSlimHtml", "getText", "initRows", "sniffTitle", "titleSuffix", "tabulate", "updateColumnAttributes", "cluster", "column", "Lai/platon/pulsar/common/OpenMapTable$Column;", "updateTitleColumnAttributes", "updateURLColumnAttributes", "validateClusterOrder", "scent-auto-mining"})
@SourceDebugExtension({"SMAP\nPageCorpusTabulator.kt\nKotlin\n*S Kotlin\n*F\n+ 1 PageCorpusTabulator.kt\nai/platon/scent/analysis/corpus/PageCorpusTabulator\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n+ 3 _Arrays.kt\nkotlin/collections/ArraysKt___ArraysKt\n+ 4 fake.kt\nkotlin/jvm/internal/FakeKt\n+ 5 _Maps.kt\nkotlin/collections/MapsKt___MapsKt\n*L\n1#1,469:1\n1655#2,8:470\n1549#2:478\n1620#2,3:479\n288#2,2:482\n1360#2:484\n1446#2,5:485\n1864#2,3:490\n1855#2,2:496\n1620#2,3:499\n1620#2,3:502\n2333#2,14:505\n1194#2,2:519\n1222#2,4:521\n1864#2,2:525\n1855#2,2:528\n1866#2:531\n1864#2,2:532\n1855#2,2:534\n1866#2:536\n1864#2,2:537\n1855#2,2:539\n1866#2:541\n1864#2,2:542\n1855#2,2:544\n1866#2:546\n1864#2,2:547\n1855#2,2:549\n1866#2:551\n766#2:552\n857#2,2:553\n1045#2:555\n3792#3:493\n4307#3,2:494\n1#4:498\n215#5:527\n216#5:530\n*S KotlinDebug\n*F\n+ 1 PageCorpusTabulator.kt\nai/platon/scent/analysis/corpus/PageCorpusTabulator\n*L\n76#1:470,8\n77#1:478\n77#1:479,3\n79#1:482,2\n90#1:484\n90#1:485,5\n125#1:490,3\n164#1:496,2\n221#1:499,3\n222#1:502,3\n246#1:505,14\n271#1:519,2\n271#1:521,4\n274#1:525,2\n292#1:528,2\n274#1:531\n326#1:532,2\n327#1:534,2\n326#1:536\n352#1:537,2\n354#1:539,2\n352#1:541\n364#1:542,2\n366#1:544,2\n364#1:546\n376#1:547,2\n378#1:549,2\n376#1:551\n464#1:552\n464#1:553,2\n466#1:555\n147#1:493\n147#1:494,2\n275#1:527\n275#1:530\n*E\n"})
/* loaded from: input_file:ai/platon/scent/analysis/corpus/PageCorpusTabulator.class */
public final class PageCorpusTabulator {

    @NotNull
    private final AnalysablePageCorpus corpus;

    @NotNull
    private final TileClusterGroup clusterGroup;
    private final int groupIndex;

    @NotNull
    private final CellType cellType;

    @NotNull
    private final Logger logger;
    private final double qualifiedRate;

    @NotNull
    private final Lazy qualifiedClusters$delegate;

    /* compiled from: PageCorpusTabulator.kt */
    @Metadata(mv = {1, 9, 0}, k = 3, xi = 48)
    /* loaded from: input_file:ai/platon/scent/analysis/corpus/PageCorpusTabulator$WhenMappings.class */
    public /* synthetic */ class WhenMappings {
        public static final /* synthetic */ int[] $EnumSwitchMapping$0;

        static {
            int[] iArr = new int[CellType.values().length];
            try {
                iArr[CellType.DATABASE.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                iArr[CellType.PLAIN_TEXT.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                iArr[CellType.SLIM_HTML.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
            $EnumSwitchMapping$0 = iArr;
        }
    }

    public PageCorpusTabulator(@NotNull AnalysablePageCorpus analysablePageCorpus, @NotNull TileClusterGroup tileClusterGroup, int i, @NotNull CellType cellType) {
        Intrinsics.checkNotNullParameter(analysablePageCorpus, "corpus");
        Intrinsics.checkNotNullParameter(tileClusterGroup, "clusterGroup");
        Intrinsics.checkNotNullParameter(cellType, "cellType");
        this.corpus = analysablePageCorpus;
        this.clusterGroup = tileClusterGroup;
        this.groupIndex = i;
        this.cellType = cellType;
        this.logger = LogsKt.getLogger(Reflection.getOrCreateKotlinClass(PageCorpusTabulator.class));
        this.qualifiedRate = 0.5d;
        this.qualifiedClusters$delegate = LangKt.usfLazy(new Function0<List<? extends TileCluster>>() { // from class: ai.platon.scent.analysis.corpus.PageCorpusTabulator$qualifiedClusters$2
            /* JADX INFO: Access modifiers changed from: package-private */
            {
                super(0);
            }

            @NotNull
            /* renamed from: invoke, reason: merged with bridge method [inline-methods] */
            public final List<TileCluster> m28invoke() {
                List<TileCluster> computeQualifiedClusters;
                computeQualifiedClusters = PageCorpusTabulator.this.computeQualifiedClusters();
                return computeQualifiedClusters;
            }
        });
    }

    public /* synthetic */ PageCorpusTabulator(AnalysablePageCorpus analysablePageCorpus, TileClusterGroup tileClusterGroup, int i, CellType cellType, int i2, DefaultConstructorMarker defaultConstructorMarker) {
        this(analysablePageCorpus, tileClusterGroup, i, (i2 & 8) != 0 ? CellType.SLIM_HTML : cellType);
    }

    private final HNormUrl getPortalUrl() {
        return this.corpus.getPortalUrl();
    }

    private final HarvestOptions getOptions() {
        HarvestOptions options = getPortalUrl().getOptions();
        Intrinsics.checkNotNull(options, "null cannot be cast to non-null type ai.platon.scent.dom.HarvestOptions");
        return options;
    }

    private final String getCommand() {
        return getPortalUrl() + " " + getOptions().getArgs();
    }

    private final int getDocumentSize() {
        return this.corpus.getQualifiedDocuments$scent_auto_mining().size();
    }

    private final double getQualifiedSize() {
        return Math.max(this.qualifiedRate * getDocumentSize(), 2.0d);
    }

    private final List<TileCluster> getQualifiedClusters() {
        return (List) this.qualifiedClusters$delegate.getValue();
    }

    @NotNull
    public final OpenMapTable tabulate() {
        Object obj;
        if (this.corpus.getQualifiedDocuments$scent_auto_mining().isEmpty() || this.clusterGroup.isEmpty() || this.clusterGroup.isConstant()) {
            return new OpenMapTable(0, 0, 2, (DefaultConstructorMarker) null);
        }
        int size = 1 + getQualifiedClusters().size() + 1;
        ScoreVector score = this.clusterGroup.getScore();
        OpenMapTable openMapTable = new OpenMapTable(size, this.groupIndex);
        if (getQualifiedClusters().isEmpty()) {
            return openMapTable;
        }
        if (this.logger.isDebugEnabled()) {
            validateClusterOrder();
        }
        List<Element> componentElements$scent_auto_mining = this.corpus.getComponentElements$scent_auto_mining();
        HashSet hashSet = new HashSet();
        ArrayList arrayList = new ArrayList();
        for (Object obj2 : componentElements$scent_auto_mining) {
            if (hashSet.add(NodeExtKt.getPath((Element) obj2).getSelector2())) {
                arrayList.add(obj2);
            }
        }
        ArrayList<Node> arrayList2 = arrayList;
        ArrayList arrayList3 = new ArrayList(CollectionsKt.collectionSizeOrDefault(arrayList2, 10));
        for (Node node : arrayList2) {
            arrayList3.add(TuplesKt.to(NodeExtKt.getPath(node).getSelector2(), ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getUniqueName(node)));
        }
        ArrayList arrayList4 = arrayList3;
        Node node2 = (Element) CollectionsKt.first(((TileCluster) CollectionsKt.first(getQualifiedClusters())).getComponents());
        Iterator<T> it = ((TileCluster) CollectionsKt.first(getQualifiedClusters())).getComponents().iterator();
        while (true) {
            if (!it.hasNext()) {
                obj = null;
                break;
            }
            Object next = it.next();
            if (!Intrinsics.areEqual(ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getOwnerDocument((Element) next).title(), ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getOwnerDocument(node2).title())) {
                obj = next;
                break;
            }
        }
        Node node3 = (Element) obj;
        String str = "";
        if (node3 != null) {
            if (!(NodeExtKt.getPid(node2) != NodeExtKt.getPid(node3))) {
                throw new IllegalArgumentException("Failed requirement.".toString());
            }
            String commonSuffix = Strings.commonSuffix(sniffTitle$default(this, node2, null, 2, null), sniffTitle$default(this, node3, null, 2, null));
            Intrinsics.checkNotNullExpressionValue(commonSuffix, "commonSuffix(...)");
            str = StringsKt.trim(commonSuffix).toString();
        }
        List<TileCluster> qualifiedClusters = getQualifiedClusters();
        ArrayList arrayList5 = new ArrayList();
        Iterator<T> it2 = qualifiedClusters.iterator();
        while (it2.hasNext()) {
            CollectionsKt.addAll(arrayList5, ((TileCluster) it2.next()).getComponents());
        }
        HyperPath build = new HyperPathBuilder(arrayList5).build();
        String path = build.getPath();
        int alignedTop = NodeExtKt.getAlignedTop(node2);
        String name = ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getName(node2);
        int documentSize = getDocumentSize();
        String scoreVector = score.toString();
        int size2 = this.clusterGroup.getClusters().size();
        int size3 = getQualifiedClusters().size();
        double distortion = this.clusterGroup.getDistortion();
        DataTypeStatistics dataTypeStatistics = this.clusterGroup.getDataTypeStatistics();
        ClusterTaskStatus clusterTaskStatus = this.clusterGroup.getClusterTaskStatus();
        NodeClusterGroupMetrics metrics = this.clusterGroup.getMetrics();
        double microP = this.clusterGroup.getMicroP();
        double microR = this.clusterGroup.getMicroR();
        double microF1 = this.clusterGroup.getMicroF1();
        double macroP = this.clusterGroup.getMacroP();
        double macroR = this.clusterGroup.getMacroR();
        double macroF1 = this.clusterGroup.getMacroF1();
        int numFineFields = this.clusterGroup.getNumFineFields();
        double fineFieldRate = this.clusterGroup.getFineFieldRate();
        Intrinsics.checkNotNull(scoreVector);
        PageTableKt.setData(openMapTable, new TableData(false, path, (String) null, alignedTop, name, documentSize, (List) null, scoreVector, size2, size3, str, distortion, dataTypeStatistics, clusterTaskStatus, metrics, microP, microR, microF1, macroP, macroR, macroF1, numFineFields, fineFieldRate, (List) null, 8388677, (DefaultConstructorMarker) null));
        PageTableKt.getData(openMapTable).getUrls().addAll(this.corpus.getDocumentUrls$scent_auto_mining());
        PageTableKt.getData(openMapTable).getRecognizedComponents().addAll(arrayList4);
        updateTitleColumnAttributes((OpenMapTable.Column) ArraysKt.first(openMapTable.getColumns()));
        int i = 0;
        for (Object obj3 : getQualifiedClusters()) {
            int i2 = i;
            i++;
            if (i2 < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            updateColumnAttributes((TileCluster) obj3, openMapTable.getColumns()[i2]);
        }
        updateURLColumnAttributes((OpenMapTable.Column) ArraysKt.last(openMapTable.getColumns()));
        String str2 = (String) CollectionsKt.firstOrNull(this.corpus.getDocumentUrls$scent_auto_mining());
        if (str2 == null) {
            str2 = "http://example.com";
        }
        PageTableKt.getData(openMapTable).setXsql(createXSQL(openMapTable, str2, build));
        initRows(openMapTable);
        fillRowCells(openMapTable);
        fillSpecialRowCells(openMapTable);
        return openMapTable;
    }

    private final String createXSQL(OpenMapTable openMapTable, String str, HyperPath hyperPath) {
        StringBuilder sb = new StringBuilder();
        StringBuilder append = sb.append("select ");
        Intrinsics.checkNotNullExpressionValue(append, "append(...)");
        Intrinsics.checkNotNullExpressionValue(append.append('\n'), "append(...)");
        OpenMapTable.Column[] columns = openMapTable.getColumns();
        ArrayList arrayList = new ArrayList();
        for (OpenMapTable.Column column : columns) {
            if (!StringsKt.isBlank(PageTableKt.getData(column).getHyperPath())) {
                arrayList.add(column);
            }
        }
        SequencesKt.joinTo$default(SequencesKt.map(SequencesKt.filter(ArraysKt.asSequence(openMapTable.getColumns()), new Function1<OpenMapTable.Column, Boolean>() { // from class: ai.platon.scent.analysis.corpus.PageCorpusTabulator$createXSQL$1
            @NotNull
            public final Boolean invoke(@NotNull OpenMapTable.Column column2) {
                Intrinsics.checkNotNullParameter(column2, "it");
                return Boolean.valueOf(!StringsKt.isBlank(PageTableKt.getData(column2).getHyperPath()));
            }
        }), new Function1<OpenMapTable.Column, Pair<? extends String, ? extends String>>() { // from class: ai.platon.scent.analysis.corpus.PageCorpusTabulator$createXSQL$2
            @NotNull
            public final Pair<String, String> invoke(@NotNull OpenMapTable.Column column2) {
                Intrinsics.checkNotNullParameter(column2, "it");
                return TuplesKt.to(column2.getName(), PageTableKt.getData(column2).getHyperPath());
            }
        }), sb, ",\n", (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, new Function1<Pair<? extends String, ? extends String>, CharSequence>() { // from class: ai.platon.scent.analysis.corpus.PageCorpusTabulator$createXSQL$3
            @NotNull
            public final CharSequence invoke(@NotNull Pair<String, String> pair) {
                Intrinsics.checkNotNullParameter(pair, "<name for destructuring parameter 0>");
                return "    dom_first_text(dom, '" + ((String) pair.component2()) + "') as `" + ((String) pair.component1()) + "`";
            }
        }, 60, (Object) null);
        StringBuilder append2 = sb.append("\nfrom load_and_select('" + str + "', '" + hyperPath + "');");
        Intrinsics.checkNotNullExpressionValue(append2, "append(...)");
        Intrinsics.checkNotNullExpressionValue(append2.append('\n'), "append(...)");
        String sb2 = sb.toString();
        Intrinsics.checkNotNullExpressionValue(sb2, "toString(...)");
        return sb2;
    }

    private final void fillSpecialRowCells(OpenMapTable openMapTable) {
        ((OpenMapTable.Column) ArraysKt.first(openMapTable.getColumns())).setName("Title");
        ((OpenMapTable.Column) ArraysKt.last(openMapTable.getColumns())).setName("URL");
        String titleSuffix = PageTableKt.getData(openMapTable).getTitleSuffix();
        boolean z = this.cellType == CellType.PLAIN_TEXT;
        for (OpenMapTable.Row row : openMapTable.getRows()) {
            PageTableKt.getData(row).setTitle(sniffTitle((Node) PageTableKt.getData(row).getComponent(), titleSuffix));
            row.setValue(0, buildTitleCell$default(this, PageTableKt.getData(row).getLocation(), PageTableKt.getData(row).getTitle(), 0, z, 4, null));
            int numColumns = openMapTable.getNumColumns() - 1;
            switch (WhenMappings.$EnumSwitchMapping$0[this.cellType.ordinal()]) {
                case 1:
                    row.setValue(numColumns, buildLinkCell(PageTableKt.getData(row).getLocation()));
                    break;
                case 2:
                    row.setValue(numColumns, PageTableKt.getData(row).getLocation());
                    break;
                default:
                    row.setValue(numColumns, "<a href='" + PageTableKt.getData(row).getLocation() + "' target='_blank' class='_hidden'>detail&gt;</a>");
                    break;
            }
        }
    }

    private final void validateClusterOrder() {
        for (Pair pair : CollectionsKt.zipWithNext(getQualifiedClusters())) {
            Node node = ((NodePoint) CollectionsKt.first(((TileCluster) pair.getFirst()).getPoints())).getNode();
            Node node2 = ((NodePoint) CollectionsKt.first(((TileCluster) pair.getSecond()).getPoints())).getNode();
            double geoOrder = NodeExtKt.getGeoOrder(node);
            if (geoOrder > NodeExtKt.getGeoOrder(node2)) {
                this.logger.warn("Unexpected cluster order");
                Logger logger = this.logger;
                logger.warn(ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getUniqueName(node) + " (" + geoOrder + ")\t\t" + logger + " (" + ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getUniqueName(node2) + ")");
                Logger logger2 = this.logger;
                StringCompanionObject stringCompanionObject = StringCompanionObject.INSTANCE;
                Object[] objArr = {Integer.valueOf(NodeExtKt.getAlignedCenterY(node)), Integer.valueOf(ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getX(node)), Integer.valueOf(ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getWidth(node)), Integer.valueOf(ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getHeight(node))};
                String format = String.format("%10d%10d%10d%10d", Arrays.copyOf(objArr, objArr.length));
                Intrinsics.checkNotNullExpressionValue(format, "format(...)");
                logger2.warn(format);
                Logger logger3 = this.logger;
                StringCompanionObject stringCompanionObject2 = StringCompanionObject.INSTANCE;
                Object[] objArr2 = {Integer.valueOf(NodeExtKt.getAlignedCenterY(node2)), Integer.valueOf(ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getX(node2)), Integer.valueOf(ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getWidth(node2)), Integer.valueOf(ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getHeight(node2))};
                String format2 = String.format("%10d%10d%10d%10d", Arrays.copyOf(objArr2, objArr2.length));
                Intrinsics.checkNotNullExpressionValue(format2, "format(...)");
                logger3.warn(format2);
            }
        }
    }

    private final String sniffTitle(Node node, String str) {
        Node ownerDocument;
        if (node == null || (ownerDocument = node.ownerDocument()) == null) {
            return "[Title]";
        }
        String title = ownerDocument.title();
        Intrinsics.checkNotNull(title);
        String str2 = !StringsKt.isBlank(title) ? title : null;
        if (str2 == null) {
            str2 = QueriesKt.selectFirstTextOrNull(ownerDocument, "title");
            if (str2 == null) {
                str2 = QueriesKt.selectFirstTextOrNull(ownerDocument, "h1");
                if (str2 == null) {
                    str2 = "[Title]";
                }
            }
        }
        return StringsKt.removeSuffix(str2, str);
    }

    static /* synthetic */ String sniffTitle$default(PageCorpusTabulator pageCorpusTabulator, Node node, String str, int i, Object obj) {
        if ((i & 2) != 0) {
            str = "";
        }
        return pageCorpusTabulator.sniffTitle(node, str);
    }

    private final void updateTitleColumnAttributes(OpenMapTable.Column column) {
        int i = 0;
        Iterator it = ((TileCluster) CollectionsKt.first(getQualifiedClusters())).getPoints().iterator();
        while (it.hasNext()) {
            i += sniffTitle$default(this, ((NodePoint) it.next()).getNode(), null, 2, null).length();
        }
        double qualifiedSize = i / getQualifiedSize();
        PageTableKt.getData(column).setDisplay("title");
        PageTableKt.getData(column).setAveLen(qualifiedSize);
    }

    private final void updateURLColumnAttributes(OpenMapTable.Column column) {
        PageTableKt.getData(column).setDisplay("URL");
    }

    /* JADX WARN: Code restructure failed: missing block: B:14:0x0165, code lost:
    
        if (r1 == null) goto L15;
     */
    /* JADX WARN: Code restructure failed: missing block: B:20:0x019c, code lost:
    
        if (r1 == null) goto L23;
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private final void updateColumnAttributes(ai.platon.scent.ml.unsupervised.TileCluster r7, ai.platon.pulsar.common.OpenMapTable.Column r8) {
        /*
            Method dump skipped, instructions count: 885
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: ai.platon.scent.analysis.corpus.PageCorpusTabulator.updateColumnAttributes(ai.platon.scent.ml.unsupervised.TileCluster, ai.platon.pulsar.common.OpenMapTable$Column):void");
    }

    private final void initRows(final OpenMapTable openMapTable) {
        List<VisualDocument> qualifiedDocuments$scent_auto_mining = this.corpus.getQualifiedDocuments$scent_auto_mining();
        LinkedHashMap linkedHashMap = new LinkedHashMap(RangesKt.coerceAtLeast(MapsKt.mapCapacity(CollectionsKt.collectionSizeOrDefault(qualifiedDocuments$scent_auto_mining, 10)), 16));
        for (Object obj : qualifiedDocuments$scent_auto_mining) {
            linkedHashMap.put(Integer.valueOf(NodeExtKt.getPid(((VisualDocument) obj).getBody())), obj);
        }
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        int i = 0;
        for (Object obj2 : getQualifiedClusters()) {
            int i2 = i;
            i++;
            if (i2 < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            for (Map.Entry<Integer, List<NodePoint>> entry : ((TileCluster) obj2).getPagePoints().entrySet()) {
                final int intValue = entry.getKey().intValue();
                List<NodePoint> value = entry.getValue();
                if (!linkedHashSet.contains(Integer.valueOf(intValue))) {
                    VisualDocument visualDocument = (VisualDocument) linkedHashMap.get(Integer.valueOf(intValue));
                    final Document document = visualDocument != null ? visualDocument.getDocument() : null;
                    if (document == null) {
                        linkedHashSet.add(Integer.valueOf(intValue));
                    } else {
                        final String normalizedURI = ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getNormalizedURI(document);
                        if (normalizedURI != null) {
                            final Ref.ObjectRef objectRef = new Ref.ObjectRef();
                            objectRef.element = ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getLocation((Node) document);
                            if (!StringsKt.startsWith$default((String) objectRef.element, "http", false, 2, (Object) null)) {
                                String baseUri = document.baseUri();
                                Intrinsics.checkNotNullExpressionValue(baseUri, "baseUri(...)");
                                objectRef.element = baseUri;
                            }
                            Iterator<T> it = value.iterator();
                            while (it.hasNext()) {
                                final Element ownerComponent = NodeExtKt.getOwnerComponent(((NodePoint) it.next()).getNode());
                                if (ownerComponent != null) {
                                    openMapTable.computeIfAbsent((String) objectRef.element, new Function1<OpenMapTable.Row, Unit>() { // from class: ai.platon.scent.analysis.corpus.PageCorpusTabulator$initRows$1$1$1$1
                                        /* JADX INFO: Access modifiers changed from: package-private */
                                        /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                                        {
                                            super(1);
                                        }

                                        public final void invoke(@NotNull OpenMapTable.Row row) {
                                            Intrinsics.checkNotNullParameter(row, "row");
                                            PageTableKt.getData(row).setOwnerTable(openMapTable);
                                            PageTableKt.getData(row).setComponent(ownerComponent);
                                            PageTableKt.getData(row).setPid(intValue);
                                            PageTableKt.getData(row).setNormalizedUrl(normalizedURI);
                                            PageTableKt.getData(row).setLocation((String) objectRef.element);
                                            RowData data = PageTableKt.getData(row);
                                            String title = document.title();
                                            Intrinsics.checkNotNullExpressionValue(title, "title(...)");
                                            data.setTitle(title);
                                            PageTableKt.getData(row).setExportPath(ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getExportPaths(document).getAnnotatedView().toString());
                                            PageTableKt.getData(row).setTilePath(ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getExportPaths(document).getTileView().toString());
                                            PageTableKt.getData(row).setEntityPath(ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getExportPaths(document).getEntityView().toString());
                                        }

                                        public /* bridge */ /* synthetic */ Object invoke(Object obj3) {
                                            invoke((OpenMapTable.Row) obj3);
                                            return Unit.INSTANCE;
                                        }
                                    });
                                }
                            }
                        }
                    }
                }
            }
        }
        if (!linkedHashSet.isEmpty()) {
            this.logger.warn("There {} nodes can not find owner document", Integer.valueOf(linkedHashSet.size()));
        }
    }

    private final void fillRowCells(OpenMapTable openMapTable) {
        switch (WhenMappings.$EnumSwitchMapping$0[this.cellType.ordinal()]) {
            case 1:
                buildDatabaseRows(openMapTable);
                return;
            case 2:
                buildPlainTextRows(openMapTable);
                return;
            case 3:
                buildHtmlRows(openMapTable);
                return;
            default:
                buildNodeRows(openMapTable);
                return;
        }
    }

    private final void buildNodeRows(OpenMapTable openMapTable) {
        int i = 0;
        for (Object obj : getQualifiedClusters()) {
            int i2 = i;
            i++;
            if (i2 < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            TileCluster tileCluster = (TileCluster) obj;
            for (NodePoint nodePoint : tileCluster.getPoints()) {
                String normalizedURI = ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getNormalizedURI(ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getOwnerDocument(nodePoint.getNode()));
                if (normalizedURI != null) {
                    OpenMapTable.Row computeIfAbsent = openMapTable.computeIfAbsent(normalizedURI);
                    Node node = nodePoint.getNode();
                    OpenMapTable.Cell cell = new OpenMapTable.Cell(i2, node);
                    StringCompanionObject stringCompanionObject = StringCompanionObject.INSTANCE;
                    Object[] objArr = {Integer.valueOf(ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getX(node)), Integer.valueOf(ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getY(node)), Integer.valueOf(ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getWidth(node)), Integer.valueOf(ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getHeight(node))};
                    String format = String.format("%s %s %s %s", Arrays.copyOf(objArr, objArr.length));
                    Intrinsics.checkNotNullExpressionValue(format, "format(...)");
                    cell.getAttributes().put(MLUtilsKt.ML_VI, format);
                    String mlLabel = tileCluster.getMlLabel();
                    String attr = node.attr("L");
                    Intrinsics.checkNotNullExpressionValue(attr, "attr(...)");
                    if (!StringsKt.isBlank(attr)) {
                        cell.getAttributes().put("L", attr);
                    } else if (!StringsKt.isBlank(mlLabel)) {
                        cell.getAttributes().put("disabled", "disabled");
                    }
                    computeIfAbsent.set(i2, cell);
                }
            }
        }
    }

    private final void buildPlainTextRows(OpenMapTable openMapTable) {
        OpenMapTable.Row row;
        int i = 0;
        for (Object obj : getQualifiedClusters()) {
            int i2 = i;
            i++;
            if (i2 < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            TileCluster tileCluster = (TileCluster) obj;
            int i3 = i2 + 1;
            for (NodePoint nodePoint : tileCluster.getPoints()) {
                String normalizedURI = ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getNormalizedURI(ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getOwnerDocument(nodePoint.getNode()));
                if (normalizedURI != null && (row = openMapTable.get(normalizedURI)) != null) {
                    row.set(i3, new OpenMapTable.Cell(i3, getText(row.get(i3), nodePoint.getNode())));
                }
            }
        }
    }

    private final void buildDatabaseRows(OpenMapTable openMapTable) {
        OpenMapTable.Row row;
        int i = 0;
        for (Object obj : getQualifiedClusters()) {
            int i2 = i;
            i++;
            if (i2 < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            TileCluster tileCluster = (TileCluster) obj;
            int i3 = i2 + 1;
            for (NodePoint nodePoint : tileCluster.getPoints()) {
                String normalizedURI = ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getNormalizedURI(ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getOwnerDocument(nodePoint.getNode()));
                if (normalizedURI != null && (row = openMapTable.get(normalizedURI)) != null) {
                    row.set(i3, new OpenMapTable.Cell(i3, getText(row.get(i3), nodePoint.getNode())));
                }
            }
        }
    }

    private final void buildHtmlRows(OpenMapTable openMapTable) {
        OpenMapTable.Row row;
        int i = 0;
        for (Object obj : getQualifiedClusters()) {
            int i2 = i;
            i++;
            if (i2 < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            TileCluster tileCluster = (TileCluster) obj;
            int i3 = i2 + 1;
            for (NodePoint nodePoint : tileCluster.getPoints()) {
                String normalizedURI = ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getNormalizedURI(ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getOwnerDocument(nodePoint.getNode()));
                if (normalizedURI != null && (row = openMapTable.get(normalizedURI)) != null) {
                    OpenMapTable.Cell cell = new OpenMapTable.Cell(i3, getSlimHtml(row.get(i3), nodePoint.getNode()));
                    if (getOptions().getShowTip()) {
                        StringCompanionObject stringCompanionObject = StringCompanionObject.INSTANCE;
                        Object[] objArr = {Double.valueOf(NodeExtKt.getGeoOrder(nodePoint.getNode()))};
                        String format = String.format("%.3f", Arrays.copyOf(objArr, objArr.length));
                        Intrinsics.checkNotNullExpressionValue(format, "format(...)");
                        PageTableKt.setTip(cell, format);
                    }
                    row.set(i3, cell);
                }
            }
        }
    }

    private final String buildLinkCell(String str) {
        String str2 = str;
        if (str2.length() > 80) {
            str2 = "<a href='" + str + "' target='_blank'>" + StringUtils.abbreviateMiddle(str2, "..", 80) + "</a>";
        }
        return str2;
    }

    private final String buildTitleCell(String str, String str2, int i, boolean z) {
        String abbreviate = StringUtils.abbreviate(str2, i);
        Intrinsics.checkNotNull(abbreviate);
        if (StringsKt.isBlank(abbreviate)) {
            abbreviate = "[Title]";
        }
        if (!z) {
            return "<a href='" + str + "' title='" + str2 + "' target='_blank'>" + abbreviate + "</a>";
        }
        String str3 = abbreviate;
        Intrinsics.checkNotNull(str3);
        return str3;
    }

    static /* synthetic */ String buildTitleCell$default(PageCorpusTabulator pageCorpusTabulator, String str, String str2, int i, boolean z, int i2, Object obj) {
        if ((i2 & 4) != 0) {
            i = 150;
        }
        if ((i2 & 8) != 0) {
            z = true;
        }
        return pageCorpusTabulator.buildTitleCell(str, str2, i, z);
    }

    private final String getText(Object obj, Node node) {
        String str = obj != null ? StringsKt.trim(obj.toString()).toString() + "\t" : "";
        String attr = ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.isImage(node) ? node.attr("abs:src") : ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.isAnchor(node) ? node.attr("abs:href") : ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getCleanText(node);
        Intrinsics.checkNotNull(attr);
        return !StringsKt.isBlank(str) ? str + "\n" + attr : attr;
    }

    private final String getRichText(Object obj, Node node) {
        String str = obj != null ? StringsKt.trim(obj.toString()).toString() + "\t" : "";
        String slimHtml = ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.isImage(node) ? ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getSlimHtml(node) : ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.isAnchor(node) ? ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getSlimHtml(node) : ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.isNumeric(node) ? "<em class='numeric'>" + ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getCleanText(node) + "</em>" : ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.isMoneyLike(node) ? "<em class='money'>" + ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getCleanText(node) + "</em>" : ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getCleanText(node);
        return !StringsKt.isBlank(str) ? str + "\n" + slimHtml : slimHtml;
    }

    private final String getSlimHtml(Object obj, Node node) {
        if (StringsKt.isBlank(getText(obj, node))) {
            return "";
        }
        List split = new Regex("\n").split((obj != null ? StringsKt.trim(obj.toString()).toString() + "\n" : "") + ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt.getSlimHtml(node), 0);
        String joinToString$default = CollectionsKt.joinToString$default(CollectionsKt.take(split, 3), "\n", (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, new Function1<String, CharSequence>() { // from class: ai.platon.scent.analysis.corpus.PageCorpusTabulator$getSlimHtml$visibleParts$1
            @NotNull
            public final CharSequence invoke(@NotNull String str) {
                Intrinsics.checkNotNullParameter(str, "it");
                return str;
            }
        }, 30, (Object) null);
        String joinToString$default2 = CollectionsKt.joinToString$default(CollectionsKt.drop(split, 3), "\n", (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, new Function1<String, CharSequence>() { // from class: ai.platon.scent.analysis.corpus.PageCorpusTabulator$getSlimHtml$hiddenParts$1
            @NotNull
            public final CharSequence invoke(@NotNull String str) {
                Intrinsics.checkNotNullParameter(str, "it");
                return str;
            }
        }, 30, (Object) null);
        if (joinToString$default2.length() > 0) {
            joinToString$default2 = "<div class='hidden'>" + joinToString$default2 + "</div>";
        }
        return joinToString$default + joinToString$default2;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public final List<TileCluster> computeQualifiedClusters() {
        List<TileCluster> clusters = this.clusterGroup.getClusters();
        ArrayList arrayList = new ArrayList();
        for (Object obj : clusters) {
            TileCluster tileCluster = (TileCluster) obj;
            if (((double) tileCluster.getPoints().size()) >= getQualifiedSize() && (getOptions().getShowImage() || !tileCluster.isImage())) {
                arrayList.add(obj);
            }
        }
        return CollectionsKt.sortedWith(arrayList, new Comparator() { // from class: ai.platon.scent.analysis.corpus.PageCorpusTabulator$computeQualifiedClusters$$inlined$sortedBy$1
            /* JADX WARN: Multi-variable type inference failed */
            @Override // java.util.Comparator
            public final int compare(T t, T t2) {
                return ComparisonsKt.compareValues(Double.valueOf(((TileCluster) t).getOrder()), Double.valueOf(((TileCluster) t2).getOrder()));
            }
        });
    }
}
