/*
 * Decompiled with CFR 0.152.
 */
package ai.platon.scent.segment;

import ai.platon.pulsar.common.config.ImmutableConfig;
import ai.platon.pulsar.common.config.Parameterized;
import ai.platon.pulsar.common.config.Params;
import ai.platon.pulsar.common.math.vectors.VectorsKt;
import ai.platon.pulsar.dom.FeaturedDocument;
import ai.platon.pulsar.dom.features.NodeFeature;
import ai.platon.pulsar.dom.features.defined.DefinedFeaturesKt;
import ai.platon.pulsar.dom.features.defined.F;
import ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt;
import ai.platon.scent.segment.BlockSummarizerKt;
import com.google.common.collect.Lists;
import com.google.common.collect.TreeMultimap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.NavigableSet;
import kotlin.Metadata;
import kotlin.NotImplementedError;
import kotlin.collections.ArraysKt;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import org.apache.commons.math3.linear.RealVector;
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.perf4j.slf4j.Slf4JStopWatch;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Metadata(mv={1, 9, 0}, k=1, xi=48, d1={"\u0000\u0080\u0001\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0010\u0011\n\u0002\u0010\u000e\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010\b\n\u0002\b\u0007\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0010\u000b\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0010\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0003\u0018\u0000 ;2\u00020\u0001:\u0001;B\u0015\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\u0006\u0010\u0004\u001a\u00020\u0005\u00a2\u0006\u0002\u0010\u0006J0\u0010'\u001a\u0012\u0012\u0004\u0012\u00020)0(j\b\u0012\u0004\u0012\u00020)`*2\u0016\u0010+\u001a\u0012\u0012\u0004\u0012\u00020\u0013\u0012\u0004\u0012\u00020-0,j\u0002`.H\u0002J\u001e\u0010/\u001a\u0012\u0012\u0004\u0012\u00020)0(j\b\u0012\u0004\u0012\u00020)`*2\u0006\u00100\u001a\u00020\u0013J\b\u00101\u001a\u000202H\u0016J \u00103\u001a\u00020\u001f2\u0006\u00104\u001a\u00020)2\u0006\u00105\u001a\u00020\u00132\u0006\u0010\u0012\u001a\u00020\u0013H\u0002J#\u00106\u001a\u0002072\u0006\u00104\u001a\u00020)2\f\u00108\u001a\b\u0012\u0004\u0012\u0002090\nH\u0002\u00a2\u0006\u0002\u0010:R\u0011\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0007\u0010\bR\u0019\u0010\t\u001a\b\u0012\u0004\u0012\u00020\u000b0\n\u00a2\u0006\n\n\u0002\u0010\u000e\u001a\u0004\b\f\u0010\rR\u0016\u0010\u000f\u001a\n \u0011*\u0004\u0018\u00010\u00100\u0010X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u0012\u001a\u00020\u0013X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u0014\u001a\u00020\u0013X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u001a\u0010\u0015\u001a\u00020\u000bX\u0086\u000e\u00a2\u0006\u000e\n\u0000\u001a\u0004\b\u0016\u0010\u0017\"\u0004\b\u0018\u0010\u0019R\u0017\u0010\u001a\u001a\b\u0012\u0004\u0012\u00020\u000b0\u001b\u00a2\u0006\b\n\u0000\u001a\u0004\b\u001c\u0010\u001dR\u0011\u0010\u001e\u001a\u00020\u001f\u00a2\u0006\b\n\u0000\u001a\u0004\b \u0010!R\u0010\u0010\"\u001a\u0004\u0018\u00010#X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0011\u0010$\u001a\u00020\u000b8F\u00a2\u0006\u0006\u001a\u0004\b%\u0010\u0017R\u000e\u0010&\u001a\u00020\u0013X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006<"}, d2={"Lai/platon/scent/segment/BlockSummarizer;", "Lai/platon/pulsar/common/config/Parameterized;", "doc", "Lai/platon/pulsar/dom/FeaturedDocument;", "conf", "Lai/platon/pulsar/common/config/ImmutableConfig;", "(Lai/platon/pulsar/dom/FeaturedDocument;Lai/platon/pulsar/common/config/ImmutableConfig;)V", "getDoc", "()Lai/platon/pulsar/dom/FeaturedDocument;", "featureNames", "", "", "getFeatureNames", "()[Ljava/lang/String;", "[Ljava/lang/String;", "log", "Lorg/slf4j/Logger;", "kotlin.jvm.PlatformType", "maxSampleSize", "", "numMinItem", "reportHeader", "getReportHeader", "()Ljava/lang/String;", "setReportHeader", "(Ljava/lang/String;)V", "reportRows", "Ljava/util/LinkedHashSet;", "getReportRows", "()Ljava/util/LinkedHashSet;", "reportSummary", "", "getReportSummary", "()Z", "stopWatch", "Lorg/perf4j/slf4j/Slf4JStopWatch;", "summaryString", "getSummaryString", "varianceThreshold", "calculateSummary", "Ljava/util/ArrayList;", "Lorg/jsoup/nodes/Element;", "Lkotlin/collections/ArrayList;", "candidateBlocks", "Lcom/google/common/collect/TreeMultimap;", "Lorg/jsoup/nodes/Node;", "Lai/platon/scent/dom/nodes/IntNodeIndexer;", "findListLikeBlocks", "feature", "getParams", "Lai/platon/pulsar/common/config/Params;", "probablyCandidate", "root", "minDepth", "updateSummary", "", "summary", "Lorg/apache/commons/math3/stat/descriptive/SummaryStatistics;", "(Lorg/jsoup/nodes/Element;[Lorg/apache/commons/math3/stat/descriptive/SummaryStatistics;)V", "Companion", "scent-auto-mining"})
@SourceDebugExtension(value={"SMAP\nBlockSummarizer.kt\nKotlin\n*S Kotlin\n*F\n+ 1 BlockSummarizer.kt\nai/platon/scent/segment/BlockSummarizer\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n*L\n1#1,196:1\n1855#2,2:197\n*S KotlinDebug\n*F\n+ 1 BlockSummarizer.kt\nai/platon/scent/segment/BlockSummarizer\n*L\n98#1:197,2\n*E\n"})
public final class BlockSummarizer
implements Parameterized {
    @NotNull
    public static final Companion Companion = new Companion(null);
    @NotNull
    private final FeaturedDocument doc;
    private final Logger log;
    private final int numMinItem;
    private final int maxSampleSize;
    @NotNull
    private final String[] featureNames;
    private final int varianceThreshold;
    @Nullable
    private final Slf4JStopWatch stopWatch;
    private final boolean reportSummary;
    @NotNull
    private String reportHeader;
    @NotNull
    private final LinkedHashSet<String> reportRows;
    @NotNull
    private static final String[] defaultFeatureVector;
    private static final ArrayList<String> invalidTags;
    @NotNull
    private static final String reportColumnSeparator;

    public BlockSummarizer(@NotNull FeaturedDocument doc, @NotNull ImmutableConfig conf) {
        Intrinsics.checkNotNullParameter((Object)doc, (String)"doc");
        Intrinsics.checkNotNullParameter((Object)conf, (String)"conf");
        this.doc = doc;
        this.log = LoggerFactory.getLogger(BlockSummarizer.class);
        this.numMinItem = conf.getInt("scent.children.summary.item.min", 3);
        this.maxSampleSize = conf.getInt("scent.children.summary.sample.max", 100);
        String[] stringArray = defaultFeatureVector;
        String[] stringArray2 = conf.getStrings("scent.children.summary.features", Arrays.copyOf(stringArray, stringArray.length));
        Intrinsics.checkNotNullExpressionValue((Object)stringArray2, (String)"getStrings(...)");
        this.featureNames = stringArray2;
        this.varianceThreshold = conf.getInt("scent.children.summary.threshold", 3);
        this.stopWatch = this.log.isDebugEnabled() ? new Slf4JStopWatch() : null;
        this.reportSummary = conf.getBoolean("scent.children.summary.report", false);
        this.reportHeader = "";
        this.reportRows = new LinkedHashSet();
    }

    @NotNull
    public final FeaturedDocument getDoc() {
        return this.doc;
    }

    @NotNull
    public final String[] getFeatureNames() {
        return this.featureNames;
    }

    public final boolean getReportSummary() {
        return this.reportSummary;
    }

    @NotNull
    public final String getReportHeader() {
        return this.reportHeader;
    }

    public final void setReportHeader(@NotNull String string) {
        Intrinsics.checkNotNullParameter((Object)string, (String)"<set-?>");
        this.reportHeader = string;
    }

    @NotNull
    public final LinkedHashSet<String> getReportRows() {
        return this.reportRows;
    }

    @NotNull
    public final String getSummaryString() {
        StringBuilder sb = new StringBuilder(this.reportHeader);
        CollectionsKt.joinTo$default((Iterable)this.reportRows, (Appendable)sb, null, null, null, (int)0, null, null, (int)126, null);
        String string = sb.toString();
        Intrinsics.checkNotNullExpressionValue((Object)string, (String)"toString(...)");
        return string;
    }

    @NotNull
    public Params getParams() {
        Object[] objectArray = new Object[]{"maxSampleSize", this.maxSampleSize, "reportSummary", this.reportSummary, "varianceThreshold", this.varianceThreshold, "featureNames", ArraysKt.joinToString$default((Object[])this.featureNames, null, null, null, (int)0, null, null, (int)63, null)};
        Params params = Params.of((String)"numMinItem", (Object)this.numMinItem, (Object[])objectArray);
        Intrinsics.checkNotNullExpressionValue((Object)params, (String)"of(...)");
        return params;
    }

    @NotNull
    public final ArrayList<Element> findListLikeBlocks(int feature) {
        String string = "Not implemented";
        throw new NotImplementedError("An operation is not implemented: " + string);
    }

    private final ArrayList<Element> calculateSummary(TreeMultimap<Integer, Node> candidateBlocks) {
        ArrayList<Element> listLikeBlocks = new ArrayList<Element>(candidateBlocks.size());
        int minDepth = NodeExtKt.getDepth((Node)((Node)this.doc.getBody())) + 1;
        int n = 0;
        int n2 = this.featureNames.length + 1;
        SummaryStatistics[] summaryStatisticsArray = new SummaryStatistics[n2];
        while (n < n2) {
            int n3 = n++;
            summaryStatisticsArray[n3] = new SummaryStatistics();
        }
        SummaryStatistics[] summary = summaryStatisticsArray;
        if (this.reportSummary) {
            BlockSummarizerKt.access$buildLogHeader(this, reportColumnSeparator);
        }
        int counter = 0;
        for (Integer _child : candidateBlocks.keys()) {
            Intrinsics.checkNotNull((Object)_child);
            if (_child < this.numMinItem || counter++ > this.maxSampleSize) break;
            NavigableSet navigableSet = candidateBlocks.get((Object)_child);
            Intrinsics.checkNotNullExpressionValue((Object)navigableSet, (String)"get(...)");
            Iterable $this$forEach$iv = navigableSet;
            boolean $i$f$forEach = false;
            for (Object element$iv : $this$forEach$iv) {
                Node it = (Node)element$iv;
                boolean bl = false;
                if (!(it instanceof Element) || !this.probablyCandidate((Element)it, minDepth, this.maxSampleSize)) continue;
                this.updateSummary((Element)it, summary);
                double meanOfVariance = summary[this.featureNames.length].getMean();
                if (meanOfVariance < (double)this.varianceThreshold) {
                    listLikeBlocks.add((Element)it);
                }
                if (!this.reportSummary) continue;
                BlockSummarizerKt.access$buildLogLine(this, (Element)it, summary, reportColumnSeparator);
            }
        }
        return listLikeBlocks;
    }

    private final void updateSummary(Element root, SummaryStatistics[] summary) {
        int n = summary.length;
        for (int i = 0; i < n; ++i) {
            SummaryStatistics stat = summary[i];
            stat.clear();
        }
        for (Element child : root.children()) {
            int n2 = this.featureNames.length;
            for (int i = 0; i < n2; ++i) {
                SummaryStatistics summaryStatistics = summary[i];
                String string = this.featureNames[i];
                Intrinsics.checkNotNull((Object)child);
                summaryStatistics.addValue(NodeFeature.Companion.getValue(string, (Node)child));
            }
        }
        n = this.featureNames.length;
        for (int i = 0; i < n; ++i) {
            summary[this.featureNames.length].addValue(summary[i].getVariance());
        }
    }

    private final boolean probablyCandidate(Element root, int minDepth, int maxSampleSize) {
        if (NodeExtKt.getDepth((Node)((Node)root)) < minDepth) {
            return false;
        }
        if (invalidTags.contains(root.tagName())) {
            return false;
        }
        RealVector realVector = root.getExtension().getFeatures();
        Intrinsics.checkNotNullExpressionValue((Object)realVector, (String)"getFeatures(...)");
        double _img = VectorsKt.get((RealVector)realVector, (int)DefinedFeaturesKt.IMG);
        RealVector realVector2 = root.getExtension().getFeatures();
        Intrinsics.checkNotNullExpressionValue((Object)realVector2, (String)"getFeatures(...)");
        double _ch = VectorsKt.get((RealVector)realVector2, (int)DefinedFeaturesKt.CH);
        RealVector realVector3 = root.getExtension().getFeatures();
        Intrinsics.checkNotNullExpressionValue((Object)realVector3, (String)"getFeatures(...)");
        double _a = VectorsKt.get((RealVector)realVector3, (int)DefinedFeaturesKt.A);
        RealVector realVector4 = root.getExtension().getFeatures();
        Intrinsics.checkNotNullExpressionValue((Object)realVector4, (String)"getFeatures(...)");
        double _blk_txt = VectorsKt.get((RealVector)realVector4, (int)DefinedFeaturesKt.TN);
        if (_img >= 3.0 || _a >= 3.0) {
            return true;
        }
        if (_img + _blk_txt + _a <= 3.0) {
            return false;
        }
        return !(_img == 0.0) || !(_ch / _blk_txt <= 1.0) && !(_ch / _blk_txt >= 50.0);
    }

    static {
        Object[] objectArray = new String[]{F.TN.getAlias(), F.CH.getAlias(), F.A.getAlias(), F.IMG.getAlias()};
        defaultFeatureVector = objectArray;
        objectArray = new String[]{"form", "style"};
        invalidTags = Lists.newArrayList((Object[])objectArray);
        reportColumnSeparator = "^^";
    }

    @Metadata(mv={1, 9, 0}, k=1, xi=48, d1={"\u0000 \n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0002\n\u0002\u0010\u0011\n\u0002\u0010\u000e\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\b\u0005\b\u0086\u0003\u0018\u00002\u00020\u0001B\u0007\b\u0002\u00a2\u0006\u0002\u0010\u0002R\u0019\u0010\u0003\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004\u00a2\u0006\n\n\u0002\u0010\b\u001a\u0004\b\u0006\u0010\u0007R2\u0010\t\u001a&\u0012\f\u0012\n \u000b*\u0004\u0018\u00010\u00050\u0005 \u000b*\u0012\u0012\f\u0012\n \u000b*\u0004\u0018\u00010\u00050\u0005\u0018\u00010\n0\nX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0014\u0010\f\u001a\u00020\u0005X\u0086D\u00a2\u0006\b\n\u0000\u001a\u0004\b\r\u0010\u000e\u00a8\u0006\u000f"}, d2={"Lai/platon/scent/segment/BlockSummarizer$Companion;", "", "()V", "defaultFeatureVector", "", "", "getDefaultFeatureVector", "()[Ljava/lang/String;", "[Ljava/lang/String;", "invalidTags", "Ljava/util/ArrayList;", "kotlin.jvm.PlatformType", "reportColumnSeparator", "getReportColumnSeparator", "()Ljava/lang/String;", "scent-auto-mining"})
    public static final class Companion {
        private Companion() {
        }

        @NotNull
        public final String[] getDefaultFeatureVector() {
            return defaultFeatureVector;
        }

        @NotNull
        public final String getReportColumnSeparator() {
            return reportColumnSeparator;
        }

        public /* synthetic */ Companion(DefaultConstructorMarker $constructor_marker) {
            this();
        }
    }
}

