package ai.platon.scent.dm;

import ai.platon.pulsar.boilerpipe.document.TextDocument;
import ai.platon.pulsar.boilerpipe.extractors.ChineseNewsExtractor;
import ai.platon.pulsar.boilerpipe.sax.SAXInput;
import ai.platon.pulsar.common.OpenMapTable;
import ai.platon.pulsar.common.options.LoadOptions;
import ai.platon.pulsar.common.urls.NormUrl;
import ai.platon.pulsar.common.urls.UrlUtils;
import ai.platon.pulsar.crawl.PageEvent;
import ai.platon.pulsar.dom.FeaturedDocument;
import ai.platon.pulsar.persist.WebPage;
import ai.platon.pulsar.persist.model.FieldGroup;
import ai.platon.pulsar.persist.model.PageModel;
import ai.platon.pulsar.session.PulsarSession;
import ai.platon.scent.ScentContext;
import ai.platon.scent.ScentSession;
import ai.platon.scent.analysis.AutoMiner;
import ai.platon.scent.analysis.DocumentLoader;
import ai.platon.scent.analysis.HarvestTaskTracker;
import ai.platon.scent.common.Auth;
import ai.platon.scent.dom.HNormUrl;
import ai.platon.scent.dom.HarvestOptions;
import ai.platon.scent.dom.nodes.AnchorGroup;
import ai.platon.scent.dom.nodes.FullFeaturedDocumentKt;
import ai.platon.scent.dom.nodes.NavigateAnchor;
import ai.platon.scent.dom.nodes.node.ext.NodeExtKt;
import ai.platon.scent.dom.utils.DOMUtils;
import ai.platon.scent.entities.AnchorGroupInfo;
import ai.platon.scent.entities.HarvestResult;
import ai.platon.scent.entities.HarvestTaskStatus;
import ai.platon.scent.entities.PageTableGroup;
import ai.platon.scent.entities.PageTableKt;
import ai.platon.scent.extract.WebExtractor;
import com.google.common.collect.Iterables;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import kotlin.Metadata;
import kotlin.Pair;
import kotlin.TuplesKt;
import kotlin.collections.CollectionsKt;
import kotlin.coroutines.Continuation;
import kotlin.coroutines.jvm.internal.Boxing;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.Reflection;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.sequences.Sequence;
import kotlin.sequences.SequencesKt;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* compiled from: HarvestRunner.kt */
@Metadata(mv = {1, 8, 0}, k = 1, xi = 48, d1 = {"��²\u0001\n\u0002\u0018\u0002\n\u0002\u0010��\n��\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010\u001c\n\u0002\u0010\u000e\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0010 \n\u0002\b\n\n\u0002\u0010\u0002\n\u0002\b\u0004\n\u0002\u0010\b\n\u0002\b\u0002\u0018��2\u00020\u0001B\r\u0012\u0006\u0010\u0002\u001a\u00020\u0003¢\u0006\u0002\u0010\u0004J\u0010\u0010\u001a\u001a\u00020\u001b2\u0006\u0010\u001c\u001a\u00020\u001dH\u0002J\u000e\u0010\u001e\u001a\u00020\u001d2\u0006\u0010\u001f\u001a\u00020 J9\u0010!\u001a\u00020\"2\u0006\u0010#\u001a\u00020 2\u0006\u0010$\u001a\u00020%2\u0006\u0010&\u001a\u00020'2\u0006\u0010(\u001a\u00020)2\u0006\u0010*\u001a\u00020\u001dH\u0086@ø\u0001��¢\u0006\u0002\u0010+J7\u0010!\u001a\u00020,2\u0006\u0010#\u001a\u00020 2\u0006\u0010-\u001a\u00020%2\f\u0010.\u001a\b\u0012\u0004\u0012\u00020'0/2\u0006\u0010*\u001a\u00020\u001dH\u0086@ø\u0001��¢\u0006\u0002\u00100J7\u0010!\u001a\u00020\"2\u0006\u0010#\u001a\u00020 2\u0006\u0010$\u001a\u00020%2\f\u00101\u001a\b\u0012\u0004\u0012\u000203022\u0006\u00104\u001a\u00020)H\u0086@ø\u0001��¢\u0006\u0002\u00105J$\u0010!\u001a\u00020\"2\u0006\u0010#\u001a\u00020 2\u0006\u0010$\u001a\u00020%2\f\u00106\u001a\b\u0012\u0004\u0012\u00020807J!\u0010!\u001a\u00020,2\u0006\u0010#\u001a\u00020 2\u0006\u0010*\u001a\u00020\u001dH\u0086@ø\u0001��¢\u0006\u0002\u00109J\u0019\u0010!\u001a\u00020,2\u0006\u0010#\u001a\u000203H\u0086@ø\u0001��¢\u0006\u0002\u0010:J!\u0010!\u001a\u00020,2\u0006\u0010#\u001a\u0002032\u0006\u00104\u001a\u00020)H\u0086@ø\u0001��¢\u0006\u0002\u0010;J\u000e\u0010<\u001a\u00020=2\u0006\u0010-\u001a\u00020%J\u0016\u0010<\u001a\u00020=2\u0006\u0010>\u001a\u0002032\u0006\u00104\u001a\u00020)J\u001e\u0010<\u001a\u00020=2\u0006\u0010?\u001a\u0002032\u0006\u0010@\u001a\u0002032\u0006\u00104\u001a\u00020)J'\u0010A\u001a\b\u0012\u0004\u0012\u00020=0B2\u0006\u0010#\u001a\u0002032\u0006\u0010(\u001a\u00020)H\u0086@ø\u0001��¢\u0006\u0002\u0010;JE\u0010C\u001a\b\u0012\u0004\u0012\u00020=0B2\u0006\u0010#\u001a\u00020 2\u0006\u0010-\u001a\u00020%2\f\u0010.\u001a\b\u0012\u0004\u0012\u00020'0/2\u0006\u0010D\u001a\u00020)2\u0006\u0010*\u001a\u00020\u001dH\u0082@ø\u0001��¢\u0006\u0002\u0010EJ!\u0010F\u001a\u00020%2\u0006\u0010>\u001a\u00020 2\u0006\u0010G\u001a\u00020\u001dH\u0082@ø\u0001��¢\u0006\u0002\u00109J=\u0010H\u001a\b\u0012\u0004\u0012\u000208072\u0006\u0010#\u001a\u00020 2\f\u0010I\u001a\b\u0012\u0004\u0012\u000203022\u0006\u0010D\u001a\u00020)2\u0006\u0010G\u001a\u00020\u001dH\u0082@ø\u0001��¢\u0006\u0002\u0010JJ\u0018\u0010K\u001a\u0002082\u0006\u0010-\u001a\u00020%2\u0006\u00104\u001a\u00020)H\u0002J \u0010L\u001a\u00020M2\u0006\u0010-\u001a\u00020%2\u0006\u0010N\u001a\u00020\"2\u0006\u0010&\u001a\u00020'H\u0002J*\u0010O\u001a\u00020\"2\u0006\u0010P\u001a\u0002032\u0006\u00104\u001a\u00020)2\b\b\u0002\u0010Q\u001a\u00020R2\b\b\u0002\u0010S\u001a\u00020RR\u0011\u0010\u0005\u001a\u00020\u00068F¢\u0006\u0006\u001a\u0004\b\u0007\u0010\bR\u0011\u0010\t\u001a\u00020\n8F¢\u0006\u0006\u001a\u0004\b\u000b\u0010\fR\u0014\u0010\r\u001a\u00020\u000e8BX\u0082\u0004¢\u0006\u0006\u001a\u0004\b\u000f\u0010\u0010R\u0014\u0010\u0011\u001a\u00020\u00128BX\u0082\u0004¢\u0006\u0006\u001a\u0004\b\u0013\u0010\u0014R\u0016\u0010\u0015\u001a\n \u0017*\u0004\u0018\u00010\u00160\u0016X\u0082\u0004¢\u0006\u0002\n��R\u0011\u0010\u0002\u001a\u00020\u0003¢\u0006\b\n��\u001a\u0004\b\u0018\u0010\u0019\u0082\u0002\u0004\n\u0002\b\u0019¨\u0006T"}, d2 = {"Lai/platon/scent/dm/HarvestRunner;", "", "session", "Lai/platon/scent/ScentSession;", "(Lai/platon/scent/ScentSession;)V", "activeHarvestTracker", "Lai/platon/scent/analysis/HarvestTaskTracker;", "getActiveHarvestTracker", "()Lai/platon/scent/analysis/HarvestTaskTracker;", "autoMiner", "Lai/platon/scent/analysis/AutoMiner;", "getAutoMiner", "()Lai/platon/scent/analysis/AutoMiner;", "context", "Lai/platon/scent/ScentContext;", "getContext", "()Lai/platon/scent/ScentContext;", "extractor", "Lai/platon/scent/extract/WebExtractor;", "getExtractor", "()Lai/platon/scent/extract/WebExtractor;", "logger", "Lorg/slf4j/Logger;", "kotlin.jvm.PlatformType", "getSession", "()Lai/platon/scent/ScentSession;", "createDocumentLoader", "Lai/platon/scent/analysis/DocumentLoader;", "activeTask", "Lai/platon/scent/entities/HarvestTaskStatus;", "createHarvestTaskStatus", "normUrl", "Lai/platon/scent/dom/HNormUrl;", "harvest", "Lai/platon/scent/entities/PageTableGroup;", "portalUrl", "portalPage", "Lai/platon/pulsar/persist/WebPage;", "anchorGroup", "Lai/platon/scent/dom/nodes/AnchorGroup;", "hOptions", "Lai/platon/scent/dom/HarvestOptions;", "taskStatus", "(Lai/platon/scent/dom/HNormUrl;Lai/platon/pulsar/persist/WebPage;Lai/platon/scent/dom/nodes/AnchorGroup;Lai/platon/scent/dom/HarvestOptions;Lai/platon/scent/entities/HarvestTaskStatus;Lkotlin/coroutines/Continuation;)Ljava/lang/Object;", "Lai/platon/scent/entities/HarvestResult;", "page", "anchorGroups", "Ljava/util/SortedSet;", "(Lai/platon/scent/dom/HNormUrl;Lai/platon/pulsar/persist/WebPage;Ljava/util/SortedSet;Lai/platon/scent/entities/HarvestTaskStatus;Lkotlin/coroutines/Continuation;)Ljava/lang/Object;", "urls", "", "", "options", "(Lai/platon/scent/dom/HNormUrl;Lai/platon/pulsar/persist/WebPage;Ljava/lang/Iterable;Lai/platon/scent/dom/HarvestOptions;Lkotlin/coroutines/Continuation;)Ljava/lang/Object;", "docs", "Lkotlin/sequences/Sequence;", "Lai/platon/pulsar/dom/FeaturedDocument;", "(Lai/platon/scent/dom/HNormUrl;Lai/platon/scent/entities/HarvestTaskStatus;Lkotlin/coroutines/Continuation;)Ljava/lang/Object;", "(Ljava/lang/String;Lkotlin/coroutines/Continuation;)Ljava/lang/Object;", "(Ljava/lang/String;Lai/platon/scent/dom/HarvestOptions;Lkotlin/coroutines/Continuation;)Ljava/lang/Object;", "harvestArticle", "Lai/platon/pulsar/boilerpipe/document/TextDocument;", "url", "baseUrl", "htmlContent", "harvestArticles", "", "harvestArticles0", "itemOptions", "(Lai/platon/scent/dom/HNormUrl;Lai/platon/pulsar/persist/WebPage;Ljava/util/SortedSet;Lai/platon/scent/dom/HarvestOptions;Lai/platon/scent/entities/HarvestTaskStatus;Lkotlin/coroutines/Continuation;)Ljava/lang/Object;", "loadDeferred", "status", "loadDocumentsDeferred", "itemUrls", "(Lai/platon/scent/dom/HNormUrl;Ljava/lang/Iterable;Lai/platon/scent/dom/HarvestOptions;Lai/platon/scent/entities/HarvestTaskStatus;Lkotlin/coroutines/Continuation;)Ljava/lang/Object;", "parse", "persistVividAnchors", "", "tableGroup", "scanHarvest", "urlBase", "start", "", "limit", "scent-engine"})
@SourceDebugExtension({"SMAP\nHarvestRunner.kt\nKotlin\n*S Kotlin\n*F\n+ 1 HarvestRunner.kt\nai/platon/scent/dm/HarvestRunner\n+ 2 fake.kt\nkotlin/jvm/internal/FakeKt\n+ 3 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n+ 4 Transform.kt\nkotlinx/coroutines/flow/FlowKt__TransformKt\n+ 5 Emitters.kt\nkotlinx/coroutines/flow/FlowKt__EmittersKt\n+ 6 SafeCollector.common.kt\nkotlinx/coroutines/flow/internal/SafeCollector_commonKt\n*L\n1#1,394:1\n1#2:395\n1726#3,3:396\n1855#3:409\n1620#3,3:410\n1856#3:413\n766#3:414\n857#3,2:415\n1253#3,4:417\n47#4:399\n49#4:403\n47#4:404\n49#4:408\n50#5:400\n55#5:402\n50#5:405\n55#5:407\n106#6:401\n106#6:406\n*S KotlinDebug\n*F\n+ 1 HarvestRunner.kt\nai/platon/scent/dm/HarvestRunner\n*L\n149#1:396,3\n381#1:409\n382#1:410,3\n381#1:413\n386#1:414\n386#1:415,2\n391#1:417,4\n207#1:399\n207#1:403\n208#1:404\n208#1:408\n207#1:400\n207#1:402\n208#1:405\n208#1:407\n207#1:401\n208#1:406\n*E\n"})
/* loaded from: input_file:ai/platon/scent/dm/HarvestRunner.class */
public final class HarvestRunner {

    @NotNull
    private final ScentSession session;
    private final Logger logger;

    public HarvestRunner(@NotNull ScentSession scentSession) {
        Intrinsics.checkNotNullParameter(scentSession, "session");
        this.session = scentSession;
        this.logger = LoggerFactory.getLogger(HarvestRunner.class);
    }

    @NotNull
    public final ScentSession getSession() {
        return this.session;
    }

    private final ScentContext getContext() {
        return this.session.getScentContext();
    }

    private final WebExtractor getExtractor() {
        return (WebExtractor) getContext().getBean(Reflection.getOrCreateKotlinClass(WebExtractor.class));
    }

    @NotNull
    public final AutoMiner getAutoMiner() {
        return getExtractor().getAutoMiner();
    }

    @NotNull
    public final HarvestTaskTracker getActiveHarvestTracker() {
        return getAutoMiner().getActiveHarvestTracker();
    }

    @Nullable
    public final Object harvest(@NotNull String str, @NotNull Continuation<? super HarvestResult> continuation) {
        if (UrlUtils.isInternal(str)) {
            return HarvestResult.Companion.getEMPTY();
        }
        Pair splitUrlArgs = UrlUtils.splitUrlArgs(str);
        return harvest((String) splitUrlArgs.component1(), (HarvestOptions) PulsarSession.DefaultImpls.options$default(this.session, (String) splitUrlArgs.component2(), (PageEvent) null, 2, (Object) null), continuation);
    }

    @Nullable
    public final Object harvest(@NotNull String str, @NotNull HarvestOptions harvestOptions, @NotNull Continuation<? super HarvestResult> continuation) {
        NormUrl normalizeOrNull$default;
        if (!UrlUtils.isInternal(str) && (normalizeOrNull$default = ScentSession.DefaultImpls.normalizeOrNull$default(this.session, str, harvestOptions, false, 4, null)) != null) {
            return harvest((HNormUrl) normalizeOrNull$default, getAutoMiner().getActiveHarvestTracker().computeIfAbsent(normalizeOrNull$default), continuation);
        }
        return HarvestResult.Companion.getEMPTY();
    }

    @NotNull
    public final PageTableGroup harvest(@NotNull HNormUrl hNormUrl, @NotNull WebPage webPage, @NotNull Sequence<? extends FeaturedDocument> sequence) {
        Intrinsics.checkNotNullParameter(hNormUrl, "portalUrl");
        Intrinsics.checkNotNullParameter(webPage, "portalPage");
        Intrinsics.checkNotNullParameter(sequence, "docs");
        return getExtractor().autoExtract(hNormUrl, webPage, sequence, hNormUrl.getHOptions().createItemOptions());
    }

    @NotNull
    public final TextDocument harvestArticle(@NotNull String str, @NotNull HarvestOptions harvestOptions) {
        Intrinsics.checkNotNullParameter(str, "url");
        Intrinsics.checkNotNullParameter(harvestOptions, "options");
        return harvestArticle(this.session.load(str, (LoadOptions) harvestOptions));
    }

    @NotNull
    public final TextDocument harvestArticle(@NotNull String str, @NotNull String str2, @NotNull HarvestOptions harvestOptions) {
        Intrinsics.checkNotNullParameter(str, "baseUrl");
        Intrinsics.checkNotNullParameter(str2, "htmlContent");
        Intrinsics.checkNotNullParameter(harvestOptions, "options");
        TextDocument parse = new SAXInput().parse(str, str2);
        new ChineseNewsExtractor().process(parse);
        Intrinsics.checkNotNullExpressionValue(parse, "SAXInput().parse(baseUrl…Extractor().process(it) }");
        return parse;
    }

    @NotNull
    public final TextDocument harvestArticle(@NotNull WebPage webPage) {
        Intrinsics.checkNotNullParameter(webPage, "page");
        TextDocument parse = new SAXInput().parse(webPage.getBaseUrl(), webPage.getContentAsSaxInputSource());
        new ChineseNewsExtractor().process(parse);
        Intrinsics.checkNotNullExpressionValue(parse, "SAXInput().parse(page.ba…Extractor().process(it) }");
        return parse;
    }

    @Nullable
    public final Object harvestArticles(@NotNull String str, @NotNull HarvestOptions harvestOptions, @NotNull Continuation<? super List<? extends TextDocument>> continuation) throws IllegalArgumentException {
        if (UrlUtils.isInternal(str)) {
            this.logger.warn("Unexpected internal portal url");
            return CollectionsKt.emptyList();
        }
        HNormUrl normalizeOrNull$default = ScentSession.DefaultImpls.normalizeOrNull$default(this.session, str, harvestOptions, false, 4, null);
        if (normalizeOrNull$default == null) {
            return CollectionsKt.emptyList();
        }
        HarvestOptions hOptions = normalizeOrNull$default.getHOptions();
        WebPage load = this.session.load(normalizeOrNull$default);
        if (load.isInternal()) {
            this.logger.warn("Unexpected internal page | {}", load.getUrl());
            return CollectionsKt.emptyList();
        }
        if (!load.getProtocolStatus().isSuccess() || load.getContentLength() < 1000) {
            this.logger.warn("Failed to load portal url, content size: {}, status: {} | {}", new Object[]{Boxing.boxLong(load.getContentLength()), load.getProtocolStatus(), load.getUrl()});
            return CollectionsKt.emptyList();
        }
        FeaturedDocument parse$default = ScentSession.DefaultImpls.parse$default(this.session, load, hOptions, false, 4, null);
        if (this.logger.isInfoEnabled()) {
            FullFeaturedDocumentKt.annotateNodes(parse$default, hOptions);
            NodeExtKt.export$default(parse$default, "portal", false, 2, (Object) null);
        }
        return harvestArticles0(normalizeOrNull$default, load, this.session.arrangeLinks(normalizeOrNull$default, parse$default), hOptions, getAutoMiner().getActiveHarvestTracker().computeIfAbsent((NormUrl) normalizeOrNull$default), continuation);
    }

    /* JADX WARN: Removed duplicated region for block: B:27:0x0172  */
    /* JADX WARN: Removed duplicated region for block: B:29:0x0179  */
    /* JADX WARN: Removed duplicated region for block: B:52:0x0147  */
    /* JADX WARN: Removed duplicated region for block: B:53:0x0218  */
    /* JADX WARN: Removed duplicated region for block: B:8:0x0060  */
    @org.jetbrains.annotations.Nullable
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public final java.lang.Object harvest(@org.jetbrains.annotations.NotNull ai.platon.scent.dom.HNormUrl r12, @org.jetbrains.annotations.NotNull ai.platon.pulsar.persist.WebPage r13, @org.jetbrains.annotations.NotNull java.util.SortedSet<ai.platon.scent.dom.nodes.AnchorGroup> r14, @org.jetbrains.annotations.NotNull ai.platon.scent.entities.HarvestTaskStatus r15, @org.jetbrains.annotations.NotNull kotlin.coroutines.Continuation<? super ai.platon.scent.entities.HarvestResult> r16) {
        /*
            Method dump skipped, instructions count: 547
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: ai.platon.scent.dm.HarvestRunner.harvest(ai.platon.scent.dom.HNormUrl, ai.platon.pulsar.persist.WebPage, java.util.SortedSet, ai.platon.scent.entities.HarvestTaskStatus, kotlin.coroutines.Continuation):java.lang.Object");
    }

    /* JADX WARN: Removed duplicated region for block: B:23:0x00eb  */
    /* JADX WARN: Removed duplicated region for block: B:24:0x011b  */
    /* JADX WARN: Removed duplicated region for block: B:8:0x0060  */
    @org.jetbrains.annotations.Nullable
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public final java.lang.Object harvest(@org.jetbrains.annotations.NotNull ai.platon.scent.dom.HNormUrl r10, @org.jetbrains.annotations.NotNull ai.platon.pulsar.persist.WebPage r11, @org.jetbrains.annotations.NotNull ai.platon.scent.dom.nodes.AnchorGroup r12, @org.jetbrains.annotations.NotNull ai.platon.scent.dom.HarvestOptions r13, @org.jetbrains.annotations.NotNull ai.platon.scent.entities.HarvestTaskStatus r14, @org.jetbrains.annotations.NotNull kotlin.coroutines.Continuation<? super ai.platon.scent.entities.PageTableGroup> r15) {
        /*
            Method dump skipped, instructions count: 294
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: ai.platon.scent.dm.HarvestRunner.harvest(ai.platon.scent.dom.HNormUrl, ai.platon.pulsar.persist.WebPage, ai.platon.scent.dom.nodes.AnchorGroup, ai.platon.scent.dom.HarvestOptions, ai.platon.scent.entities.HarvestTaskStatus, kotlin.coroutines.Continuation):java.lang.Object");
    }

    /* JADX WARN: Removed duplicated region for block: B:15:0x00eb  */
    /* JADX WARN: Removed duplicated region for block: B:16:0x011e  */
    /* JADX WARN: Removed duplicated region for block: B:8:0x0060  */
    @org.jetbrains.annotations.Nullable
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public final java.lang.Object harvest(@org.jetbrains.annotations.NotNull ai.platon.scent.dom.HNormUrl r9, @org.jetbrains.annotations.NotNull ai.platon.pulsar.persist.WebPage r10, @org.jetbrains.annotations.NotNull java.lang.Iterable<java.lang.String> r11, @org.jetbrains.annotations.NotNull ai.platon.scent.dom.HarvestOptions r12, @org.jetbrains.annotations.NotNull kotlin.coroutines.Continuation<? super ai.platon.scent.entities.PageTableGroup> r13) {
        /*
            Method dump skipped, instructions count: 297
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: ai.platon.scent.dm.HarvestRunner.harvest(ai.platon.scent.dom.HNormUrl, ai.platon.pulsar.persist.WebPage, java.lang.Iterable, ai.platon.scent.dom.HarvestOptions, kotlin.coroutines.Continuation):java.lang.Object");
    }

    @NotNull
    public final PageTableGroup scanHarvest(@NotNull String str, @NotNull final HarvestOptions harvestOptions, int i, int i2) {
        Intrinsics.checkNotNullParameter(str, "urlBase");
        Intrinsics.checkNotNullParameter(harvestOptions, "options");
        Sequence<? extends FeaturedDocument> map = SequencesKt.map(this.session.scan(str, this.session.initOptions(harvestOptions, true), i, i2), new Function1<WebPage, FeaturedDocument>() { // from class: ai.platon.scent.dm.HarvestRunner$scanHarvest$documents$1
            /* JADX INFO: Access modifiers changed from: package-private */
            /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
            {
                super(1);
            }

            @NotNull
            public final FeaturedDocument invoke(@NotNull WebPage webPage) {
                Intrinsics.checkNotNullParameter(webPage, "it");
                return ScentSession.DefaultImpls.parse$default(HarvestRunner.this.getSession(), webPage, harvestOptions, false, 4, null);
            }
        });
        HNormUrl nil = HNormUrl.Companion.getNIL();
        WebPage newInternalPage = WebPage.newInternalPage(str);
        Intrinsics.checkNotNullExpressionValue(newInternalPage, "newInternalPage(urlBase)");
        return harvest(nil, newInternalPage, map);
    }

    public static /* synthetic */ PageTableGroup scanHarvest$default(HarvestRunner harvestRunner, String str, HarvestOptions harvestOptions, int i, int i2, int i3, Object obj) {
        if ((i3 & 4) != 0) {
            i = 0;
        }
        if ((i3 & 8) != 0) {
            i2 = 40;
        }
        return harvestRunner.scanHarvest(str, harvestOptions, i, i2);
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* JADX WARN: Removed duplicated region for block: B:19:0x00b9  */
    /* JADX WARN: Removed duplicated region for block: B:21:0x00bd  */
    /* JADX WARN: Removed duplicated region for block: B:23:0x00a1  */
    /* JADX WARN: Removed duplicated region for block: B:24:0x00d6  */
    /* JADX WARN: Removed duplicated region for block: B:8:0x0060  */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public final java.lang.Object harvestArticles0(ai.platon.scent.dom.HNormUrl r12, ai.platon.pulsar.persist.WebPage r13, java.util.SortedSet<ai.platon.scent.dom.nodes.AnchorGroup> r14, ai.platon.scent.dom.HarvestOptions r15, ai.platon.scent.entities.HarvestTaskStatus r16, kotlin.coroutines.Continuation<? super java.util.List<? extends ai.platon.pulsar.boilerpipe.document.TextDocument>> r17) {
        /*
            r11 = this;
            r0 = r17
            boolean r0 = r0 instanceof ai.platon.scent.dm.HarvestRunner$harvestArticles0$1
            if (r0 == 0) goto L2b
            r0 = r17
            ai.platon.scent.dm.HarvestRunner$harvestArticles0$1 r0 = (ai.platon.scent.dm.HarvestRunner$harvestArticles0$1) r0
            r21 = r0
            r0 = r21
            int r0 = r0.label
            r1 = -2147483648(0xffffffff80000000, float:-0.0)
            r0 = r0 & r1
            if (r0 == 0) goto L2b
            r0 = r21
            r1 = r0
            int r1 = r1.label
            r2 = -2147483648(0xffffffff80000000, float:-0.0)
            int r1 = r1 - r2
            r0.label = r1
            goto L37
        L2b:
            ai.platon.scent.dm.HarvestRunner$harvestArticles0$1 r0 = new ai.platon.scent.dm.HarvestRunner$harvestArticles0$1
            r1 = r0
            r2 = r11
            r3 = r17
            r1.<init>(r2, r3)
            r21 = r0
        L37:
            r0 = r21
            java.lang.Object r0 = r0.result
            r20 = r0
            java.lang.Object r0 = kotlin.coroutines.intrinsics.IntrinsicsKt.getCOROUTINE_SUSPENDED()
            r22 = r0
            r0 = r21
            int r0 = r0.label
            switch(r0) {
                case 0: goto L60;
                case 1: goto La1;
                default: goto Ld6;
            }
        L60:
            r0 = r20
            kotlin.ResultKt.throwOnFailure(r0)
            r0 = r14
            boolean r0 = r0.isEmpty()
            if (r0 == 0) goto L72
            java.util.List r0 = kotlin.collections.CollectionsKt.emptyList()
            return r0
        L72:
            r0 = r11
            r1 = r16
            ai.platon.scent.analysis.DocumentLoader r0 = r0.createDocumentLoader(r1)
            r18 = r0
            r0 = r11
            ai.platon.scent.analysis.AutoMiner r0 = r0.getAutoMiner()
            r1 = r12
            r2 = r13
            r3 = r14
            r4 = r15
            r5 = r18
            r6 = r16
            r7 = r21
            r8 = r21
            r9 = r11
            r8.L$0 = r9
            r8 = r21
            r9 = 1
            r8.label = r9
            java.lang.Object r0 = r0.create(r1, r2, r3, r4, r5, r6, r7)
            r1 = r0
            r2 = r22
            if (r1 != r2) goto Lb1
            r1 = r22
            return r1
        La1:
            r0 = r21
            java.lang.Object r0 = r0.L$0
            ai.platon.scent.dm.HarvestRunner r0 = (ai.platon.scent.dm.HarvestRunner) r0
            r11 = r0
            r0 = r20
            kotlin.ResultKt.throwOnFailure(r0)
            r0 = r20
        Lb1:
            ai.platon.scent.analysis.AutoMiningTask r0 = (ai.platon.scent.analysis.AutoMiningTask) r0
            r1 = r0
            if (r1 != 0) goto Lbd
        Lb9:
            java.util.List r0 = kotlin.collections.CollectionsKt.emptyList()
            return r0
        Lbd:
            r19 = r0
            r0 = r19
            kotlin.sequences.Sequence r0 = r0.getDocuments()
            ai.platon.scent.dm.HarvestRunner$harvestArticles0$2 r1 = new ai.platon.scent.dm.HarvestRunner$harvestArticles0$2
            r2 = r1
            r3 = r11
            r2.<init>()
            kotlin.jvm.functions.Function1 r1 = (kotlin.jvm.functions.Function1) r1
            kotlin.sequences.Sequence r0 = kotlin.sequences.SequencesKt.map(r0, r1)
            java.util.List r0 = kotlin.sequences.SequencesKt.toList(r0)
            return r0
        Ld6:
            java.lang.IllegalStateException r0 = new java.lang.IllegalStateException
            r1 = r0
            java.lang.String r2 = "call to 'resume' before 'invoke' with coroutine"
            r1.<init>(r2)
            throw r0
        */
        throw new UnsupportedOperationException("Method not decompiled: ai.platon.scent.dm.HarvestRunner.harvestArticles0(ai.platon.scent.dom.HNormUrl, ai.platon.pulsar.persist.WebPage, java.util.SortedSet, ai.platon.scent.dom.HarvestOptions, ai.platon.scent.entities.HarvestTaskStatus, kotlin.coroutines.Continuation):java.lang.Object");
    }

    /* JADX WARN: Removed duplicated region for block: B:19:0x00e8  */
    /* JADX WARN: Removed duplicated region for block: B:21:0x0100  */
    /* JADX WARN: Removed duplicated region for block: B:39:0x00b9  */
    /* JADX WARN: Removed duplicated region for block: B:40:0x01dc  */
    /* JADX WARN: Removed duplicated region for block: B:42:0x01e4  */
    /* JADX WARN: Removed duplicated region for block: B:8:0x0060  */
    @org.jetbrains.annotations.Nullable
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public final java.lang.Object harvest(@org.jetbrains.annotations.NotNull ai.platon.scent.dom.HNormUrl r10, @org.jetbrains.annotations.NotNull ai.platon.scent.entities.HarvestTaskStatus r11, @org.jetbrains.annotations.NotNull kotlin.coroutines.Continuation<? super ai.platon.scent.entities.HarvestResult> r12) {
        /*
            Method dump skipped, instructions count: 495
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: ai.platon.scent.dm.HarvestRunner.harvest(ai.platon.scent.dom.HNormUrl, ai.platon.scent.entities.HarvestTaskStatus, kotlin.coroutines.Continuation):java.lang.Object");
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* JADX WARN: Removed duplicated region for block: B:19:0x00bb  */
    /* JADX WARN: Removed duplicated region for block: B:22:0x00cd  */
    /* JADX WARN: Removed duplicated region for block: B:28:0x009a  */
    /* JADX WARN: Removed duplicated region for block: B:29:0x0107  */
    /* JADX WARN: Removed duplicated region for block: B:8:0x005c  */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public final java.lang.Object loadDeferred(ai.platon.scent.dom.HNormUrl r7, ai.platon.scent.entities.HarvestTaskStatus r8, kotlin.coroutines.Continuation<? super ai.platon.pulsar.persist.WebPage> r9) {
        /*
            Method dump skipped, instructions count: 274
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: ai.platon.scent.dm.HarvestRunner.loadDeferred(ai.platon.scent.dom.HNormUrl, ai.platon.scent.entities.HarvestTaskStatus, kotlin.coroutines.Continuation):java.lang.Object");
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* JADX WARN: Removed duplicated region for block: B:15:0x00cb  */
    /* JADX WARN: Removed duplicated region for block: B:16:0x0128  */
    /* JADX WARN: Removed duplicated region for block: B:8:0x0060  */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public final java.lang.Object loadDocumentsDeferred(ai.platon.scent.dom.HNormUrl r12, java.lang.Iterable<java.lang.String> r13, ai.platon.scent.dom.HarvestOptions r14, ai.platon.scent.entities.HarvestTaskStatus r15, kotlin.coroutines.Continuation<? super kotlin.sequences.Sequence<? extends ai.platon.pulsar.dom.FeaturedDocument>> r16) {
        /*
            Method dump skipped, instructions count: 307
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: ai.platon.scent.dm.HarvestRunner.loadDocumentsDeferred(ai.platon.scent.dom.HNormUrl, java.lang.Iterable, ai.platon.scent.dom.HarvestOptions, ai.platon.scent.entities.HarvestTaskStatus, kotlin.coroutines.Continuation):java.lang.Object");
    }

    /* JADX INFO: Access modifiers changed from: private */
    public final FeaturedDocument parse(WebPage webPage, HarvestOptions harvestOptions) {
        Map fieldsCopy;
        String url = webPage.getUrl();
        Intrinsics.checkNotNullExpressionValue(url, "page.url");
        FeaturedDocument parse = this.session.parse(webPage, harvestOptions, true);
        parse.getHead().append("<link rel=\"normalized\" href=\"" + url + "\" />");
        PageModel pageModel = webPage.getPageModel();
        FieldGroup findGroup = pageModel != null ? pageModel.findGroup(8330129) : null;
        if (findGroup != null) {
        }
        if (findGroup == null || (fieldsCopy = findGroup.getFieldsCopy()) == null) {
            return parse;
        }
        this.logger.info("There are {} ml annotations | {} | {}", new Object[]{Integer.valueOf(fieldsCopy.size()), fieldsCopy, webPage.getUrl()});
        DOMUtils.INSTANCE.annotateNodes(parse, fieldsCopy);
        return parse;
    }

    private final DocumentLoader createDocumentLoader(final HarvestTaskStatus harvestTaskStatus) {
        return new DocumentLoader() { // from class: ai.platon.scent.dm.HarvestRunner$createDocumentLoader$1
            @Nullable
            public Object loadAll(@NotNull HNormUrl hNormUrl, @NotNull Iterable<String> iterable, @NotNull HarvestOptions harvestOptions, @NotNull Continuation<? super Sequence<? extends FeaturedDocument>> continuation) {
                Object loadDocumentsDeferred;
                harvestTaskStatus.setNTotalPages(Iterables.size(iterable));
                loadDocumentsDeferred = this.loadDocumentsDeferred(hNormUrl, iterable, harvestOptions, harvestTaskStatus, continuation);
                return loadDocumentsDeferred;
            }
        };
    }

    @NotNull
    public final HarvestTaskStatus createHarvestTaskStatus(@NotNull HNormUrl hNormUrl) {
        Intrinsics.checkNotNullParameter(hNormUrl, "normUrl");
        String configuredUrl = hNormUrl.getConfiguredUrl();
        HarvestTaskStatus harvestTaskStatus = new HarvestTaskStatus(Auth.INSTANCE.getSYSTEM_AUTH(), configuredUrl, 0, (AnchorGroupInfo) null, (List) null, 0, 0, 0, 0, 0, 0.0d, 0, 0, (String) null, (String) null, 32764, (DefaultConstructorMarker) null);
        harvestTaskStatus.setActualTask(getActiveHarvestTracker().getIfActive(hNormUrl));
        if (harvestTaskStatus.getActualTask() == null) {
            getActiveHarvestTracker().set(configuredUrl, harvestTaskStatus);
        }
        return harvestTaskStatus;
    }

    private final void persistVividAnchors(WebPage webPage, PageTableGroup pageTableGroup, AnchorGroup anchorGroup) {
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        Iterator it = pageTableGroup.getTables().iterator();
        while (it.hasNext()) {
            Iterator it2 = ((OpenMapTable) it.next()).getRows().iterator();
            while (it2.hasNext()) {
                linkedHashSet.add(PageTableKt.getData((OpenMapTable.Row) it2.next()).getLocation());
            }
        }
        List anchors = anchorGroup.getAnchors();
        ArrayList arrayList = new ArrayList();
        for (Object obj : anchors) {
            if (linkedHashSet.contains(((NavigateAnchor) obj).getHref())) {
                arrayList.add(obj);
            }
        }
        ArrayList arrayList2 = arrayList;
        String id = anchorGroup.getId();
        long currentTimeMillis = System.currentTimeMillis();
        ArrayList<NavigateAnchor> arrayList3 = arrayList2;
        Map vividLinks = webPage.getVividLinks();
        for (NavigateAnchor navigateAnchor : arrayList3) {
            String href = navigateAnchor.getHref();
            navigateAnchor.getText();
            Pair pair = TuplesKt.to(href, "gn:" + id + " ts:" + currentTimeMillis + " txt:" + href);
            vividLinks.put(pair.getFirst(), pair.getSecond());
        }
    }
}
