package ai.platon.scent.amazon.category;

import ai.platon.pulsar.common.LangKt;
import ai.platon.pulsar.common.ResourceLoader;
import ai.platon.pulsar.common.collect.UrlPool;
import ai.platon.pulsar.common.options.LoadOptions;
import ai.platon.pulsar.common.urls.PlainUrl;
import ai.platon.pulsar.common.urls.UrlUtils;
import ai.platon.pulsar.crawl.CrawlLoop;
import ai.platon.pulsar.crawl.PulsarEventHandler;
import ai.platon.pulsar.crawl.common.GlobalCache;
import ai.platon.pulsar.dom.FeaturedDocument;
import ai.platon.pulsar.session.AbstractPulsarSession;
import ai.platon.pulsar.session.PulsarSession;
import ai.platon.scent.common.ClusterTools;
import ai.platon.scent.dom.web.SlimWebNode;
import ai.platon.scent.dom.web.WebTreeProperties;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ConcurrentSkipListSet;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.sequences.SequencesKt;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.jsoup.nodes.Element;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* compiled from: KnownCategoryLoader.kt */
@Metadata(mv = {1, 5, 1}, k = 1, xi = 48, d1 = {"��\\\n\u0002\u0018\u0002\n\u0002\u0010��\n��\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0010\u0002\n��\n\u0002\u0010\u000e\n\u0002\b\u0002\u0018��2\u00020\u0001B\u0015\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\u0006\u0010\u0004\u001a\u00020\u0005¢\u0006\u0002\u0010\u0006J\u0016\u0010$\u001a\u00020%2\u0006\u0010&\u001a\u00020'2\u0006\u0010(\u001a\u00020'R\u0014\u0010\u0007\u001a\u00020\b8BX\u0082\u0004¢\u0006\u0006\u001a\u0004\b\t\u0010\nR\u0011\u0010\u0002\u001a\u00020\u0003¢\u0006\b\n��\u001a\u0004\b\u000b\u0010\fR\u0014\u0010\r\u001a\u00020\u000e8BX\u0082\u0004¢\u0006\u0006\u001a\u0004\b\u000f\u0010\u0010R\u0014\u0010\u0011\u001a\u00020\u00128BX\u0082\u0004¢\u0006\u0006\u001a\u0004\b\u0013\u0010\u0014R\u0011\u0010\u0015\u001a\u00020\u0016¢\u0006\b\n��\u001a\u0004\b\u0017\u0010\u0018R\u0016\u0010\u0019\u001a\n \u001b*\u0004\u0018\u00010\u001a0\u001aX\u0082\u0004¢\u0006\u0002\n��R\u0011\u0010\u0004\u001a\u00020\u0005¢\u0006\b\n��\u001a\u0004\b\u001c\u0010\u001dR\u000e\u0010\u001e\u001a\u00020\u001fX\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010 \u001a\u00020!8BX\u0082\u0004¢\u0006\u0006\u001a\u0004\b\"\u0010#¨\u0006)"}, d2 = {"Lai/platon/scent/amazon/category/KnownCategoryLoader;", "", "config", "Lai/platon/scent/dom/web/WebTreeProperties;", "session", "Lai/platon/pulsar/session/PulsarSession;", "(Lai/platon/scent/dom/web/WebTreeProperties;Lai/platon/pulsar/session/PulsarSession;)V", "abstractSession", "Lai/platon/pulsar/session/AbstractPulsarSession;", "getAbstractSession", "()Lai/platon/pulsar/session/AbstractPulsarSession;", "getConfig", "()Lai/platon/scent/dom/web/WebTreeProperties;", "crawlLoop", "Lai/platon/pulsar/crawl/CrawlLoop;", "getCrawlLoop", "()Lai/platon/pulsar/crawl/CrawlLoop;", "globalCache", "Lai/platon/pulsar/crawl/common/GlobalCache;", "getGlobalCache", "()Lai/platon/pulsar/crawl/common/GlobalCache;", "loadOptions", "Lai/platon/pulsar/common/options/LoadOptions;", "getLoadOptions", "()Lai/platon/pulsar/common/options/LoadOptions;", "log", "Lorg/slf4j/Logger;", "kotlin.jvm.PlatformType", "getSession", "()Lai/platon/pulsar/session/PulsarSession;", "urlNormalizer", "Lai/platon/scent/amazon/category/CategoryUrlNormalizer;", "urlPool", "Lai/platon/pulsar/common/collect/UrlPool;", "getUrlPool", "()Lai/platon/pulsar/common/collect/UrlPool;", "load", "", "resource", "", "urlIdent", "scent-site-amazon"})
/* loaded from: input_file:ai/platon/scent/amazon/category/KnownCategoryLoader.class */
public final class KnownCategoryLoader {

    @NotNull
    private final WebTreeProperties config;

    @NotNull
    private final PulsarSession session;
    private final Logger log;

    @NotNull
    private final LoadOptions loadOptions;

    @NotNull
    private final CategoryUrlNormalizer urlNormalizer;

    public KnownCategoryLoader(@NotNull WebTreeProperties webTreeProperties, @NotNull PulsarSession pulsarSession) {
        Intrinsics.checkNotNullParameter(webTreeProperties, "config");
        Intrinsics.checkNotNullParameter(pulsarSession, "session");
        this.config = webTreeProperties;
        this.session = pulsarSession;
        this.log = LoggerFactory.getLogger(CategoryTreeCrawler.class);
        this.loadOptions = PulsarSession.DefaultImpls.options$default(this.session, this.config.getLoadArguments(), (PulsarEventHandler) null, 2, (Object) null);
        this.urlNormalizer = new CategoryUrlNormalizer();
    }

    @NotNull
    public final WebTreeProperties getConfig() {
        return this.config;
    }

    @NotNull
    public final PulsarSession getSession() {
        return this.session;
    }

    @NotNull
    public final LoadOptions getLoadOptions() {
        return this.loadOptions;
    }

    private final AbstractPulsarSession getAbstractSession() {
        return this.session;
    }

    private final GlobalCache getGlobalCache() {
        return getAbstractSession().getContext().getGlobalCacheFactory().getGlobalCache();
    }

    private final UrlPool getUrlPool() {
        return getGlobalCache().getUrlPool();
    }

    private final CrawlLoop getCrawlLoop() {
        return this.session.getContext().getCrawlLoops().first();
    }

    public final void load(@NotNull String str, @NotNull String str2) {
        Intrinsics.checkNotNullParameter(str, "resource");
        Intrinsics.checkNotNullParameter(str2, "urlIdent");
        List mutableList = SequencesKt.toMutableList(SequencesKt.map(SequencesKt.mapNotNull(SequencesKt.filter(SequencesKt.map(CollectionsKt.asSequence(ClusterTools.partition$default(ClusterTools.INSTANCE, ResourceLoader.INSTANCE.readAllLines(str), 0, 0, 6, (Object) null)), new Function1<String, SlimWebNode>() { // from class: ai.platon.scent.amazon.category.KnownCategoryLoader$load$categoryUrls$1
            @NotNull
            public final SlimWebNode invoke(@NotNull String str3) {
                Intrinsics.checkNotNullParameter(str3, "it");
                return SlimWebNode.Companion.parse(str3);
            }
        }), new Function1<SlimWebNode, Boolean>() { // from class: ai.platon.scent.amazon.category.KnownCategoryLoader$load$categoryUrls$2
            @NotNull
            public final Boolean invoke(@NotNull SlimWebNode slimWebNode) {
                Intrinsics.checkNotNullParameter(slimWebNode, "it");
                return Boolean.valueOf(slimWebNode.getNumChildren() != 0);
            }
        }), new Function1<SlimWebNode, String>() { // from class: ai.platon.scent.amazon.category.KnownCategoryLoader$load$categoryUrls$3
            @Nullable
            public final String invoke(@NotNull SlimWebNode slimWebNode) {
                Intrinsics.checkNotNullParameter(slimWebNode, "it");
                return UrlUtils.normalizeOrNull$default(slimWebNode.getUrl(), false, 2, (Object) null);
            }
        }), new Function1<String, PlainUrl>() { // from class: ai.platon.scent.amazon.category.KnownCategoryLoader$load$categoryUrls$4
            /* JADX INFO: Access modifiers changed from: package-private */
            {
                super(1);
            }

            @NotNull
            public final PlainUrl invoke(@NotNull String str3) {
                Intrinsics.checkNotNullParameter(str3, "it");
                return new PlainUrl(str3, KnownCategoryLoader.this.getConfig().getLoadArguments(), (String) null, 4, (DefaultConstructorMarker) null);
            }
        }));
        this.log.info("Loading known categories from resource with {} links: ", Integer.valueOf(mutableList.size()));
        getUrlPool().getNormalCache().getNReentrantQueue().addAll(mutableList);
        int i = 0;
        while (getCrawlLoop().getUrlFeeder().iterator().hasNext()) {
            int i2 = i;
            i = i2 + 1;
            if (i2 % 6 == 0) {
                this.log.info(getCrawlLoop().getAbstract());
            }
            LangKt.sleepSeconds(10L);
        }
        this.log.info("Collecting categories from known category pages");
        ConcurrentSkipListSet concurrentSkipListSet = new ConcurrentSkipListSet();
        mutableList.parallelStream().forEach((v3) -> {
            m3load$lambda2(r1, r2, r3, v3);
        });
        mutableList.clear();
        this.log.info("Loading categories from known category pages with {} links: ", Integer.valueOf(concurrentSkipListSet.size()));
        getUrlPool().getNormalCache().getNReentrantQueue().addAll(concurrentSkipListSet);
        int i3 = 0;
        while (getCrawlLoop().getUrlFeeder().iterator().hasNext()) {
            int i4 = i3;
            i3 = i4 + 1;
            if (i4 % 6 == 0) {
                this.log.info(getCrawlLoop().getAbstract());
            }
            LangKt.sleepSeconds(10L);
        }
    }

    /* renamed from: load$lambda-2, reason: not valid java name */
    private static final void m3load$lambda2(KnownCategoryLoader knownCategoryLoader, String str, ConcurrentSkipListSet concurrentSkipListSet, PlainUrl plainUrl) {
        Intrinsics.checkNotNullParameter(knownCategoryLoader, "this$0");
        Intrinsics.checkNotNullParameter(str, "$urlIdent");
        Intrinsics.checkNotNullParameter(concurrentSkipListSet, "$collectedCategoryUrls");
        Iterable select$default = FeaturedDocument.select$default(knownCategoryLoader.getSession().loadDocument(plainUrl.getUrl(), knownCategoryLoader.getLoadOptions()), "ul a[href~=/" + str + "/]", 0, 0, 6, (Object) null);
        ArrayList<String> arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(select$default, 10));
        Iterator it = select$default.iterator();
        while (it.hasNext()) {
            arrayList.add(((Element) it.next()).attr("abs:href"));
        }
        for (String str2 : arrayList) {
            CategoryUrlNormalizer categoryUrlNormalizer = knownCategoryLoader.urlNormalizer;
            Intrinsics.checkNotNullExpressionValue(str2, "it");
            concurrentSkipListSet.add(new PlainUrl(categoryUrlNormalizer.normalize(str2), knownCategoryLoader.getConfig().getLoadArguments(), (String) null, 4, (DefaultConstructorMarker) null));
        }
    }
}
