/*
 * Decompiled with CFR 0.152.
 */
package ai.platon.scent.crawl.diffusing;

import ai.platon.pulsar.common.LogsKt;
import ai.platon.pulsar.common.urls.Hyperlink;
import ai.platon.pulsar.common.urls.URLUtils;
import ai.platon.pulsar.common.urls.UrlAware;
import ai.platon.pulsar.dom.FeaturedDocument;
import ai.platon.pulsar.persist.WebPage;
import ai.platon.pulsar.persist.metadata.OpenPageCategory;
import ai.platon.pulsar.persist.metadata.PageCategory;
import ai.platon.pulsar.skeleton.common.message.PageLoadStatusFormatter;
import ai.platon.pulsar.skeleton.crawl.common.url.StatefulListenableHyperlink;
import ai.platon.scent.common.WebPages;
import ai.platon.scent.crawl.diffusing.PageProcessor;
import ai.platon.scent.crawl.diffusing.config.DiffusingCrawlerConfig;
import ai.platon.scent.skeleton.ScentSession;
import java.time.Instant;
import java.util.Collection;
import java.util.concurrent.ConcurrentSkipListSet;
import kotlin.Metadata;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.Reflection;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.reflect.KClass;
import kotlin.text.Regex;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.jsoup.nodes.Element;
import org.slf4j.Logger;

@Metadata(mv={1, 9, 0}, k=1, xi=48, d1={"\u0000r\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0010\u000b\n\u0002\b\u0005\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0010\u000e\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\b\n\u0002\b\u0005\n\u0002\u0018\u0002\n\u0002\b\t\n\u0002\u0010\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u001f\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0004\b&\u0018\u0000 ;2\u00020\u0001:\u0001;B\u0015\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\u0006\u0010\u0004\u001a\u00020\u0005\u00a2\u0006\u0002\u0010\u0006J\u001e\u0010)\u001a\u00020*2\u0006\u0010+\u001a\u00020,2\f\u0010-\u001a\b\u0012\u0004\u0012\u00020/0.H\u0016J0\u00100\u001a\u0004\u0018\u0001012\u0006\u00102\u001a\u00020\u00142\b\u00103\u001a\u0004\u0018\u00010\u00142\b\u00104\u001a\u0004\u0018\u00010\u00142\b\u00105\u001a\u0004\u0018\u000106H\u0016J\u0012\u00100\u001a\u0004\u0018\u0001012\u0006\u00107\u001a\u000208H\u0016J\u0012\u00109\u001a\u0004\u0018\u00010\u00142\u0006\u00102\u001a\u00020\u0014H\u0016J\u0012\u0010:\u001a\u0004\u0018\u00010\u00142\u0006\u00102\u001a\u00020\u0014H\u0016R\u0011\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0007\u0010\bR\u001a\u0010\t\u001a\u00020\nX\u0096\u000e\u00a2\u0006\u000e\n\u0000\u001a\u0004\b\u000b\u0010\f\"\u0004\b\r\u0010\u000eR\u0014\u0010\u000f\u001a\u00020\u0010X\u0084\u0004\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0011\u0010\u0012R\u0014\u0010\u0013\u001a\u00020\u00148VX\u0096\u0004\u00a2\u0006\u0006\u001a\u0004\b\u0015\u0010\u0016R\u000e\u0010\u0017\u001a\u00020\u0018X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u001a\u0010\u0019\u001a\u00020\u001aX\u0096\u000e\u00a2\u0006\u000e\n\u0000\u001a\u0004\b\u001b\u0010\u001c\"\u0004\b\u001d\u0010\u001eR\u0014\u0010\u001f\u001a\u00020 X\u0096\u0004\u00a2\u0006\b\n\u0000\u001a\u0004\b!\u0010\"R\u0011\u0010\u0004\u001a\u00020\u0005\u00a2\u0006\b\n\u0000\u001a\u0004\b#\u0010$R\u001a\u0010%\u001a\u00020\nX\u0096\u000e\u00a2\u0006\u000e\n\u0000\u001a\u0004\b&\u0010\f\"\u0004\b'\u0010\u000eR\u000e\u0010(\u001a\u00020\u0018X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006<"}, d2={"Lai/platon/scent/crawl/diffusing/AbstractPageProcessor;", "Lai/platon/scent/crawl/diffusing/PageProcessor;", "config", "Lai/platon/scent/crawl/diffusing/config/DiffusingCrawlerConfig;", "session", "Lai/platon/scent/skeleton/ScentSession;", "(Lai/platon/scent/crawl/diffusing/config/DiffusingCrawlerConfig;Lai/platon/scent/skeleton/ScentSession;)V", "getConfig", "()Lai/platon/scent/crawl/diffusing/config/DiffusingCrawlerConfig;", "dbCheck", "", "getDbCheck", "()Z", "setDbCheck", "(Z)V", "indexPageUrlRegex", "Lkotlin/text/Regex;", "getIndexPageUrlRegex", "()Lkotlin/text/Regex;", "label", "", "getLabel", "()Ljava/lang/String;", "logger", "Lorg/slf4j/Logger;", "minPageSize", "", "getMinPageSize", "()I", "setMinPageSize", "(I)V", "pageCategory", "Lai/platon/pulsar/persist/metadata/OpenPageCategory;", "getPageCategory", "()Lai/platon/pulsar/persist/metadata/OpenPageCategory;", "getSession", "()Lai/platon/scent/skeleton/ScentSession;", "storeContent", "getStoreContent", "setStoreContent", "taskLogger", "collectTo", "", "document", "Lai/platon/pulsar/dom/FeaturedDocument;", "sink", "", "Lai/platon/pulsar/common/urls/UrlAware;", "createHyperlink", "Lai/platon/pulsar/common/urls/Hyperlink;", "url", "href", "referrer", "deadTime", "Ljava/time/Instant;", "anchor", "Lorg/jsoup/nodes/Element;", "filter", "normalize", "Companion", "scent-engine"})
@SourceDebugExtension(value={"SMAP\nPageProcessor.kt\nKotlin\n*S Kotlin\n*F\n+ 1 PageProcessor.kt\nai/platon/scent/crawl/diffusing/AbstractPageProcessor\n+ 2 fake.kt\nkotlin/jvm/internal/FakeKt\n*L\n1#1,251:1\n1#2:252\n*E\n"})
public abstract class AbstractPageProcessor
implements PageProcessor {
    @NotNull
    public static final Companion Companion = new Companion(null);
    @NotNull
    private final DiffusingCrawlerConfig config;
    @NotNull
    private final ScentSession session;
    @NotNull
    private final Logger logger;
    @NotNull
    private final Logger taskLogger;
    @NotNull
    private final OpenPageCategory pageCategory;
    private boolean dbCheck;
    private int minPageSize;
    private boolean storeContent;
    @NotNull
    private final Regex indexPageUrlRegex;
    @NotNull
    private static final ConcurrentSkipListSet<String> globalCreatedUrls = new ConcurrentSkipListSet();

    public AbstractPageProcessor(@NotNull DiffusingCrawlerConfig config, @NotNull ScentSession session2) {
        Intrinsics.checkNotNullParameter((Object)config, (String)"config");
        Intrinsics.checkNotNullParameter((Object)session2, (String)"session");
        this.config = config;
        this.session = session2;
        this.logger = LogsKt.getLogger((KClass)Reflection.getOrCreateKotlinClass(AbstractPageProcessor.class));
        this.taskLogger = LogsKt.getLogger((KClass)Reflection.getOrCreateKotlinClass(AbstractPageProcessor.class), (String)".Task");
        this.pageCategory = new OpenPageCategory(PageCategory.UNKNOWN);
        this.minPageSize = 1000;
        this.indexPageUrlRegex = new Regex(this.config.getIndexPageUrlPattern());
    }

    @NotNull
    public final DiffusingCrawlerConfig getConfig() {
        return this.config;
    }

    @NotNull
    public final ScentSession getSession() {
        return this.session;
    }

    @Override
    @NotNull
    public String getLabel() {
        return this.config.getLabel();
    }

    @Override
    @NotNull
    public OpenPageCategory getPageCategory() {
        return this.pageCategory;
    }

    @Override
    public boolean getDbCheck() {
        return this.dbCheck;
    }

    @Override
    public void setDbCheck(boolean bl) {
        this.dbCheck = bl;
    }

    @Override
    public int getMinPageSize() {
        return this.minPageSize;
    }

    @Override
    public void setMinPageSize(int n) {
        this.minPageSize = n;
    }

    @Override
    public boolean getStoreContent() {
        return this.storeContent;
    }

    @Override
    public void setStoreContent(boolean bl) {
        this.storeContent = bl;
    }

    @NotNull
    protected final Regex getIndexPageUrlRegex() {
        return this.indexPageUrlRegex;
    }

    @Override
    @Nullable
    public String filter(@NotNull String url) {
        CharSequence charSequence;
        String string;
        Intrinsics.checkNotNullParameter((Object)url, (String)"url");
        String it = string = url;
        boolean bl = false;
        return URLUtils.isStandard((String)url) && this.indexPageUrlRegex.matches(charSequence = (CharSequence)url) ? string : null;
    }

    @Override
    @Nullable
    public String normalize(@NotNull String url) {
        Intrinsics.checkNotNullParameter((Object)url, (String)"url");
        return url;
    }

    @Override
    @Nullable
    public Hyperlink createHyperlink(@NotNull Element anchor) {
        Intrinsics.checkNotNullParameter((Object)anchor, (String)"anchor");
        String string = anchor.absUrl("href");
        Intrinsics.checkNotNullExpressionValue((Object)string, (String)"absUrl(...)");
        String string2 = this.filter(string);
        if (string2 == null) {
            return null;
        }
        String href = string2;
        String string3 = this.normalize(href);
        if (string3 == null) {
            return null;
        }
        String url = string3;
        return PageProcessor.DefaultImpls.createHyperlink$default(this, url, href, anchor.baseUri(), null, 8, null);
    }

    @Override
    @Nullable
    public Hyperlink createHyperlink(@NotNull String url, @Nullable String href, @Nullable String referrer, @Nullable Instant deadTime) {
        Intrinsics.checkNotNullParameter((Object)url, (String)"url");
        if (this.filter(url) == null) {
            return null;
        }
        if (globalCreatedUrls.contains(url)) {
            return null;
        }
        globalCreatedUrls.add(url);
        boolean isExpired = false;
        if (this.getDbCheck()) {
            WebPage page = this.session.getOrNull(url);
            int size = WebPages.getActualContentBytes$default(WebPages.INSTANCE, page, false, 2, null);
            boolean bl = isExpired = size < this.getMinPageSize();
            if (page == null) {
                Object[] objectArray = new Object[3];
                objectArray[0] = this.getPageCategory().getSymbol();
                objectArray[1] = this.getLabel();
                String string = href;
                if (string == null) {
                    string = url;
                }
                objectArray[2] = string;
                this.logger.debug("Fetching {} new page | {} | {}", objectArray);
            } else {
                String prefix = isExpired ? "Fetching" : "Loading";
                this.logger.debug("{}", (Object)new PageLoadStatusFormatter(page, prefix, false, false, false, false, 60, null));
            }
        }
        String expires = isExpired ? "0s" : "3000d";
        String args = "-i " + expires + " -parse -ignoreFailure -storeContent " + this.getStoreContent() + " -label " + this.getLabel();
        if (deadTime != null) {
            args = args + " -deadTime " + deadTime;
        }
        return (Hyperlink)new StatefulListenableHyperlink(url, "", 0, referrer, args, href, 0, null, null, null, 0, 0, null, 8132, null);
    }

    @Override
    public void collectTo(@NotNull FeaturedDocument document, @NotNull Collection<UrlAware> sink) {
        Intrinsics.checkNotNullParameter((Object)document, (String)"document");
        Intrinsics.checkNotNullParameter(sink, (String)"sink");
    }

    @Metadata(mv={1, 9, 0}, k=1, xi=48, d1={"\u0000\u0018\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0010\u000e\n\u0002\b\u0003\b\u0086\u0003\u0018\u00002\u00020\u0001B\u0007\b\u0002\u00a2\u0006\u0002\u0010\u0002R\u0017\u0010\u0003\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0006\u0010\u0007\u00a8\u0006\b"}, d2={"Lai/platon/scent/crawl/diffusing/AbstractPageProcessor$Companion;", "", "()V", "globalCreatedUrls", "Ljava/util/concurrent/ConcurrentSkipListSet;", "", "getGlobalCreatedUrls", "()Ljava/util/concurrent/ConcurrentSkipListSet;", "scent-engine"})
    public static final class Companion {
        private Companion() {
        }

        @NotNull
        public final ConcurrentSkipListSet<String> getGlobalCreatedUrls() {
            return globalCreatedUrls;
        }

        public /* synthetic */ Companion(DefaultConstructorMarker $constructor_marker) {
            this();
        }
    }
}

