/*
 * Decompiled with CFR 0.152.
 */
package ai.platon.scent.crawl.diffusing;

import ai.platon.pulsar.common.LogsKt;
import ai.platon.pulsar.common.message.PageLoadStatusFormatter;
import ai.platon.pulsar.common.urls.Hyperlink;
import ai.platon.pulsar.common.urls.UrlAware;
import ai.platon.pulsar.common.urls.UrlUtils;
import ai.platon.pulsar.crawl.common.url.StatefulListenableHyperlink;
import ai.platon.pulsar.crawl.event.impl.DefaultPageEvent;
import ai.platon.pulsar.dom.FeaturedDocument;
import ai.platon.pulsar.persist.WebPage;
import ai.platon.pulsar.persist.metadata.OpenPageCategory;
import ai.platon.pulsar.persist.metadata.PageCategory;
import ai.platon.scent.ScentSession;
import ai.platon.scent.common.WebPages;
import ai.platon.scent.crawl.diffusing.PageProcessor;
import ai.platon.scent.crawl.diffusing.config.DiffusingCrawlerConfig;
import java.time.Instant;
import java.util.Collection;
import java.util.concurrent.ConcurrentSkipListSet;
import kotlin.Metadata;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.Reflection;
import kotlin.reflect.KClass;
import kotlin.text.Regex;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.jsoup.nodes.Element;
import org.slf4j.Logger;

@Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u0000|\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0010\u000b\n\u0002\b\u0005\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0010\u000e\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010\b\n\u0002\b\u0005\n\u0002\u0018\u0002\n\u0002\b\t\n\u0002\u0010\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u001f\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0004\b&\u0018\u0000 @2\u00020\u0001:\u0001@B\u0015\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\u0006\u0010\u0004\u001a\u00020\u0005\u00a2\u0006\u0002\u0010\u0006J\u001e\u0010.\u001a\u00020/2\u0006\u00100\u001a\u0002012\f\u00102\u001a\b\u0012\u0004\u0012\u00020403H\u0016J0\u00105\u001a\u0004\u0018\u0001062\u0006\u00107\u001a\u00020\u00182\b\u00108\u001a\u0004\u0018\u00010\u00182\b\u00109\u001a\u0004\u0018\u00010\u00182\b\u0010:\u001a\u0004\u0018\u00010;H\u0016J\u0012\u00105\u001a\u0004\u0018\u0001062\u0006\u0010<\u001a\u00020=H\u0016J\u0012\u0010>\u001a\u0004\u0018\u00010\u00182\u0006\u00107\u001a\u00020\u0018H\u0016J\u0012\u0010?\u001a\u0004\u0018\u00010\u00182\u0006\u00107\u001a\u00020\u0018H\u0016R\u0011\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0007\u0010\bR\u001a\u0010\t\u001a\u00020\nX\u0096\u000e\u00a2\u0006\u000e\n\u0000\u001a\u0004\b\u000b\u0010\f\"\u0004\b\r\u0010\u000eR\u0014\u0010\u000f\u001a\u00020\u0010X\u0096\u0004\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0011\u0010\u0012R\u0014\u0010\u0013\u001a\u00020\u0014X\u0084\u0004\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0015\u0010\u0016R\u0014\u0010\u0017\u001a\u00020\u00188VX\u0096\u0004\u00a2\u0006\u0006\u001a\u0004\b\u0019\u0010\u001aR\u0016\u0010\u001b\u001a\n \u001d*\u0004\u0018\u00010\u001c0\u001cX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u001a\u0010\u001e\u001a\u00020\u001fX\u0096\u000e\u00a2\u0006\u000e\n\u0000\u001a\u0004\b \u0010!\"\u0004\b\"\u0010#R\u0014\u0010$\u001a\u00020%X\u0096\u0004\u00a2\u0006\b\n\u0000\u001a\u0004\b&\u0010'R\u0011\u0010\u0004\u001a\u00020\u0005\u00a2\u0006\b\n\u0000\u001a\u0004\b(\u0010)R\u001a\u0010*\u001a\u00020\nX\u0096\u000e\u00a2\u0006\u000e\n\u0000\u001a\u0004\b+\u0010\f\"\u0004\b,\u0010\u000eR\u0016\u0010-\u001a\n \u001d*\u0004\u0018\u00010\u001c0\u001cX\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006A"}, d2={"Lai/platon/scent/crawl/diffusing/AbstractPageProcessor;", "Lai/platon/scent/crawl/diffusing/PageProcessor;", "config", "Lai/platon/scent/crawl/diffusing/config/DiffusingCrawlerConfig;", "session", "Lai/platon/scent/ScentSession;", "(Lai/platon/scent/crawl/diffusing/config/DiffusingCrawlerConfig;Lai/platon/scent/ScentSession;)V", "getConfig", "()Lai/platon/scent/crawl/diffusing/config/DiffusingCrawlerConfig;", "dbCheck", "", "getDbCheck", "()Z", "setDbCheck", "(Z)V", "event", "Lai/platon/pulsar/crawl/event/impl/DefaultPageEvent;", "getEvent", "()Lai/platon/pulsar/crawl/event/impl/DefaultPageEvent;", "indexPageUrlRegex", "Lkotlin/text/Regex;", "getIndexPageUrlRegex", "()Lkotlin/text/Regex;", "label", "", "getLabel", "()Ljava/lang/String;", "logger", "Lorg/slf4j/Logger;", "kotlin.jvm.PlatformType", "minPageSize", "", "getMinPageSize", "()I", "setMinPageSize", "(I)V", "pageCategory", "Lai/platon/pulsar/persist/metadata/OpenPageCategory;", "getPageCategory", "()Lai/platon/pulsar/persist/metadata/OpenPageCategory;", "getSession", "()Lai/platon/scent/ScentSession;", "storeContent", "getStoreContent", "setStoreContent", "taskLogger", "collectTo", "", "document", "Lai/platon/pulsar/dom/FeaturedDocument;", "sink", "", "Lai/platon/pulsar/common/urls/UrlAware;", "createHyperlink", "Lai/platon/pulsar/common/urls/Hyperlink;", "url", "href", "referrer", "deadTime", "Ljava/time/Instant;", "anchor", "Lorg/jsoup/nodes/Element;", "filter", "normalize", "Companion", "scent-engine"})
public abstract class AbstractPageProcessor
implements PageProcessor {
    @NotNull
    public static final Companion Companion = new Companion(null);
    @NotNull
    private final DiffusingCrawlerConfig config;
    @NotNull
    private final ScentSession session;
    private final Logger logger;
    private final Logger taskLogger;
    @NotNull
    private final OpenPageCategory pageCategory;
    @NotNull
    private final DefaultPageEvent event;
    private boolean dbCheck;
    private int minPageSize;
    private boolean storeContent;
    @NotNull
    private final Regex indexPageUrlRegex;
    @NotNull
    private static final ConcurrentSkipListSet<String> globalCreatedUrls = new ConcurrentSkipListSet();

    public AbstractPageProcessor(@NotNull DiffusingCrawlerConfig config, @NotNull ScentSession session) {
        Intrinsics.checkNotNullParameter((Object)config, (String)"config");
        Intrinsics.checkNotNullParameter((Object)session, (String)"session");
        this.config = config;
        this.session = session;
        this.logger = LogsKt.getLogger((KClass)Reflection.getOrCreateKotlinClass(AbstractPageProcessor.class));
        this.taskLogger = LogsKt.getLogger((KClass)Reflection.getOrCreateKotlinClass(AbstractPageProcessor.class), (String)".Task");
        this.pageCategory = new OpenPageCategory(PageCategory.UNKNOWN);
        this.event = new DefaultPageEvent(null, null, null, 7, null);
        this.minPageSize = 1000;
        String string = this.config.getIndexPageUrlPattern();
        boolean bl = false;
        this.indexPageUrlRegex = new Regex(string);
    }

    @NotNull
    public final DiffusingCrawlerConfig getConfig() {
        return this.config;
    }

    @NotNull
    public final ScentSession getSession() {
        return this.session;
    }

    @Override
    @NotNull
    public String getLabel() {
        return this.config.getLabel();
    }

    @Override
    @NotNull
    public OpenPageCategory getPageCategory() {
        return this.pageCategory;
    }

    @NotNull
    public DefaultPageEvent getEvent() {
        return this.event;
    }

    @Override
    public boolean getDbCheck() {
        return this.dbCheck;
    }

    @Override
    public void setDbCheck(boolean bl) {
        this.dbCheck = bl;
    }

    @Override
    public int getMinPageSize() {
        return this.minPageSize;
    }

    @Override
    public void setMinPageSize(int n) {
        this.minPageSize = n;
    }

    @Override
    public boolean getStoreContent() {
        return this.storeContent;
    }

    @Override
    public void setStoreContent(boolean bl) {
        this.storeContent = bl;
    }

    @NotNull
    protected final Regex getIndexPageUrlRegex() {
        return this.indexPageUrlRegex;
    }

    /*
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    @Override
    @Nullable
    public String filter(@NotNull String url) {
        Intrinsics.checkNotNullParameter((Object)url, (String)"url");
        String string = url;
        boolean bl = false;
        boolean bl2 = false;
        String it = string;
        boolean bl3 = false;
        if (!UrlUtils.isValidUrl((String)url)) return null;
        CharSequence charSequence = url;
        Regex regex = this.getIndexPageUrlRegex();
        boolean bl4 = false;
        if (!regex.matches(charSequence)) return null;
        boolean bl5 = true;
        if (!bl5) return null;
        String string2 = string;
        return string2;
    }

    @Override
    @Nullable
    public String normalize(@NotNull String url) {
        Intrinsics.checkNotNullParameter((Object)url, (String)"url");
        return url;
    }

    @Override
    @Nullable
    public Hyperlink createHyperlink(@NotNull Element anchor) {
        Intrinsics.checkNotNullParameter((Object)anchor, (String)"anchor");
        String string = anchor.absUrl("href");
        Intrinsics.checkNotNullExpressionValue((Object)string, (String)"anchor.absUrl(\"href\")");
        String string2 = this.filter(string);
        if (string2 == null) {
            return null;
        }
        String href = string2;
        string = this.normalize(href);
        if (string == null) {
            return null;
        }
        String url = string;
        return PageProcessor.DefaultImpls.createHyperlink$default(this, url, href, anchor.baseUri(), null, 8, null);
    }

    @Override
    @Nullable
    public Hyperlink createHyperlink(@NotNull String url, @Nullable String href, @Nullable String referrer, @Nullable Instant deadTime) {
        Intrinsics.checkNotNullParameter((Object)url, (String)"url");
        if (this.filter(url) == null) {
            return null;
        }
        if (globalCreatedUrls.contains(url)) {
            return null;
        }
        globalCreatedUrls.add(url);
        boolean isExpired = false;
        if (this.getDbCheck()) {
            WebPage page = this.session.getOrNull(url);
            int size = WebPages.getActualContentBytes$default(WebPages.INSTANCE, page, false, 2, null);
            boolean bl = isExpired = size < this.getMinPageSize();
            if (page == null) {
                String string;
                Object[] objectArray = new Object[]{this.getPageCategory().getSymbol(), this.getLabel(), (string = href) == null ? url : string};
                this.logger.debug("Fetching {} new page | {} | {}", objectArray);
            } else {
                String prefix = isExpired ? "Fetching" : "Loading";
                this.logger.debug("{}", (Object)new PageLoadStatusFormatter(page, prefix, false, false, false, false, 60, null));
            }
        }
        String expires = isExpired ? "0s" : "3000d";
        String args = "-i " + expires + " -parse -ignoreFailure -storeContent " + this.getStoreContent() + " -label " + this.getLabel();
        if (deadTime != null) {
            args = args + " -deadTime " + deadTime;
        }
        return (Hyperlink)new StatefulListenableHyperlink(url, null, 0, referrer, args, href, null, 70, null);
    }

    @Override
    public void collectTo(@NotNull FeaturedDocument document, @NotNull Collection<UrlAware> sink) {
        Intrinsics.checkNotNullParameter((Object)document, (String)"document");
        Intrinsics.checkNotNullParameter(sink, (String)"sink");
    }

    @Metadata(mv={1, 5, 1}, k=1, xi=48, d1={"\u0000\u0018\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0010\u000e\n\u0002\b\u0003\b\u0086\u0003\u0018\u00002\u00020\u0001B\u0007\b\u0002\u00a2\u0006\u0002\u0010\u0002R\u0017\u0010\u0003\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0006\u0010\u0007\u00a8\u0006\b"}, d2={"Lai/platon/scent/crawl/diffusing/AbstractPageProcessor$Companion;", "", "()V", "globalCreatedUrls", "Ljava/util/concurrent/ConcurrentSkipListSet;", "", "getGlobalCreatedUrls", "()Ljava/util/concurrent/ConcurrentSkipListSet;", "scent-engine"})
    public static final class Companion {
        private Companion() {
        }

        @NotNull
        public final ConcurrentSkipListSet<String> getGlobalCreatedUrls() {
            return globalCreatedUrls;
        }

        public /* synthetic */ Companion(DefaultConstructorMarker $constructor_marker) {
            this();
        }
    }
}

