package ai.platon.scent.examples.sites.amazon;

import ai.platon.pulsar.common.LinkExtractors;
import ai.platon.pulsar.common.ResourceLoader;
import ai.platon.pulsar.crawl.PulsarEventHandler;
import ai.platon.pulsar.dom.FeaturedDocument;
import ai.platon.pulsar.dom.nodes.node.ext.NodeExtKt;
import ai.platon.pulsar.persist.WebDb;
import ai.platon.pulsar.persist.WebPage;
import ai.platon.pulsar.persist.gora.generated.GWebPage;
import ai.platon.pulsar.protocol.browser.driver.WebDriverPoolManager;
import ai.platon.pulsar.session.PulsarSession;
import ai.platon.scent.common.message.ScentMiscMessageWriter;
import ai.platon.scent.dom.HarvestOptions;
import ai.platon.scent.dom.nodes.HyperPath;
import ai.platon.scent.dom.nodes.NodePath;
import ai.platon.scent.dom.select.QueriesKt;
import ai.platon.scent.entities.HarvestResult;
import ai.platon.scent.examples.common.VerboseCrawler;
import ai.platon.scent.ql.h2.context.ScentSQLContexts;
import java.nio.ByteBuffer;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentLinkedQueue;
import kotlin.Metadata;
import kotlin.Unit;
import kotlin.collections.CollectionsKt;
import kotlin.coroutines.CoroutineContext;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.Reflection;
import kotlin.sequences.Sequence;
import kotlin.sequences.SequencesKt;
import kotlin.text.StringsKt;
import kotlinx.coroutines.BuildersKt;
import org.jetbrains.annotations.NotNull;
import org.jsoup.nodes.Node;

/* compiled from: FeatureLeaner.kt */
@Metadata(mv = {1, 5, 1}, k = 1, xi = 48, d1 = {"��~\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0010\b\n\u0002\b\u0002\n\u0002\u0010\u000e\n��\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n��\n\u0002\u0010\"\n\u0002\b\u0002\n\u0002\u0010 \n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n��\n\u0002\u0010\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0010\u001e\n\u0002\b\u0005\n\u0002\u0018\u0002\n��\u0018��2\u00020\u0001B\u000f\u0012\b\b\u0002\u0010\u0002\u001a\u00020\u0003¢\u0006\u0002\u0010\u0004J\b\u0010\u001c\u001a\u00020\u001dH\u0002J\u001e\u0010\u001e\u001a\u00020\u001d2\u0006\u0010\u001f\u001a\u00020 2\f\b\u0002\u0010!\u001a\u00060\"j\u0002`#H\u0002J\u001e\u0010\u001e\u001a\u00020\u001d2\u0006\u0010$\u001a\u00020%2\f\b\u0002\u0010!\u001a\u00060\"j\u0002`#H\u0002J\u0006\u0010&\u001a\u00020\u001dJ\u000e\u0010'\u001a\u00020\u001d2\u0006\u0010(\u001a\u00020\u0006J\u001e\u0010)\u001a\u00020\u001d2\u0006\u0010(\u001a\u00020\u00062\f\u0010*\u001a\b\u0012\u0004\u0012\u00020\u000b0+H\u0002J\u0006\u0010,\u001a\u00020\u001dJ\u0006\u0010-\u001a\u00020\u001dJ\u0011\u0010.\u001a\u00020\u001dH\u0086@ø\u0001��¢\u0006\u0002\u0010/J\u000e\u00100\u001a\b\u0012\u0004\u0012\u00020%01H\u0002R\u000e\u0010\u0005\u001a\u00020\u0006X\u0082D¢\u0006\u0002\n��R\u000e\u0010\u0007\u001a\u00020\bX\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\t\u001a\b\u0012\u0004\u0012\u00020\u000b0\nX\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0002\u001a\u00020\u0003X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\f\u001a\u00020\r8BX\u0082\u0004¢\u0006\u0006\u001a\u0004\b\u000e\u0010\u000fR\u000e\u0010\u0010\u001a\u00020\u0011X\u0082\u0004¢\u0006\u0002\n��R\u0014\u0010\u0012\u001a\b\u0012\u0004\u0012\u00020\u00060\u0013X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0014\u001a\u00020\u0003X\u0082\u000e¢\u0006\u0002\n��R\u0014\u0010\u0015\u001a\b\u0012\u0004\u0012\u00020\u00170\u0016X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0018\u001a\u00020\u0003X\u0082D¢\u0006\u0002\n��R\u000e\u0010\u0019\u001a\u00020\u0006X\u0082D¢\u0006\u0002\n��R\u000e\u0010\u001a\u001a\u00020\u001bX\u0082\u0004¢\u0006\u0002\n��\u0082\u0002\u0004\n\u0002\b\u0019¨\u00062"}, d2 = {"Lai/platon/scent/examples/sites/amazon/FeatureLeaner;", "Lai/platon/scent/examples/common/VerboseCrawler;", "maxRecords", "", "(I)V", "args", "", "driverManager", "Lai/platon/pulsar/protocol/browser/driver/WebDriverPoolManager;", "hyperPaths", "Ljava/util/concurrent/ConcurrentLinkedQueue;", "Lai/platon/scent/dom/nodes/HyperPath;", "messageWriter", "Lai/platon/scent/common/message/ScentMiscMessageWriter;", "getMessageWriter", "()Lai/platon/scent/common/message/ScentMiscMessageWriter;", "options", "Lai/platon/scent/dom/HarvestOptions;", "portalUrls", "", "round", "scanFields", "", "Lai/platon/pulsar/persist/gora/generated/GWebPage$Field;", "scanMinimumContentSize", "scanUrlPrefix", "webDb", "Lai/platon/pulsar/persist/WebDb;", "convertHyperPaths2Sql", "", "extractByHyperPath", "doc", "Lai/platon/pulsar/dom/FeaturedDocument;", "sb", "Ljava/lang/StringBuilder;", "Lkotlin/text/StringBuilder;", "page", "Lai/platon/pulsar/persist/WebPage;", "leanAndScan", "learn", "portalUrl", "loadAndExtractOutPages", "paths", "", "loadPaths", "run", "scanAndExtract", "(Lkotlin/coroutines/Continuation;)Ljava/lang/Object;", "scanSequence", "Lkotlin/sequences/Sequence;", "scent-examples"})
/* loaded from: input_file:ai/platon/scent/examples/sites/amazon/FeatureLeaner.class */
public final class FeatureLeaner extends VerboseCrawler {
    private final int maxRecords;

    @NotNull
    private final String scanUrlPrefix;

    @NotNull
    private final List<GWebPage.Field> scanFields;
    private final int scanMinimumContentSize;

    @NotNull
    private final String args;

    @NotNull
    private final HarvestOptions options;

    @NotNull
    private final Set<String> portalUrls;

    @NotNull
    private final ConcurrentLinkedQueue<HyperPath> hyperPaths;

    @NotNull
    private final WebDb webDb;

    @NotNull
    private final WebDriverPoolManager driverManager;
    private int round;

    public FeatureLeaner(int i) {
        super(ScentSQLContexts.INSTANCE.create());
        this.maxRecords = i;
        this.scanUrlPrefix = "https://www.amazon.com/";
        this.scanFields = CollectionsKt.listOf(new GWebPage.Field[]{GWebPage.Field.PROTOCOL_STATUS, GWebPage.Field.CONTENT});
        this.scanMinimumContentSize = 10000;
        this.args = "-ic -i 10d -ii 70d -tl 40 -ol \"h2 a[href~=/dp/]\"";
        HarvestOptions options$default = PulsarSession.DefaultImpls.options$default(getSession(), this.args, (PulsarEventHandler) null, 2, (Object) null);
        HarvestOptions harvestOptions = options$default;
        harvestOptions.setDiagnose(true);
        harvestOptions.setTrustSamples(true);
        Unit unit = Unit.INSTANCE;
        this.options = options$default;
        this.portalUrls = LinkExtractors.fromResource("/new-releases.txt");
        this.hyperPaths = new ConcurrentLinkedQueue<>();
        this.webDb = (WebDb) getSession().getContext().getBean(Reflection.getOrCreateKotlinClass(WebDb.class));
        this.driverManager = (WebDriverPoolManager) getSession().getContext().getBean(Reflection.getOrCreateKotlinClass(WebDriverPoolManager.class));
    }

    public /* synthetic */ FeatureLeaner(int i, int i2, DefaultConstructorMarker defaultConstructorMarker) {
        this((i2 & 1) != 0 ? 50 : i);
    }

    private final ScentMiscMessageWriter getMessageWriter() {
        return (ScentMiscMessageWriter) getSession().getContext().getBean(Reflection.getOrCreateKotlinClass(ScentMiscMessageWriter.class));
    }

    public final void run() {
        long currentTimeMillis = System.currentTimeMillis();
        BuildersKt.runBlocking$default((CoroutineContext) null, new FeatureLeaner$run$elapsed$1$1(this, null), 1, (Object) null);
        System.out.println((Object) ("Elapsed: " + Duration.ofMillis(System.currentTimeMillis() - currentTimeMillis)));
    }

    public final void leanAndScan() {
        loadPaths();
        getSession().loadAll(CollectionsKt.take(this.portalUrls, 1), this.options).parallelStream().forEach((v1) -> {
            m22leanAndScan$lambda2(r1, v1);
        });
        this.driverManager.close();
    }

    public final void loadPaths() {
        List readAllLines = ResourceLoader.INSTANCE.readAllLines("trusted-hyper-paths.txt");
        ArrayList arrayList = new ArrayList();
        Iterator it = readAllLines.iterator();
        while (it.hasNext()) {
            HyperPath parse = HyperPath.Companion.parse((String) it.next());
            if (parse != null) {
                arrayList.add(parse);
            }
        }
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
            this.hyperPaths.add((HyperPath) it2.next());
        }
    }

    /* JADX WARN: Removed duplicated region for block: B:15:0x00d4  */
    /* JADX WARN: Removed duplicated region for block: B:16:0x011c  */
    /* JADX WARN: Removed duplicated region for block: B:8:0x005c  */
    @org.jetbrains.annotations.Nullable
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public final java.lang.Object scanAndExtract(@org.jetbrains.annotations.NotNull kotlin.coroutines.Continuation<? super kotlin.Unit> r11) {
        /*
            Method dump skipped, instructions count: 295
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: ai.platon.scent.examples.sites.amazon.FeatureLeaner.scanAndExtract(kotlin.coroutines.Continuation):java.lang.Object");
    }

    public final void learn(@NotNull String str) {
        Intrinsics.checkNotNullParameter(str, "portalUrl");
        getSession().buildAll(((HarvestResult) BuildersKt.runBlocking$default((CoroutineContext) null, new FeatureLeaner$learn$result$1(this, str, null), 1, (Object) null)).getTableGroup(), this.options);
    }

    /* JADX INFO: Access modifiers changed from: private */
    public final Sequence<WebPage> scanSequence() {
        return SequencesKt.take(SequencesKt.drop(SequencesKt.filter(SequencesKt.filter(SequencesKt.asSequence(this.webDb.scan(this.scanUrlPrefix, this.scanFields)), new Function1<WebPage, Boolean>() { // from class: ai.platon.scent.examples.sites.amazon.FeatureLeaner$scanSequence$1
            @NotNull
            public final Boolean invoke(@NotNull WebPage webPage) {
                Intrinsics.checkNotNullParameter(webPage, "it");
                String key = webPage.getKey();
                Intrinsics.checkNotNullExpressionValue(key, "it.key");
                return Boolean.valueOf(StringsKt.contains$default(key, "/dp/", false, 2, (Object) null));
            }
        }), new Function1<WebPage, Boolean>() { // from class: ai.platon.scent.examples.sites.amazon.FeatureLeaner$scanSequence$2
            /* JADX INFO: Access modifiers changed from: package-private */
            {
                super(1);
            }

            @NotNull
            public final Boolean invoke(@NotNull WebPage webPage) {
                boolean z;
                int length;
                int i;
                Intrinsics.checkNotNullParameter(webPage, "it");
                if (webPage.getProtocolStatus().isSuccess()) {
                    ByteBuffer content = webPage.getContent();
                    if (content == null) {
                        length = 0;
                    } else {
                        byte[] array = content.array();
                        length = array == null ? 0 : array.length;
                    }
                    i = FeatureLeaner.this.scanMinimumContentSize;
                    if (length > i) {
                        z = true;
                        return Boolean.valueOf(z);
                    }
                }
                z = false;
                return Boolean.valueOf(z);
            }
        }), 2000), this.maxRecords);
    }

    private final void loadAndExtractOutPages(String str, Collection<HyperPath> collection) {
        this.round++;
        System.out.println((Object) ("\n\nRound " + this.round + ".====================="));
        List loadOutPages = getSession().loadOutPages(str, this.options);
        System.out.println((Object) ("Loaded " + loadOutPages.size() + " pages and total " + collection.size() + " fields are expected | " + str));
        StringBuilder sb = new StringBuilder();
        int i = 0;
        for (Object obj : loadOutPages) {
            int i2 = i;
            i++;
            if (i2 < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            extractByHyperPath((WebPage) obj, sb);
        }
    }

    private final void extractByHyperPath(WebPage webPage, StringBuilder sb) {
        extractByHyperPath(PulsarSession.DefaultImpls.parse$default(getSession(), webPage, false, 2, (Object) null), sb);
    }

    static /* synthetic */ void extractByHyperPath$default(FeatureLeaner featureLeaner, WebPage webPage, StringBuilder sb, int i, Object obj) {
        if ((i & 2) != 0) {
            sb = new StringBuilder();
        }
        featureLeaner.extractByHyperPath(webPage, sb);
    }

    private final void extractByHyperPath(FeaturedDocument featuredDocument, StringBuilder sb) {
        String str;
        sb.setLength(0);
        int i = 0;
        ArrayList arrayList = new ArrayList();
        Iterator<HyperPath> it = this.hyperPaths.iterator();
        Intrinsics.checkNotNullExpressionValue(it, "hyperPaths.iterator()");
        int i2 = 0;
        for (Object obj : SequencesKt.asSequence(it)) {
            int i3 = i2;
            i2++;
            if (i3 < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            NodePath nodePath = (HyperPath) obj;
            Intrinsics.checkNotNullExpressionValue(nodePath, "path");
            Node selectFirstOrNull = QueriesKt.selectFirstOrNull(featuredDocument, nodePath);
            if (selectFirstOrNull == null) {
                str = "";
            } else {
                String textRepresentation = NodeExtKt.getTextRepresentation(selectFirstOrNull);
                str = textRepresentation == null ? "" : textRepresentation;
            }
            String replace$default = StringsKt.replace$default(str, "\t", "", false, 4, (Object) null);
            if (!StringsKt.isBlank(replace$default)) {
                i++;
            } else {
                arrayList.add(nodePath.getDisplay());
            }
            sb.append(replace$default).append("\t");
        }
        if (i > 10) {
            sb.append(featuredDocument.getLocation());
            Thread.currentThread().getId();
            ScentMiscMessageWriter messageWriter = getMessageWriter();
            String sb2 = sb.toString();
            Intrinsics.checkNotNullExpressionValue(sb2, "sb.toString()");
            messageWriter.reportExtractCsvResult(sb2);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static /* synthetic */ void extractByHyperPath$default(FeatureLeaner featureLeaner, FeaturedDocument featuredDocument, StringBuilder sb, int i, Object obj) {
        if ((i & 2) != 0) {
            sb = new StringBuilder();
        }
        featureLeaner.extractByHyperPath(featuredDocument, sb);
    }

    private final void convertHyperPaths2Sql() {
        StringBuilder sb = new StringBuilder();
        StringBuilder append = sb.append("select");
        Intrinsics.checkNotNullExpressionValue(append, "append(value)");
        Intrinsics.checkNotNullExpressionValue(append.append('\n'), "append('\\n')");
        Iterator<HyperPath> it = this.hyperPaths.iterator();
        Intrinsics.checkNotNullExpressionValue(it, "hyperPaths.iterator()");
        CollectionsKt.joinTo$default(SequencesKt.toList(SequencesKt.asSequence(it)), sb, ",\n", (CharSequence) null, (CharSequence) null, 0, (CharSequence) null, new Function1<HyperPath, CharSequence>() { // from class: ai.platon.scent.examples.sites.amazon.FeatureLeaner$convertHyperPaths2Sql$1
            @NotNull
            public final CharSequence invoke(HyperPath hyperPath) {
                return "    dom_first_text(dom, '" + hyperPath + "') as `" + hyperPath.getLabel() + "`";
            }
        }, 60, (Object) null);
        Intrinsics.checkNotNullExpressionValue(sb.append('\n'), "append('\\n')");
        StringBuilder append2 = sb.append("from dom_select({{url}})");
        Intrinsics.checkNotNullExpressionValue(append2, "append(value)");
        Intrinsics.checkNotNullExpressionValue(append2.append('\n'), "append('\\n')");
        System.out.println((Object) sb.toString());
    }

    /* renamed from: leanAndScan$lambda-2, reason: not valid java name */
    private static final void m22leanAndScan$lambda2(FeatureLeaner featureLeaner, WebPage webPage) {
        Intrinsics.checkNotNullParameter(featureLeaner, "this$0");
        String url = webPage.getUrl();
        Intrinsics.checkNotNullExpressionValue(url, "it.url");
        featureLeaner.learn(url);
    }

    public FeatureLeaner() {
        this(0, 1, null);
    }
}
