/*
 * Decompiled with CFR 0.152.
 */
package ai.platon.scent.tools;

import ai.platon.pulsar.common.AppPaths;
import ai.platon.pulsar.common.LogsKt;
import ai.platon.pulsar.common.config.VolatileConfig;
import ai.platon.pulsar.dom.FeaturedDocument;
import ai.platon.pulsar.persist.WebDb;
import ai.platon.pulsar.persist.WebPage;
import ai.platon.pulsar.persist.gora.generated.GWebPage;
import ai.platon.pulsar.skeleton.context.PulsarContext;
import ai.platon.pulsar.skeleton.crawl.common.URLUtil;
import ai.platon.pulsar.skeleton.crawl.filter.ChainedUrlNormalizer;
import ai.platon.pulsar.skeleton.crawl.filter.ScopedUrlNormalizer;
import ai.platon.scent.ScentSession;
import ai.platon.scent.common.MLPaths;
import ai.platon.scent.common.ScentWebPageExtKt;
import ai.platon.scent.common.sites.amazon.AmazonAsinUrlNormalizer;
import ai.platon.scent.common.sites.amazon.AmazonUrls;
import ai.platon.scent.context.support.AbstractScentContext;
import ai.platon.scent.dom.HarvestOptions;
import ai.platon.scent.ml.EncodeOptions;
import ai.platon.scent.ml.MLProject;
import ai.platon.scent.ml.data.SimpleDataFrame;
import ai.platon.scent.tools.HarvestTaskExecutor;
import ai.platon.scent.tools.ScanningHarvestTaskExecutor;
import ai.platon.scent.tools.VerboseCrawler;
import java.lang.management.ManagementFactory;
import java.lang.management.RuntimeMXBean;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.collections.SetsKt;
import kotlin.enums.EnumEntries;
import kotlin.enums.EnumEntriesKt;
import kotlin.io.path.PathsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.Reflection;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.reflect.KClass;
import kotlin.sequences.Sequence;
import kotlin.sequences.SequencesKt;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.slf4j.Logger;

@Metadata(mv={1, 9, 0}, k=1, xi=48, d1={"\u0000b\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u000e\n\u0000\n\u0002\u0010\b\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\n\n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0010\u0002\n\u0002\b\u0005\n\u0002\u0010\u000b\n\u0000\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\u0018\u00002\u00020\u0001B-\u0012\b\b\u0002\u0010\u0002\u001a\u00020\u0003\u0012\b\b\u0002\u0010\u0004\u001a\u00020\u0005\u0012\b\b\u0002\u0010\u0006\u001a\u00020\u0005\u0012\b\b\u0002\u0010\u0007\u001a\u00020\b\u00a2\u0006\u0002\u0010\tJ\u0006\u0010\u001f\u001a\u00020 J\u0006\u0010!\u001a\u00020 J8\u0010\"\u001a\u00020 2\u0006\u0010#\u001a\u00020\u00032\b\b\u0002\u0010$\u001a\u00020\u00032\b\b\u0002\u0010%\u001a\u00020&2\u0014\b\u0002\u0010'\u001a\u000e\u0012\u0004\u0012\u00020)\u0012\u0004\u0012\u00020&0(J\u0010\u0010*\u001a\u00020 2\b\b\u0002\u0010#\u001a\u00020\u0003J\u000e\u0010+\u001a\u00020 2\u0006\u0010#\u001a\u00020\u0003J\u0016\u0010,\u001a\b\u0012\u0004\u0012\u00020.0-2\u0006\u0010/\u001a\u000200H\u0002R\u000e\u0010\n\u001a\u00020\u000bX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0011\u0010\u0007\u001a\u00020\b\u00a2\u0006\b\n\u0000\u001a\u0004\b\f\u0010\rR\u001a\u0010\u000e\u001a\u00020\u000fX\u0086\u000e\u00a2\u0006\u000e\n\u0000\u001a\u0004\b\u0010\u0010\u0011\"\u0004\b\u0012\u0010\u0013R\u0011\u0010\u0014\u001a\u00020\u00038F\u00a2\u0006\u0006\u001a\u0004\b\u0015\u0010\u0016R\u0011\u0010\u0006\u001a\u00020\u0005\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0017\u0010\u0018R\u000e\u0010\u0019\u001a\u00020\u001aX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0011\u0010\u001b\u001a\u00020\u00038F\u00a2\u0006\u0006\u001a\u0004\b\u001c\u0010\u0016R\u0011\u0010\u0004\u001a\u00020\u0005\u00a2\u0006\b\n\u0000\u001a\u0004\b\u001d\u0010\u0018R\u0011\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\b\n\u0000\u001a\u0004\b\u001e\u0010\u0016\u00a8\u00061"}, d2={"Lai/platon/scent/tools/ScanningHarvestTaskExecutor;", "Lai/platon/scent/tools/HarvestTaskExecutor;", "urlBase", "", "start", "", "limit", "crawler", "Lai/platon/scent/tools/VerboseCrawler;", "(Ljava/lang/String;IILai/platon/scent/tools/VerboseCrawler;)V", "context", "Lai/platon/scent/context/support/AbstractScentContext;", "getCrawler", "()Lai/platon/scent/tools/VerboseCrawler;", "datasetPath", "Ljava/nio/file/Path;", "getDatasetPath", "()Ljava/nio/file/Path;", "setDatasetPath", "(Ljava/nio/file/Path;)V", "domain", "getDomain", "()Ljava/lang/String;", "getLimit", "()I", "logger", "Lorg/slf4j/Logger;", "origin", "getOrigin", "getStart", "getUrlBase", "check", "", "clearAnnotations", "encode", "args", "restrictCss", "annotated", "", "nodeFilter", "Lkotlin/Function1;", "Lorg/jsoup/nodes/Node;", "harvest", "kmeans", "loadDocuments", "Lkotlin/sequences/Sequence;", "Lai/platon/pulsar/dom/FeaturedDocument;", "options", "Lai/platon/scent/dom/HarvestOptions;", "scent-boot"})
@SourceDebugExtension(value={"SMAP\nScanningHarvestTaskExecutor.kt\nKotlin\n*S Kotlin\n*F\n+ 1 ScanningHarvestTaskExecutor.kt\nai/platon/scent/tools/ScanningHarvestTaskExecutor\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n+ 3 ArraysJVM.kt\nkotlin/collections/ArraysKt__ArraysJVMKt\n+ 4 _Sequences.kt\nkotlin/sequences/SequencesKt___SequencesKt\n*L\n1#1,189:1\n1549#2:190\n1620#2,3:191\n1549#2:202\n1620#2,3:203\n1549#2:208\n1620#2,3:209\n37#3,2:194\n1324#4,3:196\n1324#4,3:199\n1313#4,2:206\n*S KotlinDebug\n*F\n+ 1 ScanningHarvestTaskExecutor.kt\nai/platon/scent/tools/ScanningHarvestTaskExecutor\n*L\n53#1:190\n53#1:191,3\n90#1:202\n90#1:203,3\n155#1:208\n155#1:209,3\n53#1:194,2\n58#1:196,3\n75#1:199,3\n130#1:206,2\n*E\n"})
public final class ScanningHarvestTaskExecutor
extends HarvestTaskExecutor {
    @NotNull
    private final String urlBase;
    private final int start;
    private final int limit;
    @NotNull
    private final VerboseCrawler crawler;
    @NotNull
    private final Logger logger;
    @NotNull
    private final AbstractScentContext context;
    @NotNull
    private Path datasetPath;

    public ScanningHarvestTaskExecutor(@NotNull String urlBase, int start, int limit, @NotNull VerboseCrawler crawler) {
        Intrinsics.checkNotNullParameter((Object)urlBase, (String)"urlBase");
        Intrinsics.checkNotNullParameter((Object)crawler, (String)"crawler");
        super((ScentSession)crawler.getSession());
        this.urlBase = urlBase;
        this.start = start;
        this.limit = limit;
        this.crawler = crawler;
        this.logger = LogsKt.getLogger((KClass)Reflection.getOrCreateKotlinClass(ScanningHarvestTaskExecutor.class));
        PulsarContext pulsarContext = this.getSession().getContext();
        Intrinsics.checkNotNull((Object)pulsarContext, (String)"null cannot be cast to non-null type ai.platon.scent.context.support.AbstractScentContext");
        this.context = (AbstractScentContext)pulsarContext;
        this.datasetPath = HarvestTaskExecutor.Companion.createDatasetPath(this.start, this.limit, StringsKt.substringBefore$default((String)this.getDomain(), (String)".", null, (int)2, null));
        ChainedUrlNormalizer.add$default((ChainedUrlNormalizer)this.getSession().getContext().getUrlNormalizer(), (ScopedUrlNormalizer)((ScopedUrlNormalizer)new AmazonAsinUrlNormalizer()), null, (int)2, null);
    }

    public /* synthetic */ ScanningHarvestTaskExecutor(String string, int n, int n2, VerboseCrawler verboseCrawler, int n3, DefaultConstructorMarker defaultConstructorMarker) {
        if ((n3 & 1) != 0) {
            string = "https://www.amazon.com/dp/";
        }
        if ((n3 & 2) != 0) {
            n = 0;
        }
        if ((n3 & 4) != 0) {
            n2 = 6000;
        }
        if ((n3 & 8) != 0) {
            verboseCrawler = new VerboseCrawler(null, 1, null);
        }
        this(string, n, n2, verboseCrawler);
    }

    @NotNull
    public final String getUrlBase() {
        return this.urlBase;
    }

    public final int getStart() {
        return this.start;
    }

    public final int getLimit() {
        return this.limit;
    }

    @NotNull
    public final VerboseCrawler getCrawler() {
        return this.crawler;
    }

    @NotNull
    public final String getOrigin() {
        return URLUtil.INSTANCE.getOrigin(this.urlBase);
    }

    @NotNull
    public final String getDomain() {
        String string = URLUtil.INSTANCE.getDomainName(this.urlBase);
        if (string == null) {
            string = "unknown";
        }
        return string;
    }

    @NotNull
    public final Path getDatasetPath() {
        return this.datasetPath;
    }

    public final void setDatasetPath(@NotNull Path path) {
        Intrinsics.checkNotNullParameter((Object)path, (String)"<set-?>");
        this.datasetPath = path;
    }

    /*
     * WARNING - void declaration
     */
    public final void check() {
        Sequence sequence2;
        void $this$mapTo$iv$iv;
        Iterable $this$map$iv = SetsKt.minus((Set)CollectionsKt.toSet((Iterable)((Iterable)EntriesMappings.entries$0)), (Object)GWebPage.Field.PAGE_MODEL);
        boolean $i$f$map = false;
        Iterable iterable = $this$map$iv;
        Collection destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
        boolean $i$f$mapTo = false;
        for (Object item$iv$iv : $this$mapTo$iv$iv) {
            void it;
            GWebPage.Field field = (GWebPage.Field)item$iv$iv;
            Collection collection = destination$iv$iv;
            boolean bl = false;
            collection.add(it.toString());
        }
        Collection $this$toTypedArray$iv = (List)destination$iv$iv;
        boolean $i$f$toTypedArray = false;
        Collection thisCollection$iv = $this$toTypedArray$iv;
        String[] fields = thisCollection$iv.toArray(new String[0]);
        Iterator pages2 = this.context.getWebDb().scan(this.urlBase, fields);
        String string = URLUtil.INSTANCE.getDomainName(this.urlBase);
        if (string == null) {
            throw new IllegalArgumentException("Invalid urlBase: " + this.urlBase);
        }
        String domain = string;
        Sequence $this$forEachIndexed$iv = sequence2 = SequencesKt.take((Sequence)SequencesKt.drop((Sequence)SequencesKt.filter((Sequence)SequencesKt.filter((Sequence)SequencesKt.asSequence((Iterator)pages2), (Function1)((Function1)new Function1<WebPage, Boolean>(domain){
            final /* synthetic */ String $domain;
            {
                this.$domain = $domain;
                super(1);
            }

            @NotNull
            public final Boolean invoke(@NotNull WebPage it) {
                Intrinsics.checkNotNullParameter((Object)it, (String)"it");
                String string = it.getUrl();
                Intrinsics.checkNotNullExpressionValue((Object)string, (String)"getUrl(...)");
                return StringsKt.contains$default((CharSequence)string, (CharSequence)this.$domain, (boolean)false, (int)2, null);
            }
        })), (Function1)check.sequence.2.INSTANCE), (int)this.start), (int)this.limit);
        boolean $i$f$forEachIndexed = false;
        int index$iv = 0;
        for (Object item$iv : $this$forEachIndexed$iv) {
            void page;
            int n;
            if ((n = index$iv++) < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            WebPage webPage = (WebPage)item$iv;
            int i = n;
            boolean bl = false;
            FeaturedDocument document = this.getSession().parse((WebPage)page);
            String string2 = page.getUrl();
            Intrinsics.checkNotNullExpressionValue((Object)string2, (String)"getUrl(...)");
            if (!StringsKt.startsWith$default((String)string2, (String)this.urlBase, (boolean)false, (int)2, null)) {
                WebPage page2;
                String url;
                String string3 = page.getUrl();
                Intrinsics.checkNotNullExpressionValue((Object)string3, (String)"getUrl(...)");
                if (AmazonUrls.INSTANCE.normalizeAsinUrl(string3) == null) continue;
                Intrinsics.checkNotNullExpressionValue((Object)WebPage.newWebPage((String)url, (VolatileConfig)page.getConf(), (String)page.getUrl()), (String)"newWebPage(...)");
                page2.unsafeCloneGPage((WebPage)page);
                WebDb.put$default((WebDb)this.context.getWebDb(), (WebPage)page2, (boolean)false, (int)2, null);
            }
            Map map = ScentWebPageExtKt.getMlLabels((WebPage)page);
            System.out.println((Object)(i + 1 + ". " + page.getContentLength() + " | " + page.getPersistedContentLength() + " | " + (map != null ? map.values() : null) + " | " + page.getUrl()));
            System.out.println((Object)document.getBaseURI());
        }
    }

    /*
     * WARNING - void declaration
     */
    public final void clearAnnotations() {
        Iterator pages2 = this.context.getWebDb().scan(this.urlBase);
        Sequence $this$forEachIndexed$iv = SequencesKt.take((Sequence)SequencesKt.drop((Sequence)SequencesKt.asSequence((Iterator)pages2), (int)this.start), (int)this.limit);
        boolean $i$f$forEachIndexed = false;
        int index$iv = 0;
        for (Object item$iv : $this$forEachIndexed$iv) {
            void page;
            int n;
            if ((n = index$iv++) < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            WebPage webPage = (WebPage)item$iv;
            int i = n;
            boolean bl = false;
            ScentWebPageExtKt.clearMLLabels((WebPage)page);
            this.getSession().persist((WebPage)page);
        }
    }

    /*
     * WARNING - void declaration
     */
    public final void encode(@NotNull String args, @NotNull String restrictCss, boolean annotated, @NotNull Function1<? super Node, Boolean> nodeFilter2) {
        void $this$mapTo$iv$iv;
        void $this$map$iv;
        Intrinsics.checkNotNullParameter((Object)args, (String)"args");
        Intrinsics.checkNotNullParameter((Object)restrictCss, (String)"restrictCss");
        Intrinsics.checkNotNullParameter(nodeFilter2, (String)"nodeFilter");
        Files.deleteIfExists(this.datasetPath);
        String string = URLUtil.INSTANCE.getDomainName(this.urlBase);
        if (string == null) {
            throw new IllegalArgumentException("Invalid urlBase: " + this.urlBase);
        }
        String domain = string;
        HarvestOptions options = this.getSession().options(args);
        Object object = new GWebPage.Field[]{GWebPage.Field.CONTENT, GWebPage.Field.CONTENT_LENGTH, GWebPage.Field.PERSISTED_CONTENT_LENGTH, GWebPage.Field.PROTOCOL_STATUS};
        object = CollectionsKt.listOf((Object[])object);
        boolean $i$f$map = false;
        void var10_10 = $this$map$iv;
        Collection destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
        boolean $i$f$mapTo = false;
        for (Object item$iv$iv : $this$mapTo$iv$iv) {
            void it;
            GWebPage.Field field = (GWebPage.Field)item$iv$iv;
            Collection collection = destination$iv$iv;
            boolean bl = false;
            collection.add(it.toString());
        }
        List fields = (List)destination$iv$iv;
        Sequence rootElements2 = SequencesKt.mapNotNull((Sequence)SequencesKt.map((Sequence)SequencesKt.filter((Sequence)SequencesKt.filter((Sequence)SequencesKt.filter((Sequence)SequencesKt.filter((Sequence)this.getSession().scan(this.urlBase, options, this.start, this.limit, this.start, this.limit, fields), (Function1)((Function1)new Function1<WebPage, Boolean>(this){
            final /* synthetic */ ScanningHarvestTaskExecutor this$0;
            {
                this.this$0 = $receiver;
                super(1);
            }

            @NotNull
            public final Boolean invoke(@NotNull WebPage it) {
                Intrinsics.checkNotNullParameter((Object)it, (String)"it");
                String string = it.getUrl();
                Intrinsics.checkNotNullExpressionValue((Object)string, (String)"getUrl(...)");
                return StringsKt.startsWith$default((String)string, (String)this.this$0.getUrlBase(), (boolean)false, (int)2, null);
            }
        })), (Function1)encode.rootElements.2.INSTANCE), (Function1)encode.rootElements.3.INSTANCE), (Function1)((Function1)new Function1<WebPage, Boolean>(annotated){
            final /* synthetic */ boolean $annotated;
            {
                this.$annotated = $annotated;
                super(1);
            }

            /*
             * Enabled aggressive block sorting
             */
            @NotNull
            public final Boolean invoke(@NotNull WebPage it) {
                boolean bl;
                Intrinsics.checkNotNullParameter((Object)it, (String)"it");
                if (this.$annotated) {
                    Map map = ScentWebPageExtKt.getMlLabels((WebPage)it);
                    if (!(map != null ? !map.isEmpty() : false)) {
                        bl = false;
                        return bl;
                    }
                }
                bl = true;
                return bl;
            }
        })), (Function1)((Function1)new Function1<WebPage, FeaturedDocument>(this, options){
            final /* synthetic */ ScanningHarvestTaskExecutor this$0;
            final /* synthetic */ HarvestOptions $options;
            {
                this.this$0 = $receiver;
                this.$options = $options;
                super(1);
            }

            @NotNull
            public final FeaturedDocument invoke(@NotNull WebPage it) {
                Intrinsics.checkNotNullParameter((Object)it, (String)"it");
                return ScentSession.DefaultImpls.parse$default((ScentSession)this.this$0.getSession(), (WebPage)it, (HarvestOptions)this.$options, (boolean)false, (int)4, null);
            }
        })), (Function1)((Function1)new Function1<FeaturedDocument, Element>(restrictCss){
            final /* synthetic */ String $restrictCss;
            {
                this.$restrictCss = $restrictCss;
                super(1);
            }

            @Nullable
            public final Element invoke(@NotNull FeaturedDocument it) {
                Intrinsics.checkNotNullParameter((Object)it, (String)"it");
                return it.selectFirstOrNull(this.$restrictCss);
            }
        }));
        EncodeOptions encodeOptions = new EncodeOptions(this.datasetPath, true, null, 0, 0, 28, null);
        SimpleDataFrame df = this.crawler.encodeForElements(SequencesKt.asIterable((Sequence)rootElements2), encodeOptions, nodeFilter2);
        if (Files.exists(this.datasetPath, new LinkOption[0])) {
            MLPaths.INSTANCE.copyToLearnUnsupervised(this.datasetPath);
        } else {
            System.out.println((Object)("Dataset is not saved to " + this.datasetPath));
        }
        this.logger.info("Dataset size: " + df.getRecordCount() + ", total documents: " + df.getSize() + ", dataset exported:\n" + this.datasetPath);
        this.logger.info("All done.");
    }

    public static /* synthetic */ void encode$default(ScanningHarvestTaskExecutor scanningHarvestTaskExecutor, String string, String string2, boolean bl, Function1 function1, int n, Object object) {
        if ((n & 2) != 0) {
            string2 = "body";
        }
        if ((n & 4) != 0) {
            bl = false;
        }
        if ((n & 8) != 0) {
            function1 = encode.1.INSTANCE;
        }
        scanningHarvestTaskExecutor.encode(string, string2, bl, (Function1<? super Node, Boolean>)function1);
    }

    public final void kmeans(@NotNull String args) {
        Intrinsics.checkNotNullParameter((Object)args, (String)"args");
        this.logger.info("Use project scent-spark to cluster the dataset");
        this.logger.info("https://github.com/galaxyeye/scent-spark/blob/main/src/main/java/ai/platon/scent/ml/clustering/DomKMeans.java");
    }

    public final void harvest(@NotNull String args) {
        Intrinsics.checkNotNullParameter((Object)args, (String)"args");
        String projectId = MLProject.Companion.newProjectId();
        String args2 = args + " -projectId " + projectId + " -diagnose -vj -trustSamples";
        HarvestOptions options = this.getSession().options(args2);
        Sequence<FeaturedDocument> documents2 = this.loadDocuments(options);
        RuntimeMXBean runtimeMxBean = ManagementFactory.getRuntimeMXBean();
        this.logger.info("{}", runtimeMxBean.getInputArguments());
        Sequence $this$forEach$iv = SequencesKt.chunked(documents2, (int)200);
        boolean $i$f$forEach = false;
        for (Object element$iv : $this$forEach$iv) {
            List chunk = (List)element$iv;
            boolean bl = false;
            this.crawler.harvest(CollectionsKt.asSequence((Iterable)chunk), options);
        }
    }

    public static /* synthetic */ void harvest$default(ScanningHarvestTaskExecutor scanningHarvestTaskExecutor, String string, int n, Object object) {
        if ((n & 1) != 0) {
            string = "";
        }
        scanningHarvestTaskExecutor.harvest(string);
    }

    /*
     * WARNING - void declaration
     */
    private final Sequence<FeaturedDocument> loadDocuments(HarvestOptions options) {
        void $this$mapTo$iv$iv;
        void $this$map$iv;
        long count2;
        Path htmlBaseDir = AppPaths.INSTANCE.getDOC_EXPORT_DIR().resolve(this.getDomain());
        Intrinsics.checkNotNull((Object)htmlBaseDir);
        LinkOption[] linkOptionArray = new LinkOption[]{};
        long l = count2 = Files.notExists(htmlBaseDir, Arrays.copyOf(linkOptionArray, linkOptionArray.length)) ? 0L : Files.list(htmlBaseDir).filter(arg_0 -> ScanningHarvestTaskExecutor.loadDocuments$lambda$5(loadDocuments.count.1.INSTANCE, arg_0)).count();
        if (count2 > 40L) {
            Sequence documents2 = SequencesKt.onEach((Sequence)SequencesKt.map((Sequence)SequencesKt.take((Sequence)SequencesKt.drop((Sequence)CollectionsKt.asSequence((Iterable)PathsKt.listDirectoryEntries((Path)htmlBaseDir, (String)"*.htm")), (int)this.start), (int)this.limit), (Function1)loadDocuments.documents.1.INSTANCE), (Function1)loadDocuments.documents.2.INSTANCE);
            return documents2;
        }
        Iterable iterable = SetsKt.minus((Set)CollectionsKt.toSet((Iterable)((Iterable)EntriesMappings.entries$0)), (Object)GWebPage.Field.PAGE_MODEL);
        boolean $i$f$map = false;
        void var8_9 = $this$map$iv;
        Collection destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
        boolean $i$f$mapTo = false;
        for (Object item$iv$iv : $this$mapTo$iv$iv) {
            void it;
            GWebPage.Field field = (GWebPage.Field)item$iv$iv;
            Collection collection = destination$iv$iv;
            boolean bl = false;
            collection.add(it.toString());
        }
        List fields = (List)destination$iv$iv;
        Sequence pages2 = SequencesKt.take((Sequence)SequencesKt.drop((Sequence)SequencesKt.filter((Sequence)SequencesKt.filter((Sequence)SequencesKt.filter((Sequence)this.getSession().scan(this.urlBase, options, this.start, this.limit, this.start, this.limit, fields), (Function1)((Function1)new Function1<WebPage, Boolean>(this){
            final /* synthetic */ ScanningHarvestTaskExecutor this$0;
            {
                this.this$0 = $receiver;
                super(1);
            }

            @NotNull
            public final Boolean invoke(@NotNull WebPage it) {
                Intrinsics.checkNotNullParameter((Object)it, (String)"it");
                String string = it.getUrl();
                Intrinsics.checkNotNullExpressionValue((Object)string, (String)"getUrl(...)");
                return StringsKt.contains$default((CharSequence)string, (CharSequence)this.this$0.getDomain(), (boolean)false, (int)2, null);
            }
        })), (Function1)loadDocuments.pages.2.INSTANCE), (Function1)loadDocuments.pages.3.INSTANCE), (int)this.start), (int)this.limit);
        Sequence paths2 = SequencesKt.map((Sequence)SequencesKt.map((Sequence)pages2, (Function1)((Function1)new Function1<WebPage, FeaturedDocument>(this){
            final /* synthetic */ ScanningHarvestTaskExecutor this$0;
            {
                this.this$0 = $receiver;
                super(1);
            }

            @NotNull
            public final FeaturedDocument invoke(@NotNull WebPage it) {
                Intrinsics.checkNotNullParameter((Object)it, (String)"it");
                return this.this$0.getSession().parse(it, true);
            }
        })), (Function1)((Function1)new Function1<FeaturedDocument, Path>(this){
            final /* synthetic */ ScanningHarvestTaskExecutor this$0;
            {
                this.this$0 = $receiver;
                super(1);
            }

            @NotNull
            public final Path invoke(@NotNull FeaturedDocument it) {
                Intrinsics.checkNotNullParameter((Object)it, (String)"it");
                return this.this$0.getSession().export(it, this.this$0.getDomain());
            }
        }));
        Sequence documents3 = SequencesKt.onEach((Sequence)SequencesKt.map((Sequence)paths2, (Function1)loadDocuments.documents.3.INSTANCE), (Function1)loadDocuments.documents.4.INSTANCE);
        return documents3;
    }

    private static final boolean loadDocuments$lambda$5(Function1 $tmp0, Object p0) {
        Intrinsics.checkNotNullParameter((Object)$tmp0, (String)"$tmp0");
        return (Boolean)$tmp0.invoke(p0);
    }

    public ScanningHarvestTaskExecutor() {
        this(null, 0, 0, null, 15, null);
    }

    @Metadata(mv={1, 9, 0}, k=3, xi=48)
    public final class EntriesMappings {
        public static final /* synthetic */ EnumEntries<GWebPage.Field> entries$0;

        static {
            entries$0 = EnumEntriesKt.enumEntries((Enum[])((Enum[])GWebPage.Field.values()));
        }
    }
}

