/*
 * Decompiled with CFR 0.152.
 */
package ai.platon.scent.tools;

import ai.platon.pulsar.common.LogsKt;
import ai.platon.pulsar.common.serialize.json.JacksonKt;
import ai.platon.pulsar.dom.FeaturedDocument;
import ai.platon.pulsar.persist.WebPage;
import ai.platon.pulsar.skeleton.common.options.LoadOptions;
import ai.platon.pulsar.skeleton.context.PulsarContext;
import ai.platon.pulsar.skeleton.context.PulsarContexts;
import ai.platon.pulsar.skeleton.crawl.filter.ScopedUrlNormalizer;
import ai.platon.scent.BasicScentSession;
import ai.platon.scent.core.common.RegexUrlNormalizer;
import ai.platon.scent.dom.HarvestOptions;
import ai.platon.scent.ml.MLProject;
import ai.platon.scent.ml.encoding.EncodeProject;
import ai.platon.scent.ml.harvest.HarvestProject;
import ai.platon.scent.tools.BasicWebHarvester;
import ai.platon.scent.tools.HarvestProjectConfig;
import ai.platon.scent.tools.ItemURLNormalizer;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.File;
import java.nio.file.CopyOption;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import kotlin.Metadata;
import kotlin.Pair;
import kotlin.Result;
import kotlin.ResultKt;
import kotlin.TuplesKt;
import kotlin.Unit;
import kotlin.collections.CollectionsKt;
import kotlin.collections.MapsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.sequences.SequencesKt;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Metadata(mv={1, 9, 0}, k=1, xi=48, d1={"\u0000V\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0010\u0002\n\u0000\n\u0002\u0010\u000e\n\u0000\n\u0002\u0010 \n\u0002\u0018\u0002\n\u0002\b\b\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\b\n\u0002\b\u0004\b\u0016\u0018\u0000 (2\u00020\u0001:\u0001(B\r\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\u0002\u0010\u0004J\u001c\u0010\u0014\u001a\u00020\u00152\u0006\u0010\u0016\u001a\u00020\u00172\f\u0010\u0018\u001a\b\u0012\u0004\u0012\u00020\u001a0\u0019J\u0016\u0010\u001b\u001a\u00020\u00152\u0006\u0010\u001c\u001a\u00020\u00172\u0006\u0010\u001d\u001a\u00020\u0017J\u0016\u0010\u001e\u001a\u00020\u00152\u0006\u0010\u001c\u001a\u00020\u00172\u0006\u0010\u001d\u001a\u00020\u0017J\u001c\u0010\u001f\u001a\u00020\u00152\u0006\u0010\u0016\u001a\u00020\u00172\f\u0010 \u001a\b\u0012\u0004\u0012\u00020\u00170\u0019J\u000e\u0010!\u001a\u00020\u00152\u0006\u0010\"\u001a\u00020#J\u001a\u0010!\u001a\u00020\u00152\b\b\u0002\u0010$\u001a\u00020%2\b\b\u0002\u0010&\u001a\u00020%J\u0006\u0010'\u001a\u00020\u0015R\u0014\u0010\u0005\u001a\u00020\u00068BX\u0082\u0004\u00a2\u0006\u0006\u001a\u0004\b\u0007\u0010\bR\u000e\u0010\t\u001a\u00020\nX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0016\u0010\u000b\u001a\n \r*\u0004\u0018\u00010\f0\fX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0011\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\b\n\u0000\u001a\u0004\b\u000e\u0010\u000fR\u0014\u0010\u0010\u001a\u00020\u00118BX\u0082\u0004\u00a2\u0006\u0006\u001a\u0004\b\u0012\u0010\u0013\u00a8\u0006)"}, d2={"Lai/platon/scent/tools/HarvesterProjectRunner;", "", "project", "Lai/platon/scent/ml/harvest/HarvestProject;", "(Lai/platon/scent/ml/harvest/HarvestProject;)V", "context", "Lai/platon/pulsar/skeleton/context/PulsarContext;", "getContext", "()Lai/platon/pulsar/skeleton/context/PulsarContext;", "harvester", "Lai/platon/scent/tools/BasicWebHarvester;", "logger", "Lorg/slf4j/Logger;", "kotlin.jvm.PlatformType", "getProject", "()Lai/platon/scent/ml/harvest/HarvestProject;", "session", "Lai/platon/scent/BasicScentSession;", "getSession", "()Lai/platon/scent/BasicScentSession;", "copy", "", "projectId", "", "htmlFiles", "", "Ljava/nio/file/Path;", "copyOutPagesToLearn", "portalUrl", "args", "copyOutPagesToLearnIfNotExists", "export", "urls", "harvest", "conf", "Lai/platon/scent/tools/HarvestProjectConfig;", "start", "", "limit", "openViewDir", "Companion", "scent-boot"})
@SourceDebugExtension(value={"SMAP\nHarvestProjectRunner.kt\nKotlin\n*S Kotlin\n*F\n+ 1 HarvestProjectRunner.kt\nai/platon/scent/tools/HarvesterProjectRunner\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n+ 3 fake.kt\nkotlin/jvm/internal/FakeKt\n*L\n1#1,143:1\n1855#2,2:144\n1855#2,2:146\n1864#2,3:149\n1#3:148\n*S KotlinDebug\n*F\n+ 1 HarvestProjectRunner.kt\nai/platon/scent/tools/HarvesterProjectRunner\n*L\n97#1:144,2\n109#1:146,2\n138#1:149,3\n*E\n"})
public class HarvesterProjectRunner {
    @NotNull
    public static final Companion Companion = new Companion(null);
    @NotNull
    private final HarvestProject project;
    private final Logger logger;
    @NotNull
    private final BasicWebHarvester harvester;

    public HarvesterProjectRunner(@NotNull HarvestProject project) {
        Intrinsics.checkNotNullParameter((Object)project, (String)"project");
        this.project = project;
        this.logger = LoggerFactory.getLogger(BasicWebHarvester.class);
        this.harvester = new BasicWebHarvester(null, null, 3, null);
    }

    @NotNull
    public final HarvestProject getProject() {
        return this.project;
    }

    private final PulsarContext getContext() {
        return this.harvester.getContext();
    }

    private final BasicScentSession getSession() {
        return this.harvester.getSession();
    }

    public final void copyOutPagesToLearnIfNotExists(@NotNull String portalUrl, @NotNull String args) {
        Intrinsics.checkNotNullParameter((Object)portalUrl, (String)"portalUrl");
        Intrinsics.checkNotNullParameter((Object)args, (String)"args");
        if (!this.project.getHasHTMLFile()) {
            this.copyOutPagesToLearn(portalUrl, args);
        }
    }

    public final void copyOutPagesToLearn(@NotNull String portalUrl, @NotNull String args) {
        Intrinsics.checkNotNullParameter((Object)portalUrl, (String)"portalUrl");
        Intrinsics.checkNotNullParameter((Object)args, (String)"args");
        HarvestOptions options = this.getSession().options(args);
        options.setProjectId(this.project.getId());
        if (!(!StringsKt.isBlank((CharSequence)options.getOutLinkSelector()))) {
            String string = "Failed requirement.";
            throw new IllegalArgumentException(string.toString());
        }
        options.getItemEventHandlers().getLoadEventHandlers().getOnLoaded().addLast((Function1)new Function1<WebPage, Unit>(options, this){
            final /* synthetic */ HarvestOptions $options;
            final /* synthetic */ HarvesterProjectRunner this$0;
            {
                this.$options = $options;
                this.this$0 = $receiver;
                super(1);
            }

            public final void invoke(@NotNull WebPage page) {
                Intrinsics.checkNotNullParameter((Object)page, (String)"page");
                if (page.getProtocolStatus().isSuccess() && page.getPersistedContentLength() > (long)this.$options.getRequireSize()) {
                    Path path = this.this$0.getProject().getEncodeProject().exportPath(page);
                    FeaturedDocument document = HarvesterProjectRunner.access$getSession(this.this$0).parse(page);
                    if (document.getNormalizedURI() == null) {
                        String string = "Required value was null.";
                        throw new IllegalArgumentException(string.toString());
                    }
                    HarvesterProjectRunner.access$getSession(this.this$0).exportTo(document, path);
                }
            }
        });
        this.getSession().submitForOutPages(portalUrl, (LoadOptions)options);
        this.project.createExportInfoFile(MapsKt.mapOf((Pair)TuplesKt.to((Object)"args", (Object)args)));
        PulsarContexts.await();
    }

    public final void copy(@NotNull String projectId, @NotNull List<? extends Path> htmlFiles) {
        Intrinsics.checkNotNullParameter((Object)projectId, (String)"projectId");
        Intrinsics.checkNotNullParameter(htmlFiles, (String)"htmlFiles");
        this.logger.info("Copying {} files to harvest, use .harvest({}) to learn and output web data", (Object)htmlFiles.size(), (Object)projectId);
        EncodeProject project = new EncodeProject(projectId, EncodeProject.Type.TRAINING);
        Iterable $this$forEach$iv = htmlFiles;
        boolean $i$f$forEach = false;
        for (Object element$iv : $this$forEach$iv) {
            Path path = (Path)element$iv;
            boolean bl = false;
            Files.copy(path, project.getHtmlBaseDir().resolve(path.getFileName()), new CopyOption[0]);
        }
        this.logger.info("Copied {} files, use .harvest({}) to learn and output web data", (Object)htmlFiles.size(), (Object)projectId);
    }

    public final void export(@NotNull String projectId, @NotNull List<String> urls) {
        Intrinsics.checkNotNullParameter((Object)projectId, (String)"projectId");
        Intrinsics.checkNotNullParameter(urls, (String)"urls");
        this.logger.info("Exporting {} pages to harvest, use VerboseHarvester.harvest({}) to learn and output data", (Object)urls.size(), (Object)projectId);
        EncodeProject project = new EncodeProject(projectId, EncodeProject.Type.TRAINING);
        Iterable $this$forEach$iv = urls;
        boolean $i$f$forEach = false;
        for (Object element$iv : $this$forEach$iv) {
            String url = (String)element$iv;
            boolean bl = false;
            WebPage page = this.getSession().load(url);
            Path path = project.exportPath(page);
            this.getSession().exportTo(page, path);
        }
        this.logger.info("Exported {} pages, use .harvest({}) to learn and output web data", (Object)urls.size(), (Object)projectId);
    }

    public final void openViewDir() {
        this.logger.info("Opening " + this.project.getResultBaseDir());
        Path path = this.project.getResultBaseDir();
        Intrinsics.checkNotNullExpressionValue((Object)path, (String)"<get-resultBaseDir>(...)");
        this.harvester.openExplorer(path);
    }

    public final void harvest(@NotNull HarvestProjectConfig conf) {
        Intrinsics.checkNotNullParameter((Object)conf, (String)"conf");
        ItemURLNormalizer itemURLNormalizer = conf.getItemURLNormalizer();
        if (itemURLNormalizer != null) {
            ItemURLNormalizer it = itemURLNormalizer;
            boolean bl = false;
            it = new RegexUrlNormalizer(it.getUrlCapturer(), it.getNormalizedURLTemplate(), it.getUrlFilter());
            boolean bl2 = false;
            this.harvester.addUrlNormalizer((ScopedUrlNormalizer)it);
        }
        this.copyOutPagesToLearnIfNotExists(conf.getPortalUrl(), conf.getArgs());
        HarvesterProjectRunner.harvest$default(this, 0, 0, 3, null);
    }

    /*
     * WARNING - void declaration
     */
    public final void harvest(int start, int limit) {
        String args2 = "-projectId " + this.project.getId() + " -diagnose -vj -trustSamples";
        this.logger.info("Loading HTML files and training | {}", (Object)this.project.getEncodeProject().getHtmlBaseDir());
        Path path = this.project.getEncodeProject().getHtmlBaseDir();
        Intrinsics.checkNotNullExpressionValue((Object)path, (String)"<get-htmlBaseDir>(...)");
        List documents2 = SequencesKt.toList(this.harvester.loadDocuments(path, start, limit));
        Iterable $this$forEachIndexed$iv = CollectionsKt.chunked((Iterable)documents2, (int)200);
        boolean $i$f$forEachIndexed = false;
        int index$iv = 0;
        for (Object item$iv : $this$forEachIndexed$iv) {
            Object object;
            int n;
            if ((n = index$iv++) < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            List list = (List)item$iv;
            int i = n;
            boolean bl = false;
            HarvestOptions options = this.getSession().options(args2);
            Object object2 = this;
            try {
                void batch;
                HarvesterProjectRunner $this$harvest_u24lambda_u246_u24lambda_u244 = object2;
                boolean bl2 = false;
                object = Result.constructor-impl((Object)$this$harvest_u24lambda_u246_u24lambda_u244.harvester.harvest((List<? extends FeaturedDocument>)batch, options));
            }
            catch (Throwable bl2) {
                object = Result.constructor-impl((Object)ResultKt.createFailure((Throwable)bl2));
            }
            object2 = object;
            Throwable throwable = Result.exceptionOrNull-impl((Object)object2);
            if (throwable == null) continue;
            Object it = object = throwable;
            boolean bl3 = false;
            LogsKt.warnUnexpected((Object)this, (Throwable)it);
        }
    }

    public static /* synthetic */ void harvest$default(HarvesterProjectRunner harvesterProjectRunner, int n, int n2, int n3, Object object) {
        if (object != null) {
            throw new UnsupportedOperationException("Super calls with default arguments not supported in this target, function: harvest");
        }
        if ((n3 & 1) != 0) {
            n = 0;
        }
        if ((n3 & 2) != 0) {
            n2 = Integer.MAX_VALUE;
        }
        harvesterProjectRunner.harvest(n, n2);
    }

    public static final /* synthetic */ BasicScentSession access$getSession(HarvesterProjectRunner $this) {
        return $this.getSession();
    }

    @Metadata(mv={1, 9, 0}, k=1, xi=48, d1={"\u0000\"\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\u0010\u000e\n\u0000\b\u0086\u0003\u0018\u00002\u00020\u0001B\u0007\b\u0002\u00a2\u0006\u0002\u0010\u0002J\u000e\u0010\u0003\u001a\u00020\u00042\u0006\u0010\u0005\u001a\u00020\u0006J\u000e\u0010\u0003\u001a\u00020\u00042\u0006\u0010\u0007\u001a\u00020\bJ\u000e\u0010\u0003\u001a\u00020\u00042\u0006\u0010\u0007\u001a\u00020\t\u00a8\u0006\n"}, d2={"Lai/platon/scent/tools/HarvesterProjectRunner$Companion;", "", "()V", "harvest", "Lai/platon/scent/tools/HarvesterProjectRunner;", "conf", "Lai/platon/scent/tools/HarvestProjectConfig;", "json", "Ljava/nio/file/Path;", "", "scent-boot"})
    @SourceDebugExtension(value={"SMAP\nHarvestProjectRunner.kt\nKotlin\n*S Kotlin\n*F\n+ 1 HarvestProjectRunner.kt\nai/platon/scent/tools/HarvesterProjectRunner$Companion\n+ 2 Extensions.kt\ncom/fasterxml/jackson/module/kotlin/ExtensionsKt\n+ 3 fake.kt\nkotlin/jvm/internal/FakeKt\n*L\n1#1,143:1\n56#2:144\n51#2:145\n58#2:146\n51#2:147\n1#3:148\n*S KotlinDebug\n*F\n+ 1 HarvestProjectRunner.kt\nai/platon/scent/tools/HarvesterProjectRunner$Companion\n*L\n36#1:144\n36#1:145\n40#1:146\n40#1:147\n*E\n"})
    public static final class Companion {
        private Companion() {
        }

        /*
         * WARNING - void declaration
         */
        @NotNull
        public final HarvesterProjectRunner harvest(@NotNull Path json) {
            void $this$readValue$iv;
            Intrinsics.checkNotNullParameter((Object)json, (String)"json");
            File jsonFile = json.toFile();
            ObjectMapper objectMapper = JacksonKt.pulsarObjectMapper();
            Intrinsics.checkNotNull((Object)jsonFile);
            File src$iv = jsonFile;
            boolean $i$f$readValue = false;
            boolean $i$f$jacksonTypeRef = false;
            return this.harvest((HarvestProjectConfig)$this$readValue$iv.readValue(src$iv, (TypeReference)new TypeReference<HarvestProjectConfig>(){}));
        }

        @NotNull
        public final HarvesterProjectRunner harvest(@NotNull String json) {
            Intrinsics.checkNotNullParameter((Object)json, (String)"json");
            ObjectMapper $this$readValue$iv = JacksonKt.pulsarObjectMapper();
            boolean $i$f$readValue = false;
            boolean $i$f$jacksonTypeRef = false;
            return this.harvest((HarvestProjectConfig)$this$readValue$iv.readValue(json, (TypeReference)new TypeReference<HarvestProjectConfig>(){}));
        }

        @NotNull
        public final HarvesterProjectRunner harvest(@NotNull HarvestProjectConfig conf) {
            Intrinsics.checkNotNullParameter((Object)conf, (String)"conf");
            String projectId = MLProject.Companion.perHourProjectId();
            HarvesterProjectRunner runner = new HarvesterProjectRunner(new HarvestProject(projectId));
            ItemURLNormalizer itemURLNormalizer = conf.getItemURLNormalizer();
            if (itemURLNormalizer != null) {
                ItemURLNormalizer it = itemURLNormalizer;
                boolean bl = false;
                it = new RegexUrlNormalizer(it.getUrlCapturer(), it.getNormalizedURLTemplate(), it.getUrlFilter());
                boolean bl2 = false;
                runner.harvester.addUrlNormalizer((ScopedUrlNormalizer)it);
            }
            runner.copyOutPagesToLearnIfNotExists(conf.getPortalUrl(), conf.getArgs());
            HarvesterProjectRunner.harvest$default(runner, 0, 0, 3, null);
            return runner;
        }

        public /* synthetic */ Companion(DefaultConstructorMarker $constructor_marker) {
            this();
        }
    }
}

