package ai.platon.scent.ml;

import java.nio.file.Path;
import java.util.List;
import java.util.ListIterator;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;
import smile.data.DataFrame;
import smile.data.type.StructType;
import smile.read;

/* compiled from: DatasetProcessor.kt */
@Metadata(mv = {1, 9, 0}, k = 1, xi = 48, d1 = {"��\u001a\n\u0002\u0018\u0002\n\u0002\u0010��\n��\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\b\u0003\u0018��2\u00020\u0001:\u0002\t\nB\r\u0012\u0006\u0010\u0002\u001a\u00020\u0003¢\u0006\u0002\u0010\u0004J\u0006\u0010\u0007\u001a\u00020\bR\u0011\u0010\u0002\u001a\u00020\u0003¢\u0006\b\n��\u001a\u0004\b\u0005\u0010\u0006¨\u0006\u000b"}, d2 = {"Lai/platon/scent/ml/DatasetProcessor;", "", "datasetPath", "Ljava/nio/file/Path;", "(Ljava/nio/file/Path;)V", "getDatasetPath", "()Ljava/nio/file/Path;", "loadDataset", "Lsmile/data/DataFrame;", "ENCODED", "ORIGINAL", "scent-auto-mining"})
/* loaded from: input_file:ai/platon/scent/ml/DatasetProcessor.class */
public final class DatasetProcessor {

    @NotNull
    private final Path datasetPath;

    /* compiled from: DatasetProcessor.kt */
    @Metadata(mv = {1, 9, 0}, k = 1, xi = 48, d1 = {"��\u0018\n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0002\n\u0002\u0010\u0011\n\u0002\u0010\u000e\n\u0002\b\u000f\bÆ\u0002\u0018��2\u00020\u0001B\u0007\b\u0002¢\u0006\u0002\u0010\u0002R\u0019\u0010\u0003\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004¢\u0006\n\n\u0002\u0010\b\u001a\u0004\b\u0006\u0010\u0007R\u0014\u0010\t\u001a\u00020\u0005X\u0086D¢\u0006\b\n��\u001a\u0004\b\n\u0010\u000bR\u0019\u0010\f\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004¢\u0006\n\n\u0002\u0010\b\u001a\u0004\b\r\u0010\u0007R\u0014\u0010\u000e\u001a\u00020\u0005X\u0086D¢\u0006\b\n��\u001a\u0004\b\u000f\u0010\u000bR\u0019\u0010\u0010\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004¢\u0006\n\n\u0002\u0010\b\u001a\u0004\b\u0011\u0010\u0007R\u0014\u0010\u0012\u001a\u00020\u0005X\u0086D¢\u0006\b\n��\u001a\u0004\b\u0013\u0010\u000b¨\u0006\u0014"}, d2 = {"Lai/platon/scent/ml/DatasetProcessor$ENCODED;", "", "()V", "NOMINAL_FEATURE_NAMES", "", "", "getNOMINAL_FEATURE_NAMES", "()[Ljava/lang/String;", "[Ljava/lang/String;", "NOMINAL_FEATURE_NAMES_STRING", "getNOMINAL_FEATURE_NAMES_STRING", "()Ljava/lang/String;", "NUMERIC_FEATURE_NAMES", "getNUMERIC_FEATURE_NAMES", "NUMERIC_FEATURE_NAMES_STRING", "getNUMERIC_FEATURE_NAMES_STRING", "TEXT_FEATURE_NAMES", "getTEXT_FEATURE_NAMES", "TEXT_FEATURE_NAMES_STRING", "getTEXT_FEATURE_NAMES_STRING", "scent-auto-mining"})
    @SourceDebugExtension({"SMAP\nDatasetProcessor.kt\nKotlin\n*S Kotlin\n*F\n+ 1 DatasetProcessor.kt\nai/platon/scent/ml/DatasetProcessor$ENCODED\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n+ 3 ArraysJVM.kt\nkotlin/collections/ArraysKt__ArraysJVMKt\n*L\n1#1,77:1\n731#2,9:78\n731#2,9:89\n731#2,9:100\n37#3,2:87\n37#3,2:98\n37#3,2:109\n*S KotlinDebug\n*F\n+ 1 DatasetProcessor.kt\nai/platon/scent/ml/DatasetProcessor$ENCODED\n*L\n59#1:78,9\n64#1:89,9\n69#1:100,9\n60#1:87,2\n65#1:98,2\n69#1:109,2\n*E\n"})
    /* loaded from: input_file:ai/platon/scent/ml/DatasetProcessor$ENCODED.class */
    public static final class ENCODED {

        @NotNull
        public static final ENCODED INSTANCE = new ENCODED();

        @NotNull
        private static final String NUMERIC_FEATURE_NAMES_STRING = "top-g0,left-g0,width-g0,height-g0,char-g0,txt_nd-g0,img-g0,a-g0,sibling-g0,child-g0,dep-g0,seq-g0,txt_dns-g0,pid-g0,tag-g0,nd_id-g0,nd_cs-g0,ft_sz-g0,color-g0,b_bolor-g0,rtop-g0,rleft-g0,rrow-g0,rcol-g0,dist-g0,simg-g0,mimg-g0,limg-g0,aimg-g0,saimg-g0,maimg-g0,laimg-g0,char_max-g0,char_ave-g0,own_char-g0,own_txt_nd-g0,grant_child-g0,descend-g0,sep-g0,rseq-g0,txt_nd_c-g0,vcc-g0,vcv-g0,avcc-g0,avcv-g0,hcc-g0,hcv-g0,ahcc-g0,ahcv-g0,txt_df-g0,cap_df-g0,tn_max_w-g0,tn_ave_w-g0,tn_max_h-g0,tn_ave_h-g0,a_max_w-g0,a_ave_w-g0,a_max_h-g0,a_ave_h-g0,img_max_w-g0,img_ave_w-g0,img_max_h-g0,img_ave_h-g0,tn_total_w-g0,tn_total_h-g0,a_total_w-g0,a_total_h-g0,img_total_w-g0,img_total_h-g0,top-g1,left-g1,width-g1,height-g1,char-g1,txt_nd-g1,img-g1,a-g1,sibling-g1,child-g1,dep-g1,seq-g1,txt_dns-g1,pid-g1,tag-g1,nd_id-g1,nd_cs-g1,ft_sz-g1,color-g1,b_bolor-g1,rtop-g1,rleft-g1,rrow-g1,rcol-g1,dist-g1,simg-g1,mimg-g1,limg-g1,aimg-g1,saimg-g1,maimg-g1,laimg-g1,char_max-g1,char_ave-g1,own_char-g1,own_txt_nd-g1,grant_child-g1,descend-g1,sep-g1,rseq-g1,txt_nd_c-g1,vcc-g1,vcv-g1,avcc-g1,avcv-g1,hcc-g1,hcv-g1,ahcc-g1,ahcv-g1,txt_df-g1,cap_df-g1,tn_max_w-g1,tn_ave_w-g1,tn_max_h-g1,tn_ave_h-g1,a_max_w-g1,a_ave_w-g1,a_max_h-g1,a_ave_h-g1,img_max_w-g1,img_ave_w-g1,img_max_h-g1,img_ave_h-g1,tn_total_w-g1,tn_total_h-g1,a_total_w-g1,a_total_h-g1,img_total_w-g1,img_total_h-g1,top-g2,left-g2,width-g2,height-g2,char-g2,txt_nd-g2,img-g2,a-g2,sibling-g2,child-g2,dep-g2,seq-g2,txt_dns-g2,pid-g2,tag-g2,nd_id-g2,nd_cs-g2,ft_sz-g2,color-g2,b_bolor-g2,rtop-g2,rleft-g2,rrow-g2,rcol-g2,dist-g2,simg-g2,mimg-g2,limg-g2,aimg-g2,saimg-g2,maimg-g2,laimg-g2,char_max-g2,char_ave-g2,own_char-g2,own_txt_nd-g2,grant_child-g2,descend-g2,sep-g2,rseq-g2,txt_nd_c-g2,vcc-g2,vcv-g2,avcc-g2,avcv-g2,hcc-g2,hcv-g2,ahcc-g2,ahcv-g2,txt_df-g2,cap_df-g2,tn_max_w-g2,tn_ave_w-g2,tn_max_h-g2,tn_ave_h-g2,a_max_w-g2,a_ave_w-g2,a_max_h-g2,a_ave_h-g2,img_max_w-g2,img_ave_w-g2,img_max_h-g2,img_ave_h-g2,tn_total_w-g2,tn_total_h-g2,a_total_w-g2,a_total_h-g2,img_total_w-g2,img_total_h-g2,top-g3,left-g3,width-g3,height-g3,char-g3,txt_nd-g3,img-g3,a-g3,sibling-g3,child-g3,dep-g3,seq-g3,txt_dns-g3,pid-g3,tag-g3,nd_id-g3,nd_cs-g3,ft_sz-g3,color-g3,b_bolor-g3,rtop-g3,rleft-g3,rrow-g3,rcol-g3,dist-g3,simg-g3,mimg-g3,limg-g3,aimg-g3,saimg-g3,maimg-g3,laimg-g3,char_max-g3,char_ave-g3,own_char-g3,own_txt_nd-g3,grant_child-g3,descend-g3,sep-g3,rseq-g3,txt_nd_c-g3,vcc-g3,vcv-g3,avcc-g3,avcv-g3,hcc-g3,hcv-g3,ahcc-g3,ahcv-g3,txt_df-g3,cap_df-g3,tn_max_w-g3,tn_ave_w-g3,tn_max_h-g3,tn_ave_h-g3,a_max_w-g3,a_ave_w-g3,a_max_h-g3,a_ave_h-g3,img_max_w-g3,img_ave_w-g3,img_max_h-g3,img_ave_h-g3,tn_total_w-g3,tn_total_h-g3,a_total_w-g3,a_total_h-g3,img_total_w-g3,img_total_h-g3";

        @NotNull
        private static final String[] NUMERIC_FEATURE_NAMES;

        @NotNull
        private static final String TEXT_FEATURE_NAMES_STRING;

        @NotNull
        private static final String[] TEXT_FEATURE_NAMES;

        @NotNull
        private static final String NOMINAL_FEATURE_NAMES_STRING;

        @NotNull
        private static final String[] NOMINAL_FEATURE_NAMES;

        private ENCODED() {
        }

        @NotNull
        public final String getNUMERIC_FEATURE_NAMES_STRING() {
            return NUMERIC_FEATURE_NAMES_STRING;
        }

        @NotNull
        public final String[] getNUMERIC_FEATURE_NAMES() {
            return NUMERIC_FEATURE_NAMES;
        }

        @NotNull
        public final String getTEXT_FEATURE_NAMES_STRING() {
            return TEXT_FEATURE_NAMES_STRING;
        }

        @NotNull
        public final String[] getTEXT_FEATURE_NAMES() {
            return TEXT_FEATURE_NAMES;
        }

        @NotNull
        public final String getNOMINAL_FEATURE_NAMES_STRING() {
            return NOMINAL_FEATURE_NAMES_STRING;
        }

        @NotNull
        public final String[] getNOMINAL_FEATURE_NAMES() {
            return NOMINAL_FEATURE_NAMES;
        }

        static {
            List emptyList;
            List emptyList2;
            List emptyList3;
            ENCODED encoded = INSTANCE;
            List split = new Regex(",").split(NUMERIC_FEATURE_NAMES_STRING, 0);
            if (!split.isEmpty()) {
                ListIterator listIterator = split.listIterator(split.size());
                while (listIterator.hasPrevious()) {
                    if (!(((String) listIterator.previous()).length() == 0)) {
                        emptyList = CollectionsKt.take(split, listIterator.nextIndex() + 1);
                        break;
                    }
                }
            }
            emptyList = CollectionsKt.emptyList();
            NUMERIC_FEATURE_NAMES = (String[]) emptyList.toArray(new String[0]);
            TEXT_FEATURE_NAMES_STRING = "tag,id,class,color,bg-color,font,text,url";
            ENCODED encoded2 = INSTANCE;
            List split2 = new Regex(",").split(TEXT_FEATURE_NAMES_STRING, 0);
            if (!split2.isEmpty()) {
                ListIterator listIterator2 = split2.listIterator(split2.size());
                while (listIterator2.hasPrevious()) {
                    if (!(((String) listIterator2.previous()).length() == 0)) {
                        emptyList2 = CollectionsKt.take(split2, listIterator2.nextIndex() + 1);
                        break;
                    }
                }
            }
            emptyList2 = CollectionsKt.emptyList();
            TEXT_FEATURE_NAMES = (String[]) emptyList2.toArray(new String[0]);
            NOMINAL_FEATURE_NAMES_STRING = "tag,id,class";
            ENCODED encoded3 = INSTANCE;
            List split3 = new Regex(",").split(NOMINAL_FEATURE_NAMES_STRING, 0);
            if (!split3.isEmpty()) {
                ListIterator listIterator3 = split3.listIterator(split3.size());
                while (listIterator3.hasPrevious()) {
                    if (!(((String) listIterator3.previous()).length() == 0)) {
                        emptyList3 = CollectionsKt.take(split3, listIterator3.nextIndex() + 1);
                        break;
                    }
                }
            }
            emptyList3 = CollectionsKt.emptyList();
            NOMINAL_FEATURE_NAMES = (String[]) emptyList3.toArray(new String[0]);
        }
    }

    /* compiled from: DatasetProcessor.kt */
    @Metadata(mv = {1, 9, 0}, k = 1, xi = 48, d1 = {"��\u0018\n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0002\n\u0002\u0010\u0011\n\u0002\u0010\u000e\n\u0002\b\u0007\bÆ\u0002\u0018��2\u00020\u0001B\u0007\b\u0002¢\u0006\u0002\u0010\u0002R\u0019\u0010\u0003\u001a\b\u0012\u0004\u0012\u00020\u00050\u0004¢\u0006\n\n\u0002\u0010\b\u001a\u0004\b\u0006\u0010\u0007R\u0011\u0010\t\u001a\u00020\u0005¢\u0006\b\n��\u001a\u0004\b\n\u0010\u000b¨\u0006\f"}, d2 = {"Lai/platon/scent/ml/DatasetProcessor$ORIGINAL;", "", "()V", "FEATURE_NAMES", "", "", "getFEATURE_NAMES", "()[Ljava/lang/String;", "[Ljava/lang/String;", "FEATURE_NAMES_STRING", "getFEATURE_NAMES_STRING", "()Ljava/lang/String;", "scent-auto-mining"})
    @SourceDebugExtension({"SMAP\nDatasetProcessor.kt\nKotlin\n*S Kotlin\n*F\n+ 1 DatasetProcessor.kt\nai/platon/scent/ml/DatasetProcessor$ORIGINAL\n+ 2 ArraysJVM.kt\nkotlin/collections/ArraysKt__ArraysJVMKt\n*L\n1#1,77:1\n37#2,2:78\n*S KotlinDebug\n*F\n+ 1 DatasetProcessor.kt\nai/platon/scent/ml/DatasetProcessor$ORIGINAL\n*L\n22#1:78,2\n*E\n"})
    /* loaded from: input_file:ai/platon/scent/ml/DatasetProcessor$ORIGINAL.class */
    public static final class ORIGINAL {

        @NotNull
        public static final ORIGINAL INSTANCE = new ORIGINAL();

        @NotNull
        private static final String FEATURE_NAMES_STRING = new Regex("\\s+").replace("top, left, width, height, char, txt_nd, img, a, sibling, child, dep, seq, txt_dns, pid, tag, nd_id, nd_cs, ft_sz,\ncolor, b_bolor, rtop, rleft, rrow, rcol, dist, simg, mimg, limg, aimg, saimg, maimg, laimg,\nchar_max, char_ave, own_char, own_txt_nd, grant_child, descend, sep, rseq, txt_nd_c, vcc, vcv, avcc, avcv, hcc,\nhcv, ahcc, ahcv, txt_df, cap_df, tn_max_w, tn_ave_w, tn_max_h, tn_ave_h, a_max_w, a_ave_w, a_max_h, a_ave_h,\nimg_max_w, img_ave_w, img_max_h, img_ave_h, tn_total_w, tn_total_h, a_total_w, a_total_h, img_total_w, img_total_h", " ");

        @NotNull
        private static final String[] FEATURE_NAMES;

        private ORIGINAL() {
        }

        @NotNull
        public final String getFEATURE_NAMES_STRING() {
            return FEATURE_NAMES_STRING;
        }

        @NotNull
        public final String[] getFEATURE_NAMES() {
            return FEATURE_NAMES;
        }

        static {
            ORIGINAL original = INSTANCE;
            FEATURE_NAMES = (String[]) StringsKt.split$default(FEATURE_NAMES_STRING, new String[]{","}, false, 0, 6, (Object) null).toArray(new String[0]);
        }
    }

    public DatasetProcessor(@NotNull Path path) {
        Intrinsics.checkNotNullParameter(path, "datasetPath");
        this.datasetPath = path;
    }

    @NotNull
    public final Path getDatasetPath() {
        return this.datasetPath;
    }

    @NotNull
    public final DataFrame loadDataset() {
        return read.csv$default(read.INSTANCE, this.datasetPath, ',', true, (char) 0, (char) 0, (StructType) null, 56, (Object) null);
    }
}
