/*
 * Decompiled with CFR 0.152.
 */
package org.pentaho.di.engine.spark.impl.ops;

import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.variables.VariableSpace;
import org.pentaho.di.engine.api.ExecutionContext;
import org.pentaho.di.engine.api.model.Operation;
import org.pentaho.di.engine.api.model.Row;
import org.pentaho.di.engine.api.model.Transformation;
import org.pentaho.di.engine.spark.api.SparkOperation;
import org.pentaho.di.engine.spark.impl.accumulators.MetricsAccumulator;
import org.pentaho.di.engine.spark.impl.events.OperationErrorEvent;
import org.pentaho.di.engine.spark.impl.functions.FileInputContentFunction;
import org.pentaho.di.engine.spark.impl.functions.SparkToKettleRowFunction;
import org.pentaho.di.engine.spark.impl.ops.FileInputResolver;
import org.pentaho.di.engine.spark.spi.SparkOperationFactory;
import org.pentaho.di.engine.spark.util.Util;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.steps.file.BaseFileInputMeta;
import org.pentaho.di.trans.steps.fileinput.text.TextFileInputMeta;

public class FileInputSparkOperation
implements SparkOperation {
    private static final long serialVersionUID = 6935963394687169493L;
    private static final String FAIL_FAST_MODE = "FAILFAST";
    private static final String DROP_MALFORMED_MODE = "DROPMALFORMED";
    private final TransMeta transMeta;
    private final JavaSparkContext sparkContext;
    private final Operation operation;
    private final TextFileInputMeta fileMeta;
    private MetricsAccumulator metricsAccumulator;
    private final Supplier<String[]> fileListSupplier = Suppliers.memoize(this::getFileInputList);

    public FileInputSparkOperation(Operation operation, Transformation transformation, StepMeta stepMeta, JavaSparkContext sparkContext, ExecutionContext executionContext) {
        this(operation, stepMeta, sparkContext);
    }

    FileInputSparkOperation(Operation operation, StepMeta stepMeta, JavaSparkContext sparkContext) {
        this.transMeta = stepMeta.getParentTransMeta();
        this.fileMeta = (TextFileInputMeta)Util.getTypedStepMeta((StepMeta)stepMeta, TextFileInputMeta.class);
        this.operation = operation;
        this.sparkContext = sparkContext;
    }

    private String[] getFileInputList() {
        List<String> files = FileInputResolver.getFiles((VariableSpace)this.transMeta, (BaseFileInputMeta)this.fileMeta, this.sparkContext);
        return files.toArray(new String[files.size()]);
    }

    public static SparkOperationFactory factory() {
        return new SparkOperationFactory(FileInputSparkOperation::new, new java.util.function.Supplier[]{FileInputContentFunction::new, SparkToKettleRowFunction::new});
    }

    public Optional<Operation> getLogicalOperation() {
        return Optional.of(this.operation);
    }

    public String getId() {
        return this.operation.getId();
    }

    public void apply(SparkOperation.Subscriber subscriber) {
        this.metricsAccumulator = subscriber.getMetricsAccumulator();
        java.util.function.Supplier<JavaRDD> readAction = this::loadDefaultAction;
        subscriber.setOutput(CompletableFuture.supplyAsync(readAction, arg_0 -> ((SparkOperation.Subscriber)subscriber).registerDriverAction(arg_0)));
    }

    private JavaRDD<Row> loadDefaultActionCsv() throws RuntimeException {
        if (this.fileMeta.content.separator == null || this.fileMeta.content.separator.isEmpty()) {
            throw new RuntimeException("No separator defined for file input step");
        }
        SparkSession sparkSession = Util.getSparkSession();
        Dataset dataset = sparkSession.read().format("org.apache.spark.csv").option("delimiter", this.environmentSubstitute(this.fileMeta.content.separator)).option("header", this.fileMeta.content.header).option("quote", this.environmentSubstitute(this.fileMeta.content.enclosure)).option("escape", this.environmentSubstitute(this.fileMeta.content.escapeCharacter)).option("charset", this.fileMeta.content.encoding == null ? Charset.defaultCharset().name() : this.fileMeta.content.encoding).option("mode", this.fileMeta.errorHandling.errorIgnored ? DROP_MALFORMED_MODE : FAIL_FAST_MODE).schema(Util.getReadStructType((RowMetaInterface)Util.getRowMeta((TransMeta)this.transMeta, (String)this.getId()))).csv((String[])this.fileListSupplier.get());
        this.metricsAccumulator.setCountRowIn(false);
        return dataset.toJavaRDD().mapPartitions(new SparkToKettleRowFunction(this.transMeta.findStep(this.getId()), this.metricsAccumulator).asRegisteredFunction(this.fileMeta.getParentStepMeta().getStepID()).toFlatMap());
    }

    private JavaRDD<Row> loadDefaultActionFixedWidth() {
        JavaRDD fileRdd = this.sparkContext.textFile(String.join((CharSequence)",", (CharSequence[])this.fileListSupplier.get()));
        return fileRdd.mapPartitions(new FileInputContentFunction(this.transMeta.findStep(this.getId()), this.metricsAccumulator).asRegisteredFunction(this.fileMeta.getParentStepMeta().getStepID()).toFlatMap());
    }

    private JavaRDD<Row> loadDefaultAction() {
        try {
            if ("CSV".equals(this.fileMeta.content.fileType)) {
                return this.loadDefaultActionCsv();
            }
            return this.loadDefaultActionFixedWidth();
        }
        catch (Exception e) {
            OperationErrorEvent.sendEvent((JavaSparkContext)this.sparkContext, (String)this.operation.getId(), (Throwable)e);
            return null;
        }
    }

    private String environmentSubstitute(String s) {
        return this.transMeta.environmentSubstitute(s);
    }
}

