/*
 * Decompiled with CFR 0.152.
 */
package org.pentaho.di.engine.spark.impl.ops;

import com.google.common.annotations.VisibleForTesting;
import java.util.Optional;
import java.util.Set;
import java.util.function.Supplier;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.sql.DataFrameWriter;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.StructType;
import org.pentaho.big.data.kettle.plugins.formats.parquet.output.ParquetOutputMetaBase;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.engine.api.ExecutionContext;
import org.pentaho.di.engine.api.model.Operation;
import org.pentaho.di.engine.api.model.Transformation;
import org.pentaho.di.engine.spark.api.SparkOperation;
import org.pentaho.di.engine.spark.impl.accumulators.MetricsAccumulator;
import org.pentaho.di.engine.spark.impl.events.FinalOperationEvent;
import org.pentaho.di.engine.spark.impl.events.OperationErrorEvent;
import org.pentaho.di.engine.spark.impl.functions.KettleToSparkParquetRowFunction;
import org.pentaho.di.engine.spark.impl.ops.ParquetSchemaUtils;
import org.pentaho.di.engine.spark.impl.parquet.ParquetSetup;
import org.pentaho.di.engine.spark.spi.SparkOperationFactory;
import org.pentaho.di.engine.spark.util.Util;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.StepMeta;

public class ParquetOutputSparkOperation
implements SparkOperation {
    private final TransMeta transMeta;
    private final ParquetOutputMetaBase fileMeta;
    private final Operation operation;
    private final JavaSparkContext sparkContext;
    private final ParquetOutputMetaBase outputMeta;
    private final boolean isFinalOperation;
    private MetricsAccumulator metricsAccumulator = MetricsAccumulator.empty();

    public ParquetOutputSparkOperation(Operation operation, Transformation transformation, StepMeta stepMeta, JavaSparkContext sparkContext, ExecutionContext executionContext) {
        this.transMeta = stepMeta.getParentTransMeta();
        this.outputMeta = (ParquetOutputMetaBase)stepMeta.getStepMetaInterface();
        this.fileMeta = (ParquetOutputMetaBase)Util.getTypedStepMeta((StepMeta)stepMeta, ParquetOutputMetaBase.class);
        this.operation = operation;
        this.sparkContext = sparkContext;
        this.isFinalOperation = operation.getHopsOut() == null || operation.getHopsOut().isEmpty();
    }

    public static SparkOperationFactory factory() {
        return new SparkOperationFactory(ParquetOutputSparkOperation::new, new Supplier[]{KettleToSparkParquetRowFunction::new});
    }

    public Optional<Operation> getLogicalOperation() {
        return Optional.of(this.operation);
    }

    public String getId() {
        return this.operation.getId();
    }

    public void apply(SparkOperation.Subscriber subscriber) {
        Set expectedOutputs = subscriber.getExpectedOutputs();
        this.metricsAccumulator = subscriber.getMetricsAccumulator();
        JavaRDD rdd = subscriber.getInput().map(input -> expectedOutputs.isEmpty() ? input : input.cache()).orElseGet(() -> ((JavaSparkContext)this.sparkContext).emptyRDD());
        expectedOutputs.forEach(output -> subscriber.addOutput(output, rdd));
        subscriber.registerDriverAction(() -> this.saveDefaultAction((JavaRDD<org.pentaho.di.engine.api.model.Row>)rdd, this.metricsAccumulator));
    }

    @VisibleForTesting
    protected void saveDefaultAction(JavaRDD<org.pentaho.di.engine.api.model.Row> rdd, MetricsAccumulator metricsAccumulator) {
        try {
            StructType schema = ParquetSchemaUtils.createSchemaFromMeta(this.outputMeta.outputFields, true);
            this.setupParquetParams();
            Dataset<Row> rowDataset = ParquetOutputSparkOperation.convert(schema, rdd, Util.getRowMeta((TransMeta)this.transMeta, (String)this.getId()), this.fileMeta.getParentStepMeta().getStepID(), metricsAccumulator);
            DataFrameWriter writer = rowDataset.write().format("parquet");
            if (this.fileMeta.compressionType != null) {
                writer.option("compression", this.fileMeta.getCompressionType());
            }
            writer.mode(this.fileMeta.overrideOutput ? SaveMode.Overwrite : SaveMode.ErrorIfExists);
            String filename = this.transMeta.environmentSubstitute(this.fileMeta.getFilename());
            this.fileMeta.setFilename(filename);
            writer.save(this.fileMeta.constructOutputFilename());
            if (this.isFinalOperation) {
                FinalOperationEvent.sendEvent((JavaSparkContext)this.sparkContext, (String)this.getId());
            }
        }
        catch (Exception e) {
            OperationErrorEvent.sendEvent((JavaSparkContext)this.sparkContext, (String)this.operation.getId(), (Throwable)e);
        }
    }

    protected void setupParquetParams() {
        ParquetSetup.enableDictionary(this.sparkContext.hadoopConfiguration(), this.fileMeta.enableDictionary);
        ParquetSetup.setPageSize(this.sparkContext.hadoopConfiguration(), this.fileMeta.getDataPageSize(this.transMeta.getParentVariableSpace()));
        ParquetSetup.getDictionaryPageSize(this.sparkContext.hadoopConfiguration(), this.fileMeta.getDictPageSize(this.transMeta.getParentVariableSpace()));
        ParquetSetup.setBlockSize(this.sparkContext.hadoopConfiguration(), this.fileMeta.getRowGroupSize(this.transMeta.getParentVariableSpace()));
        ParquetSetup.setParquetVersion(this.sparkContext.hadoopConfiguration(), this.fileMeta.getParquetVersion(this.transMeta.getParentVariableSpace()));
    }

    public static Dataset<Row> convert(StructType schema, JavaRDD<org.pentaho.di.engine.api.model.Row> input, RowMetaInterface rowMeta, String stepID, MetricsAccumulator metricsAccumulator) {
        SparkSession spark = Util.getSparkSession();
        JavaRDD sparkRDD = input.map((Function)new KettleToSparkParquetRowFunction(rowMeta, metricsAccumulator).asRegisteredFunction(stepID));
        return spark.createDataFrame(sparkRDD, schema);
    }
}

