/*
 * Decompiled with CFR 0.152.
 */
package org.pentaho.di.engine.spark.impl.ops;

import com.google.common.annotations.VisibleForTesting;
import java.util.Optional;
import java.util.Set;
import java.util.function.Supplier;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.sql.DataFrameWriter;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.types.StructType;
import org.pentaho.big.data.kettle.plugins.formats.parquet.output.ParquetOutputMetaBase;
import org.pentaho.di.engine.api.ExecutionContext;
import org.pentaho.di.engine.api.model.Operation;
import org.pentaho.di.engine.api.model.Transformation;
import org.pentaho.di.engine.spark.api.BaseSparkOperation;
import org.pentaho.di.engine.spark.api.SparkOperation;
import org.pentaho.di.engine.spark.api.TypeMapper;
import org.pentaho.di.engine.spark.impl.accumulators.MetricsAccumulator;
import org.pentaho.di.engine.spark.impl.events.FinalOperationEvent;
import org.pentaho.di.engine.spark.impl.events.OperationErrorEvent;
import org.pentaho.di.engine.spark.impl.functions.KettleToSparkRowFunction;
import org.pentaho.di.engine.spark.impl.parquet.ParquetSetup;
import org.pentaho.di.engine.spark.impl.typehandling.ParquetSpec;
import org.pentaho.di.engine.spark.impl.typehandling.ParquetTypeMapper;
import org.pentaho.di.engine.spark.spi.SparkOperationFactory;
import org.pentaho.di.engine.spark.util.MetaHelper;
import org.pentaho.di.engine.spark.util.Util;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.StepMeta;

public class ParquetOutputSparkOperation
extends BaseSparkOperation {
    private static final String PARQUET_FORMAT = "parquet";
    private static final String COMPRESSION_OPT = "compression";
    private final TransMeta transMeta;
    private final ParquetOutputMetaBase fileMeta;
    private final JavaSparkContext sparkContext;
    private final ParquetOutputMetaBase outputMeta;
    private final boolean isFinalOperation;
    private MetricsAccumulator metricsAccumulator = MetricsAccumulator.empty();

    public ParquetOutputSparkOperation(Operation operation, Transformation transformation, StepMeta stepMeta, JavaSparkContext sparkContext, ExecutionContext executionContext) {
        super(operation);
        this.transMeta = stepMeta.getParentTransMeta();
        this.outputMeta = (ParquetOutputMetaBase)stepMeta.getStepMetaInterface();
        this.fileMeta = (ParquetOutputMetaBase)MetaHelper.getTypedStepMeta((StepMeta)stepMeta, ParquetOutputMetaBase.class);
        this.sparkContext = sparkContext;
        this.isFinalOperation = operation.getHopsOut() == null || operation.getHopsOut().isEmpty();
    }

    public static SparkOperationFactory factory() {
        return new SparkOperationFactory(ParquetOutputSparkOperation::new, new Supplier[]{KettleToSparkRowFunction::new});
    }

    public Optional<Operation> getLogicalOperation() {
        return Optional.of(this.operation);
    }

    public void apply(SparkOperation.Subscriber subscriber) {
        Set expectedOutputs = subscriber.getExpectedOutputs();
        this.metricsAccumulator = subscriber.getMetricsAccumulator();
        JavaRDD rdd = subscriber.getInput().map(input -> expectedOutputs.isEmpty() ? input : input.cache()).orElseGet(() -> ((JavaSparkContext)this.sparkContext).emptyRDD());
        expectedOutputs.forEach(output -> subscriber.addOutput(output, rdd));
        subscriber.registerDriverAction(() -> this.saveDefaultAction((JavaRDD<org.pentaho.di.engine.api.model.Row>)rdd, this.metricsAccumulator));
    }

    @VisibleForTesting
    protected void saveDefaultAction(JavaRDD<org.pentaho.di.engine.api.model.Row> rdd, MetricsAccumulator metricsAccumulator) {
        try {
            ParquetTypeMapper parquetTypeMapper = new ParquetTypeMapper(MetaHelper.getRowMeta((StepMeta)this.fileMeta.getParentStepMeta()), this.outputMeta.getOutputFields(), true, ParquetSpec.DataType.class);
            this.setupParquetParams();
            Dataset<Row> rowDataset = ParquetOutputSparkOperation.convert(parquetTypeMapper, rdd, metricsAccumulator, this.fileMeta.getParentStepMeta().getStepID());
            DataFrameWriter writer = rowDataset.write().format(PARQUET_FORMAT);
            if (this.fileMeta.compressionType != null) {
                writer.option(COMPRESSION_OPT, this.fileMeta.getCompressionType());
            }
            writer.mode(this.fileMeta.overrideOutput ? SaveMode.Overwrite : SaveMode.ErrorIfExists);
            String filename = this.transMeta.environmentSubstitute(this.fileMeta.getFilename());
            this.fileMeta.setFilename(filename);
            writer.save(this.fileMeta.constructOutputFilename());
            if (this.isFinalOperation) {
                FinalOperationEvent.sendEvent((JavaSparkContext)this.sparkContext, (String)this.getUUID());
            }
        }
        catch (Exception e) {
            OperationErrorEvent.sendEvent((JavaSparkContext)this.sparkContext, (String)this.getUUID(), (Throwable)e);
        }
    }

    protected void setupParquetParams() {
        ParquetSetup.enableDictionary(this.sparkContext.hadoopConfiguration(), this.fileMeta.enableDictionary);
        ParquetSetup.setPageSize(this.sparkContext.hadoopConfiguration(), this.fileMeta.getDataPageSize(this.transMeta.getParentVariableSpace()));
        ParquetSetup.getDictionaryPageSize(this.sparkContext.hadoopConfiguration(), this.fileMeta.getDictPageSize(this.transMeta.getParentVariableSpace()));
        ParquetSetup.setBlockSize(this.sparkContext.hadoopConfiguration(), this.fileMeta.getRowGroupSize(this.transMeta.getParentVariableSpace()));
        ParquetSetup.setParquetVersion(this.sparkContext.hadoopConfiguration(), this.fileMeta.getParquetVersion(this.transMeta.getParentVariableSpace()));
    }

    public static Dataset<Row> convert(TypeMapper<StructType> mapper, JavaRDD<org.pentaho.di.engine.api.model.Row> input, MetricsAccumulator metricsAccumulator, String stepId) {
        JavaRDD sparkRDD = input.map((Function)new KettleToSparkRowFunction(metricsAccumulator, mapper).asRegisteredFunction(stepId));
        return Util.getSparkSession().createDataFrame(sparkRDD, (StructType)mapper.schema());
    }
}

