Skip to content

Bulk Data Pivot, Query Vlookup

Problem:

  1. there were many big files around 200-300 mb with around 7 Lakh line items which needed to be pivotted and v looked up
  2. Excel crashed
  3. Power query took 30 minutes to load and the moment i put another filter, start again- takes another 30 minutes

Solution:

  1. Data Bases are faster and more optimised for large data
  2. Duck DB is really good for loading big files in a faster manner

Code

import sys
import pandas as pd
import duckdb
from pathlib import Path
from datetime import datetime
from PyQt5.QtWidgets import (
    QApplication, QMainWindow, QTabWidget, QWidget, QVBoxLayout,
    QHBoxLayout, QPushButton, QFileDialog, QTableWidget, QTableWidgetItem,
    QComboBox, QLabel, QListWidget, QAbstractItemView, QMessageBox,
    QGroupBox, QScrollArea, QLineEdit, QDialog, QDialogButtonBox, QInputDialog,
    QCheckBox, QSpinBox, QTextEdit
)
from PyQt5.QtCore import Qt

# --- Step Logger Class ---
class StepLogger:
    """Records data transformation steps in a human-readable format."""
    def __init__(self):
        self.steps = []

    def log(self, message: str):
        """Adds a timestamped step to the log."""
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        log_entry = f"[{timestamp}] {message}"
        self.steps.append(log_entry)

    def get_steps_report(self) -> str:
        """Returns the full log as a single string."""
        if not self.steps:
            return "No transformation steps were recorded."
        return "\n".join(self.steps)

    def clear(self):
        """Clears all recorded steps."""
        self.steps = []

# --- Helper function to display DataFrame in QTableWidget ---
def display_df_in_table(df: pd.DataFrame, table_widget: QTableWidget, limit_rows: bool, max_rows: int):
    """Populates a QTableWidget with data from a pandas DataFrame."""
    table_widget.clear()
    if df is None or df.empty:
        table_widget.setRowCount(0)
        table_widget.setColumnCount(0)
        return

    # Limit rows for UI performance if enabled
    display_df = df.head(max_rows) if limit_rows else df
    
    table_widget.setRowCount(display_df.shape[0])
    table_widget.setColumnCount(display_df.shape[1])
    table_widget.setHorizontalHeaderLabels(display_df.columns)

    for row_idx, row in enumerate(display_df.itertuples(index=False)):
        for col_idx, cell_data in enumerate(row):
            item = QTableWidgetItem(str(cell_data))
            item.setFlags(item.flags() & ~Qt.ItemIsEditable) # Read-only
            table_widget.setItem(row_idx, col_idx, item)
    
    table_widget.resizeColumnsToContents()

class ExcelSheetDialog(QDialog):
    """A dialog to select a sheet from an Excel file."""
    def __init__(self, sheet_names, parent=None):
        super().__init__(parent)
        self.setWindowTitle("Select Excel Sheet")
        layout = QVBoxLayout(self)
        
        self.sheet_combo = QComboBox()
        self.sheet_combo.addItems(sheet_names)
        layout.addWidget(QLabel("Please select the sheet to load:"))
        layout.addWidget(self.sheet_combo)
        
        buttons = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel)
        buttons.accepted.connect(self.accept)
        buttons.rejected.connect(self.reject)
        layout.addWidget(buttons)
        
    def selected_sheet(self):
        return self.sheet_combo.currentText()

class ExportDialog(QDialog):
    """A custom dialog for exporting data with an option to include steps."""
    def __init__(self, parent=None):
        super().__init__(parent)
        self.setWindowTitle("Export Data")
        self.layout = QVBoxLayout(self)
        
        self.path_edit = QLineEdit()
        self.browse_btn = QPushButton("Browse...")
        self.browse_btn.clicked.connect(self.browse)
        
        path_layout = QHBoxLayout()
        path_layout.addWidget(QLabel("Save to:"))
        path_layout.addWidget(self.path_edit)
        path_layout.addWidget(self.browse_btn)
        self.layout.addLayout(path_layout)
        
        self.include_steps_checkbox = QCheckBox("Include transformation steps in a separate .txt file")
        self.include_steps_checkbox.setChecked(True)
        self.layout.addWidget(self.include_steps_checkbox)
        
        buttons = QDialogButtonBox(QDialogButtonBox.Save | QDialogButtonBox.Cancel)
        buttons.accepted.connect(self.accept)
        buttons.rejected.connect(self.reject)
        self.layout.addWidget(buttons)
        
    def browse(self):
        filepath, _ = QFileDialog.getSaveFileName(self, "Save File", "", "CSV Files (*.csv);;Excel Files (*.xlsx)")
        if filepath:
            self.path_edit.setText(filepath)
            
    def get_export_options(self):
        return {
            "filepath": self.path_edit.text(),
            "include_steps": self.include_steps_checkbox.isChecked()
        }

class DataMasterApp(QMainWindow):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("High-Performance Data Analyzer (DuckDB Edition)")
        self.setGeometry(100, 100, 1400, 900)

        self.logger = StepLogger()
        self.con = duckdb.connect(database=':memory:', read_only=False)
        self.loaded_tables = {}
        self.filtered_df, self.pivot_df, self.merge_df, self.append_df = None, None, None, None

        central_widget = QWidget()
        self.setCentralWidget(central_widget)
        main_layout = QVBoxLayout(central_widget)

        self.tabs = QTabWidget()
        main_layout.addWidget(self.tabs)

        self.load_tab, self.filter_tab, self.pivot_tab, self.merge_tab, self.append_tab, self.history_tab = QWidget(), QWidget(), QWidget(), QWidget(), QWidget(), QWidget()

        self.tabs.addTab(self.load_tab, "1. Load & Manage Data")
        self.tabs.addTab(self.filter_tab, "2. Filter Data")
        self.tabs.addTab(self.pivot_tab, "3. Pivot Data")
        self.tabs.addTab(self.merge_tab, "4. Merge Queries (Join)")
        self.tabs.addTab(self.append_tab, "5. Append Queries (Union)")
        self.tabs.addTab(self.history_tab, "Transformation History")

        self.init_load_tab()
        self.init_filter_tab()
        self.init_pivot_tab()
        self.init_merge_tab()
        self.init_append_tab()
        self.init_history_tab()
        
        self.init_display_settings(main_layout)
        self.log_step("Application started.")

    def init_display_settings(self, main_layout):
        display_group = QGroupBox("Display Settings")
        display_layout = QHBoxLayout(display_group)
        self.limit_rows_checkbox = QCheckBox("Limit displayed rows")
        self.limit_rows_checkbox.setChecked(True)
        self.limit_rows_spinbox = QSpinBox()
        self.limit_rows_spinbox.setRange(1, 100000)
        self.limit_rows_spinbox.setValue(1000)
        display_layout.addWidget(self.limit_rows_checkbox)
        display_layout.addWidget(self.limit_rows_spinbox)
        display_layout.addStretch()
        main_layout.addWidget(display_group)

    def init_history_tab(self):
        layout = QVBoxLayout(self.history_tab)
        layout.addWidget(QLabel("Live log of all data transformation steps:"))
        self.history_text_edit = QTextEdit()
        self.history_text_edit.setReadOnly(True)
        self.history_text_edit.setFontFamily("Courier")
        layout.addWidget(self.history_text_edit)

    def init_load_tab(self):
        layout = QHBoxLayout(self.load_tab)
        left_panel, right_panel = QWidget(), QWidget()
        left_layout, right_layout = QVBoxLayout(left_panel), QVBoxLayout(right_panel)
        left_layout.setAlignment(Qt.AlignTop)
        
        self.load_file_btn = QPushButton("Load CSV / Excel File")
        self.load_file_btn.clicked.connect(self.load_file)
        left_layout.addWidget(self.load_file_btn)
        
        left_layout.addWidget(QLabel("Loaded Tables (Double-click to rename):"))
        self.loaded_tables_list = QListWidget()
        self.loaded_tables_list.itemSelectionChanged.connect(self.on_table_select)
        self.loaded_tables_list.itemDoubleClicked.connect(self.rename_table_dialog)
        left_layout.addWidget(self.loaded_tables_list)

        self.delete_table_btn = QPushButton("Delete Selected Table")
        self.delete_table_btn.clicked.connect(self.delete_table)
        left_layout.addWidget(self.delete_table_btn)

        type_group = QGroupBox("Change Column Data Types (Select rows to apply)")
        type_layout = QVBoxLayout(type_group)
        self.type_table = QTableWidget()
        self.type_table.setColumnCount(3)
        self.type_table.setHorizontalHeaderLabels(["Column", "Current Type", "New Type"])
        self.type_table.setSelectionBehavior(QAbstractItemView.SelectRows)
        type_layout.addWidget(self.type_table)
        self.apply_types_btn = QPushButton("Apply Type Changes to Selected")
        self.apply_types_btn.clicked.connect(self.apply_type_changes)
        type_layout.addWidget(self.apply_types_btn)
        left_layout.addWidget(type_group)

        right_layout.addWidget(QLabel("Data Preview:"))
        self.preview_table = QTableWidget()
        right_layout.addWidget(self.preview_table)
        
        layout.addWidget(left_panel, 1)
        layout.addWidget(right_panel, 3)

    def init_filter_tab(self):
        layout = QVBoxLayout(self.filter_tab)
        top_controls = QHBoxLayout()
        top_controls.addWidget(QLabel("Select Table to Filter:"))
        self.filter_table_combo = QComboBox()
        top_controls.addWidget(self.filter_table_combo, 1)
        self.add_filter_btn = QPushButton("Add Filter Condition")
        self.add_filter_btn.clicked.connect(self.add_filter_condition)
        top_controls.addWidget(self.add_filter_btn)
        self.apply_filters_btn = QPushButton("Apply All Filters")
        self.apply_filters_btn.clicked.connect(self.apply_filters)
        top_controls.addWidget(self.apply_filters_btn)
        self.export_filtered_btn = QPushButton("Export Result")
        self.export_filtered_btn.clicked.connect(lambda: self.export_data(self.filtered_df))
        top_controls.addWidget(self.export_filtered_btn)
        layout.addLayout(top_controls)
        
        self.filter_scroll_area = QScrollArea()
        self.filter_scroll_area.setWidgetResizable(True)
        self.filter_conditions_widget = QWidget()
        self.filter_conditions_layout = QVBoxLayout(self.filter_conditions_widget)
        self.filter_conditions_layout.setAlignment(Qt.AlignTop)
        self.filter_scroll_area.setWidget(self.filter_conditions_widget)
        layout.addWidget(self.filter_scroll_area, 1)

        layout.addWidget(QLabel("Filtered Result:"), 0)
        self.filter_result_table = QTableWidget()
        layout.addWidget(self.filter_result_table, 3)
        self.filter_conditions = []

    def init_pivot_tab(self):
        layout = QHBoxLayout(self.pivot_tab)
        left_panel, right_panel = QVBoxLayout(), QVBoxLayout()
        left_panel.setAlignment(Qt.AlignTop)
        left_panel.addWidget(QLabel("Select Table to Pivot:"))
        self.pivot_table_combo = QComboBox()
        self.pivot_table_combo.currentTextChanged.connect(self.populate_pivot_columns)
        left_panel.addWidget(self.pivot_table_combo)
        
        left_panel.addWidget(QLabel("Group By (Rows - Ctrl+Click for multiple):"))
        self.pivot_index_list = QListWidget()
        self.pivot_index_list.setSelectionMode(QAbstractItemView.MultiSelection)
        left_panel.addWidget(self.pivot_index_list)
        
        left_panel.addWidget(QLabel("Values (for Aggregation):"))
        self.pivot_values_list = QListWidget()
        self.pivot_values_list.setSelectionMode(QAbstractItemView.MultiSelection)
        left_panel.addWidget(self.pivot_values_list)
        
        left_panel.addWidget(QLabel("Aggregation Function:"))
        self.pivot_agg_combo = QComboBox()
        self.pivot_agg_combo.addItems(['SUM', 'AVG', 'COUNT', 'MIN', 'MAX'])
        left_panel.addWidget(self.pivot_agg_combo)
        
        self.generate_pivot_btn = QPushButton("Generate Pivot Table")
        self.generate_pivot_btn.clicked.connect(self.generate_pivot)
        left_panel.addWidget(self.generate_pivot_btn)
        
        self.export_pivot_btn = QPushButton("Export Pivot Result")
        self.export_pivot_btn.clicked.connect(lambda: self.export_data(self.pivot_df))
        left_panel.addWidget(self.export_pivot_btn)

        right_panel.addWidget(QLabel("Pivot Result:"))
        self.pivot_result_table = QTableWidget()
        right_panel.addWidget(self.pivot_result_table)
        
        layout.addLayout(left_panel, 1)
        layout.addLayout(right_panel, 3)

    def init_merge_tab(self):
        layout = QVBoxLayout(self.merge_tab)
        top_controls = QHBoxLayout()
        join_group = QGroupBox("Join Configuration")
        join_group_layout = QHBoxLayout(join_group)
        join_group_layout.addWidget(QLabel("Join Type:"))
        self.merge_type_combo = QComboBox()
        self.merge_type_combo.addItems(['INNER', 'LEFT', 'RIGHT', 'FULL', 'LEFT ANTI', 'RIGHT ANTI'])
        join_group_layout.addWidget(self.merge_type_combo)
        self.perform_merge_btn = QPushButton("Perform Merge")
        self.perform_merge_btn.clicked.connect(self.perform_merge)
        join_group_layout.addWidget(self.perform_merge_btn)
        self.export_merge_btn = QPushButton("Export Merge Result")
        self.export_merge_btn.clicked.connect(lambda: self.export_data(self.merge_df))
        join_group_layout.addWidget(self.export_merge_btn)
        top_controls.addWidget(join_group)
        layout.addLayout(top_controls)
        
        key_selection_layout = QHBoxLayout()
        left_group, right_group = QGroupBox("Left Table"), QGroupBox("Right Table")
        left_group_layout, right_group_layout = QVBoxLayout(left_group), QVBoxLayout(right_group)
        
        left_group_layout.addWidget(QLabel("Select Left Table:"))
        self.merge_left_table_combo = QComboBox()
        self.merge_left_table_combo.currentTextChanged.connect(lambda: self.populate_merge_keys('left'))
        left_group_layout.addWidget(self.merge_left_table_combo)
        left_group_layout.addWidget(QLabel("Left Join Keys (Ctrl+Click for multiple):"))
        self.merge_left_key_list = QListWidget()
        self.merge_left_key_list.setSelectionMode(QAbstractItemView.MultiSelection)
        left_group_layout.addWidget(self.merge_left_key_list)
        
        right_group_layout.addWidget(QLabel("Select Right Table:"))
        self.merge_right_table_combo = QComboBox()
        self.merge_right_table_combo.currentTextChanged.connect(lambda: self.populate_merge_keys('right'))
        right_group_layout.addWidget(self.merge_right_table_combo)
        right_group_layout.addWidget(QLabel("Right Join Keys (Ctrl+Click for multiple):"))
        self.merge_right_key_list = QListWidget()
        self.merge_right_key_list.setSelectionMode(QAbstractItemView.MultiSelection)
        right_group_layout.addWidget(self.merge_right_key_list)
        
        key_selection_layout.addWidget(left_group)
        key_selection_layout.addWidget(right_group)
        layout.addLayout(key_selection_layout)
        
        layout.addWidget(QLabel("Merge Result:"))
        self.merge_result_table = QTableWidget()
        layout.addWidget(self.merge_result_table)

    def init_append_tab(self):
        layout = QHBoxLayout(self.append_tab)
        left_panel, right_panel = QVBoxLayout(), QVBoxLayout()
        left_panel.setAlignment(Qt.AlignTop)
        left_panel.addWidget(QLabel("Select tables to append (must have same columns):"))
        self.append_tables_list = QListWidget()
        self.append_tables_list.setSelectionMode(QAbstractItemView.MultiSelection)
        left_panel.addWidget(self.append_tables_list)
        self.perform_append_btn = QPushButton("Append Selected Tables")
        self.perform_append_btn.clicked.connect(self.perform_append)
        left_panel.addWidget(self.perform_append_btn)
        self.export_append_btn = QPushButton("Export Append Result")
        self.export_append_btn.clicked.connect(lambda: self.export_data(self.append_df))
        left_panel.addWidget(self.export_append_btn)

        right_panel.addWidget(QLabel("Append Result:"))
        self.append_result_table = QTableWidget()
        right_panel.addWidget(self.append_result_table)
        
        layout.addLayout(left_panel, 1)
        layout.addLayout(right_panel, 3)

    def log_step(self, message):
        self.logger.log(message)
        self.history_text_edit.append(self.logger.steps[-1])

    def load_file(self):
        filepath, _ = QFileDialog.getOpenFileName(self, "Open Data File", "", "Data Files (*.csv *.xlsx)")
        if not filepath:
            return

        try:
            base_name = "".join(filter(str.isalnum, Path(filepath).stem))
            table_name = base_name
            count = 1
            while table_name in self.loaded_tables:
                table_name = f"{base_name}_{count}"
                count += 1

            if filepath.endswith('.csv'):
                self.con.execute(f"CREATE TABLE {table_name} AS SELECT * FROM read_csv_auto('{filepath}', header=true)")
            elif filepath.endswith('.xlsx'):
                xls = pd.ExcelFile(filepath)
                sheet_name = xls.sheet_names[0]
                if len(xls.sheet_names) > 1:
                    dialog = ExcelSheetDialog(xls.sheet_names, self)
                    if dialog.exec_() == QDialog.Accepted:
                        sheet_name = dialog.selected_sheet()
                    else:
                        return
                df = pd.read_excel(filepath, sheet_name=sheet_name)
                self.con.register(table_name, df)
            
            self.loaded_tables[table_name] = self.con.execute(f"SELECT * FROM {table_name} LIMIT 10").fetchdf()
            self.update_all_table_widgets()
            self.loaded_tables_list.setCurrentRow(self.loaded_tables_list.count() - 1)
            self.log_step(f"Loaded file '{Path(filepath).name}' into table '{table_name}'.")
            QMessageBox.information(self, "Success", f"Loaded '{filepath}' as table '{table_name}'.")

        except Exception as e:
            QMessageBox.critical(self, "Error", f"Failed to load file: {e}")

    def on_table_select(self):
        selected_items = self.loaded_tables_list.selectedItems()
        if not selected_items:
            self.preview_table.clear()
            self.type_table.setRowCount(0)
            return
        
        table_name = selected_items[0].text()
        df_schema = self.con.execute(f"DESCRIBE {table_name}").fetchdf()

        preview_df = self.con.execute(f"SELECT * FROM {table_name}").fetchdf()
        display_df_in_table(preview_df, self.preview_table, self.limit_rows_checkbox.isChecked(), self.limit_rows_spinbox.value())
        
        self.type_table.setRowCount(0)
        self.type_table.setRowCount(len(df_schema))
        
        valid_types = ['VARCHAR', 'INTEGER', 'BIGINT', 'FLOAT', 'DOUBLE', 'DATE', 'TIMESTAMP', 'BOOLEAN']
        for row, record in enumerate(df_schema.itertuples()):
            self.type_table.setItem(row, 0, QTableWidgetItem(record.column_name))
            self.type_table.setItem(row, 1, QTableWidgetItem(record.column_type))
            
            combo = QComboBox()
            combo.addItems(valid_types)
            self.type_table.setCellWidget(row, 2, combo)
        self.type_table.resizeColumnsToContents()

    def apply_type_changes(self):
        selected_table_items = self.loaded_tables_list.selectedItems()
        if not selected_table_items:
            QMessageBox.warning(self, "Warning", "Please select a table first.")
            return
        table_name = selected_table_items[0].text()
        
        selected_rows = self.type_table.selectionModel().selectedRows()
        if not selected_rows:
            QMessageBox.warning(self, "Warning", "Please select one or more columns in the type table to apply changes.")
            return
            
        try:
            log_messages = []
            for index in selected_rows:
                row = index.row()
                col_name = self.type_table.item(row, 0).text()
                new_type = self.type_table.cellWidget(row, 2).currentText()
                self.con.execute(f'ALTER TABLE "{table_name}" ALTER "{col_name}" TYPE {new_type}')
                log_messages.append(f"Changed type of column '{col_name}' to {new_type}")
            
            self.log_step(f"Applied type changes to table '{table_name}': {'; '.join(log_messages)}.")
            QMessageBox.information(self, "Success", "Data types for selected columns updated successfully.")
            self.on_table_select()
        except Exception as e:
            QMessageBox.critical(self, "Error", f"Could not apply type changes: {e}")

    def rename_table_dialog(self, item):
        old_name = item.text()
        new_name, ok = QInputDialog.getText(self, "Rename Table", "Enter new table name:", text=old_name)
        
        if ok and new_name and new_name != old_name:
            if new_name in self.loaded_tables:
                QMessageBox.critical(self, "Error", f"Table name '{new_name}' already exists.")
                return
            try:
                self.con.execute(f'ALTER TABLE "{old_name}" RENAME TO "{new_name}"')
                self.loaded_tables[new_name] = self.loaded_tables.pop(old_name)
                self.update_all_table_widgets()
                self.log_step(f"Renamed table '{old_name}' to '{new_name}'.")
                QMessageBox.information(self, "Success", f"Table '{old_name}' renamed to '{new_name}'.")
            except Exception as e:
                QMessageBox.critical(self, "Error", f"Could not rename table: {e}")

    def delete_table(self):
        selected_items = self.loaded_tables_list.selectedItems()
        if not selected_items:
            QMessageBox.warning(self, "Warning", "Please select a table to delete.")
            return
        
        table_name = selected_items[0].text()
        reply = QMessageBox.question(self, 'Confirm Delete', f"Are you sure you want to permanently delete the table '{table_name}'?",
                                     QMessageBox.Yes | QMessageBox.No, QMessageBox.No)
        
        if reply == QMessageBox.Yes:
            try:
                self.con.execute(f'DROP TABLE "{table_name}"')
                del self.loaded_tables[table_name]
                self.update_all_table_widgets()
                self.log_step(f"Deleted table '{table_name}'.")
                QMessageBox.information(self, "Success", f"Table '{table_name}' has been deleted.")
            except Exception as e:
                QMessageBox.critical(self, "Error", f"Could not delete table: {e}")

    def add_filter_condition(self):
        table_name = self.filter_table_combo.currentText()
        if not table_name:
            QMessageBox.warning(self, "Warning", "Please select a table to filter first.")
            return
        
        condition_widget = QWidget()
        layout = QHBoxLayout(condition_widget)
        
        columns = self.con.execute(f"DESCRIBE {table_name}").fetchdf()['column_name'].tolist()
        col_combo = QComboBox()
        col_combo.addItems(columns)
        
        op_combo = QComboBox()
        op_combo.addItems(['=', '!=', '>', '<', '>=', '<=', 'LIKE', 'NOT LIKE', 'IS NULL', 'IS NOT NULL'])
        
        val_edit = QLineEdit()
        
        remove_btn = QPushButton("X")
        remove_btn.setFixedWidth(30)
        
        layout.addWidget(col_combo)
        layout.addWidget(op_combo)
        layout.addWidget(val_edit)
        layout.addWidget(remove_btn)
        
        self.filter_conditions_layout.addWidget(condition_widget)
        condition_ref = {'widget': condition_widget, 'col': col_combo, 'op': op_combo, 'val': val_edit}
        self.filter_conditions.append(condition_ref)
        
        remove_btn.clicked.connect(lambda: self.remove_filter_condition(condition_ref))

    def remove_filter_condition(self, condition_ref):
        condition_ref['widget'].deleteLater()
        self.filter_conditions.remove(condition_ref)

    def apply_filters(self):
        table_name = self.filter_table_combo.currentText()
        if not table_name: return
        
        where_clauses = []
        log_clauses = []
        for cond in self.filter_conditions:
            col = f'"{cond["col"].currentText()}"'
            op = cond["op"].currentText()
            val = cond["val"].text()
            
            if op in ['IS NULL', 'IS NOT NULL']:
                where_clauses.append(f"{col} {op}")
                log_clauses.append(f"{col} {op}")
            else:
                # Properly quote string values for query
                val_quoted = val.replace("'", "''")
                where_clauses.append(f"{col} {op} '{val_quoted}'")
                log_clauses.append(f"{col} {op} '{val}'")
        
        if not where_clauses:
            QMessageBox.information(self, "Info", "No filter conditions to apply.")
            return

        query = f'SELECT * FROM "{table_name}" WHERE {" AND ".join(where_clauses)}'
            
        try:
            self.filtered_df = self.con.execute(query).fetchdf()
            display_df_in_table(self.filtered_df, self.filter_result_table, self.limit_rows_checkbox.isChecked(), self.limit_rows_spinbox.value())
            self.log_step(f"Filtered table '{table_name}' with conditions: {' AND '.join(log_clauses)}.")
        except Exception as e:
            self.filtered_df = None
            QMessageBox.critical(self, "Query Error", f"Failed to apply filters: {e}")

    def generate_pivot(self):
        table_name = self.pivot_table_combo.currentText()
        group_by_cols = [item.text() for item in self.pivot_index_list.selectedItems()]
        agg_func = self.pivot_agg_combo.currentText()
        value_cols = [item.text() for item in self.pivot_values_list.selectedItems()]
        
        if not all([table_name, group_by_cols, value_cols]):
            QMessageBox.warning(self, "Input Error", "Please select a table, at least one grouping column, and at least one value column.")
            return
            
        group_by_string = ", ".join([f'"{col}"' for col in group_by_cols])
        selections = [f'"{col}"' for col in group_by_cols]
        for col in value_cols:
            sanitized_alias = f"{agg_func}_{col}".replace('"', '""')
            selections.append(f'{agg_func}("{col}") AS "{sanitized_alias}"')
        
        query = f'SELECT {", ".join(selections)} FROM "{table_name}" GROUP BY {group_by_string}'
        
        try:
            self.pivot_df = self.con.execute(query).fetchdf()
            display_df_in_table(self.pivot_df, self.pivot_result_table, self.limit_rows_checkbox.isChecked(), self.limit_rows_spinbox.value())
            log_msg = f"Created Pivot on table '{table_name}'. Grouped by: {', '.join(group_by_cols)}. Aggregated: {', '.join(value_cols)} using {agg_func}."
            self.log_step(log_msg)
        except Exception as e:
            self.pivot_df = None
            QMessageBox.critical(self, "Pivot Error", f"Failed to generate pivot table: {e}\nQuery: {query}")

    def perform_merge(self):
        left_table = self.merge_left_table_combo.currentText()
        right_table = self.merge_right_table_combo.currentText()
        left_keys = [item.text() for item in self.merge_left_key_list.selectedItems()]
        right_keys = [item.text() for item in self.merge_right_key_list.selectedItems()]
        join_type = self.merge_type_combo.currentText()
        
        if not (left_table and right_table and left_keys and right_keys):
            QMessageBox.warning(self, "Input Error", "Please select left/right tables and at least one join key for each.")
            return
        if len(left_keys) != len(right_keys):
            QMessageBox.warning(self, "Input Error", "The number of join keys for the left and right tables must be equal.")
            return
            
        on_clauses = [f'"{left_table}"."{lk}" = "{right_table}"."{rk}"' for lk, rk in zip(left_keys, right_keys)]
        on_string = " AND ".join(on_clauses)
        query = f'SELECT * FROM "{left_table}" {join_type} JOIN "{right_table}" ON {on_string}'
        
        try:
            self.merge_df = self.con.execute(query).fetchdf()
            display_df_in_table(self.merge_df, self.merge_result_table, self.limit_rows_checkbox.isChecked(), self.limit_rows_spinbox.value())
            log_msg = f"Merged table '{left_table}' and '{right_table}' using a {join_type} JOIN on keys: ({', '.join(left_keys)}) = ({', '.join(right_keys)})."
            self.log_step(log_msg)
        except Exception as e:
            self.merge_df = None
            QMessageBox.critical(self, "Merge Error", f"Failed to perform merge: {e}")

    def perform_append(self):
        tables_to_append = [item.text() for item in self.append_tables_list.selectedItems()]
        if len(tables_to_append) < 2:
            QMessageBox.warning(self, "Input Error", "Please select at least two tables to append.")
            return
            
        try:
            base_schema = self.con.execute(f"DESCRIBE {tables_to_append[0]}").fetchdf()['column_name'].tolist()
            for table in tables_to_append[1:]:
                current_schema = self.con.execute(f"DESCRIBE {table}").fetchdf()['column_name'].tolist()
                if base_schema != current_schema:
                    QMessageBox.critical(self, "Schema Mismatch", f"Table '{table}' does not have the same columns as '{tables_to_append[0]}'.")
                    return
        except Exception as e:
             QMessageBox.critical(self, "Error", f"Could not verify schemas: {e}")
             return

        union_query = " UNION ALL ".join([f'SELECT * FROM "{table}"' for table in tables_to_append])
        
        try:
            self.append_df = self.con.execute(union_query).fetchdf()
            display_df_in_table(self.append_df, self.append_result_table, self.limit_rows_checkbox.isChecked(), self.limit_rows_spinbox.value())
            self.log_step(f"Appended tables: {', '.join(tables_to_append)}.")
        except Exception as e:
            self.append_df = None
            QMessageBox.critical(self, "Append Error", f"Failed to append tables: {e}")

    def export_data(self, df):
        if df is None or df.empty:
            QMessageBox.warning(self, "No Data", "There is no result data to export.")
            return

        dialog = ExportDialog(self)
        if dialog.exec_() == QDialog.Accepted:
            options = dialog.get_export_options()
            filepath = options["filepath"]
            include_steps = options["include_steps"]

            if not filepath:
                QMessageBox.warning(self, "Input Error", "File path cannot be empty.")
                return
            
            try:
                if filepath.endswith('.csv'):
                    df.to_csv(filepath, index=False)
                elif filepath.endswith('.xlsx'):
                    df.to_excel(filepath, index=False)
                else:
                    QMessageBox.critical(self, "Export Error", "Unsupported file type. Please use .csv or .xlsx.")
                    return
                
                QMessageBox.information(self, "Success", f"Data successfully exported to {filepath}")
                self.log_step(f"Exported result to '{Path(filepath).name}'.")

                if include_steps:
                    steps_filepath = Path(filepath).with_suffix('.steps.txt')
                    with open(steps_filepath, 'w', encoding='utf-8') as f:
                        f.write(self.logger.get_steps_report())
                    QMessageBox.information(self, "Success", f"Transformation steps saved to {steps_filepath}")
                    self.log_step(f"Exported steps to '{steps_filepath.name}'.")

            except Exception as e:
                QMessageBox.critical(self, "Export Error", f"Failed to export file: {e}")

    def update_all_table_widgets(self):
        table_names = list(self.loaded_tables.keys())
        current_filter_selection = self.filter_table_combo.currentText()
        
        for combo in [self.filter_table_combo, self.pivot_table_combo, self.merge_left_table_combo, self.merge_right_table_combo]:
            current_selection = combo.currentText()
            combo.blockSignals(True)
            combo.clear()
            combo.addItems(table_names)
            if current_selection in table_names:
                combo.setCurrentText(current_selection)
            combo.blockSignals(False)
        
        self.loaded_tables_list.clear()
        self.loaded_tables_list.addItems(table_names)
        self.append_tables_list.clear()
        self.append_tables_list.addItems(table_names)

        if self.filter_table_combo.currentText() != current_filter_selection:
             self.filter_conditions.clear()
             while self.filter_conditions_layout.count():
                 child = self.filter_conditions_layout.takeAt(0)
                 if child.widget():
                     child.widget().deleteLater()

        self.populate_pivot_columns()
        self.populate_merge_keys('left')
        self.populate_merge_keys('right')
        
    def populate_pivot_columns(self):
        table_name = self.pivot_table_combo.currentText()
        self.pivot_index_list.clear()
        self.pivot_values_list.clear()
        if not table_name: return
        
        schema = self.con.execute(f"DESCRIBE {table_name}").fetchdf()
        all_cols = schema['column_name'].tolist()
        numeric_cols = schema[schema['column_type'].isin(['INTEGER', 'BIGINT', 'FLOAT', 'DOUBLE', 'DECIMAL'])]['column_name'].tolist()
        
        self.pivot_index_list.addItems(all_cols)
        self.pivot_values_list.addItems(numeric_cols)

    def populate_merge_keys(self, side):
        if side == 'left':
            list_widget = self.merge_left_key_list
            table_name = self.merge_left_table_combo.currentText()
        else:
            list_widget = self.merge_right_key_list
            table_name = self.merge_right_table_combo.currentText()
            
        list_widget.clear()
        if not table_name: return
        
        cols = self.con.execute(f"DESCRIBE {table_name}").fetchdf()['column_name'].tolist()
        list_widget.addItems(cols)

if __name__ == '__main__':
    app = QApplication(sys.argv)
    main_win = DataMasterApp()
    main_win.show()
    sys.exit(app.exec_())

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.