Skip to content

Columnar Report Generator- V2

What is new in this new columnar report generator?

  1. Lower RAM utilisation
  2. Limit on loading the rows in GUI so that app runs faster and smoother
  3. Option to export multiple accounts as different file

New Requirements List:

pip install pandas duckdb pyqt5
#you guessed it right, it just uses Duck DB extra

Code

import sys
import os
import re
import pandas as pd
import duckdb
from PyQt5.QtWidgets import (
    QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QPushButton,
    QFileDialog, QLabel, QLineEdit, QComboBox, QListWidget, QAbstractItemView,
    QProgressBar, QGroupBox, QFormLayout, QMessageBox, QStatusBar, QTabWidget,
    QCheckBox, QRadioButton
)
from PyQt5.QtCore import Qt, QThread, pyqtSignal

# --- Worker Thread for Processing ---
class ProcessingThread(QThread):
    """
    Runs the heavy DuckDB processing in a separate thread to keep the UI responsive.
    It's self-contained to ensure thread safety.
    """
    progress = pyqtSignal(int)
    finished = pyqtSignal(object, str)
    error = pyqtSignal(str)

    def __init__(self, params):
        super().__init__()
        self.params = params

    def run(self):
        try:
            p = self.params
            self.progress.emit(10)
            
            # Each thread creates its own connection for thread safety.
            con = duckdb.connect(database=':memory:', read_only=False)
            
            # Create the source_data view within this thread's connection
            file_path = p['file_path']
            if file_path.lower().endswith('.csv'):
                con.execute(f"CREATE OR REPLACE VIEW source_data AS SELECT * FROM read_csv_auto('{file_path}', all_varchar=True);")
            else:
                temp_df = pd.read_excel(file_path, dtype=str, keep_default_na=False)
                con.register('source_data', temp_df)

            self.progress.emit(20)

            # Build the Core SQL Query with safe casting
            doc_col, acc_col = f'"{p["doc_col"]}"', f'"{p["acc_col"]}"'
            if p['mode'] == 'debit_credit':
                net_value_calc = f'COALESCE(TRY_CAST("{p["debit_col"]}" AS DOUBLE), 0) - COALESCE(TRY_CAST("{p["credit_col"]}" AS DOUBLE), 0)'
            else:
                net_value_calc = f'COALESCE(TRY_CAST("{p["net_col"]}" AS DOUBLE), 0)'

            key_accounts_tuple = tuple(p['key_accounts'])
            target_docs_query = f'SELECT DISTINCT {doc_col} FROM source_data WHERE {acc_col} IN {key_accounts_tuple}'
            
            self.progress.emit(40)

            # Construct and execute the main PIVOT query
            index_cols_str = ", ".join([f'"{c}"' for c in p["index_cols"]])
            pivot_sql = f"""
            PIVOT (
                SELECT {index_cols_str}, {acc_col}, ({net_value_calc}) AS _net_value
                FROM source_data
                WHERE {doc_col} IN ({target_docs_query})
            )
            ON {acc_col}
            USING SUM(_net_value)
            GROUP BY {index_cols_str}
            """
            
            self.progress.emit(60)
            result_df = con.execute(pivot_sql).fetchdf()
            con.close() # Clean up the connection

            if result_df.empty:
                self.error.emit("No data produced. The selected key accounts might not exist in any documents.")
                return

            # Reorder columns as requested
            account_cols = [c for c in result_df.columns if c not in p['index_cols']]
            other_account_cols = [c for c in account_cols if c not in p['key_accounts']]
            final_col_order = p['index_cols'] + p['key_accounts'] + other_account_cols
            result_df = result_df[[c for c in final_col_order if c in result_df.columns]]

            self.progress.emit(80)

            # Handle Exporting
            if p['export_separate_files']:
                base_path = p['output_path']
                for account in p['key_accounts']:
                    if account not in result_df.columns: continue
                    account_df = result_df[result_df[account] != 0]
                    safe_filename = re.sub(r'[\\/*?:"<>|]', "", account)
                    filepath = os.path.join(base_path, f"{safe_filename}.{p['export_format']}")
                    if p['export_format'] == 'xlsx':
                        account_df.to_excel(filepath, index=False)
                    else:
                        account_df.to_csv(filepath, index=False)
                self.finished.emit(base_path, f"Successfully exported files to folder.")
            else:
                output_path = p['output_path']
                if p['export_format'] == 'xlsx':
                    result_df.to_excel(output_path, index=False)
                else:
                    result_df.to_csv(output_path, index=False)
                self.finished.emit(output_path, "Successfully exported to a single file.")

        except Exception as e:
            self.error.emit(f"An error occurred during processing: {e}\n\nPlease check your column mappings and file data integrity.")


# --- Main Application Window ---
class JournalTransformerDuckDB(QMainWindow):
    def __init__(self):
        super().__init__()
        self.con = None # Central DuckDB connection object
        self.input_file_path = ""
        self.init_ui()

    def closeEvent(self, event):
        """Ensure the database connection is closed when the app exits."""
        if self.con:
            self.con.close()
        event.accept()

    def init_ui(self):
        self.setWindowTitle("Journal Transformer (DuckDB Production Edition)")
        self.setGeometry(100, 100, 900, 850)
        central_widget = QWidget()
        self.setCentralWidget(central_widget)
        main_layout = QVBoxLayout(central_widget)

        # Create UI sections
        self.create_file_selection_ui(main_layout)
        self.create_column_config_ui(main_layout)
        self.create_export_columns_ui(main_layout)
        self.create_account_selection_ui(main_layout)
        self.create_export_options_ui(main_layout)
        self.create_action_ui(main_layout)

    def create_file_selection_ui(self, parent_layout):
        group = QGroupBox("1. Select Input File (CSV or Excel)")
        layout = QHBoxLayout(group)
        self.select_file_btn = QPushButton("Browse...")
        self.select_file_btn.clicked.connect(self.load_file)
        self.file_path_label = QLineEdit("No file selected")
        self.file_path_label.setReadOnly(True)
        layout.addWidget(self.select_file_btn)
        layout.addWidget(self.file_path_label)
        parent_layout.addWidget(group)

    def create_column_config_ui(self, parent_layout):
        group = QGroupBox("2. Configure Data Columns")
        layout = QVBoxLayout(group)
        form_layout = QFormLayout()
        self.doc_col_combo = QComboBox()
        self.account_col_combo = QComboBox()
        form_layout.addRow("Document Identifier Column:", self.doc_col_combo)
        form_layout.addRow("Account Description Column:", self.account_col_combo)
        layout.addLayout(form_layout)
        self.mapping_tabs = QTabWidget()
        dc_tab, nv_tab = QWidget(), QWidget()
        self.mapping_tabs.addTab(dc_tab, "Debit/Credit Mode")
        self.mapping_tabs.addTab(nv_tab, "Net Value Mode")
        dc_layout, self.debit_col_combo, self.credit_col_combo = QFormLayout(dc_tab), QComboBox(), QComboBox()
        dc_layout.addRow("Debit Column:", self.debit_col_combo)
        dc_layout.addRow("Credit Column:", self.credit_col_combo)
        nv_layout, self.net_col_combo = QFormLayout(nv_tab), QComboBox()
        nv_layout.addRow("Net Debit/Credit Column:", self.net_col_combo)
        layout.addWidget(self.mapping_tabs)
        parent_layout.addWidget(group)

    def create_export_columns_ui(self, parent_layout):
        group = QGroupBox("3. Select Columns to Include in Export")
        self.index_cols_list_widget = QListWidget()
        self.index_cols_list_widget.setSelectionMode(QAbstractItemView.ExtendedSelection)
        layout = QVBoxLayout(group)
        layout.addWidget(self.index_cols_list_widget)
        parent_layout.addWidget(group)

    def create_account_selection_ui(self, parent_layout):
        group = QGroupBox("4. Select Key Accounts to Find Documents")
        self.search_box = QLineEdit()
        self.search_box.setPlaceholderText("Search for accounts...")
        self.search_box.textChanged.connect(self.filter_accounts)
        self.account_list_widget = QListWidget()
        self.account_list_widget.setSelectionMode(QAbstractItemView.ExtendedSelection)
        layout = QVBoxLayout(group)
        layout.addWidget(self.search_box)
        layout.addWidget(self.account_list_widget)
        parent_layout.addWidget(group)

    def create_export_options_ui(self, parent_layout):
        group = QGroupBox("5. Export Options")
        layout = QHBoxLayout(group)
        self.export_separate_files_check = QCheckBox("Export each account to a separate file")
        self.radio_excel, self.radio_csv = QRadioButton("Excel (.xlsx)"), QRadioButton("CSV (.csv)")
        self.radio_excel.setChecked(True)
        layout.addWidget(self.export_separate_files_check)
        layout.addStretch()
        layout.addWidget(QLabel("Format:"))
        layout.addWidget(self.radio_excel)
        layout.addWidget(self.radio_csv)
        parent_layout.addWidget(group)

    def create_action_ui(self, parent_layout):
        self.process_btn = QPushButton("Process and Export")
        self.process_btn.setStyleSheet("font-size: 16px; padding: 10px;")
        self.process_btn.setEnabled(False)
        self.process_btn.clicked.connect(self.run_processing)
        parent_layout.addWidget(self.process_btn)
        self.progress_bar = QProgressBar()
        self.progress_bar.setVisible(False)
        parent_layout.addWidget(self.progress_bar)
        self.setStatusBar(QStatusBar())
        self.statusBar().showMessage("Ready. Please select a file to begin.")

    def load_file(self):
        """Handles loading a new file and setting up the central DB connection."""
        path, _ = QFileDialog.getOpenFileName(self, "Select Data File", "", "Data Files (*.xlsx *.xls *.csv)")
        if not path: return
        
        self.process_btn.setEnabled(False)
        self.statusBar().showMessage("Establishing database connection and reading file...")
        QApplication.processEvents()

        try:
            # Close existing connection if any
            if self.con:
                self.con.close()
            
            # Establish the new central connection for this session
            self.con = duckdb.connect(database=':memory:', read_only=False)
            self.input_file_path = path
            self.file_path_label.setText(path)

            # Register the file as a view in our central connection
            if path.lower().endswith('.csv'):
                self.con.execute(f"CREATE OR REPLACE VIEW source_data AS SELECT * FROM read_csv_auto('{path}', all_varchar=True);")
                # Get columns directly from the view schema
                self.df_columns = [col[0] for col in self.con.execute("DESCRIBE source_data;").fetchall()]
            else:
                # Use pandas as a reader for Excel, then register
                df = pd.read_excel(path, dtype=str, keep_default_na=False)
                self.con.register('source_data_df', df)
                self.con.execute("CREATE OR REPLACE VIEW source_data AS SELECT * FROM source_data_df;")
                self.df_columns = df.columns.tolist()

            self.update_all_selectors()
            self.process_btn.setEnabled(True)
            self.statusBar().showMessage("File loaded and ready. Please configure options.", 5000)

        except Exception as e:
            QMessageBox.critical(self, "Error Loading File", f"Could not load the file into the database.\nError: {e}")
            if self.con: self.con.close()
            self.con = None

    def update_all_selectors(self):
        """Populates all UI selectors with columns and data from the source_data view."""
        for combo in [self.doc_col_combo, self.account_col_combo, self.debit_col_combo, self.credit_col_combo, self.net_col_combo]:
            combo.clear()
            combo.addItems(self.df_columns)
        self.index_cols_list_widget.clear()
        self.index_cols_list_widget.addItems(self.df_columns)
        self.account_list_widget.clear()

        # Auto-select likely columns
        non_index_cols = set()
        for i, col in enumerate(self.df_columns):
            lc_col = col.lower().replace("_", "").replace(" ", "")
            if "doc" in lc_col or "voucherno" in lc_col: self.doc_col_combo.setCurrentIndex(i)
            if "ledgerdesc" in lc_col or "accountname" in lc_col:
                self.account_col_combo.setCurrentIndex(i)
                non_index_cols.add(col)
            if "debit" in lc_col:
                self.debit_col_combo.setCurrentIndex(i)
                non_index_cols.add(col)
            if "credit" in lc_col:
                self.credit_col_combo.setCurrentIndex(i)
                non_index_cols.add(col)
            if "netvalue" in lc_col:
                self.net_col_combo.setCurrentIndex(i)
                non_index_cols.add(col)

        for i in range(self.index_cols_list_widget.count()):
            item = self.index_cols_list_widget.item(i)
            if item.text() not in non_index_cols:
                item.setSelected(True)
        
        # Now that selectors are set, populate the account list
        self.populate_account_list()
        self.account_col_combo.currentIndexChanged.connect(self.populate_account_list)

    def populate_account_list(self):
        """Fetches unique accounts using the central DB connection."""
        if not self.con or not self.account_col_combo.currentText(): return
        
        self.statusBar().showMessage("Fetching unique accounts...")
        QApplication.processEvents()
        
        try:
            acc_col = self.account_col_combo.currentText()
            query = f'SELECT DISTINCT "{acc_col}" FROM source_data WHERE "{acc_col}" IS NOT NULL ORDER BY 1'
            unique_accounts = self.con.execute(query).fetchdf()[acc_col].tolist()
            
            self.account_list_widget.clear()
            self.account_list_widget.addItems([str(a) for a in unique_accounts])
            self.statusBar().showMessage("Account list populated.", 3000)
        except Exception as e:
            QMessageBox.warning(self, "Account Fetch Error", f"Could not fetch accounts from the database.\nError: {e}")
            self.statusBar().showMessage("Error fetching accounts.", 5000)

    def filter_accounts(self, text):
        for i in range(self.account_list_widget.count()):
            item = self.account_list_widget.item(i)
            item.setHidden(text.lower() not in item.text().lower())

    def run_processing(self):
        """Validates inputs and starts the processing thread."""
        if not self.con:
            QMessageBox.warning(self, "No Data", "Please load a file first."); return
        key_accounts = [item.text() for item in self.account_list_widget.selectedItems()]
        if not key_accounts:
            QMessageBox.warning(self, "Input Missing", "Please select at least one key account."); return
        index_cols = [item.text() for item in self.index_cols_list_widget.selectedItems()]
        if not index_cols:
            QMessageBox.warning(self, "Input Missing", "Please select at least one column for export."); return

        params = {
            'file_path': self.input_file_path, 'key_accounts': key_accounts, 'index_cols': index_cols,
            'doc_col': self.doc_col_combo.currentText(), 'acc_col': self.account_col_combo.currentText(),
            'mode': 'debit_credit' if self.mapping_tabs.currentIndex() == 0 else 'net_value',
            'debit_col': self.debit_col_combo.currentText(), 'credit_col': self.credit_col_combo.currentText(),
            'net_col': self.net_col_combo.currentText(),
            'export_separate_files': self.export_separate_files_check.isChecked(),
            'export_format': 'xlsx' if self.radio_excel.isChecked() else 'csv'
        }
        
        ext = params['export_format']
        if params['export_separate_files']:
            output_path = QFileDialog.getExistingDirectory(self, "Select Folder to Save Files")
        else:
            file_filter = f"Excel Files (*.{ext})" if ext == 'xlsx' else f"CSV Files (*.{ext})"
            default_path = os.path.join(os.path.dirname(self.input_file_path), f"Transformed_Report.{ext}")
            output_path, _ = QFileDialog.getSaveFileName(self, "Save Output File", default_path, file_filter)
        
        if not output_path:
            self.statusBar().showMessage("Export cancelled.", 3000); return
        params['output_path'] = output_path

        self.process_btn.setEnabled(False)
        self.progress_bar.setVisible(True)
        self.statusBar().showMessage("Processing... This may take a while for large files.")
        
        self.thread = ProcessingThread(params)
        self.thread.progress.connect(self.progress_bar.setValue)
        self.thread.finished.connect(self.on_processing_finished)
        self.thread.error.connect(self.on_processing_error)
        self.thread.start()

    def on_processing_finished(self, path, message):
        self.progress_bar.setValue(100)
        self.statusBar().showMessage("Processing complete.", 5000)
        QMessageBox.information(self, "Success", f"{message}\n\nLocation: {path}")
        self.process_btn.setEnabled(True)
        self.progress_bar.setVisible(False)

    def on_processing_error(self, error_message):
        self.statusBar().showMessage("An error occurred during processing.", 5000)
        QMessageBox.critical(self, "Processing Error", error_message)
        self.process_btn.setEnabled(True)
        self.progress_bar.setVisible(False)


if __name__ == '__main__':
    app = QApplication(sys.argv)
    main_win = JournalTransformerDuckDB()
    main_win.show()
    sys.exit(app.exec_())

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.