What is new in this new columnar report generator?
- Lower RAM utilisation
- Limit on loading the rows in GUI so that app runs faster and smoother
- Option to export multiple accounts as different file
New Requirements List:
pip install pandas duckdb pyqt5
#you guessed it right, it just uses Duck DB extra
Code
import sys
import os
import re
import pandas as pd
import duckdb
from PyQt5.QtWidgets import (
QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, QPushButton,
QFileDialog, QLabel, QLineEdit, QComboBox, QListWidget, QAbstractItemView,
QProgressBar, QGroupBox, QFormLayout, QMessageBox, QStatusBar, QTabWidget,
QCheckBox, QRadioButton
)
from PyQt5.QtCore import Qt, QThread, pyqtSignal
# --- Worker Thread for Processing ---
class ProcessingThread(QThread):
"""
Runs the heavy DuckDB processing in a separate thread to keep the UI responsive.
It's self-contained to ensure thread safety.
"""
progress = pyqtSignal(int)
finished = pyqtSignal(object, str)
error = pyqtSignal(str)
def __init__(self, params):
super().__init__()
self.params = params
def run(self):
try:
p = self.params
self.progress.emit(10)
# Each thread creates its own connection for thread safety.
con = duckdb.connect(database=':memory:', read_only=False)
# Create the source_data view within this thread's connection
file_path = p['file_path']
if file_path.lower().endswith('.csv'):
con.execute(f"CREATE OR REPLACE VIEW source_data AS SELECT * FROM read_csv_auto('{file_path}', all_varchar=True);")
else:
temp_df = pd.read_excel(file_path, dtype=str, keep_default_na=False)
con.register('source_data', temp_df)
self.progress.emit(20)
# Build the Core SQL Query with safe casting
doc_col, acc_col = f'"{p["doc_col"]}"', f'"{p["acc_col"]}"'
if p['mode'] == 'debit_credit':
net_value_calc = f'COALESCE(TRY_CAST("{p["debit_col"]}" AS DOUBLE), 0) - COALESCE(TRY_CAST("{p["credit_col"]}" AS DOUBLE), 0)'
else:
net_value_calc = f'COALESCE(TRY_CAST("{p["net_col"]}" AS DOUBLE), 0)'
key_accounts_tuple = tuple(p['key_accounts'])
target_docs_query = f'SELECT DISTINCT {doc_col} FROM source_data WHERE {acc_col} IN {key_accounts_tuple}'
self.progress.emit(40)
# Construct and execute the main PIVOT query
index_cols_str = ", ".join([f'"{c}"' for c in p["index_cols"]])
pivot_sql = f"""
PIVOT (
SELECT {index_cols_str}, {acc_col}, ({net_value_calc}) AS _net_value
FROM source_data
WHERE {doc_col} IN ({target_docs_query})
)
ON {acc_col}
USING SUM(_net_value)
GROUP BY {index_cols_str}
"""
self.progress.emit(60)
result_df = con.execute(pivot_sql).fetchdf()
con.close() # Clean up the connection
if result_df.empty:
self.error.emit("No data produced. The selected key accounts might not exist in any documents.")
return
# Reorder columns as requested
account_cols = [c for c in result_df.columns if c not in p['index_cols']]
other_account_cols = [c for c in account_cols if c not in p['key_accounts']]
final_col_order = p['index_cols'] + p['key_accounts'] + other_account_cols
result_df = result_df[[c for c in final_col_order if c in result_df.columns]]
self.progress.emit(80)
# Handle Exporting
if p['export_separate_files']:
base_path = p['output_path']
for account in p['key_accounts']:
if account not in result_df.columns: continue
account_df = result_df[result_df[account] != 0]
safe_filename = re.sub(r'[\\/*?:"<>|]', "", account)
filepath = os.path.join(base_path, f"{safe_filename}.{p['export_format']}")
if p['export_format'] == 'xlsx':
account_df.to_excel(filepath, index=False)
else:
account_df.to_csv(filepath, index=False)
self.finished.emit(base_path, f"Successfully exported files to folder.")
else:
output_path = p['output_path']
if p['export_format'] == 'xlsx':
result_df.to_excel(output_path, index=False)
else:
result_df.to_csv(output_path, index=False)
self.finished.emit(output_path, "Successfully exported to a single file.")
except Exception as e:
self.error.emit(f"An error occurred during processing: {e}\n\nPlease check your column mappings and file data integrity.")
# --- Main Application Window ---
class JournalTransformerDuckDB(QMainWindow):
def __init__(self):
super().__init__()
self.con = None # Central DuckDB connection object
self.input_file_path = ""
self.init_ui()
def closeEvent(self, event):
"""Ensure the database connection is closed when the app exits."""
if self.con:
self.con.close()
event.accept()
def init_ui(self):
self.setWindowTitle("Journal Transformer (DuckDB Production Edition)")
self.setGeometry(100, 100, 900, 850)
central_widget = QWidget()
self.setCentralWidget(central_widget)
main_layout = QVBoxLayout(central_widget)
# Create UI sections
self.create_file_selection_ui(main_layout)
self.create_column_config_ui(main_layout)
self.create_export_columns_ui(main_layout)
self.create_account_selection_ui(main_layout)
self.create_export_options_ui(main_layout)
self.create_action_ui(main_layout)
def create_file_selection_ui(self, parent_layout):
group = QGroupBox("1. Select Input File (CSV or Excel)")
layout = QHBoxLayout(group)
self.select_file_btn = QPushButton("Browse...")
self.select_file_btn.clicked.connect(self.load_file)
self.file_path_label = QLineEdit("No file selected")
self.file_path_label.setReadOnly(True)
layout.addWidget(self.select_file_btn)
layout.addWidget(self.file_path_label)
parent_layout.addWidget(group)
def create_column_config_ui(self, parent_layout):
group = QGroupBox("2. Configure Data Columns")
layout = QVBoxLayout(group)
form_layout = QFormLayout()
self.doc_col_combo = QComboBox()
self.account_col_combo = QComboBox()
form_layout.addRow("Document Identifier Column:", self.doc_col_combo)
form_layout.addRow("Account Description Column:", self.account_col_combo)
layout.addLayout(form_layout)
self.mapping_tabs = QTabWidget()
dc_tab, nv_tab = QWidget(), QWidget()
self.mapping_tabs.addTab(dc_tab, "Debit/Credit Mode")
self.mapping_tabs.addTab(nv_tab, "Net Value Mode")
dc_layout, self.debit_col_combo, self.credit_col_combo = QFormLayout(dc_tab), QComboBox(), QComboBox()
dc_layout.addRow("Debit Column:", self.debit_col_combo)
dc_layout.addRow("Credit Column:", self.credit_col_combo)
nv_layout, self.net_col_combo = QFormLayout(nv_tab), QComboBox()
nv_layout.addRow("Net Debit/Credit Column:", self.net_col_combo)
layout.addWidget(self.mapping_tabs)
parent_layout.addWidget(group)
def create_export_columns_ui(self, parent_layout):
group = QGroupBox("3. Select Columns to Include in Export")
self.index_cols_list_widget = QListWidget()
self.index_cols_list_widget.setSelectionMode(QAbstractItemView.ExtendedSelection)
layout = QVBoxLayout(group)
layout.addWidget(self.index_cols_list_widget)
parent_layout.addWidget(group)
def create_account_selection_ui(self, parent_layout):
group = QGroupBox("4. Select Key Accounts to Find Documents")
self.search_box = QLineEdit()
self.search_box.setPlaceholderText("Search for accounts...")
self.search_box.textChanged.connect(self.filter_accounts)
self.account_list_widget = QListWidget()
self.account_list_widget.setSelectionMode(QAbstractItemView.ExtendedSelection)
layout = QVBoxLayout(group)
layout.addWidget(self.search_box)
layout.addWidget(self.account_list_widget)
parent_layout.addWidget(group)
def create_export_options_ui(self, parent_layout):
group = QGroupBox("5. Export Options")
layout = QHBoxLayout(group)
self.export_separate_files_check = QCheckBox("Export each account to a separate file")
self.radio_excel, self.radio_csv = QRadioButton("Excel (.xlsx)"), QRadioButton("CSV (.csv)")
self.radio_excel.setChecked(True)
layout.addWidget(self.export_separate_files_check)
layout.addStretch()
layout.addWidget(QLabel("Format:"))
layout.addWidget(self.radio_excel)
layout.addWidget(self.radio_csv)
parent_layout.addWidget(group)
def create_action_ui(self, parent_layout):
self.process_btn = QPushButton("Process and Export")
self.process_btn.setStyleSheet("font-size: 16px; padding: 10px;")
self.process_btn.setEnabled(False)
self.process_btn.clicked.connect(self.run_processing)
parent_layout.addWidget(self.process_btn)
self.progress_bar = QProgressBar()
self.progress_bar.setVisible(False)
parent_layout.addWidget(self.progress_bar)
self.setStatusBar(QStatusBar())
self.statusBar().showMessage("Ready. Please select a file to begin.")
def load_file(self):
"""Handles loading a new file and setting up the central DB connection."""
path, _ = QFileDialog.getOpenFileName(self, "Select Data File", "", "Data Files (*.xlsx *.xls *.csv)")
if not path: return
self.process_btn.setEnabled(False)
self.statusBar().showMessage("Establishing database connection and reading file...")
QApplication.processEvents()
try:
# Close existing connection if any
if self.con:
self.con.close()
# Establish the new central connection for this session
self.con = duckdb.connect(database=':memory:', read_only=False)
self.input_file_path = path
self.file_path_label.setText(path)
# Register the file as a view in our central connection
if path.lower().endswith('.csv'):
self.con.execute(f"CREATE OR REPLACE VIEW source_data AS SELECT * FROM read_csv_auto('{path}', all_varchar=True);")
# Get columns directly from the view schema
self.df_columns = [col[0] for col in self.con.execute("DESCRIBE source_data;").fetchall()]
else:
# Use pandas as a reader for Excel, then register
df = pd.read_excel(path, dtype=str, keep_default_na=False)
self.con.register('source_data_df', df)
self.con.execute("CREATE OR REPLACE VIEW source_data AS SELECT * FROM source_data_df;")
self.df_columns = df.columns.tolist()
self.update_all_selectors()
self.process_btn.setEnabled(True)
self.statusBar().showMessage("File loaded and ready. Please configure options.", 5000)
except Exception as e:
QMessageBox.critical(self, "Error Loading File", f"Could not load the file into the database.\nError: {e}")
if self.con: self.con.close()
self.con = None
def update_all_selectors(self):
"""Populates all UI selectors with columns and data from the source_data view."""
for combo in [self.doc_col_combo, self.account_col_combo, self.debit_col_combo, self.credit_col_combo, self.net_col_combo]:
combo.clear()
combo.addItems(self.df_columns)
self.index_cols_list_widget.clear()
self.index_cols_list_widget.addItems(self.df_columns)
self.account_list_widget.clear()
# Auto-select likely columns
non_index_cols = set()
for i, col in enumerate(self.df_columns):
lc_col = col.lower().replace("_", "").replace(" ", "")
if "doc" in lc_col or "voucherno" in lc_col: self.doc_col_combo.setCurrentIndex(i)
if "ledgerdesc" in lc_col or "accountname" in lc_col:
self.account_col_combo.setCurrentIndex(i)
non_index_cols.add(col)
if "debit" in lc_col:
self.debit_col_combo.setCurrentIndex(i)
non_index_cols.add(col)
if "credit" in lc_col:
self.credit_col_combo.setCurrentIndex(i)
non_index_cols.add(col)
if "netvalue" in lc_col:
self.net_col_combo.setCurrentIndex(i)
non_index_cols.add(col)
for i in range(self.index_cols_list_widget.count()):
item = self.index_cols_list_widget.item(i)
if item.text() not in non_index_cols:
item.setSelected(True)
# Now that selectors are set, populate the account list
self.populate_account_list()
self.account_col_combo.currentIndexChanged.connect(self.populate_account_list)
def populate_account_list(self):
"""Fetches unique accounts using the central DB connection."""
if not self.con or not self.account_col_combo.currentText(): return
self.statusBar().showMessage("Fetching unique accounts...")
QApplication.processEvents()
try:
acc_col = self.account_col_combo.currentText()
query = f'SELECT DISTINCT "{acc_col}" FROM source_data WHERE "{acc_col}" IS NOT NULL ORDER BY 1'
unique_accounts = self.con.execute(query).fetchdf()[acc_col].tolist()
self.account_list_widget.clear()
self.account_list_widget.addItems([str(a) for a in unique_accounts])
self.statusBar().showMessage("Account list populated.", 3000)
except Exception as e:
QMessageBox.warning(self, "Account Fetch Error", f"Could not fetch accounts from the database.\nError: {e}")
self.statusBar().showMessage("Error fetching accounts.", 5000)
def filter_accounts(self, text):
for i in range(self.account_list_widget.count()):
item = self.account_list_widget.item(i)
item.setHidden(text.lower() not in item.text().lower())
def run_processing(self):
"""Validates inputs and starts the processing thread."""
if not self.con:
QMessageBox.warning(self, "No Data", "Please load a file first."); return
key_accounts = [item.text() for item in self.account_list_widget.selectedItems()]
if not key_accounts:
QMessageBox.warning(self, "Input Missing", "Please select at least one key account."); return
index_cols = [item.text() for item in self.index_cols_list_widget.selectedItems()]
if not index_cols:
QMessageBox.warning(self, "Input Missing", "Please select at least one column for export."); return
params = {
'file_path': self.input_file_path, 'key_accounts': key_accounts, 'index_cols': index_cols,
'doc_col': self.doc_col_combo.currentText(), 'acc_col': self.account_col_combo.currentText(),
'mode': 'debit_credit' if self.mapping_tabs.currentIndex() == 0 else 'net_value',
'debit_col': self.debit_col_combo.currentText(), 'credit_col': self.credit_col_combo.currentText(),
'net_col': self.net_col_combo.currentText(),
'export_separate_files': self.export_separate_files_check.isChecked(),
'export_format': 'xlsx' if self.radio_excel.isChecked() else 'csv'
}
ext = params['export_format']
if params['export_separate_files']:
output_path = QFileDialog.getExistingDirectory(self, "Select Folder to Save Files")
else:
file_filter = f"Excel Files (*.{ext})" if ext == 'xlsx' else f"CSV Files (*.{ext})"
default_path = os.path.join(os.path.dirname(self.input_file_path), f"Transformed_Report.{ext}")
output_path, _ = QFileDialog.getSaveFileName(self, "Save Output File", default_path, file_filter)
if not output_path:
self.statusBar().showMessage("Export cancelled.", 3000); return
params['output_path'] = output_path
self.process_btn.setEnabled(False)
self.progress_bar.setVisible(True)
self.statusBar().showMessage("Processing... This may take a while for large files.")
self.thread = ProcessingThread(params)
self.thread.progress.connect(self.progress_bar.setValue)
self.thread.finished.connect(self.on_processing_finished)
self.thread.error.connect(self.on_processing_error)
self.thread.start()
def on_processing_finished(self, path, message):
self.progress_bar.setValue(100)
self.statusBar().showMessage("Processing complete.", 5000)
QMessageBox.information(self, "Success", f"{message}\n\nLocation: {path}")
self.process_btn.setEnabled(True)
self.progress_bar.setVisible(False)
def on_processing_error(self, error_message):
self.statusBar().showMessage("An error occurred during processing.", 5000)
QMessageBox.critical(self, "Processing Error", error_message)
self.process_btn.setEnabled(True)
self.progress_bar.setVisible(False)
if __name__ == '__main__':
app = QApplication(sys.argv)
main_win = JournalTransformerDuckDB()
main_win.show()
sys.exit(app.exec_())