Tools: 💰 Build a Salary Prediction System in Python with a GUI
In this tutorial, we’ll build SalaryPredictor v2.0, a desktop app that predicts employee salaries using Machine Learning. It supports batch predictions, real-time previews, filtering, and live search—all in a beautiful GUI with tkinter and ttkbootstrap. We’ll break it down step by step so even beginners can follow along. Step 1: Install Required Packages Before we start, make sure you have Python installed. Then install the dependencies: pip install pandas joblib tkinterdnd2 ttkbootstrap pandas: For handling CSV files joblib: To load ML models tkinter + ttkbootstrap: For GUI tkinterdnd2: Optional, for drag-and-drop support Step 2: Set Up the Imports Create a Python file called salary_predictor.py and start with the imports: Optional drag-and-drop support: ✅ Explanation:
We import packages for GUI, CSV handling, threading, and machine learning. tkinterdnd2 enables drag-and-drop of files for convenience. Step 3: Helper Function for Resource Paths If you package your app using PyInstaller, you need a helper to locate files: Helps the app locate icons or other files when bundled. Step 4: Create the Salary Prediction Worker This class handles predictions in a background thread, so the GUI doesn’t freeze: The main prediction loop: Uses only the model’s features (Age, Experience, EducationLevel). Predicts salary for each employee. Sends results back to GUI via callback functions. Step 5: Build the GUI App Sets up main variables and GUI window. Step 6: File Selection and Drag-and-Drop Allow users to select files or folders: Drag-and-drop (if tkinterdnd2 is installed): Step 7: Display Employee Predictions in a Table Use ttk.Treeview to show real-time results: ✅ Explanation:
Each row shows: Checkbox for selecting rows Step 8: Start Prediction Start worker in a background thread: Step 9: Filter and Search Filter by min/max salary and search by employee name: ✅ Explanation:
Users can filter the table dynamically as data is processed. Step 10: Export Results Export selected rows to a CSV: Finally, start the GUI loop: Step 12: Clone the Full Project You can find the full source code and download it here:
👉 https://github.com/rogers-cyber/python-tiny-tools/tree/main/Salary-prediction-system Drag & drop CSV files or folders Real-time per-employee salary prediction Filter by min/max salary Search employees live Batch processing with progress bar Export results to CSV This tutorial is designed to be beginner-friendly and easy to follow. Templates let you quickly answer FAQs or store snippets for re-use. Are you sure you want to ? It will become hidden in your post, but will still be visible via the comment's permalink. as well , this person and/or CODE_BLOCK:
import os
import sys
import threading
import pandas as pd
import joblib
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import ttkbootstrap as tb
from ttkbootstrap.constants import * CODE_BLOCK:
import os
import sys
import threading
import pandas as pd
import joblib
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import ttkbootstrap as tb
from ttkbootstrap.constants import * CODE_BLOCK:
import os
import sys
import threading
import pandas as pd
import joblib
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
import ttkbootstrap as tb
from ttkbootstrap.constants import * CODE_BLOCK:
try: from tkinterdnd2 import TkinterDnD, DND_FILES DND_ENABLED = True
except ImportError: DND_ENABLED = False print("Drag & Drop requires tkinterdnd2: pip install tkinterdnd2") CODE_BLOCK:
try: from tkinterdnd2 import TkinterDnD, DND_FILES DND_ENABLED = True
except ImportError: DND_ENABLED = False print("Drag & Drop requires tkinterdnd2: pip install tkinterdnd2") CODE_BLOCK:
try: from tkinterdnd2 import TkinterDnD, DND_FILES DND_ENABLED = True
except ImportError: DND_ENABLED = False print("Drag & Drop requires tkinterdnd2: pip install tkinterdnd2") CODE_BLOCK:
def resource_path(file_name): base_path = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))) return os.path.join(base_path, file_name) CODE_BLOCK:
def resource_path(file_name): base_path = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))) return os.path.join(base_path, file_name) CODE_BLOCK:
def resource_path(file_name): base_path = getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__))) return os.path.join(base_path, file_name) CODE_BLOCK:
class SalaryPredictWorker: def __init__(self, files, model_path, callbacks): self.files = files self.model = joblib.load(model_path) self.callbacks = callbacks self._running = True def stop(self): self._running = False CODE_BLOCK:
class SalaryPredictWorker: def __init__(self, files, model_path, callbacks): self.files = files self.model = joblib.load(model_path) self.callbacks = callbacks self._running = True def stop(self): self._running = False CODE_BLOCK:
class SalaryPredictWorker: def __init__(self, files, model_path, callbacks): self.files = files self.model = joblib.load(model_path) self.callbacks = callbacks self._running = True def stop(self): self._running = False CODE_BLOCK:
def run(self): feature_cols = ["Age", "Experience", "EducationLevel"] for path in self.files: if not self._running: break df = pd.read_csv(path) X = df[feature_cols] df['PredictedSalary'] = self.model.predict(X) for idx, row in df.iterrows(): if not self._running: break row_data = row.to_dict() row_data['_row_id'] = idx if "found" in self.callbacks: self.callbacks["found"](path, row_data) CODE_BLOCK:
def run(self): feature_cols = ["Age", "Experience", "EducationLevel"] for path in self.files: if not self._running: break df = pd.read_csv(path) X = df[feature_cols] df['PredictedSalary'] = self.model.predict(X) for idx, row in df.iterrows(): if not self._running: break row_data = row.to_dict() row_data['_row_id'] = idx if "found" in self.callbacks: self.callbacks["found"](path, row_data) CODE_BLOCK:
def run(self): feature_cols = ["Age", "Experience", "EducationLevel"] for path in self.files: if not self._running: break df = pd.read_csv(path) X = df[feature_cols] df['PredictedSalary'] = self.model.predict(X) for idx, row in df.iterrows(): if not self._running: break row_data = row.to_dict() row_data['_row_id'] = idx if "found" in self.callbacks: self.callbacks["found"](path, row_data) COMMAND_BLOCK:
class SalaryPredictApp: APP_NAME = "SalaryPredictor" APP_VERSION = "2.0" def __init__(self): self.root = tb.Window(themename="darkly") # Use ttkbootstrap for a nice theme self.root.title(f"{self.APP_NAME} v{self.APP_VERSION}") self.root.minsize(1300, 750) self.file_set = set() self.model_path = tk.StringVar() self.filter_min = tk.DoubleVar(value=0) self.filter_max = tk.DoubleVar(value=1e9) self.search_var = tk.StringVar() COMMAND_BLOCK:
class SalaryPredictApp: APP_NAME = "SalaryPredictor" APP_VERSION = "2.0" def __init__(self): self.root = tb.Window(themename="darkly") # Use ttkbootstrap for a nice theme self.root.title(f"{self.APP_NAME} v{self.APP_VERSION}") self.root.minsize(1300, 750) self.file_set = set() self.model_path = tk.StringVar() self.filter_min = tk.DoubleVar(value=0) self.filter_max = tk.DoubleVar(value=1e9) self.search_var = tk.StringVar() COMMAND_BLOCK:
class SalaryPredictApp: APP_NAME = "SalaryPredictor" APP_VERSION = "2.0" def __init__(self): self.root = tb.Window(themename="darkly") # Use ttkbootstrap for a nice theme self.root.title(f"{self.APP_NAME} v{self.APP_VERSION}") self.root.minsize(1300, 750) self.file_set = set() self.model_path = tk.StringVar() self.filter_min = tk.DoubleVar(value=0) self.filter_max = tk.DoubleVar(value=1e9) self.search_var = tk.StringVar() CODE_BLOCK:
def browse(self): folder = filedialog.askdirectory(title="Select Folder with CSV files") if folder: self._scan_and_queue_files_thread([folder]) CODE_BLOCK:
def browse(self): folder = filedialog.askdirectory(title="Select Folder with CSV files") if folder: self._scan_and_queue_files_thread([folder]) CODE_BLOCK:
def browse(self): folder = filedialog.askdirectory(title="Select Folder with CSV files") if folder: self._scan_and_queue_files_thread([folder]) CODE_BLOCK:
def on_drop(self, event): dropped_paths = self.root.tk.splitlist(event.data) self._scan_and_queue_files_thread(dropped_paths) CODE_BLOCK:
def on_drop(self, event): dropped_paths = self.root.tk.splitlist(event.data) self._scan_and_queue_files_thread(dropped_paths) CODE_BLOCK:
def on_drop(self, event): dropped_paths = self.root.tk.splitlist(event.data) self._scan_and_queue_files_thread(dropped_paths) CODE_BLOCK:
self.tree = ttk.Treeview(main, columns=("selected", "filename", "employee_name", "predicted_salary"), show="headings")
self.tree.heading("selected", text="✅")
self.tree.heading("filename", text="Filename")
self.tree.heading("employee_name", text="Employee")
self.tree.heading("predicted_salary", text="Predicted Salary")
self.tree.pack(fill="both", expand=True) CODE_BLOCK:
self.tree = ttk.Treeview(main, columns=("selected", "filename", "employee_name", "predicted_salary"), show="headings")
self.tree.heading("selected", text="✅")
self.tree.heading("filename", text="Filename")
self.tree.heading("employee_name", text="Employee")
self.tree.heading("predicted_salary", text="Predicted Salary")
self.tree.pack(fill="both", expand=True) CODE_BLOCK:
self.tree = ttk.Treeview(main, columns=("selected", "filename", "employee_name", "predicted_salary"), show="headings")
self.tree.heading("selected", text="✅")
self.tree.heading("filename", text="Filename")
self.tree.heading("employee_name", text="Employee")
self.tree.heading("predicted_salary", text="Predicted Salary")
self.tree.pack(fill="both", expand=True) CODE_BLOCK:
def start(self): selected_files = [self.tree.item(i)['values'][1] for i in self.tree.get_children()] self.worker_obj = SalaryPredictWorker(selected_files, self.model_path.get(), callbacks={ "found": self.add_result, "progress": self.set_target, "finished": self.finish }) threading.Thread(target=self.worker_obj.run, daemon=True).start() CODE_BLOCK:
def start(self): selected_files = [self.tree.item(i)['values'][1] for i in self.tree.get_children()] self.worker_obj = SalaryPredictWorker(selected_files, self.model_path.get(), callbacks={ "found": self.add_result, "progress": self.set_target, "finished": self.finish }) threading.Thread(target=self.worker_obj.run, daemon=True).start() CODE_BLOCK:
def start(self): selected_files = [self.tree.item(i)['values'][1] for i in self.tree.get_children()] self.worker_obj = SalaryPredictWorker(selected_files, self.model_path.get(), callbacks={ "found": self.add_result, "progress": self.set_target, "finished": self.finish }) threading.Thread(target=self.worker_obj.run, daemon=True).start() CODE_BLOCK:
def apply_filters(self): min_salary = self.filter_min.get() max_salary = self.filter_max.get() search_text = self.search_var.get().lower() self.tree.delete(*self.tree.get_children()) for key, row in self.all_rows.items(): if min_salary <= row['predicted_salary'] <= max_salary: if search_text in row['employee_name'].lower(): self.tree.insert("", "end", iid=key, values=(row['selected'], row['filename'], row['employee_name'], f"${row['predicted_salary']:,.2f}")) CODE_BLOCK:
def apply_filters(self): min_salary = self.filter_min.get() max_salary = self.filter_max.get() search_text = self.search_var.get().lower() self.tree.delete(*self.tree.get_children()) for key, row in self.all_rows.items(): if min_salary <= row['predicted_salary'] <= max_salary: if search_text in row['employee_name'].lower(): self.tree.insert("", "end", iid=key, values=(row['selected'], row['filename'], row['employee_name'], f"${row['predicted_salary']:,.2f}")) CODE_BLOCK:
def apply_filters(self): min_salary = self.filter_min.get() max_salary = self.filter_max.get() search_text = self.search_var.get().lower() self.tree.delete(*self.tree.get_children()) for key, row in self.all_rows.items(): if min_salary <= row['predicted_salary'] <= max_salary: if search_text in row['employee_name'].lower(): self.tree.insert("", "end", iid=key, values=(row['selected'], row['filename'], row['employee_name'], f"${row['predicted_salary']:,.2f}")) CODE_BLOCK:
def export_results(self): path = filedialog.asksaveasfilename(defaultextension=".csv") if path: with open(path, "w", encoding="utf-8") as f: f.write("Filename,Employee,PredictedSalary\n") for i in self.tree.get_children(): values = self.tree.item(i)['values'] f.write(f"{values[1]},{values[2]},{values[3]}\n") CODE_BLOCK:
def export_results(self): path = filedialog.asksaveasfilename(defaultextension=".csv") if path: with open(path, "w", encoding="utf-8") as f: f.write("Filename,Employee,PredictedSalary\n") for i in self.tree.get_children(): values = self.tree.item(i)['values'] f.write(f"{values[1]},{values[2]},{values[3]}\n") CODE_BLOCK:
def export_results(self): path = filedialog.asksaveasfilename(defaultextension=".csv") if path: with open(path, "w", encoding="utf-8") as f: f.write("Filename,Employee,PredictedSalary\n") for i in self.tree.get_children(): values = self.tree.item(i)['values'] f.write(f"{values[1]},{values[2]},{values[3]}\n") CODE_BLOCK:
if __name__ == "__main__": app = SalaryPredictApp() app.run() CODE_BLOCK:
if __name__ == "__main__": app = SalaryPredictApp() app.run() CODE_BLOCK:
if __name__ == "__main__": app = SalaryPredictApp() app.run()