mirror of
https://github.com/youronlydimwit/Data_ScienceUse_Cases.git
synced 2025-12-14 02:40:02 +01:00
Add files via upload
This commit is contained in:
committed by
GitHub
parent
a933346b5e
commit
a81d6b635d
639
Scripts/Synthetic_Data_Generator/SDG_2.py
Normal file
639
Scripts/Synthetic_Data_Generator/SDG_2.py
Normal file
@@ -0,0 +1,639 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import tkinter as tk
|
||||||
|
from tkinter import ttk, messagebox, filedialog
|
||||||
|
import uuid
|
||||||
|
import random
|
||||||
|
import csv
|
||||||
|
import math
|
||||||
|
|
||||||
|
# ---------- Helper functions ----------
|
||||||
|
def new_col_id():
|
||||||
|
return str(uuid.uuid4())
|
||||||
|
|
||||||
|
def clamp(v, a, b):
|
||||||
|
return max(a, min(b, v))
|
||||||
|
|
||||||
|
def format_decimals(dec):
|
||||||
|
return f"0.{''.join(['0']*dec)}" if dec>0 else "0"
|
||||||
|
|
||||||
|
# ---------- Column model ----------
|
||||||
|
class Column:
|
||||||
|
def __init__(self, name="col", col_id=None):
|
||||||
|
self.id = col_id or new_col_id()
|
||||||
|
self.name = name
|
||||||
|
self.type = "Random" # or "Fixed"
|
||||||
|
self.min = 0.0
|
||||||
|
self.max = 10.0
|
||||||
|
self.fixed = 0.0
|
||||||
|
self.decimals = 0
|
||||||
|
self.linearity = {
|
||||||
|
"enabled": False,
|
||||||
|
"target_id": None,
|
||||||
|
"weight": 0.0
|
||||||
|
}
|
||||||
|
|
||||||
|
def range_min(self):
|
||||||
|
if self.type == "Random":
|
||||||
|
return float(self.min)
|
||||||
|
else:
|
||||||
|
return float(self.fixed)
|
||||||
|
|
||||||
|
def range_max(self):
|
||||||
|
if self.type == "Random":
|
||||||
|
return float(self.max)
|
||||||
|
else:
|
||||||
|
return float(self.fixed)
|
||||||
|
|
||||||
|
# ---------- Main App ----------
|
||||||
|
class SyntheticDataGUI(tk.Tk):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.title("Synthetic Data Generator v1.0")
|
||||||
|
self.geometry("920x800") # Increased height to accommodate preview table
|
||||||
|
self.minsize(800, 600)
|
||||||
|
|
||||||
|
self.columns = [] # list[Column]
|
||||||
|
# start with 2 sample columns
|
||||||
|
self.add_column("A")
|
||||||
|
self.add_column("B")
|
||||||
|
|
||||||
|
self._build_ui()
|
||||||
|
|
||||||
|
# ---------- UI building ----------
|
||||||
|
def _build_ui(self):
|
||||||
|
# Create main paned window for resizable sections
|
||||||
|
main_pane = ttk.PanedWindow(self, orient=tk.VERTICAL)
|
||||||
|
main_pane.pack(fill=tk.BOTH, expand=True, padx=8, pady=6)
|
||||||
|
|
||||||
|
# Top frame: controls and column definitions
|
||||||
|
top_frame = ttk.Frame(main_pane)
|
||||||
|
main_pane.add(top_frame, weight=1)
|
||||||
|
|
||||||
|
# top frame: controls
|
||||||
|
top = ttk.Frame(top_frame)
|
||||||
|
top.pack(side="top", fill="x", padx=8, pady=6)
|
||||||
|
|
||||||
|
add_btn = ttk.Button(top, text="Add column", command=self.ui_add_column)
|
||||||
|
add_btn.pack(side="left", padx=(0,6))
|
||||||
|
|
||||||
|
remove_btn = ttk.Button(top, text="Remove selected", command=self.ui_remove_selected)
|
||||||
|
remove_btn.pack(side="left", padx=(0,6))
|
||||||
|
|
||||||
|
help_btn = ttk.Button(top, text="Help / Guide", command=self.show_help)
|
||||||
|
help_btn.pack(side="right")
|
||||||
|
|
||||||
|
self.rows_var = tk.IntVar(value=100)
|
||||||
|
rows_spin = ttk.Spinbox(top, from_=1, to=1000000, textvariable=self.rows_var, width=8)
|
||||||
|
rows_spin.pack(side="left", padx=(0,6))
|
||||||
|
|
||||||
|
gen_btn = ttk.Button(top, text="Generate & Preview", command=self.generate_and_preview)
|
||||||
|
gen_btn.pack(side="left", padx=(12,6))
|
||||||
|
|
||||||
|
export_btn = ttk.Button(top, text="Export CSV", command=self.export_csv_dialog)
|
||||||
|
export_btn.pack(side="left", padx=(0,6))
|
||||||
|
|
||||||
|
ttk.Label(top, text=" ").pack(side="left", expand=True) # spacer
|
||||||
|
|
||||||
|
# main area: headers + scrollable frame with list of columns
|
||||||
|
# 1) Fixed headers directly under the buttons
|
||||||
|
hdr = ttk.Frame(top_frame)
|
||||||
|
hdr.pack(fill="x", padx=8, pady=(0,6))
|
||||||
|
header_font = ('Arial', 10, 'bold italic')
|
||||||
|
ttk.Label(hdr, text="Name", width=20, font=header_font).grid(
|
||||||
|
row=0, column=0, sticky="w", padx=(10, 0)
|
||||||
|
)
|
||||||
|
ttk.Label(hdr, text="Type / Params", width=46, font=header_font).grid(
|
||||||
|
row=0, column=1, sticky="w", padx=(20, 0)
|
||||||
|
)
|
||||||
|
ttk.Label(hdr, text="Rounding", width=10, font=header_font).grid(
|
||||||
|
row=0, column=2, sticky="w", padx=(0, 0)
|
||||||
|
)
|
||||||
|
ttk.Label(hdr, text="Advanced", width=16, font=header_font).grid(
|
||||||
|
row=0, column=3, sticky="w", padx=(10, 0)
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2) Scrollable container for rows (separate from headers)
|
||||||
|
scroll_container = ttk.Frame(top_frame)
|
||||||
|
scroll_container.pack(fill="both", expand=True, padx=8, pady=6)
|
||||||
|
|
||||||
|
# canvas + scrollbar
|
||||||
|
self.canvas = tk.Canvas(scroll_container)
|
||||||
|
self.canvas.pack(side="left", fill="both", expand=True)
|
||||||
|
scrollbar = ttk.Scrollbar(scroll_container, orient="vertical", command=self.canvas.yview)
|
||||||
|
scrollbar.pack(side="right", fill="y")
|
||||||
|
self.canvas.configure(yscrollcommand=scrollbar.set)
|
||||||
|
|
||||||
|
# adjust scrollregion when canvas resized
|
||||||
|
self.canvas.bind('<Configure>', lambda e: self.canvas.configure(scrollregion=self.canvas.bbox("all")))
|
||||||
|
|
||||||
|
self.inner_frame = ttk.Frame(self.canvas)
|
||||||
|
self.canvas.create_window((0,0), window=self.inner_frame, anchor='nw')
|
||||||
|
|
||||||
|
# list area
|
||||||
|
self.row_frames = {}
|
||||||
|
self.selected_col_id = None
|
||||||
|
|
||||||
|
self.refresh_column_list()
|
||||||
|
|
||||||
|
# Bottom frame: Preview table
|
||||||
|
bottom_frame = ttk.Frame(main_pane)
|
||||||
|
main_pane.add(bottom_frame, weight=1)
|
||||||
|
|
||||||
|
# Preview section
|
||||||
|
preview_label = ttk.Label(bottom_frame, text="Preview (First 10 rows):", font=('Arial', 10, 'bold italic'))
|
||||||
|
preview_label.pack(anchor='w', padx=8, pady=(8, 4))
|
||||||
|
|
||||||
|
# Create frame for table and scrollbar
|
||||||
|
table_container = ttk.Frame(bottom_frame)
|
||||||
|
table_container.pack(fill='both', expand=True, padx=8, pady=(0, 8))
|
||||||
|
|
||||||
|
# Create treeview for table display
|
||||||
|
self.preview_tree = ttk.Treeview(table_container, show='headings', height=10)
|
||||||
|
vsb = ttk.Scrollbar(table_container, orient="vertical", command=self.preview_tree.yview)
|
||||||
|
hsb = ttk.Scrollbar(table_container, orient="horizontal", command=self.preview_tree.xview)
|
||||||
|
self.preview_tree.configure(yscrollcommand=vsb.set, xscrollcommand=hsb.set)
|
||||||
|
|
||||||
|
# Pack the treeview and scrollbars
|
||||||
|
self.preview_tree.grid(row=0, column=0, sticky='nsew')
|
||||||
|
vsb.grid(row=0, column=1, sticky='ns')
|
||||||
|
hsb.grid(row=1, column=0, sticky='ew')
|
||||||
|
|
||||||
|
table_container.grid_rowconfigure(0, weight=1)
|
||||||
|
table_container.grid_columnconfigure(0, weight=1)
|
||||||
|
|
||||||
|
# Bottom Labels
|
||||||
|
self.preview_status = ttk.Label(bottom_frame, text="No data generated yet. Click 'Generate & Preview' to see sample data.")
|
||||||
|
self.preview_status.pack(anchor='w', padx=8, pady=(0, 8))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------- Column management ----------
|
||||||
|
def add_column(self, name="col"):
|
||||||
|
c = Column(name=name)
|
||||||
|
self.columns.append(c)
|
||||||
|
return c
|
||||||
|
|
||||||
|
def remove_column_by_id(self, cid):
|
||||||
|
self.columns = [c for c in self.columns if c.id != cid]
|
||||||
|
|
||||||
|
def find_column(self, cid):
|
||||||
|
for c in self.columns:
|
||||||
|
if c.id == cid:
|
||||||
|
return c
|
||||||
|
return None
|
||||||
|
|
||||||
|
# ---------- UI callbacks ----------
|
||||||
|
def ui_add_column(self):
|
||||||
|
n = 1
|
||||||
|
base = "col"
|
||||||
|
existing = {c.name for c in self.columns}
|
||||||
|
while f"{base}{n}" in existing:
|
||||||
|
n += 1
|
||||||
|
c = self.add_column(f"{base}{n}")
|
||||||
|
self.refresh_column_list()
|
||||||
|
# select new
|
||||||
|
self.selected_col_id = c.id
|
||||||
|
|
||||||
|
def ui_remove_selected(self):
|
||||||
|
if not self.selected_col_id:
|
||||||
|
messagebox.showinfo("Remove column", "Select a column row by clicking its name first.")
|
||||||
|
return
|
||||||
|
self.remove_column_by_id(self.selected_col_id)
|
||||||
|
self.selected_col_id = None
|
||||||
|
self.refresh_column_list()
|
||||||
|
|
||||||
|
def show_help(self):
|
||||||
|
"""Show Help/Guide window"""
|
||||||
|
help_win = tk.Toplevel(self)
|
||||||
|
help_win.title("Help / Guide")
|
||||||
|
help_win.geometry("500x400")
|
||||||
|
|
||||||
|
# Instructions text
|
||||||
|
text = (
|
||||||
|
"Welcome to the Synthetic Data Generator!\n\n"
|
||||||
|
"➤ Use 'Add column' to create new data columns.\n"
|
||||||
|
"➤ Choose 'Random' or 'Fixed' values for each column.\n"
|
||||||
|
"➤ Adjust ranges, rounding, and advanced settings as needed.\n"
|
||||||
|
"➤ 'Generate & Preview' shows sample data (first 10 rows).\n"
|
||||||
|
"➤ 'Export CSV' saves the generated dataset to a CSV file.\n\n"
|
||||||
|
"Advanced:\n"
|
||||||
|
" - Use the 'Advanced' button per column to enable linearity, basically\n"
|
||||||
|
" making one column follow another with a weighted influence.\n\n"
|
||||||
|
)
|
||||||
|
label = tk.Label(
|
||||||
|
help_win,
|
||||||
|
text=text,
|
||||||
|
justify="left",
|
||||||
|
anchor="nw",
|
||||||
|
font=("Arial", 10),
|
||||||
|
wraplength=460
|
||||||
|
)
|
||||||
|
label.pack(fill="both", expand=True, padx=12, pady=12)
|
||||||
|
|
||||||
|
cred = tk.Label(
|
||||||
|
help_win,
|
||||||
|
text=(
|
||||||
|
"This will be one of my scripts/executables.\n"
|
||||||
|
"Feel free to share!\n\n"
|
||||||
|
"You can leave feedback on my LinkedIn:\n"
|
||||||
|
"Sang Putu Sandhyana Yogi\n\n"
|
||||||
|
"Thank you so much!"
|
||||||
|
),
|
||||||
|
font=("Arial", 7, "italic"),
|
||||||
|
justify="center"
|
||||||
|
)
|
||||||
|
cred.pack(fill="both", expand=True, padx=5, pady=5)
|
||||||
|
|
||||||
|
# Important notice
|
||||||
|
warning = tk.Label(
|
||||||
|
help_win,
|
||||||
|
text="For Research / Training Purposes Only.\nDon't fake your Thesis Data!",
|
||||||
|
font=("Arial", 10, "bold italic"),
|
||||||
|
fg="red",
|
||||||
|
justify="center"
|
||||||
|
)
|
||||||
|
warning.pack(pady=(0, 12))
|
||||||
|
|
||||||
|
def refresh_column_list(self):
|
||||||
|
# clear previous column rows (but keep the header which is now outside)
|
||||||
|
for child in list(self.inner_frame.pack_slaves()):
|
||||||
|
if child != self.inner_frame.pack_slaves()[0]:
|
||||||
|
child.destroy()
|
||||||
|
|
||||||
|
# rebuild rows
|
||||||
|
for c in self.columns:
|
||||||
|
self._create_column_row(c)
|
||||||
|
|
||||||
|
# ensure scrollregion updates after refresh
|
||||||
|
self.inner_frame.update_idletasks()
|
||||||
|
self.canvas.configure(scrollregion=self.canvas.bbox("all"))
|
||||||
|
|
||||||
|
def _create_column_row(self, col: Column):
|
||||||
|
row = ttk.Frame(self.inner_frame, relief="ridge", padding=6)
|
||||||
|
row.pack(fill="x", pady=4)
|
||||||
|
|
||||||
|
# clicking name selects
|
||||||
|
name_var = tk.StringVar(value=col.name)
|
||||||
|
name_entry = ttk.Entry(row, textvariable=name_var, width=24)
|
||||||
|
name_entry.grid(row=0, column=0, sticky="w")
|
||||||
|
name_entry.bind("<FocusOut>", lambda e, cid=col.id, var=name_var: self._on_name_change(cid, var.get()))
|
||||||
|
name_entry.bind("<Button-1>", lambda e, cid=col.id: self._on_row_select(cid))
|
||||||
|
|
||||||
|
# Type + params frame
|
||||||
|
params = ttk.Frame(row)
|
||||||
|
params.grid(row=0, column=1, sticky="w", padx=(8,8))
|
||||||
|
|
||||||
|
type_var = tk.StringVar(value=col.type)
|
||||||
|
type_combo = ttk.Combobox(params, values=["Random", "Fixed"], width=8, state="readonly", textvariable=type_var)
|
||||||
|
type_combo.grid(row=0, column=0, padx=(0,6))
|
||||||
|
type_combo.bind("<<ComboboxSelected>>", lambda e, cid=col.id, var=type_var: self._on_type_change(cid, var.get()))
|
||||||
|
# Random: min/max entries; Fixed: fixed
|
||||||
|
min_var = tk.StringVar(value=str(col.min))
|
||||||
|
max_var = tk.StringVar(value=str(col.max))
|
||||||
|
fixed_var = tk.StringVar(value=str(col.fixed))
|
||||||
|
|
||||||
|
min_entry = ttk.Entry(params, textvariable=min_var, width=10)
|
||||||
|
min_entry.grid(row=0, column=1, padx=(0,4))
|
||||||
|
ttk.Label(params, text="to").grid(row=0, column=2)
|
||||||
|
max_entry = ttk.Entry(params, textvariable=max_var, width=10)
|
||||||
|
max_entry.grid(row=0, column=3, padx=(4,8))
|
||||||
|
|
||||||
|
fixed_entry = ttk.Entry(params, textvariable=fixed_var, width=12)
|
||||||
|
# place fixed entry but hide or show based on type
|
||||||
|
fixed_entry.grid(row=0, column=4, padx=(4,8))
|
||||||
|
|
||||||
|
# rounding
|
||||||
|
round_var = tk.IntVar(value=col.decimals)
|
||||||
|
round_combo = ttk.Combobox(row, values=[0,1,2,3,4,5,6], width=4, state="readonly", textvariable=round_var)
|
||||||
|
round_combo.grid(row=0, column=2)
|
||||||
|
round_combo.bind("<<ComboboxSelected>>", lambda e, cid=col.id, var=round_var: self._on_round_change(cid, int(var.get())))
|
||||||
|
|
||||||
|
# advanced button
|
||||||
|
adv_btn = ttk.Button(row, text="Advanced ▾", width=12)
|
||||||
|
adv_btn.grid(row=0, column=3, padx=(8,0))
|
||||||
|
|
||||||
|
# advanced area (hidden by default)
|
||||||
|
adv_frame = ttk.Frame(row)
|
||||||
|
adv_frame.grid(row=1, column=0, columnspan=4, pady=(8,0), sticky="w")
|
||||||
|
adv_frame.grid_remove()
|
||||||
|
|
||||||
|
# contents of advanced: linearity
|
||||||
|
lin_enabled_var = tk.BooleanVar(value=col.linearity["enabled"])
|
||||||
|
lin_check = ttk.Checkbutton(adv_frame, text="Enable Linearity (follow another column)", variable=lin_enabled_var)
|
||||||
|
lin_check.grid(row=0, column=0, sticky="w")
|
||||||
|
# target selector and weight
|
||||||
|
ttk.Label(adv_frame, text="Target:").grid(row=0, column=1, sticky="e", padx=(12,2))
|
||||||
|
target_values = [ (c.name, c.id) for c in self.columns if c.id != col.id ]
|
||||||
|
# if target list empty, provide placeholder
|
||||||
|
if not target_values:
|
||||||
|
target_combo = ttk.Combobox(adv_frame, values=["(no other columns)"], state="disabled", width=18)
|
||||||
|
else:
|
||||||
|
target_combo = ttk.Combobox(adv_frame, values=[tv[0] for tv in target_values], state="readonly", width=18)
|
||||||
|
# set current if exists
|
||||||
|
if col.linearity["target_id"]:
|
||||||
|
found = next((i for i,tv in enumerate(target_values) if tv[1]==col.linearity["target_id"]), None)
|
||||||
|
if found is not None:
|
||||||
|
target_combo.current(found)
|
||||||
|
target_combo.grid(row=0, column=2, padx=(4,8))
|
||||||
|
|
||||||
|
ttk.Label(adv_frame, text="Weight:").grid(row=0, column=3, padx=(8,2))
|
||||||
|
weight_var = tk.DoubleVar(value=col.linearity["weight"])
|
||||||
|
weight_spin = ttk.Spinbox(adv_frame, from_=0.0, to=1.0, increment=0.01, textvariable=weight_var, width=6)
|
||||||
|
weight_spin.grid(row=0, column=4)
|
||||||
|
weight_scale = ttk.Scale(adv_frame, from_=0.0, to=1.0, orient="horizontal", variable=weight_var, length=120)
|
||||||
|
weight_scale.grid(row=0, column=5, padx=(8,0))
|
||||||
|
|
||||||
|
# after creating the row, update scrollregion so scrollbar appears
|
||||||
|
self.inner_frame.update_idletasks()
|
||||||
|
self.canvas.configure(scrollregion=self.canvas.bbox("all"))
|
||||||
|
|
||||||
|
# wire adv toggle button
|
||||||
|
def toggle_adv():
|
||||||
|
if adv_frame.winfo_ismapped():
|
||||||
|
adv_frame.grid_remove()
|
||||||
|
adv_btn.config(text="Advanced ▾")
|
||||||
|
else:
|
||||||
|
adv_frame.grid()
|
||||||
|
adv_btn.config(text="Advanced ▴")
|
||||||
|
adv_btn.config(command=toggle_adv)
|
||||||
|
|
||||||
|
# update widgets visibility according to type
|
||||||
|
def update_type_widgets():
|
||||||
|
t = type_var.get()
|
||||||
|
if t == "Random":
|
||||||
|
min_entry.configure(state="normal")
|
||||||
|
max_entry.configure(state="normal")
|
||||||
|
fixed_entry.configure(state="disabled")
|
||||||
|
else:
|
||||||
|
min_entry.configure(state="disabled")
|
||||||
|
max_entry.configure(state="disabled")
|
||||||
|
fixed_entry.configure(state="normal")
|
||||||
|
|
||||||
|
update_type_widgets()
|
||||||
|
|
||||||
|
# bind entry updates
|
||||||
|
def on_minmax_focus_out(e=None):
|
||||||
|
try:
|
||||||
|
col.min = float(min_var.get())
|
||||||
|
col.max = float(max_var.get())
|
||||||
|
if col.min > col.max:
|
||||||
|
col.min, col.max = col.max, col.min
|
||||||
|
min_var.set(str(col.min))
|
||||||
|
max_var.set(str(col.max))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def on_fixed_focus_out(e=None):
|
||||||
|
try:
|
||||||
|
col.fixed = float(fixed_var.get())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
min_entry.bind("<FocusOut>", on_minmax_focus_out)
|
||||||
|
max_entry.bind("<FocusOut>", on_minmax_focus_out)
|
||||||
|
fixed_entry.bind("<FocusOut>", on_fixed_focus_out)
|
||||||
|
|
||||||
|
# when combobox type changed
|
||||||
|
def on_type_selected(e=None):
|
||||||
|
col.type = type_var.get()
|
||||||
|
update_type_widgets()
|
||||||
|
|
||||||
|
type_combo.bind("<<ComboboxSelected>>", lambda e=None: on_type_selected())
|
||||||
|
|
||||||
|
# name change handler
|
||||||
|
# already bound above
|
||||||
|
|
||||||
|
# rounding handler bound above
|
||||||
|
|
||||||
|
# set initial values in widgets (in case)
|
||||||
|
min_var.set(str(col.min))
|
||||||
|
max_var.set(str(col.max))
|
||||||
|
fixed_var.set(str(col.fixed))
|
||||||
|
|
||||||
|
# clicking a row to 'select' it
|
||||||
|
def on_click_row(event=None):
|
||||||
|
self.selected_col_id = col.id
|
||||||
|
# highlight selection visually
|
||||||
|
for rf_cid, rf in self.row_frames.items():
|
||||||
|
if rf_cid == col.id:
|
||||||
|
rf.config(style="Selected.TFrame")
|
||||||
|
else:
|
||||||
|
rf.config(style="TFrame")
|
||||||
|
|
||||||
|
row.bind("<Button-1>", lambda e: on_click_row())
|
||||||
|
# also store refs for later updates
|
||||||
|
self.row_frames[col.id] = row
|
||||||
|
|
||||||
|
# store final update callbacks when user changes advanced widgets
|
||||||
|
def apply_all_changes():
|
||||||
|
col.name = name_var.get().strip() or col.name
|
||||||
|
col.type = type_var.get()
|
||||||
|
try:
|
||||||
|
col.min = float(min_var.get())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
col.max = float(max_var.get())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
col.fixed = float(fixed_var.get())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
col.decimals = int(round_var.get())
|
||||||
|
col.linearity["enabled"] = bool(lin_enabled_var.get())
|
||||||
|
col.linearity["weight"] = float(weight_var.get())
|
||||||
|
# determine target id by name in current dropdown
|
||||||
|
if isinstance(target_combo, ttk.Combobox) and target_combo['state'] != 'disabled':
|
||||||
|
sel_name = target_combo.get()
|
||||||
|
# find id by name
|
||||||
|
for other in self.columns:
|
||||||
|
if other.id != col.id and other.name == sel_name:
|
||||||
|
col.linearity["target_id"] = other.id
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# if no selection or not matched
|
||||||
|
col.linearity["target_id"] = None
|
||||||
|
|
||||||
|
# call apply_all_changes when adv toggled or when leaving row
|
||||||
|
for w in [name_entry, min_entry, max_entry, fixed_entry, round_combo, lin_check, weight_spin, target_combo]:
|
||||||
|
w.bind("<FocusOut>", lambda e, f=apply_all_changes: f())
|
||||||
|
|
||||||
|
# when columns list changes (someone added/renamed) we need to refresh target lists.
|
||||||
|
# We'll rely on refresh_column_list to rebuild everything - keep simple.
|
||||||
|
|
||||||
|
# ---------- simple callbacks to update model ----------
|
||||||
|
def _on_name_change(self, cid, new_name):
|
||||||
|
c = self.find_column(cid)
|
||||||
|
if c:
|
||||||
|
c.name = new_name.strip() or c.name
|
||||||
|
# refresh to update target name lists
|
||||||
|
self.refresh_column_list()
|
||||||
|
|
||||||
|
def _on_type_change(self, cid, new_type):
|
||||||
|
c = self.find_column(cid)
|
||||||
|
if c:
|
||||||
|
c.type = new_type
|
||||||
|
self.refresh_column_list()
|
||||||
|
|
||||||
|
def _on_round_change(self, cid, dec):
|
||||||
|
c = self.find_column(cid)
|
||||||
|
if c:
|
||||||
|
c.decimals = dec
|
||||||
|
|
||||||
|
def _on_row_select(self, cid):
|
||||||
|
self.selected_col_id = cid
|
||||||
|
# visual selection handled in row click
|
||||||
|
|
||||||
|
# ---------- Generation logic ----------
|
||||||
|
def generate_rows(self, nrows):
|
||||||
|
# Validate and collect column metadata
|
||||||
|
if not self.columns:
|
||||||
|
raise RuntimeError("No columns defined")
|
||||||
|
# ensure all names unique
|
||||||
|
names = [c.name for c in self.columns]
|
||||||
|
if len(set(names)) != len(names):
|
||||||
|
# enforce uniqueness by appending small suffixes
|
||||||
|
seen = {}
|
||||||
|
for c in self.columns:
|
||||||
|
if c.name in seen:
|
||||||
|
seen[c.name] += 1
|
||||||
|
c.name = f"{c.name}_{seen[c.name]}"
|
||||||
|
else:
|
||||||
|
seen[c.name] = 1
|
||||||
|
|
||||||
|
# Produce base values
|
||||||
|
base_values = { c.id: [] for c in self.columns }
|
||||||
|
for c in self.columns:
|
||||||
|
for _ in range(nrows):
|
||||||
|
if c.type == "Random":
|
||||||
|
a = float(c.min)
|
||||||
|
b = float(c.max)
|
||||||
|
if a == b:
|
||||||
|
val = a
|
||||||
|
else:
|
||||||
|
val = random.random() * (b - a) + a
|
||||||
|
else:
|
||||||
|
val = float(c.fixed)
|
||||||
|
base_values[c.id].append(val)
|
||||||
|
|
||||||
|
# Now apply linearity adjustments
|
||||||
|
final_values = { c.id: [v for v in base_values[c.id]] for c in self.columns }
|
||||||
|
|
||||||
|
# We'll process adjustments in simple pass: for each column with linearity enabled,
|
||||||
|
# map target base value to source's min/max and mix with weight.
|
||||||
|
for c in self.columns:
|
||||||
|
lin = c.linearity
|
||||||
|
if lin["enabled"] and lin["target_id"]:
|
||||||
|
target = self.find_column(lin["target_id"])
|
||||||
|
if not target:
|
||||||
|
continue
|
||||||
|
w = clamp(float(lin["weight"]), 0.0, 1.0)
|
||||||
|
smin = c.range_min()
|
||||||
|
smax = c.range_max()
|
||||||
|
tmin = target.range_min()
|
||||||
|
tmax = target.range_max()
|
||||||
|
# precompute denominators
|
||||||
|
t_range = tmax - tmin
|
||||||
|
s_range = smax - smin
|
||||||
|
for i in range(nrows):
|
||||||
|
base_val = base_values[c.id][i]
|
||||||
|
tval = base_values[target.id][i]
|
||||||
|
# map tval into source's range:
|
||||||
|
if t_range == 0:
|
||||||
|
# target constant: use its value, scaled by midpoint mapping to source range
|
||||||
|
mapped = smin + (s_range * 0.5) if s_range != 0 else smin
|
||||||
|
else:
|
||||||
|
frac = (tval - tmin) / t_range
|
||||||
|
mapped = smin + frac * s_range
|
||||||
|
new_val = (1.0 - w) * base_val + w * mapped
|
||||||
|
final_values[c.id][i] = new_val
|
||||||
|
|
||||||
|
# apply rounding according to decimals
|
||||||
|
rows = []
|
||||||
|
for i in range(nrows):
|
||||||
|
row = {}
|
||||||
|
for c in self.columns:
|
||||||
|
dec = int(c.decimals)
|
||||||
|
val = final_values[c.id][i]
|
||||||
|
# apply rounding
|
||||||
|
if dec == 0:
|
||||||
|
val = int(round(val))
|
||||||
|
else:
|
||||||
|
val = round(val, dec)
|
||||||
|
row[c.name] = val
|
||||||
|
rows.append(row)
|
||||||
|
return rows
|
||||||
|
|
||||||
|
def generate_and_preview(self):
|
||||||
|
n_preview = 10
|
||||||
|
nrows = self.rows_var.get()
|
||||||
|
|
||||||
|
if nrows < n_preview:
|
||||||
|
n_preview = nrows
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = self.generate_rows(n_preview)
|
||||||
|
self.update_preview_table(data)
|
||||||
|
self.preview_status.config(text=f"Preview showing first {n_preview} rows. Total rows to generate: {nrows}")
|
||||||
|
except Exception as e:
|
||||||
|
messagebox.showerror("Error generating", str(e))
|
||||||
|
self.preview_status.config(text="Error generating preview data")
|
||||||
|
|
||||||
|
def update_preview_table(self, data):
|
||||||
|
"""Update the preview table with generated data"""
|
||||||
|
# Clear existing data
|
||||||
|
for item in self.preview_tree.get_children():
|
||||||
|
self.preview_tree.delete(item)
|
||||||
|
|
||||||
|
# Clear existing columns
|
||||||
|
for col in self.preview_tree["columns"]:
|
||||||
|
self.preview_tree.heading(col, text="")
|
||||||
|
self.preview_tree.column(col, width=0)
|
||||||
|
|
||||||
|
# Set up new columns
|
||||||
|
if not self.columns:
|
||||||
|
return
|
||||||
|
|
||||||
|
columns = [c.name for c in self.columns]
|
||||||
|
self.preview_tree["columns"] = columns
|
||||||
|
|
||||||
|
# Configure column headers
|
||||||
|
for col_name in columns:
|
||||||
|
self.preview_tree.heading(col_name, text=col_name)
|
||||||
|
self.preview_tree.column(col_name, width=100, minwidth=80, anchor='center')
|
||||||
|
|
||||||
|
# Add data rows
|
||||||
|
for i, row in enumerate(data):
|
||||||
|
values = [row[col_name] for col_name in columns]
|
||||||
|
self.preview_tree.insert("", "end", values=values, tags=('evenrow' if i % 2 == 0 else 'oddrow',))
|
||||||
|
|
||||||
|
# Configure row colors for better readability
|
||||||
|
self.preview_tree.tag_configure('evenrow', background='#f0f0f0')
|
||||||
|
self.preview_tree.tag_configure('oddrow', background='white')
|
||||||
|
|
||||||
|
# ---------- Export ----------
|
||||||
|
def export_csv_dialog(self):
|
||||||
|
nrows = self.rows_var.get()
|
||||||
|
if nrows <= 0:
|
||||||
|
messagebox.showinfo("Rows required", "Please specify a number of rows > 0.")
|
||||||
|
return
|
||||||
|
fname = filedialog.asksaveasfilename(title="Save CSV", defaultextension=".csv",
|
||||||
|
filetypes=[("CSV files","*.csv"),("All files","*.*")])
|
||||||
|
if not fname:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
data = self.generate_rows(nrows)
|
||||||
|
with open(fname, "w", newline="", encoding="utf-8") as f:
|
||||||
|
writer = csv.DictWriter(f, fieldnames=[c.name for c in self.columns])
|
||||||
|
writer.writeheader()
|
||||||
|
for row in data:
|
||||||
|
writer.writerow(row)
|
||||||
|
messagebox.showinfo("Exported", f"Wrote {nrows} rows to:\n{fname}")
|
||||||
|
except Exception as e:
|
||||||
|
messagebox.showerror("Error", str(e))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------- Run the app ----------
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app = SyntheticDataGUI()
|
||||||
|
app.mainloop()
|
||||||
691
Scripts/Synthetic_Data_Generator/SDG_3.py
Normal file
691
Scripts/Synthetic_Data_Generator/SDG_3.py
Normal file
@@ -0,0 +1,691 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import tkinter as tk
|
||||||
|
from tkinter import ttk, messagebox, filedialog
|
||||||
|
import uuid
|
||||||
|
import random
|
||||||
|
import csv
|
||||||
|
import math
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# ---------- Helper functions ----------
|
||||||
|
def new_col_id():
|
||||||
|
return str(uuid.uuid4())
|
||||||
|
|
||||||
|
def clamp(v, a, b):
|
||||||
|
return max(a, min(b, v))
|
||||||
|
|
||||||
|
def format_decimals(dec):
|
||||||
|
return f"0.{''.join(['0']*dec)}" if dec>0 else "0"
|
||||||
|
|
||||||
|
# ---------- Column model ----------
|
||||||
|
class Column:
|
||||||
|
def __init__(self, name="col", col_id=None):
|
||||||
|
self.id = col_id or new_col_id()
|
||||||
|
self.name = name
|
||||||
|
self.type = "Random" # or "Fixed"
|
||||||
|
self.min = 0.0
|
||||||
|
self.max = 10.0
|
||||||
|
self.fixed = 0.0
|
||||||
|
self.decimals = 0
|
||||||
|
self.linearity = {
|
||||||
|
"enabled": False,
|
||||||
|
"target_id": None,
|
||||||
|
"weight": 0.0
|
||||||
|
}
|
||||||
|
|
||||||
|
def range_min(self):
|
||||||
|
if self.type == "Random":
|
||||||
|
return float(self.min)
|
||||||
|
else:
|
||||||
|
return float(self.fixed)
|
||||||
|
|
||||||
|
def range_max(self):
|
||||||
|
if self.type == "Random":
|
||||||
|
return float(self.max)
|
||||||
|
else:
|
||||||
|
return float(self.fixed)
|
||||||
|
|
||||||
|
# ---------- Main App ----------
|
||||||
|
class SyntheticDataGUI(tk.Tk):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.title("Synthetic Data Generator v1.0")
|
||||||
|
self.geometry("920x800")
|
||||||
|
self.minsize(800, 600)
|
||||||
|
|
||||||
|
self.columns = []
|
||||||
|
self.generated_data = None # <--- store last generated data
|
||||||
|
self.add_column("A")
|
||||||
|
self.add_column("B")
|
||||||
|
|
||||||
|
self._build_ui()
|
||||||
|
|
||||||
|
# ---------- UI building ----------
|
||||||
|
def _build_ui(self):
|
||||||
|
# Create main paned window for resizable sections
|
||||||
|
main_pane = ttk.PanedWindow(self, orient=tk.VERTICAL)
|
||||||
|
main_pane.pack(fill=tk.BOTH, expand=True, padx=8, pady=6)
|
||||||
|
|
||||||
|
# Top frame: controls and column definitions
|
||||||
|
top_frame = ttk.Frame(main_pane)
|
||||||
|
main_pane.add(top_frame, weight=1)
|
||||||
|
|
||||||
|
# top frame: controls
|
||||||
|
top = ttk.Frame(top_frame)
|
||||||
|
top.pack(side="top", fill="x", padx=8, pady=6)
|
||||||
|
|
||||||
|
add_btn = ttk.Button(top, text="Add column", command=self.ui_add_column)
|
||||||
|
add_btn.pack(side="left", padx=(0,6))
|
||||||
|
|
||||||
|
remove_btn = ttk.Button(top, text="Remove selected", command=self.ui_remove_selected)
|
||||||
|
remove_btn.pack(side="left", padx=(0,6))
|
||||||
|
|
||||||
|
help_btn = ttk.Button(top, text="Help / Guide", command=self.show_help)
|
||||||
|
help_btn.pack(side="right")
|
||||||
|
|
||||||
|
self.rows_var = tk.IntVar(value=100)
|
||||||
|
rows_spin = ttk.Spinbox(top, from_=1, to=1000000, textvariable=self.rows_var, width=8)
|
||||||
|
rows_spin.pack(side="left", padx=(0,6))
|
||||||
|
|
||||||
|
gen_btn = ttk.Button(top, text="Generate & Preview", command=self.generate_and_preview)
|
||||||
|
gen_btn.pack(side="left", padx=(12,6))
|
||||||
|
|
||||||
|
export_btn = ttk.Button(top, text="Export CSV", command=self.export_csv_dialog)
|
||||||
|
export_btn.pack(side="left", padx=(0,6))
|
||||||
|
|
||||||
|
vis_btn = ttk.Button(top, text="Visualize Data", command=self.visualize_data)
|
||||||
|
vis_btn.pack(side="left", padx=(12,6))
|
||||||
|
|
||||||
|
ttk.Label(top, text=" ").pack(side="left", expand=True) # spacer
|
||||||
|
|
||||||
|
# main area: headers + scrollable frame with list of columns
|
||||||
|
# 1) Fixed headers directly under the buttons
|
||||||
|
hdr = ttk.Frame(top_frame)
|
||||||
|
hdr.pack(fill="x", padx=8, pady=(0,6))
|
||||||
|
header_font = ('Arial', 10, 'bold italic')
|
||||||
|
ttk.Label(hdr, text="Name", width=20, font=header_font).grid(
|
||||||
|
row=0, column=0, sticky="w", padx=(10, 0)
|
||||||
|
)
|
||||||
|
ttk.Label(hdr, text="Type / Params", width=46, font=header_font).grid(
|
||||||
|
row=0, column=1, sticky="w", padx=(20, 0)
|
||||||
|
)
|
||||||
|
ttk.Label(hdr, text="Rounding", width=10, font=header_font).grid(
|
||||||
|
row=0, column=2, sticky="w", padx=(0, 0)
|
||||||
|
)
|
||||||
|
ttk.Label(hdr, text="Advanced", width=16, font=header_font).grid(
|
||||||
|
row=0, column=3, sticky="w", padx=(10, 0)
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2) Scrollable container for rows (separate from headers)
|
||||||
|
scroll_container = ttk.Frame(top_frame)
|
||||||
|
scroll_container.pack(fill="both", expand=True, padx=8, pady=6)
|
||||||
|
|
||||||
|
# canvas + scrollbar
|
||||||
|
self.canvas = tk.Canvas(scroll_container)
|
||||||
|
self.canvas.pack(side="left", fill="both", expand=True)
|
||||||
|
scrollbar = ttk.Scrollbar(scroll_container, orient="vertical", command=self.canvas.yview)
|
||||||
|
scrollbar.pack(side="right", fill="y")
|
||||||
|
self.canvas.configure(yscrollcommand=scrollbar.set)
|
||||||
|
|
||||||
|
# adjust scrollregion when canvas resized
|
||||||
|
self.canvas.bind('<Configure>', lambda e: self.canvas.configure(scrollregion=self.canvas.bbox("all")))
|
||||||
|
|
||||||
|
self.inner_frame = ttk.Frame(self.canvas)
|
||||||
|
self.canvas.create_window((0,0), window=self.inner_frame, anchor='nw')
|
||||||
|
|
||||||
|
# list area
|
||||||
|
self.row_frames = {}
|
||||||
|
self.selected_col_id = None
|
||||||
|
|
||||||
|
self.refresh_column_list()
|
||||||
|
|
||||||
|
# Bottom frame: Preview table
|
||||||
|
bottom_frame = ttk.Frame(main_pane)
|
||||||
|
main_pane.add(bottom_frame, weight=1)
|
||||||
|
|
||||||
|
# Preview section
|
||||||
|
preview_label = ttk.Label(bottom_frame, text="Preview (First 10 rows):", font=('Arial', 10, 'bold italic'))
|
||||||
|
preview_label.pack(anchor='w', padx=8, pady=(8, 4))
|
||||||
|
|
||||||
|
# Create frame for table and scrollbar
|
||||||
|
table_container = ttk.Frame(bottom_frame)
|
||||||
|
table_container.pack(fill='both', expand=True, padx=8, pady=(0, 8))
|
||||||
|
|
||||||
|
# Create treeview for table display
|
||||||
|
self.preview_tree = ttk.Treeview(table_container, show='headings', height=10)
|
||||||
|
vsb = ttk.Scrollbar(table_container, orient="vertical", command=self.preview_tree.yview)
|
||||||
|
hsb = ttk.Scrollbar(table_container, orient="horizontal", command=self.preview_tree.xview)
|
||||||
|
self.preview_tree.configure(yscrollcommand=vsb.set, xscrollcommand=hsb.set)
|
||||||
|
|
||||||
|
# Pack the treeview and scrollbars
|
||||||
|
self.preview_tree.grid(row=0, column=0, sticky='nsew')
|
||||||
|
vsb.grid(row=0, column=1, sticky='ns')
|
||||||
|
hsb.grid(row=1, column=0, sticky='ew')
|
||||||
|
|
||||||
|
table_container.grid_rowconfigure(0, weight=1)
|
||||||
|
table_container.grid_columnconfigure(0, weight=1)
|
||||||
|
|
||||||
|
# Bottom Labels
|
||||||
|
self.preview_status = ttk.Label(bottom_frame, text="No data generated yet. Click 'Generate & Preview' to see sample data.")
|
||||||
|
self.preview_status.pack(anchor='w', padx=8, pady=(0, 8))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------- Column management ----------
|
||||||
|
def add_column(self, name="col"):
|
||||||
|
c = Column(name=name)
|
||||||
|
self.columns.append(c)
|
||||||
|
return c
|
||||||
|
|
||||||
|
def remove_column_by_id(self, cid):
|
||||||
|
self.columns = [c for c in self.columns if c.id != cid]
|
||||||
|
|
||||||
|
def find_column(self, cid):
|
||||||
|
for c in self.columns:
|
||||||
|
if c.id == cid:
|
||||||
|
return c
|
||||||
|
return None
|
||||||
|
|
||||||
|
# ---------- UI callbacks ----------
|
||||||
|
def ui_add_column(self):
|
||||||
|
n = 1
|
||||||
|
base = "col"
|
||||||
|
existing = {c.name for c in self.columns}
|
||||||
|
while f"{base}{n}" in existing:
|
||||||
|
n += 1
|
||||||
|
c = self.add_column(f"{base}{n}")
|
||||||
|
self.refresh_column_list()
|
||||||
|
# select new
|
||||||
|
self.selected_col_id = c.id
|
||||||
|
|
||||||
|
def ui_remove_selected(self):
|
||||||
|
if not self.selected_col_id:
|
||||||
|
messagebox.showinfo("Remove column", "Select a column row by clicking its name first.")
|
||||||
|
return
|
||||||
|
self.remove_column_by_id(self.selected_col_id)
|
||||||
|
self.selected_col_id = None
|
||||||
|
self.refresh_column_list()
|
||||||
|
|
||||||
|
def show_help(self):
|
||||||
|
"""Show Help/Guide window"""
|
||||||
|
help_win = tk.Toplevel(self)
|
||||||
|
help_win.title("Help / Guide")
|
||||||
|
help_win.geometry("500x400")
|
||||||
|
|
||||||
|
# Instructions text
|
||||||
|
text = (
|
||||||
|
"Welcome to the Synthetic Data Generator!\n\n"
|
||||||
|
"➤ Use 'Add column' to create new data columns.\n"
|
||||||
|
"➤ Choose 'Random' or 'Fixed' values for each column.\n"
|
||||||
|
"➤ Adjust ranges, rounding, and advanced settings as needed.\n"
|
||||||
|
"➤ 'Generate & Preview' shows sample data (first 10 rows).\n"
|
||||||
|
"➤ 'Export CSV' saves the generated dataset to a CSV file.\n\n"
|
||||||
|
"Advanced:\n"
|
||||||
|
" - Use the 'Advanced' button per column to enable linearity, basically\n"
|
||||||
|
" making one column follow another with a weighted influence.\n\n"
|
||||||
|
)
|
||||||
|
label = tk.Label(
|
||||||
|
help_win,
|
||||||
|
text=text,
|
||||||
|
justify="left",
|
||||||
|
anchor="nw",
|
||||||
|
font=("Arial", 10),
|
||||||
|
wraplength=460
|
||||||
|
)
|
||||||
|
label.pack(fill="both", expand=True, padx=12, pady=12)
|
||||||
|
|
||||||
|
cred = tk.Label(
|
||||||
|
help_win,
|
||||||
|
text=(
|
||||||
|
"This will be one of my scripts/executables.\n"
|
||||||
|
"Feel free to share!\n\n"
|
||||||
|
"You can leave feedback on my LinkedIn:\n"
|
||||||
|
"Sang Putu Sandhyana Yogi\n\n"
|
||||||
|
"Thank you so much!"
|
||||||
|
),
|
||||||
|
font=("Arial", 7, "italic"),
|
||||||
|
justify="center"
|
||||||
|
)
|
||||||
|
cred.pack(fill="both", expand=True, padx=5, pady=5)
|
||||||
|
|
||||||
|
# Important notice
|
||||||
|
warning = tk.Label(
|
||||||
|
help_win,
|
||||||
|
text="For Research / Training Purposes Only.\nDon't fake your Thesis Data!",
|
||||||
|
font=("Arial", 10, "bold italic"),
|
||||||
|
fg="red",
|
||||||
|
justify="center"
|
||||||
|
)
|
||||||
|
warning.pack(pady=(0, 12))
|
||||||
|
|
||||||
|
def refresh_column_list(self):
|
||||||
|
# clear all previous column rows
|
||||||
|
for child in list(self.inner_frame.pack_slaves()):
|
||||||
|
child.destroy()
|
||||||
|
|
||||||
|
# rebuild rows
|
||||||
|
for c in self.columns:
|
||||||
|
self._create_column_row(c)
|
||||||
|
|
||||||
|
# ensure scrollregion updates
|
||||||
|
self.inner_frame.update_idletasks()
|
||||||
|
self.canvas.configure(scrollregion=self.canvas.bbox("all"))
|
||||||
|
|
||||||
|
def _create_column_row(self, col: Column):
|
||||||
|
row = ttk.Frame(self.inner_frame, relief="ridge", padding=6)
|
||||||
|
row.pack(fill="x", pady=4)
|
||||||
|
|
||||||
|
# clicking name selects
|
||||||
|
name_var = tk.StringVar(value=col.name)
|
||||||
|
name_entry = ttk.Entry(row, textvariable=name_var, width=24)
|
||||||
|
name_entry.grid(row=0, column=0, sticky="w")
|
||||||
|
name_entry.bind("<FocusOut>", lambda e, cid=col.id, var=name_var: self._on_name_change(cid, var.get()))
|
||||||
|
name_entry.bind("<Return>", lambda e, cid=col.id, var=name_var: self._on_name_change(cid, var.get()))
|
||||||
|
name_entry.bind("<Button-1>", lambda e, cid=col.id: self._on_row_select(cid))
|
||||||
|
|
||||||
|
# Type + params frame
|
||||||
|
params = ttk.Frame(row)
|
||||||
|
params.grid(row=0, column=1, sticky="w", padx=(8,8))
|
||||||
|
|
||||||
|
type_var = tk.StringVar(value=col.type)
|
||||||
|
type_combo = ttk.Combobox(params, values=["Random", "Fixed"], width=8, state="readonly", textvariable=type_var)
|
||||||
|
type_combo.grid(row=0, column=0, padx=(0,6))
|
||||||
|
type_combo.bind("<<ComboboxSelected>>", lambda e, cid=col.id, var=type_var: self._on_type_change(cid, var.get()))
|
||||||
|
# Random: min/max entries; Fixed: fixed
|
||||||
|
min_var = tk.StringVar(value=str(col.min))
|
||||||
|
max_var = tk.StringVar(value=str(col.max))
|
||||||
|
fixed_var = tk.StringVar(value=str(col.fixed))
|
||||||
|
|
||||||
|
min_entry = ttk.Entry(params, textvariable=min_var, width=10)
|
||||||
|
min_entry.grid(row=0, column=1, padx=(0,4))
|
||||||
|
ttk.Label(params, text="to").grid(row=0, column=2)
|
||||||
|
max_entry = ttk.Entry(params, textvariable=max_var, width=10)
|
||||||
|
max_entry.grid(row=0, column=3, padx=(4,8))
|
||||||
|
|
||||||
|
fixed_entry = ttk.Entry(params, textvariable=fixed_var, width=12)
|
||||||
|
# place fixed entry but hide or show based on type
|
||||||
|
fixed_entry.grid(row=0, column=4, padx=(4,8))
|
||||||
|
|
||||||
|
# rounding
|
||||||
|
round_var = tk.IntVar(value=col.decimals)
|
||||||
|
round_combo = ttk.Combobox(row, values=[0,1,2,3,4,5,6], width=4, state="readonly", textvariable=round_var)
|
||||||
|
round_combo.grid(row=0, column=2)
|
||||||
|
round_combo.bind("<<ComboboxSelected>>", lambda e, cid=col.id, var=round_var: self._on_round_change(cid, int(var.get())))
|
||||||
|
|
||||||
|
# advanced button
|
||||||
|
adv_btn = ttk.Button(row, text="Advanced ▾", width=12)
|
||||||
|
adv_btn.grid(row=0, column=3, padx=(8,0))
|
||||||
|
|
||||||
|
# advanced area (hidden by default)
|
||||||
|
adv_frame = ttk.Frame(row)
|
||||||
|
adv_frame.grid(row=1, column=0, columnspan=4, pady=(8,0), sticky="w")
|
||||||
|
adv_frame.grid_remove()
|
||||||
|
|
||||||
|
# contents of advanced: linearity
|
||||||
|
lin_enabled_var = tk.BooleanVar(value=col.linearity["enabled"])
|
||||||
|
lin_check = ttk.Checkbutton(adv_frame, text="Enable Linearity (follow another column)", variable=lin_enabled_var)
|
||||||
|
lin_check.grid(row=0, column=0, sticky="w")
|
||||||
|
# target selector and weight
|
||||||
|
ttk.Label(adv_frame, text="Target:").grid(row=0, column=1, sticky="e", padx=(12,2))
|
||||||
|
target_values = [ (c.name, c.id) for c in self.columns if c.id != col.id ]
|
||||||
|
# if target list empty, provide placeholder
|
||||||
|
if not target_values:
|
||||||
|
target_combo = ttk.Combobox(adv_frame, values=["(no other columns)"], state="disabled", width=18)
|
||||||
|
else:
|
||||||
|
target_combo = ttk.Combobox(adv_frame, values=[tv[0] for tv in target_values], state="readonly", width=18)
|
||||||
|
# set current if exists
|
||||||
|
if col.linearity["target_id"]:
|
||||||
|
found = next((i for i,tv in enumerate(target_values) if tv[1]==col.linearity["target_id"]), None)
|
||||||
|
if found is not None:
|
||||||
|
target_combo.current(found)
|
||||||
|
target_combo.grid(row=0, column=2, padx=(4,8))
|
||||||
|
|
||||||
|
ttk.Label(adv_frame, text="Weight:").grid(row=0, column=3, padx=(8,2))
|
||||||
|
weight_var = tk.DoubleVar(value=col.linearity["weight"])
|
||||||
|
weight_spin = ttk.Spinbox(adv_frame, from_=0.0, to=1.0, increment=0.01, textvariable=weight_var, width=6)
|
||||||
|
weight_spin.grid(row=0, column=4)
|
||||||
|
weight_scale = ttk.Scale(adv_frame, from_=0.0, to=1.0, orient="horizontal", variable=weight_var, length=120)
|
||||||
|
weight_scale.grid(row=0, column=5, padx=(8,0))
|
||||||
|
|
||||||
|
# after creating the row, update scrollregion so scrollbar appears
|
||||||
|
self.inner_frame.update_idletasks()
|
||||||
|
self.canvas.configure(scrollregion=self.canvas.bbox("all"))
|
||||||
|
|
||||||
|
# wire adv toggle button
|
||||||
|
def toggle_adv():
|
||||||
|
if adv_frame.winfo_ismapped():
|
||||||
|
adv_frame.grid_remove()
|
||||||
|
adv_btn.config(text="Advanced ▾")
|
||||||
|
else:
|
||||||
|
adv_frame.grid()
|
||||||
|
adv_btn.config(text="Advanced ▴")
|
||||||
|
adv_btn.config(command=toggle_adv)
|
||||||
|
|
||||||
|
# update widgets visibility according to type
|
||||||
|
def update_type_widgets():
|
||||||
|
t = type_var.get()
|
||||||
|
if t == "Random":
|
||||||
|
min_entry.configure(state="normal")
|
||||||
|
max_entry.configure(state="normal")
|
||||||
|
fixed_entry.configure(state="disabled")
|
||||||
|
else:
|
||||||
|
min_entry.configure(state="disabled")
|
||||||
|
max_entry.configure(state="disabled")
|
||||||
|
fixed_entry.configure(state="normal")
|
||||||
|
|
||||||
|
update_type_widgets()
|
||||||
|
|
||||||
|
# bind entry updates
|
||||||
|
def on_minmax_focus_out(e=None):
|
||||||
|
try:
|
||||||
|
col.min = float(min_var.get())
|
||||||
|
col.max = float(max_var.get())
|
||||||
|
if col.min > col.max:
|
||||||
|
col.min, col.max = col.max, col.min
|
||||||
|
min_var.set(str(col.min))
|
||||||
|
max_var.set(str(col.max))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def on_fixed_focus_out(e=None):
|
||||||
|
try:
|
||||||
|
col.fixed = float(fixed_var.get())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
min_entry.bind("<FocusOut>", on_minmax_focus_out)
|
||||||
|
max_entry.bind("<FocusOut>", on_minmax_focus_out)
|
||||||
|
fixed_entry.bind("<FocusOut>", on_fixed_focus_out)
|
||||||
|
|
||||||
|
# when combobox type changed
|
||||||
|
def on_type_selected(e=None):
|
||||||
|
col.type = type_var.get()
|
||||||
|
update_type_widgets()
|
||||||
|
|
||||||
|
type_combo.bind("<<ComboboxSelected>>", lambda e=None: on_type_selected())
|
||||||
|
|
||||||
|
# name change handler
|
||||||
|
# already bound above
|
||||||
|
|
||||||
|
# rounding handler bound above
|
||||||
|
|
||||||
|
# set initial values in widgets (in case)
|
||||||
|
min_var.set(str(col.min))
|
||||||
|
max_var.set(str(col.max))
|
||||||
|
fixed_var.set(str(col.fixed))
|
||||||
|
|
||||||
|
# clicking a row to 'select' it
|
||||||
|
def on_click_row(event=None):
|
||||||
|
self.selected_col_id = col.id
|
||||||
|
# highlight selection visually
|
||||||
|
for rf_cid, rf in self.row_frames.items():
|
||||||
|
if rf_cid == col.id:
|
||||||
|
rf.config(style="Selected.TFrame")
|
||||||
|
else:
|
||||||
|
rf.config(style="TFrame")
|
||||||
|
|
||||||
|
row.bind("<Button-1>", lambda e: on_click_row())
|
||||||
|
# also store refs for later updates
|
||||||
|
self.row_frames[col.id] = row
|
||||||
|
|
||||||
|
# store final update callbacks when user changes advanced widgets
|
||||||
|
def apply_all_changes():
|
||||||
|
col.name = name_var.get().strip() or col.name
|
||||||
|
col.type = type_var.get()
|
||||||
|
try:
|
||||||
|
col.min = float(min_var.get())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
col.max = float(max_var.get())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
col.fixed = float(fixed_var.get())
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
col.decimals = int(round_var.get())
|
||||||
|
col.linearity["enabled"] = bool(lin_enabled_var.get())
|
||||||
|
col.linearity["weight"] = float(weight_var.get())
|
||||||
|
|
||||||
|
# determine target id by name in current dropdown
|
||||||
|
if isinstance(target_combo, ttk.Combobox) and target_combo['state'] != 'disabled':
|
||||||
|
sel_name = target_combo.get()
|
||||||
|
# find id by matching current dropdown index instead of text
|
||||||
|
if target_combo.current() >= 0 and target_values:
|
||||||
|
col.linearity["target_id"] = target_values[target_combo.current()][1]
|
||||||
|
else:
|
||||||
|
col.linearity["target_id"] = None
|
||||||
|
|
||||||
|
# call apply_all_changes when adv toggled or when leaving row
|
||||||
|
for w in [name_entry, min_entry, max_entry, fixed_entry, round_combo, lin_check, weight_spin, target_combo]:
|
||||||
|
w.bind("<FocusOut>", lambda e, f=apply_all_changes: f())
|
||||||
|
|
||||||
|
# when columns list changes (someone added/renamed) we need to refresh target lists.
|
||||||
|
# We'll rely on refresh_column_list to rebuild everything - keep simple.
|
||||||
|
|
||||||
|
# ---------- simple callbacks to update model ----------
|
||||||
|
def _on_name_change(self, cid, new_name):
|
||||||
|
c = self.find_column(cid)
|
||||||
|
if c:
|
||||||
|
c.name = new_name.strip() or c.name
|
||||||
|
# refresh to update target name lists
|
||||||
|
self.refresh_column_list()
|
||||||
|
|
||||||
|
def _on_type_change(self, cid, new_type):
|
||||||
|
c = self.find_column(cid)
|
||||||
|
if c:
|
||||||
|
c.type = new_type
|
||||||
|
self.refresh_column_list()
|
||||||
|
|
||||||
|
def _on_round_change(self, cid, dec):
|
||||||
|
c = self.find_column(cid)
|
||||||
|
if c:
|
||||||
|
c.decimals = dec
|
||||||
|
|
||||||
|
def _on_row_select(self, cid):
|
||||||
|
self.selected_col_id = cid
|
||||||
|
# visual selection handled in row click
|
||||||
|
|
||||||
|
# ---------- Generation logic ----------
|
||||||
|
def generate_rows(self, nrows):
|
||||||
|
# Validate and collect column metadata
|
||||||
|
if not self.columns:
|
||||||
|
raise RuntimeError("No columns defined")
|
||||||
|
# ensure all names unique
|
||||||
|
names = [c.name for c in self.columns]
|
||||||
|
if len(set(names)) != len(names):
|
||||||
|
# enforce uniqueness by appending small suffixes
|
||||||
|
seen = {}
|
||||||
|
for c in self.columns:
|
||||||
|
if c.name in seen:
|
||||||
|
seen[c.name] += 1
|
||||||
|
c.name = f"{c.name}_{seen[c.name]}"
|
||||||
|
else:
|
||||||
|
seen[c.name] = 1
|
||||||
|
|
||||||
|
# Produce base values
|
||||||
|
base_values = { c.id: [] for c in self.columns }
|
||||||
|
for c in self.columns:
|
||||||
|
for _ in range(nrows):
|
||||||
|
if c.type == "Random":
|
||||||
|
a = float(c.min)
|
||||||
|
b = float(c.max)
|
||||||
|
if a == b:
|
||||||
|
val = a
|
||||||
|
else:
|
||||||
|
val = random.random() * (b - a) + a
|
||||||
|
else:
|
||||||
|
val = float(c.fixed)
|
||||||
|
base_values[c.id].append(val)
|
||||||
|
|
||||||
|
# Now apply linearity adjustments
|
||||||
|
final_values = { c.id: [v for v in base_values[c.id]] for c in self.columns }
|
||||||
|
|
||||||
|
# We'll process adjustments in simple pass: for each column with linearity enabled,
|
||||||
|
# map target base value to source's min/max and mix with weight.
|
||||||
|
for c in self.columns:
|
||||||
|
lin = c.linearity
|
||||||
|
if lin["enabled"] and lin["target_id"]:
|
||||||
|
target = self.find_column(lin["target_id"])
|
||||||
|
if not target:
|
||||||
|
continue
|
||||||
|
w = clamp(float(lin["weight"]), 0.0, 1.0)
|
||||||
|
smin = c.range_min()
|
||||||
|
smax = c.range_max()
|
||||||
|
tmin = target.range_min()
|
||||||
|
tmax = target.range_max()
|
||||||
|
# precompute denominators
|
||||||
|
t_range = tmax - tmin
|
||||||
|
s_range = smax - smin
|
||||||
|
for i in range(nrows):
|
||||||
|
base_val = base_values[c.id][i]
|
||||||
|
tval = base_values[target.id][i]
|
||||||
|
# map tval into source's range:
|
||||||
|
if t_range == 0:
|
||||||
|
# target constant: use its value, scaled by midpoint mapping to source range
|
||||||
|
mapped = smin + (s_range * 0.5) if s_range != 0 else smin
|
||||||
|
else:
|
||||||
|
frac = (tval - tmin) / t_range
|
||||||
|
mapped = smin + frac * s_range
|
||||||
|
new_val = (1.0 - w) * base_val + w * mapped
|
||||||
|
final_values[c.id][i] = new_val
|
||||||
|
|
||||||
|
# apply rounding according to decimals
|
||||||
|
rows = []
|
||||||
|
for i in range(nrows):
|
||||||
|
row = {}
|
||||||
|
for c in self.columns:
|
||||||
|
dec = int(c.decimals)
|
||||||
|
val = final_values[c.id][i]
|
||||||
|
# apply rounding
|
||||||
|
if dec == 0:
|
||||||
|
val = int(round(val))
|
||||||
|
else:
|
||||||
|
val = round(val, dec)
|
||||||
|
row[c.name] = val
|
||||||
|
rows.append(row)
|
||||||
|
return rows
|
||||||
|
|
||||||
|
def generate_and_preview(self):
|
||||||
|
n_preview = 10
|
||||||
|
nrows = self.rows_var.get()
|
||||||
|
|
||||||
|
if nrows < n_preview:
|
||||||
|
n_preview = nrows
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = self.generate_rows(n_preview)
|
||||||
|
self.generated_data = self.generate_rows(nrows) # store full data
|
||||||
|
self.update_preview_table(data)
|
||||||
|
self.preview_status.config(
|
||||||
|
text=f"Preview showing first {n_preview} rows. Total rows to generate: {nrows}"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
messagebox.showerror("Error generating", str(e))
|
||||||
|
self.preview_status.config(text="Error generating preview data")
|
||||||
|
|
||||||
|
def update_preview_table(self, data):
|
||||||
|
"""Update the preview table with generated data"""
|
||||||
|
# Clear existing data
|
||||||
|
for item in self.preview_tree.get_children():
|
||||||
|
self.preview_tree.delete(item)
|
||||||
|
|
||||||
|
# Clear existing columns
|
||||||
|
for col in self.preview_tree["columns"]:
|
||||||
|
self.preview_tree.heading(col, text="")
|
||||||
|
self.preview_tree.column(col, width=0)
|
||||||
|
|
||||||
|
# Set up new columns
|
||||||
|
if not self.columns:
|
||||||
|
return
|
||||||
|
|
||||||
|
columns = [c.name for c in self.columns]
|
||||||
|
self.preview_tree["columns"] = columns
|
||||||
|
|
||||||
|
# Configure column headers
|
||||||
|
for col_name in columns:
|
||||||
|
self.preview_tree.heading(col_name, text=col_name)
|
||||||
|
self.preview_tree.column(col_name, width=100, minwidth=80, anchor='center')
|
||||||
|
|
||||||
|
# Add data rows
|
||||||
|
for i, row in enumerate(data):
|
||||||
|
values = [row[col_name] for col_name in columns]
|
||||||
|
self.preview_tree.insert("", "end", values=values, tags=('evenrow' if i % 2 == 0 else 'oddrow',))
|
||||||
|
|
||||||
|
# Configure row colors for better readability
|
||||||
|
self.preview_tree.tag_configure('evenrow', background='#f0f0f0')
|
||||||
|
self.preview_tree.tag_configure('oddrow', background='white')
|
||||||
|
|
||||||
|
# ---------- Visualization ----------
|
||||||
|
def visualize_data(self):
|
||||||
|
if not self.generated_data:
|
||||||
|
messagebox.showinfo("Visualize Data", "Generate data first")
|
||||||
|
return
|
||||||
|
|
||||||
|
df = pd.DataFrame(self.generated_data)
|
||||||
|
nrows_total = len(df)
|
||||||
|
ncols = len(df.columns)
|
||||||
|
if ncols == 0:
|
||||||
|
messagebox.showinfo("Visualize Data", "No columns available")
|
||||||
|
return
|
||||||
|
|
||||||
|
# layout: up to 5 plots per row
|
||||||
|
max_cols = 5
|
||||||
|
nrows = (ncols + max_cols - 1) // max_cols
|
||||||
|
fig, axes = plt.subplots(nrows, min(ncols, max_cols), figsize=(4*max_cols, 3*nrows))
|
||||||
|
|
||||||
|
# normalize axes to flat list
|
||||||
|
if nrows == 1 and ncols == 1:
|
||||||
|
axes = [axes]
|
||||||
|
elif nrows == 1:
|
||||||
|
axes = axes
|
||||||
|
elif ncols <= max_cols:
|
||||||
|
axes = axes
|
||||||
|
axes = axes.flatten() if hasattr(axes, "flatten") else list(axes)
|
||||||
|
|
||||||
|
for i, col in enumerate(df.columns):
|
||||||
|
ax = axes[i]
|
||||||
|
if pd.api.types.is_numeric_dtype(df[col]):
|
||||||
|
df[col].plot(kind="hist", bins=20, ax=ax, title=col)
|
||||||
|
else:
|
||||||
|
df[col].value_counts().plot(kind="bar", ax=ax, title=col)
|
||||||
|
|
||||||
|
# hide unused axes
|
||||||
|
for j in range(i+1, len(axes)):
|
||||||
|
axes[j].set_visible(False)
|
||||||
|
|
||||||
|
# add top label showing dataset size
|
||||||
|
fig.suptitle(f"Visualizations based on {nrows_total} rows", fontsize=12, fontweight="bold")
|
||||||
|
|
||||||
|
plt.tight_layout(rect=[0, 0, 1, 0.96]) # leave space for title
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------- Export ----------
|
||||||
|
def export_csv_dialog(self):
|
||||||
|
nrows = self.rows_var.get()
|
||||||
|
if nrows <= 0:
|
||||||
|
messagebox.showinfo("Rows required", "Please specify a number of rows > 0.")
|
||||||
|
return
|
||||||
|
fname = filedialog.asksaveasfilename(title="Save CSV", defaultextension=".csv",
|
||||||
|
filetypes=[("CSV files","*.csv"),("All files","*.*")])
|
||||||
|
if not fname:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
data = self.generate_rows(nrows)
|
||||||
|
with open(fname, "w", newline="", encoding="utf-8") as f:
|
||||||
|
writer = csv.DictWriter(f, fieldnames=[c.name for c in self.columns])
|
||||||
|
writer.writeheader()
|
||||||
|
for row in data:
|
||||||
|
writer.writerow(row)
|
||||||
|
messagebox.showinfo("Exported", f"Wrote {nrows} rows to:\n{fname}")
|
||||||
|
except Exception as e:
|
||||||
|
messagebox.showerror("Error", str(e))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------- Run the app ----------
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app = SyntheticDataGUI()
|
||||||
|
app.mainloop()
|
||||||
Reference in New Issue
Block a user