diff --git a/Scripts/Synthetic_Data_Generator/SDG_2.py b/Scripts/Synthetic_Data_Generator/SDG_2.py new file mode 100644 index 0000000..4d652c7 --- /dev/null +++ b/Scripts/Synthetic_Data_Generator/SDG_2.py @@ -0,0 +1,639 @@ +#!/usr/bin/env python3 +import tkinter as tk +from tkinter import ttk, messagebox, filedialog +import uuid +import random +import csv +import math + +# ---------- Helper functions ---------- +def new_col_id(): + return str(uuid.uuid4()) + +def clamp(v, a, b): + return max(a, min(b, v)) + +def format_decimals(dec): + return f"0.{''.join(['0']*dec)}" if dec>0 else "0" + +# ---------- Column model ---------- +class Column: + def __init__(self, name="col", col_id=None): + self.id = col_id or new_col_id() + self.name = name + self.type = "Random" # or "Fixed" + self.min = 0.0 + self.max = 10.0 + self.fixed = 0.0 + self.decimals = 0 + self.linearity = { + "enabled": False, + "target_id": None, + "weight": 0.0 + } + + def range_min(self): + if self.type == "Random": + return float(self.min) + else: + return float(self.fixed) + + def range_max(self): + if self.type == "Random": + return float(self.max) + else: + return float(self.fixed) + +# ---------- Main App ---------- +class SyntheticDataGUI(tk.Tk): + def __init__(self): + super().__init__() + self.title("Synthetic Data Generator v1.0") + self.geometry("920x800") # Increased height to accommodate preview table + self.minsize(800, 600) + + self.columns = [] # list[Column] + # start with 2 sample columns + self.add_column("A") + self.add_column("B") + + self._build_ui() + + # ---------- UI building ---------- + def _build_ui(self): + # Create main paned window for resizable sections + main_pane = ttk.PanedWindow(self, orient=tk.VERTICAL) + main_pane.pack(fill=tk.BOTH, expand=True, padx=8, pady=6) + + # Top frame: controls and column definitions + top_frame = ttk.Frame(main_pane) + main_pane.add(top_frame, weight=1) + + # top frame: controls + top = ttk.Frame(top_frame) + top.pack(side="top", fill="x", padx=8, pady=6) + + add_btn = ttk.Button(top, text="Add column", command=self.ui_add_column) + add_btn.pack(side="left", padx=(0,6)) + + remove_btn = ttk.Button(top, text="Remove selected", command=self.ui_remove_selected) + remove_btn.pack(side="left", padx=(0,6)) + + help_btn = ttk.Button(top, text="Help / Guide", command=self.show_help) + help_btn.pack(side="right") + + self.rows_var = tk.IntVar(value=100) + rows_spin = ttk.Spinbox(top, from_=1, to=1000000, textvariable=self.rows_var, width=8) + rows_spin.pack(side="left", padx=(0,6)) + + gen_btn = ttk.Button(top, text="Generate & Preview", command=self.generate_and_preview) + gen_btn.pack(side="left", padx=(12,6)) + + export_btn = ttk.Button(top, text="Export CSV", command=self.export_csv_dialog) + export_btn.pack(side="left", padx=(0,6)) + + ttk.Label(top, text=" ").pack(side="left", expand=True) # spacer + + # main area: headers + scrollable frame with list of columns + # 1) Fixed headers directly under the buttons + hdr = ttk.Frame(top_frame) + hdr.pack(fill="x", padx=8, pady=(0,6)) + header_font = ('Arial', 10, 'bold italic') + ttk.Label(hdr, text="Name", width=20, font=header_font).grid( + row=0, column=0, sticky="w", padx=(10, 0) + ) + ttk.Label(hdr, text="Type / Params", width=46, font=header_font).grid( + row=0, column=1, sticky="w", padx=(20, 0) + ) + ttk.Label(hdr, text="Rounding", width=10, font=header_font).grid( + row=0, column=2, sticky="w", padx=(0, 0) + ) + ttk.Label(hdr, text="Advanced", width=16, font=header_font).grid( + row=0, column=3, sticky="w", padx=(10, 0) + ) + + # 2) Scrollable container for rows (separate from headers) + scroll_container = ttk.Frame(top_frame) + scroll_container.pack(fill="both", expand=True, padx=8, pady=6) + + # canvas + scrollbar + self.canvas = tk.Canvas(scroll_container) + self.canvas.pack(side="left", fill="both", expand=True) + scrollbar = ttk.Scrollbar(scroll_container, orient="vertical", command=self.canvas.yview) + scrollbar.pack(side="right", fill="y") + self.canvas.configure(yscrollcommand=scrollbar.set) + + # adjust scrollregion when canvas resized + self.canvas.bind('', lambda e: self.canvas.configure(scrollregion=self.canvas.bbox("all"))) + + self.inner_frame = ttk.Frame(self.canvas) + self.canvas.create_window((0,0), window=self.inner_frame, anchor='nw') + + # list area + self.row_frames = {} + self.selected_col_id = None + + self.refresh_column_list() + + # Bottom frame: Preview table + bottom_frame = ttk.Frame(main_pane) + main_pane.add(bottom_frame, weight=1) + + # Preview section + preview_label = ttk.Label(bottom_frame, text="Preview (First 10 rows):", font=('Arial', 10, 'bold italic')) + preview_label.pack(anchor='w', padx=8, pady=(8, 4)) + + # Create frame for table and scrollbar + table_container = ttk.Frame(bottom_frame) + table_container.pack(fill='both', expand=True, padx=8, pady=(0, 8)) + + # Create treeview for table display + self.preview_tree = ttk.Treeview(table_container, show='headings', height=10) + vsb = ttk.Scrollbar(table_container, orient="vertical", command=self.preview_tree.yview) + hsb = ttk.Scrollbar(table_container, orient="horizontal", command=self.preview_tree.xview) + self.preview_tree.configure(yscrollcommand=vsb.set, xscrollcommand=hsb.set) + + # Pack the treeview and scrollbars + self.preview_tree.grid(row=0, column=0, sticky='nsew') + vsb.grid(row=0, column=1, sticky='ns') + hsb.grid(row=1, column=0, sticky='ew') + + table_container.grid_rowconfigure(0, weight=1) + table_container.grid_columnconfigure(0, weight=1) + + # Bottom Labels + self.preview_status = ttk.Label(bottom_frame, text="No data generated yet. Click 'Generate & Preview' to see sample data.") + self.preview_status.pack(anchor='w', padx=8, pady=(0, 8)) + + + # ---------- Column management ---------- + def add_column(self, name="col"): + c = Column(name=name) + self.columns.append(c) + return c + + def remove_column_by_id(self, cid): + self.columns = [c for c in self.columns if c.id != cid] + + def find_column(self, cid): + for c in self.columns: + if c.id == cid: + return c + return None + + # ---------- UI callbacks ---------- + def ui_add_column(self): + n = 1 + base = "col" + existing = {c.name for c in self.columns} + while f"{base}{n}" in existing: + n += 1 + c = self.add_column(f"{base}{n}") + self.refresh_column_list() + # select new + self.selected_col_id = c.id + + def ui_remove_selected(self): + if not self.selected_col_id: + messagebox.showinfo("Remove column", "Select a column row by clicking its name first.") + return + self.remove_column_by_id(self.selected_col_id) + self.selected_col_id = None + self.refresh_column_list() + + def show_help(self): + """Show Help/Guide window""" + help_win = tk.Toplevel(self) + help_win.title("Help / Guide") + help_win.geometry("500x400") + + # Instructions text + text = ( + "Welcome to the Synthetic Data Generator!\n\n" + "➤ Use 'Add column' to create new data columns.\n" + "➤ Choose 'Random' or 'Fixed' values for each column.\n" + "➤ Adjust ranges, rounding, and advanced settings as needed.\n" + "➤ 'Generate & Preview' shows sample data (first 10 rows).\n" + "➤ 'Export CSV' saves the generated dataset to a CSV file.\n\n" + "Advanced:\n" + " - Use the 'Advanced' button per column to enable linearity, basically\n" + " making one column follow another with a weighted influence.\n\n" + ) + label = tk.Label( + help_win, + text=text, + justify="left", + anchor="nw", + font=("Arial", 10), + wraplength=460 + ) + label.pack(fill="both", expand=True, padx=12, pady=12) + + cred = tk.Label( + help_win, + text=( + "This will be one of my scripts/executables.\n" + "Feel free to share!\n\n" + "You can leave feedback on my LinkedIn:\n" + "Sang Putu Sandhyana Yogi\n\n" + "Thank you so much!" + ), + font=("Arial", 7, "italic"), + justify="center" + ) + cred.pack(fill="both", expand=True, padx=5, pady=5) + + # Important notice + warning = tk.Label( + help_win, + text="For Research / Training Purposes Only.\nDon't fake your Thesis Data!", + font=("Arial", 10, "bold italic"), + fg="red", + justify="center" + ) + warning.pack(pady=(0, 12)) + + def refresh_column_list(self): + # clear previous column rows (but keep the header which is now outside) + for child in list(self.inner_frame.pack_slaves()): + if child != self.inner_frame.pack_slaves()[0]: + child.destroy() + + # rebuild rows + for c in self.columns: + self._create_column_row(c) + + # ensure scrollregion updates after refresh + self.inner_frame.update_idletasks() + self.canvas.configure(scrollregion=self.canvas.bbox("all")) + + def _create_column_row(self, col: Column): + row = ttk.Frame(self.inner_frame, relief="ridge", padding=6) + row.pack(fill="x", pady=4) + + # clicking name selects + name_var = tk.StringVar(value=col.name) + name_entry = ttk.Entry(row, textvariable=name_var, width=24) + name_entry.grid(row=0, column=0, sticky="w") + name_entry.bind("", lambda e, cid=col.id, var=name_var: self._on_name_change(cid, var.get())) + name_entry.bind("", lambda e, cid=col.id: self._on_row_select(cid)) + + # Type + params frame + params = ttk.Frame(row) + params.grid(row=0, column=1, sticky="w", padx=(8,8)) + + type_var = tk.StringVar(value=col.type) + type_combo = ttk.Combobox(params, values=["Random", "Fixed"], width=8, state="readonly", textvariable=type_var) + type_combo.grid(row=0, column=0, padx=(0,6)) + type_combo.bind("<>", lambda e, cid=col.id, var=type_var: self._on_type_change(cid, var.get())) + # Random: min/max entries; Fixed: fixed + min_var = tk.StringVar(value=str(col.min)) + max_var = tk.StringVar(value=str(col.max)) + fixed_var = tk.StringVar(value=str(col.fixed)) + + min_entry = ttk.Entry(params, textvariable=min_var, width=10) + min_entry.grid(row=0, column=1, padx=(0,4)) + ttk.Label(params, text="to").grid(row=0, column=2) + max_entry = ttk.Entry(params, textvariable=max_var, width=10) + max_entry.grid(row=0, column=3, padx=(4,8)) + + fixed_entry = ttk.Entry(params, textvariable=fixed_var, width=12) + # place fixed entry but hide or show based on type + fixed_entry.grid(row=0, column=4, padx=(4,8)) + + # rounding + round_var = tk.IntVar(value=col.decimals) + round_combo = ttk.Combobox(row, values=[0,1,2,3,4,5,6], width=4, state="readonly", textvariable=round_var) + round_combo.grid(row=0, column=2) + round_combo.bind("<>", lambda e, cid=col.id, var=round_var: self._on_round_change(cid, int(var.get()))) + + # advanced button + adv_btn = ttk.Button(row, text="Advanced ▾", width=12) + adv_btn.grid(row=0, column=3, padx=(8,0)) + + # advanced area (hidden by default) + adv_frame = ttk.Frame(row) + adv_frame.grid(row=1, column=0, columnspan=4, pady=(8,0), sticky="w") + adv_frame.grid_remove() + + # contents of advanced: linearity + lin_enabled_var = tk.BooleanVar(value=col.linearity["enabled"]) + lin_check = ttk.Checkbutton(adv_frame, text="Enable Linearity (follow another column)", variable=lin_enabled_var) + lin_check.grid(row=0, column=0, sticky="w") + # target selector and weight + ttk.Label(adv_frame, text="Target:").grid(row=0, column=1, sticky="e", padx=(12,2)) + target_values = [ (c.name, c.id) for c in self.columns if c.id != col.id ] + # if target list empty, provide placeholder + if not target_values: + target_combo = ttk.Combobox(adv_frame, values=["(no other columns)"], state="disabled", width=18) + else: + target_combo = ttk.Combobox(adv_frame, values=[tv[0] for tv in target_values], state="readonly", width=18) + # set current if exists + if col.linearity["target_id"]: + found = next((i for i,tv in enumerate(target_values) if tv[1]==col.linearity["target_id"]), None) + if found is not None: + target_combo.current(found) + target_combo.grid(row=0, column=2, padx=(4,8)) + + ttk.Label(adv_frame, text="Weight:").grid(row=0, column=3, padx=(8,2)) + weight_var = tk.DoubleVar(value=col.linearity["weight"]) + weight_spin = ttk.Spinbox(adv_frame, from_=0.0, to=1.0, increment=0.01, textvariable=weight_var, width=6) + weight_spin.grid(row=0, column=4) + weight_scale = ttk.Scale(adv_frame, from_=0.0, to=1.0, orient="horizontal", variable=weight_var, length=120) + weight_scale.grid(row=0, column=5, padx=(8,0)) + + # after creating the row, update scrollregion so scrollbar appears + self.inner_frame.update_idletasks() + self.canvas.configure(scrollregion=self.canvas.bbox("all")) + + # wire adv toggle button + def toggle_adv(): + if adv_frame.winfo_ismapped(): + adv_frame.grid_remove() + adv_btn.config(text="Advanced ▾") + else: + adv_frame.grid() + adv_btn.config(text="Advanced ▴") + adv_btn.config(command=toggle_adv) + + # update widgets visibility according to type + def update_type_widgets(): + t = type_var.get() + if t == "Random": + min_entry.configure(state="normal") + max_entry.configure(state="normal") + fixed_entry.configure(state="disabled") + else: + min_entry.configure(state="disabled") + max_entry.configure(state="disabled") + fixed_entry.configure(state="normal") + + update_type_widgets() + + # bind entry updates + def on_minmax_focus_out(e=None): + try: + col.min = float(min_var.get()) + col.max = float(max_var.get()) + if col.min > col.max: + col.min, col.max = col.max, col.min + min_var.set(str(col.min)) + max_var.set(str(col.max)) + except Exception: + pass + + def on_fixed_focus_out(e=None): + try: + col.fixed = float(fixed_var.get()) + except Exception: + pass + + min_entry.bind("", on_minmax_focus_out) + max_entry.bind("", on_minmax_focus_out) + fixed_entry.bind("", on_fixed_focus_out) + + # when combobox type changed + def on_type_selected(e=None): + col.type = type_var.get() + update_type_widgets() + + type_combo.bind("<>", lambda e=None: on_type_selected()) + + # name change handler + # already bound above + + # rounding handler bound above + + # set initial values in widgets (in case) + min_var.set(str(col.min)) + max_var.set(str(col.max)) + fixed_var.set(str(col.fixed)) + + # clicking a row to 'select' it + def on_click_row(event=None): + self.selected_col_id = col.id + # highlight selection visually + for rf_cid, rf in self.row_frames.items(): + if rf_cid == col.id: + rf.config(style="Selected.TFrame") + else: + rf.config(style="TFrame") + + row.bind("", lambda e: on_click_row()) + # also store refs for later updates + self.row_frames[col.id] = row + + # store final update callbacks when user changes advanced widgets + def apply_all_changes(): + col.name = name_var.get().strip() or col.name + col.type = type_var.get() + try: + col.min = float(min_var.get()) + except Exception: + pass + try: + col.max = float(max_var.get()) + except Exception: + pass + try: + col.fixed = float(fixed_var.get()) + except Exception: + pass + col.decimals = int(round_var.get()) + col.linearity["enabled"] = bool(lin_enabled_var.get()) + col.linearity["weight"] = float(weight_var.get()) + # determine target id by name in current dropdown + if isinstance(target_combo, ttk.Combobox) and target_combo['state'] != 'disabled': + sel_name = target_combo.get() + # find id by name + for other in self.columns: + if other.id != col.id and other.name == sel_name: + col.linearity["target_id"] = other.id + break + else: + # if no selection or not matched + col.linearity["target_id"] = None + + # call apply_all_changes when adv toggled or when leaving row + for w in [name_entry, min_entry, max_entry, fixed_entry, round_combo, lin_check, weight_spin, target_combo]: + w.bind("", lambda e, f=apply_all_changes: f()) + + # when columns list changes (someone added/renamed) we need to refresh target lists. + # We'll rely on refresh_column_list to rebuild everything - keep simple. + + # ---------- simple callbacks to update model ---------- + def _on_name_change(self, cid, new_name): + c = self.find_column(cid) + if c: + c.name = new_name.strip() or c.name + # refresh to update target name lists + self.refresh_column_list() + + def _on_type_change(self, cid, new_type): + c = self.find_column(cid) + if c: + c.type = new_type + self.refresh_column_list() + + def _on_round_change(self, cid, dec): + c = self.find_column(cid) + if c: + c.decimals = dec + + def _on_row_select(self, cid): + self.selected_col_id = cid + # visual selection handled in row click + + # ---------- Generation logic ---------- + def generate_rows(self, nrows): + # Validate and collect column metadata + if not self.columns: + raise RuntimeError("No columns defined") + # ensure all names unique + names = [c.name for c in self.columns] + if len(set(names)) != len(names): + # enforce uniqueness by appending small suffixes + seen = {} + for c in self.columns: + if c.name in seen: + seen[c.name] += 1 + c.name = f"{c.name}_{seen[c.name]}" + else: + seen[c.name] = 1 + + # Produce base values + base_values = { c.id: [] for c in self.columns } + for c in self.columns: + for _ in range(nrows): + if c.type == "Random": + a = float(c.min) + b = float(c.max) + if a == b: + val = a + else: + val = random.random() * (b - a) + a + else: + val = float(c.fixed) + base_values[c.id].append(val) + + # Now apply linearity adjustments + final_values = { c.id: [v for v in base_values[c.id]] for c in self.columns } + + # We'll process adjustments in simple pass: for each column with linearity enabled, + # map target base value to source's min/max and mix with weight. + for c in self.columns: + lin = c.linearity + if lin["enabled"] and lin["target_id"]: + target = self.find_column(lin["target_id"]) + if not target: + continue + w = clamp(float(lin["weight"]), 0.0, 1.0) + smin = c.range_min() + smax = c.range_max() + tmin = target.range_min() + tmax = target.range_max() + # precompute denominators + t_range = tmax - tmin + s_range = smax - smin + for i in range(nrows): + base_val = base_values[c.id][i] + tval = base_values[target.id][i] + # map tval into source's range: + if t_range == 0: + # target constant: use its value, scaled by midpoint mapping to source range + mapped = smin + (s_range * 0.5) if s_range != 0 else smin + else: + frac = (tval - tmin) / t_range + mapped = smin + frac * s_range + new_val = (1.0 - w) * base_val + w * mapped + final_values[c.id][i] = new_val + + # apply rounding according to decimals + rows = [] + for i in range(nrows): + row = {} + for c in self.columns: + dec = int(c.decimals) + val = final_values[c.id][i] + # apply rounding + if dec == 0: + val = int(round(val)) + else: + val = round(val, dec) + row[c.name] = val + rows.append(row) + return rows + + def generate_and_preview(self): + n_preview = 10 + nrows = self.rows_var.get() + + if nrows < n_preview: + n_preview = nrows + + try: + data = self.generate_rows(n_preview) + self.update_preview_table(data) + self.preview_status.config(text=f"Preview showing first {n_preview} rows. Total rows to generate: {nrows}") + except Exception as e: + messagebox.showerror("Error generating", str(e)) + self.preview_status.config(text="Error generating preview data") + + def update_preview_table(self, data): + """Update the preview table with generated data""" + # Clear existing data + for item in self.preview_tree.get_children(): + self.preview_tree.delete(item) + + # Clear existing columns + for col in self.preview_tree["columns"]: + self.preview_tree.heading(col, text="") + self.preview_tree.column(col, width=0) + + # Set up new columns + if not self.columns: + return + + columns = [c.name for c in self.columns] + self.preview_tree["columns"] = columns + + # Configure column headers + for col_name in columns: + self.preview_tree.heading(col_name, text=col_name) + self.preview_tree.column(col_name, width=100, minwidth=80, anchor='center') + + # Add data rows + for i, row in enumerate(data): + values = [row[col_name] for col_name in columns] + self.preview_tree.insert("", "end", values=values, tags=('evenrow' if i % 2 == 0 else 'oddrow',)) + + # Configure row colors for better readability + self.preview_tree.tag_configure('evenrow', background='#f0f0f0') + self.preview_tree.tag_configure('oddrow', background='white') + + # ---------- Export ---------- + def export_csv_dialog(self): + nrows = self.rows_var.get() + if nrows <= 0: + messagebox.showinfo("Rows required", "Please specify a number of rows > 0.") + return + fname = filedialog.asksaveasfilename(title="Save CSV", defaultextension=".csv", + filetypes=[("CSV files","*.csv"),("All files","*.*")]) + if not fname: + return + try: + data = self.generate_rows(nrows) + with open(fname, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=[c.name for c in self.columns]) + writer.writeheader() + for row in data: + writer.writerow(row) + messagebox.showinfo("Exported", f"Wrote {nrows} rows to:\n{fname}") + except Exception as e: + messagebox.showerror("Error", str(e)) + + +# ---------- Run the app ---------- +if __name__ == "__main__": + app = SyntheticDataGUI() + app.mainloop() \ No newline at end of file diff --git a/Scripts/Synthetic_Data_Generator/SDG_3.py b/Scripts/Synthetic_Data_Generator/SDG_3.py new file mode 100644 index 0000000..9226e8e --- /dev/null +++ b/Scripts/Synthetic_Data_Generator/SDG_3.py @@ -0,0 +1,691 @@ +#!/usr/bin/env python3 +import tkinter as tk +from tkinter import ttk, messagebox, filedialog +import uuid +import random +import csv +import math +import matplotlib.pyplot as plt +import pandas as pd +import numpy as np + +# ---------- Helper functions ---------- +def new_col_id(): + return str(uuid.uuid4()) + +def clamp(v, a, b): + return max(a, min(b, v)) + +def format_decimals(dec): + return f"0.{''.join(['0']*dec)}" if dec>0 else "0" + +# ---------- Column model ---------- +class Column: + def __init__(self, name="col", col_id=None): + self.id = col_id or new_col_id() + self.name = name + self.type = "Random" # or "Fixed" + self.min = 0.0 + self.max = 10.0 + self.fixed = 0.0 + self.decimals = 0 + self.linearity = { + "enabled": False, + "target_id": None, + "weight": 0.0 + } + + def range_min(self): + if self.type == "Random": + return float(self.min) + else: + return float(self.fixed) + + def range_max(self): + if self.type == "Random": + return float(self.max) + else: + return float(self.fixed) + +# ---------- Main App ---------- +class SyntheticDataGUI(tk.Tk): + def __init__(self): + super().__init__() + self.title("Synthetic Data Generator v1.0") + self.geometry("920x800") + self.minsize(800, 600) + + self.columns = [] + self.generated_data = None # <--- store last generated data + self.add_column("A") + self.add_column("B") + + self._build_ui() + + # ---------- UI building ---------- + def _build_ui(self): + # Create main paned window for resizable sections + main_pane = ttk.PanedWindow(self, orient=tk.VERTICAL) + main_pane.pack(fill=tk.BOTH, expand=True, padx=8, pady=6) + + # Top frame: controls and column definitions + top_frame = ttk.Frame(main_pane) + main_pane.add(top_frame, weight=1) + + # top frame: controls + top = ttk.Frame(top_frame) + top.pack(side="top", fill="x", padx=8, pady=6) + + add_btn = ttk.Button(top, text="Add column", command=self.ui_add_column) + add_btn.pack(side="left", padx=(0,6)) + + remove_btn = ttk.Button(top, text="Remove selected", command=self.ui_remove_selected) + remove_btn.pack(side="left", padx=(0,6)) + + help_btn = ttk.Button(top, text="Help / Guide", command=self.show_help) + help_btn.pack(side="right") + + self.rows_var = tk.IntVar(value=100) + rows_spin = ttk.Spinbox(top, from_=1, to=1000000, textvariable=self.rows_var, width=8) + rows_spin.pack(side="left", padx=(0,6)) + + gen_btn = ttk.Button(top, text="Generate & Preview", command=self.generate_and_preview) + gen_btn.pack(side="left", padx=(12,6)) + + export_btn = ttk.Button(top, text="Export CSV", command=self.export_csv_dialog) + export_btn.pack(side="left", padx=(0,6)) + + vis_btn = ttk.Button(top, text="Visualize Data", command=self.visualize_data) + vis_btn.pack(side="left", padx=(12,6)) + + ttk.Label(top, text=" ").pack(side="left", expand=True) # spacer + + # main area: headers + scrollable frame with list of columns + # 1) Fixed headers directly under the buttons + hdr = ttk.Frame(top_frame) + hdr.pack(fill="x", padx=8, pady=(0,6)) + header_font = ('Arial', 10, 'bold italic') + ttk.Label(hdr, text="Name", width=20, font=header_font).grid( + row=0, column=0, sticky="w", padx=(10, 0) + ) + ttk.Label(hdr, text="Type / Params", width=46, font=header_font).grid( + row=0, column=1, sticky="w", padx=(20, 0) + ) + ttk.Label(hdr, text="Rounding", width=10, font=header_font).grid( + row=0, column=2, sticky="w", padx=(0, 0) + ) + ttk.Label(hdr, text="Advanced", width=16, font=header_font).grid( + row=0, column=3, sticky="w", padx=(10, 0) + ) + + # 2) Scrollable container for rows (separate from headers) + scroll_container = ttk.Frame(top_frame) + scroll_container.pack(fill="both", expand=True, padx=8, pady=6) + + # canvas + scrollbar + self.canvas = tk.Canvas(scroll_container) + self.canvas.pack(side="left", fill="both", expand=True) + scrollbar = ttk.Scrollbar(scroll_container, orient="vertical", command=self.canvas.yview) + scrollbar.pack(side="right", fill="y") + self.canvas.configure(yscrollcommand=scrollbar.set) + + # adjust scrollregion when canvas resized + self.canvas.bind('', lambda e: self.canvas.configure(scrollregion=self.canvas.bbox("all"))) + + self.inner_frame = ttk.Frame(self.canvas) + self.canvas.create_window((0,0), window=self.inner_frame, anchor='nw') + + # list area + self.row_frames = {} + self.selected_col_id = None + + self.refresh_column_list() + + # Bottom frame: Preview table + bottom_frame = ttk.Frame(main_pane) + main_pane.add(bottom_frame, weight=1) + + # Preview section + preview_label = ttk.Label(bottom_frame, text="Preview (First 10 rows):", font=('Arial', 10, 'bold italic')) + preview_label.pack(anchor='w', padx=8, pady=(8, 4)) + + # Create frame for table and scrollbar + table_container = ttk.Frame(bottom_frame) + table_container.pack(fill='both', expand=True, padx=8, pady=(0, 8)) + + # Create treeview for table display + self.preview_tree = ttk.Treeview(table_container, show='headings', height=10) + vsb = ttk.Scrollbar(table_container, orient="vertical", command=self.preview_tree.yview) + hsb = ttk.Scrollbar(table_container, orient="horizontal", command=self.preview_tree.xview) + self.preview_tree.configure(yscrollcommand=vsb.set, xscrollcommand=hsb.set) + + # Pack the treeview and scrollbars + self.preview_tree.grid(row=0, column=0, sticky='nsew') + vsb.grid(row=0, column=1, sticky='ns') + hsb.grid(row=1, column=0, sticky='ew') + + table_container.grid_rowconfigure(0, weight=1) + table_container.grid_columnconfigure(0, weight=1) + + # Bottom Labels + self.preview_status = ttk.Label(bottom_frame, text="No data generated yet. Click 'Generate & Preview' to see sample data.") + self.preview_status.pack(anchor='w', padx=8, pady=(0, 8)) + + + # ---------- Column management ---------- + def add_column(self, name="col"): + c = Column(name=name) + self.columns.append(c) + return c + + def remove_column_by_id(self, cid): + self.columns = [c for c in self.columns if c.id != cid] + + def find_column(self, cid): + for c in self.columns: + if c.id == cid: + return c + return None + + # ---------- UI callbacks ---------- + def ui_add_column(self): + n = 1 + base = "col" + existing = {c.name for c in self.columns} + while f"{base}{n}" in existing: + n += 1 + c = self.add_column(f"{base}{n}") + self.refresh_column_list() + # select new + self.selected_col_id = c.id + + def ui_remove_selected(self): + if not self.selected_col_id: + messagebox.showinfo("Remove column", "Select a column row by clicking its name first.") + return + self.remove_column_by_id(self.selected_col_id) + self.selected_col_id = None + self.refresh_column_list() + + def show_help(self): + """Show Help/Guide window""" + help_win = tk.Toplevel(self) + help_win.title("Help / Guide") + help_win.geometry("500x400") + + # Instructions text + text = ( + "Welcome to the Synthetic Data Generator!\n\n" + "➤ Use 'Add column' to create new data columns.\n" + "➤ Choose 'Random' or 'Fixed' values for each column.\n" + "➤ Adjust ranges, rounding, and advanced settings as needed.\n" + "➤ 'Generate & Preview' shows sample data (first 10 rows).\n" + "➤ 'Export CSV' saves the generated dataset to a CSV file.\n\n" + "Advanced:\n" + " - Use the 'Advanced' button per column to enable linearity, basically\n" + " making one column follow another with a weighted influence.\n\n" + ) + label = tk.Label( + help_win, + text=text, + justify="left", + anchor="nw", + font=("Arial", 10), + wraplength=460 + ) + label.pack(fill="both", expand=True, padx=12, pady=12) + + cred = tk.Label( + help_win, + text=( + "This will be one of my scripts/executables.\n" + "Feel free to share!\n\n" + "You can leave feedback on my LinkedIn:\n" + "Sang Putu Sandhyana Yogi\n\n" + "Thank you so much!" + ), + font=("Arial", 7, "italic"), + justify="center" + ) + cred.pack(fill="both", expand=True, padx=5, pady=5) + + # Important notice + warning = tk.Label( + help_win, + text="For Research / Training Purposes Only.\nDon't fake your Thesis Data!", + font=("Arial", 10, "bold italic"), + fg="red", + justify="center" + ) + warning.pack(pady=(0, 12)) + + def refresh_column_list(self): + # clear all previous column rows + for child in list(self.inner_frame.pack_slaves()): + child.destroy() + + # rebuild rows + for c in self.columns: + self._create_column_row(c) + + # ensure scrollregion updates + self.inner_frame.update_idletasks() + self.canvas.configure(scrollregion=self.canvas.bbox("all")) + + def _create_column_row(self, col: Column): + row = ttk.Frame(self.inner_frame, relief="ridge", padding=6) + row.pack(fill="x", pady=4) + + # clicking name selects + name_var = tk.StringVar(value=col.name) + name_entry = ttk.Entry(row, textvariable=name_var, width=24) + name_entry.grid(row=0, column=0, sticky="w") + name_entry.bind("", lambda e, cid=col.id, var=name_var: self._on_name_change(cid, var.get())) + name_entry.bind("", lambda e, cid=col.id, var=name_var: self._on_name_change(cid, var.get())) + name_entry.bind("", lambda e, cid=col.id: self._on_row_select(cid)) + + # Type + params frame + params = ttk.Frame(row) + params.grid(row=0, column=1, sticky="w", padx=(8,8)) + + type_var = tk.StringVar(value=col.type) + type_combo = ttk.Combobox(params, values=["Random", "Fixed"], width=8, state="readonly", textvariable=type_var) + type_combo.grid(row=0, column=0, padx=(0,6)) + type_combo.bind("<>", lambda e, cid=col.id, var=type_var: self._on_type_change(cid, var.get())) + # Random: min/max entries; Fixed: fixed + min_var = tk.StringVar(value=str(col.min)) + max_var = tk.StringVar(value=str(col.max)) + fixed_var = tk.StringVar(value=str(col.fixed)) + + min_entry = ttk.Entry(params, textvariable=min_var, width=10) + min_entry.grid(row=0, column=1, padx=(0,4)) + ttk.Label(params, text="to").grid(row=0, column=2) + max_entry = ttk.Entry(params, textvariable=max_var, width=10) + max_entry.grid(row=0, column=3, padx=(4,8)) + + fixed_entry = ttk.Entry(params, textvariable=fixed_var, width=12) + # place fixed entry but hide or show based on type + fixed_entry.grid(row=0, column=4, padx=(4,8)) + + # rounding + round_var = tk.IntVar(value=col.decimals) + round_combo = ttk.Combobox(row, values=[0,1,2,3,4,5,6], width=4, state="readonly", textvariable=round_var) + round_combo.grid(row=0, column=2) + round_combo.bind("<>", lambda e, cid=col.id, var=round_var: self._on_round_change(cid, int(var.get()))) + + # advanced button + adv_btn = ttk.Button(row, text="Advanced ▾", width=12) + adv_btn.grid(row=0, column=3, padx=(8,0)) + + # advanced area (hidden by default) + adv_frame = ttk.Frame(row) + adv_frame.grid(row=1, column=0, columnspan=4, pady=(8,0), sticky="w") + adv_frame.grid_remove() + + # contents of advanced: linearity + lin_enabled_var = tk.BooleanVar(value=col.linearity["enabled"]) + lin_check = ttk.Checkbutton(adv_frame, text="Enable Linearity (follow another column)", variable=lin_enabled_var) + lin_check.grid(row=0, column=0, sticky="w") + # target selector and weight + ttk.Label(adv_frame, text="Target:").grid(row=0, column=1, sticky="e", padx=(12,2)) + target_values = [ (c.name, c.id) for c in self.columns if c.id != col.id ] + # if target list empty, provide placeholder + if not target_values: + target_combo = ttk.Combobox(adv_frame, values=["(no other columns)"], state="disabled", width=18) + else: + target_combo = ttk.Combobox(adv_frame, values=[tv[0] for tv in target_values], state="readonly", width=18) + # set current if exists + if col.linearity["target_id"]: + found = next((i for i,tv in enumerate(target_values) if tv[1]==col.linearity["target_id"]), None) + if found is not None: + target_combo.current(found) + target_combo.grid(row=0, column=2, padx=(4,8)) + + ttk.Label(adv_frame, text="Weight:").grid(row=0, column=3, padx=(8,2)) + weight_var = tk.DoubleVar(value=col.linearity["weight"]) + weight_spin = ttk.Spinbox(adv_frame, from_=0.0, to=1.0, increment=0.01, textvariable=weight_var, width=6) + weight_spin.grid(row=0, column=4) + weight_scale = ttk.Scale(adv_frame, from_=0.0, to=1.0, orient="horizontal", variable=weight_var, length=120) + weight_scale.grid(row=0, column=5, padx=(8,0)) + + # after creating the row, update scrollregion so scrollbar appears + self.inner_frame.update_idletasks() + self.canvas.configure(scrollregion=self.canvas.bbox("all")) + + # wire adv toggle button + def toggle_adv(): + if adv_frame.winfo_ismapped(): + adv_frame.grid_remove() + adv_btn.config(text="Advanced ▾") + else: + adv_frame.grid() + adv_btn.config(text="Advanced ▴") + adv_btn.config(command=toggle_adv) + + # update widgets visibility according to type + def update_type_widgets(): + t = type_var.get() + if t == "Random": + min_entry.configure(state="normal") + max_entry.configure(state="normal") + fixed_entry.configure(state="disabled") + else: + min_entry.configure(state="disabled") + max_entry.configure(state="disabled") + fixed_entry.configure(state="normal") + + update_type_widgets() + + # bind entry updates + def on_minmax_focus_out(e=None): + try: + col.min = float(min_var.get()) + col.max = float(max_var.get()) + if col.min > col.max: + col.min, col.max = col.max, col.min + min_var.set(str(col.min)) + max_var.set(str(col.max)) + except Exception: + pass + + def on_fixed_focus_out(e=None): + try: + col.fixed = float(fixed_var.get()) + except Exception: + pass + + min_entry.bind("", on_minmax_focus_out) + max_entry.bind("", on_minmax_focus_out) + fixed_entry.bind("", on_fixed_focus_out) + + # when combobox type changed + def on_type_selected(e=None): + col.type = type_var.get() + update_type_widgets() + + type_combo.bind("<>", lambda e=None: on_type_selected()) + + # name change handler + # already bound above + + # rounding handler bound above + + # set initial values in widgets (in case) + min_var.set(str(col.min)) + max_var.set(str(col.max)) + fixed_var.set(str(col.fixed)) + + # clicking a row to 'select' it + def on_click_row(event=None): + self.selected_col_id = col.id + # highlight selection visually + for rf_cid, rf in self.row_frames.items(): + if rf_cid == col.id: + rf.config(style="Selected.TFrame") + else: + rf.config(style="TFrame") + + row.bind("", lambda e: on_click_row()) + # also store refs for later updates + self.row_frames[col.id] = row + + # store final update callbacks when user changes advanced widgets + def apply_all_changes(): + col.name = name_var.get().strip() or col.name + col.type = type_var.get() + try: + col.min = float(min_var.get()) + except Exception: + pass + try: + col.max = float(max_var.get()) + except Exception: + pass + try: + col.fixed = float(fixed_var.get()) + except Exception: + pass + col.decimals = int(round_var.get()) + col.linearity["enabled"] = bool(lin_enabled_var.get()) + col.linearity["weight"] = float(weight_var.get()) + + # determine target id by name in current dropdown + if isinstance(target_combo, ttk.Combobox) and target_combo['state'] != 'disabled': + sel_name = target_combo.get() + # find id by matching current dropdown index instead of text + if target_combo.current() >= 0 and target_values: + col.linearity["target_id"] = target_values[target_combo.current()][1] + else: + col.linearity["target_id"] = None + + # call apply_all_changes when adv toggled or when leaving row + for w in [name_entry, min_entry, max_entry, fixed_entry, round_combo, lin_check, weight_spin, target_combo]: + w.bind("", lambda e, f=apply_all_changes: f()) + + # when columns list changes (someone added/renamed) we need to refresh target lists. + # We'll rely on refresh_column_list to rebuild everything - keep simple. + + # ---------- simple callbacks to update model ---------- + def _on_name_change(self, cid, new_name): + c = self.find_column(cid) + if c: + c.name = new_name.strip() or c.name + # refresh to update target name lists + self.refresh_column_list() + + def _on_type_change(self, cid, new_type): + c = self.find_column(cid) + if c: + c.type = new_type + self.refresh_column_list() + + def _on_round_change(self, cid, dec): + c = self.find_column(cid) + if c: + c.decimals = dec + + def _on_row_select(self, cid): + self.selected_col_id = cid + # visual selection handled in row click + + # ---------- Generation logic ---------- + def generate_rows(self, nrows): + # Validate and collect column metadata + if not self.columns: + raise RuntimeError("No columns defined") + # ensure all names unique + names = [c.name for c in self.columns] + if len(set(names)) != len(names): + # enforce uniqueness by appending small suffixes + seen = {} + for c in self.columns: + if c.name in seen: + seen[c.name] += 1 + c.name = f"{c.name}_{seen[c.name]}" + else: + seen[c.name] = 1 + + # Produce base values + base_values = { c.id: [] for c in self.columns } + for c in self.columns: + for _ in range(nrows): + if c.type == "Random": + a = float(c.min) + b = float(c.max) + if a == b: + val = a + else: + val = random.random() * (b - a) + a + else: + val = float(c.fixed) + base_values[c.id].append(val) + + # Now apply linearity adjustments + final_values = { c.id: [v for v in base_values[c.id]] for c in self.columns } + + # We'll process adjustments in simple pass: for each column with linearity enabled, + # map target base value to source's min/max and mix with weight. + for c in self.columns: + lin = c.linearity + if lin["enabled"] and lin["target_id"]: + target = self.find_column(lin["target_id"]) + if not target: + continue + w = clamp(float(lin["weight"]), 0.0, 1.0) + smin = c.range_min() + smax = c.range_max() + tmin = target.range_min() + tmax = target.range_max() + # precompute denominators + t_range = tmax - tmin + s_range = smax - smin + for i in range(nrows): + base_val = base_values[c.id][i] + tval = base_values[target.id][i] + # map tval into source's range: + if t_range == 0: + # target constant: use its value, scaled by midpoint mapping to source range + mapped = smin + (s_range * 0.5) if s_range != 0 else smin + else: + frac = (tval - tmin) / t_range + mapped = smin + frac * s_range + new_val = (1.0 - w) * base_val + w * mapped + final_values[c.id][i] = new_val + + # apply rounding according to decimals + rows = [] + for i in range(nrows): + row = {} + for c in self.columns: + dec = int(c.decimals) + val = final_values[c.id][i] + # apply rounding + if dec == 0: + val = int(round(val)) + else: + val = round(val, dec) + row[c.name] = val + rows.append(row) + return rows + + def generate_and_preview(self): + n_preview = 10 + nrows = self.rows_var.get() + + if nrows < n_preview: + n_preview = nrows + + try: + data = self.generate_rows(n_preview) + self.generated_data = self.generate_rows(nrows) # store full data + self.update_preview_table(data) + self.preview_status.config( + text=f"Preview showing first {n_preview} rows. Total rows to generate: {nrows}" + ) + except Exception as e: + messagebox.showerror("Error generating", str(e)) + self.preview_status.config(text="Error generating preview data") + + def update_preview_table(self, data): + """Update the preview table with generated data""" + # Clear existing data + for item in self.preview_tree.get_children(): + self.preview_tree.delete(item) + + # Clear existing columns + for col in self.preview_tree["columns"]: + self.preview_tree.heading(col, text="") + self.preview_tree.column(col, width=0) + + # Set up new columns + if not self.columns: + return + + columns = [c.name for c in self.columns] + self.preview_tree["columns"] = columns + + # Configure column headers + for col_name in columns: + self.preview_tree.heading(col_name, text=col_name) + self.preview_tree.column(col_name, width=100, minwidth=80, anchor='center') + + # Add data rows + for i, row in enumerate(data): + values = [row[col_name] for col_name in columns] + self.preview_tree.insert("", "end", values=values, tags=('evenrow' if i % 2 == 0 else 'oddrow',)) + + # Configure row colors for better readability + self.preview_tree.tag_configure('evenrow', background='#f0f0f0') + self.preview_tree.tag_configure('oddrow', background='white') + + # ---------- Visualization ---------- + def visualize_data(self): + if not self.generated_data: + messagebox.showinfo("Visualize Data", "Generate data first") + return + + df = pd.DataFrame(self.generated_data) + nrows_total = len(df) + ncols = len(df.columns) + if ncols == 0: + messagebox.showinfo("Visualize Data", "No columns available") + return + + # layout: up to 5 plots per row + max_cols = 5 + nrows = (ncols + max_cols - 1) // max_cols + fig, axes = plt.subplots(nrows, min(ncols, max_cols), figsize=(4*max_cols, 3*nrows)) + + # normalize axes to flat list + if nrows == 1 and ncols == 1: + axes = [axes] + elif nrows == 1: + axes = axes + elif ncols <= max_cols: + axes = axes + axes = axes.flatten() if hasattr(axes, "flatten") else list(axes) + + for i, col in enumerate(df.columns): + ax = axes[i] + if pd.api.types.is_numeric_dtype(df[col]): + df[col].plot(kind="hist", bins=20, ax=ax, title=col) + else: + df[col].value_counts().plot(kind="bar", ax=ax, title=col) + + # hide unused axes + for j in range(i+1, len(axes)): + axes[j].set_visible(False) + + # add top label showing dataset size + fig.suptitle(f"Visualizations based on {nrows_total} rows", fontsize=12, fontweight="bold") + + plt.tight_layout(rect=[0, 0, 1, 0.96]) # leave space for title + plt.show() + + + # ---------- Export ---------- + def export_csv_dialog(self): + nrows = self.rows_var.get() + if nrows <= 0: + messagebox.showinfo("Rows required", "Please specify a number of rows > 0.") + return + fname = filedialog.asksaveasfilename(title="Save CSV", defaultextension=".csv", + filetypes=[("CSV files","*.csv"),("All files","*.*")]) + if not fname: + return + try: + data = self.generate_rows(nrows) + with open(fname, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=[c.name for c in self.columns]) + writer.writeheader() + for row in data: + writer.writerow(row) + messagebox.showinfo("Exported", f"Wrote {nrows} rows to:\n{fname}") + except Exception as e: + messagebox.showerror("Error", str(e)) + + +# ---------- Run the app ---------- +if __name__ == "__main__": + app = SyntheticDataGUI() + app.mainloop() \ No newline at end of file