|
@@ -0,0 +1,66 @@
|
|
|
|
|
+import os
|
|
|
|
|
+import shutil
|
|
|
|
|
+
|
|
|
|
|
+# Master Configuration: Maps {Original_ID: Master_ID}
|
|
|
|
|
+CONFIG = {
|
|
|
|
|
+ "suharjito_dataset": {"0": "0", "1": "1", "2": "2", "3": "3", "4": "4", "5": "5"},
|
|
|
|
|
+ "darren_dataset": {"0": "4", "1": "5", "2": "3"},
|
|
|
|
|
+ "fy_dataset": {"0": "1", "1": "3", "2": "5"},
|
|
|
|
|
+ "nazwa_dataset": {"0": "2", "1": "0", "2": "5", "3": "3", "4": "1", "5": "4"}
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+BASE_DIR = "datasets"
|
|
|
|
|
+OUTPUT_DIR = "unified_dataset"
|
|
|
|
|
+
|
|
|
|
|
+# Prepare Directories
|
|
|
|
|
+for split in ['train', 'val']:
|
|
|
|
|
+ os.makedirs(f"{OUTPUT_DIR}/images/{split}", exist_ok=True)
|
|
|
|
|
+ os.makedirs(f"{OUTPUT_DIR}/labels/{split}", exist_ok=True)
|
|
|
|
|
+
|
|
|
|
|
+def process_and_merge():
|
|
|
|
|
+ for ds_name, mapping in CONFIG.items():
|
|
|
|
|
+ print(f"Processing {ds_name}...")
|
|
|
|
|
+
|
|
|
|
|
+ # Roboflow uses 'valid', Suharjito might use 'valid' or 'test'
|
|
|
|
|
+ # Adjusting splits to unify them into train/val
|
|
|
|
|
+ splits = {
|
|
|
|
|
+ 'train': 'train',
|
|
|
|
|
+ 'valid': 'val',
|
|
|
|
|
+ 'test': 'val' # Adding test images to validation for a more robust PoC
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ for src_split, target_split in splits.items():
|
|
|
|
|
+ img_path = os.path.join(BASE_DIR, ds_name, src_split, "images")
|
|
|
|
|
+ lbl_path = os.path.join(BASE_DIR, ds_name, src_split, "labels")
|
|
|
|
|
+
|
|
|
|
|
+ if not os.path.exists(lbl_path): continue
|
|
|
|
|
+
|
|
|
|
|
+ for label_file in os.listdir(lbl_path):
|
|
|
|
|
+ if not label_file.endswith(".txt"): continue
|
|
|
|
|
+
|
|
|
|
|
+ # 1. Re-index Labels
|
|
|
|
|
+ new_lines = []
|
|
|
|
|
+ with open(os.path.join(lbl_path, label_file), 'r') as f:
|
|
|
|
|
+ for line in f:
|
|
|
|
|
+ parts = line.split()
|
|
|
|
|
+ if parts[0] in mapping:
|
|
|
|
|
+ parts[0] = mapping[parts[0]]
|
|
|
|
|
+ new_lines.append(" ".join(parts))
|
|
|
|
|
+
|
|
|
|
|
+ if new_lines:
|
|
|
|
|
+ # 2. Save new label with dataset prefix to avoid filename collisions
|
|
|
|
|
+ new_name = f"{ds_name}_{label_file}"
|
|
|
|
|
+ with open(os.path.join(OUTPUT_DIR, "labels", target_split, new_name), 'w') as f:
|
|
|
|
|
+ f.write("\n".join(new_lines))
|
|
|
|
|
+
|
|
|
|
|
+ # 3. Copy Image
|
|
|
|
|
+ img_exts = ['.jpg', '.jpeg', '.png', '.JPG']
|
|
|
|
|
+ for ext in img_exts:
|
|
|
|
|
+ img_name = label_file.replace(".txt", ext)
|
|
|
|
|
+ src_img = os.path.join(img_path, img_name)
|
|
|
|
|
+ if os.path.exists(src_img):
|
|
|
|
|
+ shutil.copy(src_img, os.path.join(OUTPUT_DIR, "images", target_split, f"{ds_name}_{img_name}"))
|
|
|
|
|
+ break
|
|
|
|
|
+
|
|
|
|
|
+process_and_merge()
|
|
|
|
|
+print("Successfully created unified_dataset!")
|