| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566 |
- import os
- import shutil
- # Master Configuration: Maps {Original_ID: Master_ID}
- CONFIG = {
- "suharjito_dataset": {"0": "0", "1": "1", "2": "2", "3": "3", "4": "4", "5": "5"},
- "darren_dataset": {"0": "4", "1": "5", "2": "3"},
- "fy_dataset": {"0": "1", "1": "3", "2": "5"},
- "nazwa_dataset": {"0": "2", "1": "0", "2": "5", "3": "3", "4": "1", "5": "4"}
- }
- BASE_DIR = "datasets"
- OUTPUT_DIR = "unified_dataset"
- # Prepare Directories
- for split in ['train', 'val']:
- os.makedirs(f"{OUTPUT_DIR}/images/{split}", exist_ok=True)
- os.makedirs(f"{OUTPUT_DIR}/labels/{split}", exist_ok=True)
- def process_and_merge():
- for ds_name, mapping in CONFIG.items():
- print(f"Processing {ds_name}...")
-
- # Roboflow uses 'valid', Suharjito might use 'valid' or 'test'
- # Adjusting splits to unify them into train/val
- splits = {
- 'train': 'train',
- 'valid': 'val',
- 'test': 'val' # Adding test images to validation for a more robust PoC
- }
- for src_split, target_split in splits.items():
- img_path = os.path.join(BASE_DIR, ds_name, src_split, "images")
- lbl_path = os.path.join(BASE_DIR, ds_name, src_split, "labels")
- if not os.path.exists(lbl_path): continue
- for label_file in os.listdir(lbl_path):
- if not label_file.endswith(".txt"): continue
-
- # 1. Re-index Labels
- new_lines = []
- with open(os.path.join(lbl_path, label_file), 'r') as f:
- for line in f:
- parts = line.split()
- if parts[0] in mapping:
- parts[0] = mapping[parts[0]]
- new_lines.append(" ".join(parts))
-
- if new_lines:
- # 2. Save new label with dataset prefix to avoid filename collisions
- new_name = f"{ds_name}_{label_file}"
- with open(os.path.join(OUTPUT_DIR, "labels", target_split, new_name), 'w') as f:
- f.write("\n".join(new_lines))
-
- # 3. Copy Image
- img_exts = ['.jpg', '.jpeg', '.png', '.JPG']
- for ext in img_exts:
- img_name = label_file.replace(".txt", ext)
- src_img = os.path.join(img_path, img_name)
- if os.path.exists(src_img):
- shutil.copy(src_img, os.path.join(OUTPUT_DIR, "images", target_split, f"{ds_name}_{img_name}"))
- break
- process_and_merge()
- print("Successfully created unified_dataset!")
|