import json import os import math from models.TrainingProject import TrainingProject from models.TrainingProjectDetails import TrainingProjectDetails from models.Images import Image from models.Annotation import Annotation def generate_training_json(training_id): """Generate COCO JSON for training, validation, and test sets""" # training_id is now project_details_id training_project_details = TrainingProjectDetails.query.get(training_id) if not training_project_details: raise Exception(f'No TrainingProjectDetails found for project_details_id {training_id}') details_obj = training_project_details.to_dict() # Get parent project for name training_project = TrainingProject.query.get(details_obj['project_id']) # Get the data directory setting for image paths from services.settings_service import get_setting data_dir = get_setting('yolox_data_dir', '/home/kitraining/To_Annotate/') # Fix UNC path if it's missing the \\ prefix # Check if it looks like a UNC path without proper prefix (e.g., "192.168.1.19\...") if data_dir and not data_dir.startswith('\\\\') and not data_dir.startswith('/'): # Check if it starts with an IP address pattern import re if re.match(r'^\d+\.\d+\.\d+\.\d+[/\\]', data_dir): data_dir = '\\\\' + data_dir # Ensure data_dir ends with separator if not data_dir.endswith(os.sep) and not data_dir.endswith('/'): data_dir += os.sep # Get split percentages (default values if not set) train_percent = details_obj.get('train_percent', 85) valid_percent = details_obj.get('valid_percent', 10) test_percent = details_obj.get('test_percent', 5) coco_images = [] coco_annotations = [] coco_categories = [] category_map = {} category_id = 0 image_id = 0 annotation_id = 0 # Build category list and mapping from class_map dictionary {source: target} class_map = details_obj.get('class_map', {}) for source_class, target_class in class_map.items(): if target_class and target_class not in category_map: category_map[target_class] = category_id coco_categories.append({'id': category_id, 'name': target_class, 'supercategory': ''}) category_id += 1 # Get all annotation projects (Label Studio project IDs) annotation_projects = details_obj.get('annotation_projects', []) # Get class mappings from database grouped by Label Studio project from models.ClassMapping import ClassMapping all_mappings = ClassMapping.query.filter_by(project_details_id=training_id).all() # Group mappings by Label Studio project ID mappings_by_project = {} for mapping in all_mappings: ls_proj_id = mapping.label_studio_project_id if ls_proj_id not in mappings_by_project: mappings_by_project[ls_proj_id] = {} mappings_by_project[ls_proj_id][mapping.source_class] = mapping.target_class # Also add target class to category map if not present if mapping.target_class and mapping.target_class not in category_map: category_map[mapping.target_class] = category_id coco_categories.append({'id': category_id, 'name': mapping.target_class, 'supercategory': ''}) category_id += 1 # Iterate through each annotation project to collect images and annotations for ls_project_id in annotation_projects: # Get images for this Label Studio project images = Image.query.filter_by(project_id=ls_project_id).all() for image in images: image_id += 1 file_name = image.image_path # Clean up file path from Label Studio format if '%20' in file_name: file_name = file_name.replace('%20', ' ') if file_name and file_name.startswith('/data/local-files/?d='): file_name = file_name.replace('/data/local-files/?d=', '') # Remove any Label Studio prefixes but keep full path # Common Label Studio patterns prefixes_to_remove = [ '//192.168.1.19/home/kitraining/To_Annotate/', '192.168.1.19/home/kitraining/To_Annotate/', '/home/kitraining/home/kitraining/', 'home/kitraining/To_Annotate/', '/home/kitraining/To_Annotate/', ] # Try each prefix for prefix in prefixes_to_remove: if file_name.startswith(prefix): file_name = file_name[len(prefix):] break # Construct ABSOLUTE path using data_dir # Normalize data_dir - ensure it uses backslashes for Windows normalized_data_dir = data_dir.rstrip('/\\').replace('/', '\\') # Check if already absolute path if not (file_name.startswith('\\\\') or (len(file_name) > 1 and file_name[1] == ':')): # It's a relative path, combine with data_dir # For UNC paths, we need to manually concatenate to preserve \\ if normalized_data_dir.startswith('\\\\'): # UNC path file_name = normalized_data_dir + '\\' + file_name.replace('/', '\\') else: # Regular path file_name = os.path.join(normalized_data_dir, file_name.replace('/', '\\')) else: # Already absolute, just normalize separators file_name = file_name.replace('/', '\\') # Get annotations for this image annotations = Annotation.query.filter_by(image_id=image.image_id).all() coco_images.append({ 'id': image_id, 'file_name': file_name, # Use absolute path 'width': image.width or 0, 'height': image.height or 0 }) for annotation in annotations: # Translate class name using class_map for this specific Label Studio project original_class = annotation.Label project_class_map = mappings_by_project.get(ls_project_id, {}) mapped_class = project_class_map.get(original_class, original_class) # Only add annotation if mapped_class is valid if mapped_class and mapped_class in category_map: annotation_id += 1 area = 0 if annotation.width and annotation.height: area = annotation.width * annotation.height coco_annotations.append({ 'id': annotation_id, 'image_id': image_id, 'category_id': category_map[mapped_class], 'bbox': [annotation.x, annotation.y, annotation.width, annotation.height], 'area': area, 'iscrowd': 0 }) # Shuffle images for random split using seed def seeded_random(seed): x = math.sin(seed) * 10000 return x - math.floor(x) def shuffle(array, seed): for i in range(len(array) - 1, 0, -1): j = int(seeded_random(seed + i) * (i + 1)) array[i], array[j] = array[j], array[i] # Use seed from details_obj if present, else default to 42 split_seed = details_obj.get('seed', 42) if split_seed is not None: split_seed = int(split_seed) else: split_seed = 42 shuffle(coco_images, split_seed) # Split images total_images = len(coco_images) train_count = int(total_images * train_percent / 100) valid_count = int(total_images * valid_percent / 100) test_count = total_images - train_count - valid_count train_images = coco_images[0:train_count] valid_images = coco_images[train_count:train_count + valid_count] test_images = coco_images[train_count + valid_count:] # Helper to get image ids for each split train_image_ids = {img['id'] for img in train_images} valid_image_ids = {img['id'] for img in valid_images} test_image_ids = {img['id'] for img in test_images} # Split annotations train_annotations = [ann for ann in coco_annotations if ann['image_id'] in train_image_ids] valid_annotations = [ann for ann in coco_annotations if ann['image_id'] in valid_image_ids] test_annotations = [ann for ann in coco_annotations if ann['image_id'] in test_image_ids] # Build final COCO JSONs def build_coco_json(images, annotations, categories): return { 'images': images, 'annotations': annotations, 'categories': categories } train_json = build_coco_json(train_images, train_annotations, coco_categories) valid_json = build_coco_json(valid_images, valid_annotations, coco_categories) test_json = build_coco_json(test_images, test_annotations, coco_categories) # Create output directory from services.settings_service import get_setting from models.training import Training output_base_path = get_setting('yolox_output_path', './backend') project_name = training_project.title.replace(' ', '_') if training_project and training_project.title else f'project_{details_obj["project_id"]}' # Get training record to use its name for folder training_record = Training.query.filter_by(project_details_id=training_id).first() training_folder_name = f"{training_record.exp_name or training_record.training_name or 'training'}_{training_record.id}" if training_record else str(training_id) training_folder_name = training_folder_name.replace(' ', '_') # Use training_record.id for file names to match what generate_yolox_exp expects training_file_id = training_record.id if training_record else training_id # Save annotations to the configured output folder annotations_dir = os.path.join(output_base_path, project_name, training_folder_name, 'annotations') os.makedirs(annotations_dir, exist_ok=True) # Write to files train_path = os.path.join(annotations_dir, f'coco_project_{training_file_id}_train.json') valid_path = os.path.join(annotations_dir, f'coco_project_{training_file_id}_valid.json') test_path = os.path.join(annotations_dir, f'coco_project_{training_file_id}_test.json') with open(train_path, 'w') as f: json.dump(train_json, f, indent=2) with open(valid_path, 'w') as f: json.dump(valid_json, f, indent=2) with open(test_path, 'w') as f: json.dump(test_json, f, indent=2) print(f'COCO JSON splits written to {annotations_dir} for trainingId {training_id}') # Also generate inference exp.py from services.generate_yolox_exp import generate_yolox_inference_exp project_folder = os.path.join(output_base_path, project_name, str(training_id)) os.makedirs(project_folder, exist_ok=True) inference_exp_path = os.path.join(project_folder, 'exp_infer.py') try: exp_content = generate_yolox_inference_exp(training_id) with open(inference_exp_path, 'w') as f: f.write(exp_content) print(f'Inference exp.py written to {inference_exp_path}') except Exception as err: print(f'Failed to generate inference exp.py: {err}')