Files
Abschluss-Projekt/backend/services/generate_json_yolox.py
2025-12-08 12:26:34 +01:00

297 lines
13 KiB
Python
Executable File

import json
import os
import math
from models.TrainingProject import TrainingProject
from models.TrainingProjectDetails import TrainingProjectDetails
from models.Images import Image
from models.Annotation import Annotation
def generate_training_json(training_id):
"""Generate COCO JSON for training, validation, and test sets
Args:
training_id: Can be either a Training.id or TrainingProjectDetails.id
Function will automatically detect which one and find the correct details_id
"""
from models.training import Training
# First, try to get as a Training record
training_record = Training.query.get(training_id)
if training_record:
# It's a Training.id - use its project_details_id
details_id = training_record.project_details_id
print(f'[generate_training_json] Using training_id={training_id}, mapped to project_details_id={details_id}')
else:
# Try as TrainingProjectDetails.id directly
details_id = training_id
print(f'[generate_training_json] Using training_id={training_id} as project_details_id directly')
training_project_details = TrainingProjectDetails.query.get(details_id)
if not training_project_details:
raise Exception(f'No TrainingProjectDetails found for id {training_id} (details_id: {details_id})')
details_obj = training_project_details.to_dict()
# Get parent project for name
training_project = TrainingProject.query.get(details_obj['project_id'])
# Get the data directory setting for image paths
from services.settings_service import get_setting
data_dir = get_setting('yolox_data_dir', '/home/kitraining/To_Annotate/')
# Fix UNC path if it's missing the \\ prefix
# Check if it looks like a UNC path without proper prefix (e.g., "192.168.1.19\...")
if data_dir and not data_dir.startswith('\\\\') and not data_dir.startswith('/'):
# Check if it starts with an IP address pattern
import re
if re.match(r'^\d+\.\d+\.\d+\.\d+[/\\]', data_dir):
data_dir = '\\\\' + data_dir
# Ensure data_dir ends with separator
if not data_dir.endswith(os.sep) and not data_dir.endswith('/'):
data_dir += os.sep
# Get split percentages (default values if not set)
train_percent = details_obj.get('train_percent', 85)
valid_percent = details_obj.get('valid_percent', 10)
test_percent = details_obj.get('test_percent', 5)
coco_images = []
coco_annotations = []
coco_categories = []
category_map = {}
category_id = 0
image_id = 0
annotation_id = 0
# Build category list and mapping from class_map dictionary {source: target}
class_map = details_obj.get('class_map', {})
for source_class, target_class in class_map.items():
if target_class and target_class not in category_map:
category_map[target_class] = category_id
coco_categories.append({'id': category_id, 'name': target_class, 'supercategory': ''})
category_id += 1
# Get all annotation projects (Label Studio project IDs)
annotation_projects = details_obj.get('annotation_projects', [])
# Get class mappings from database grouped by Label Studio project
from models.ClassMapping import ClassMapping
all_mappings = ClassMapping.query.filter_by(project_details_id=training_id).all()
# Group mappings by Label Studio project ID
mappings_by_project = {}
for mapping in all_mappings:
ls_proj_id = mapping.label_studio_project_id
if ls_proj_id not in mappings_by_project:
mappings_by_project[ls_proj_id] = {}
mappings_by_project[ls_proj_id][mapping.source_class] = mapping.target_class
# Also add target class to category map if not present
if mapping.target_class and mapping.target_class not in category_map:
category_map[mapping.target_class] = category_id
coco_categories.append({'id': category_id, 'name': mapping.target_class, 'supercategory': ''})
category_id += 1
# Iterate through each annotation project to collect images and annotations
for ls_project_id in annotation_projects:
# Get images for this Label Studio project
images = Image.query.filter_by(project_id=ls_project_id).all()
for image in images:
image_id += 1
file_name = image.image_path
# Clean up file path from Label Studio format
if '%20' in file_name:
file_name = file_name.replace('%20', ' ')
if file_name and file_name.startswith('/data/local-files/?d='):
file_name = file_name.replace('/data/local-files/?d=', '')
# Remove any Label Studio prefixes but keep full path
# Common Label Studio patterns
prefixes_to_remove = [
'//192.168.1.19/home/kitraining/To_Annotate/',
'192.168.1.19/home/kitraining/To_Annotate/',
'/home/kitraining/home/kitraining/',
'home/kitraining/To_Annotate/',
'/home/kitraining/To_Annotate/',
]
# Try each prefix
for prefix in prefixes_to_remove:
if file_name.startswith(prefix):
file_name = file_name[len(prefix):]
break
# Construct ABSOLUTE path using data_dir
# Detect platform for proper path handling
import platform
is_windows = platform.system() == 'Windows'
# Normalize data_dir and file_name based on platform
if is_windows:
# Windows: use backslashes
normalized_data_dir = data_dir.rstrip('/\\').replace('/', '\\')
file_name = file_name.replace('/', '\\')
else:
# Linux/Mac: use forward slashes
normalized_data_dir = data_dir.rstrip('/\\').replace('\\', '/')
file_name = file_name.replace('\\', '/')
# Check if already absolute path
is_absolute = False
if is_windows:
is_absolute = file_name.startswith('\\\\') or (len(file_name) > 1 and file_name[1] == ':')
else:
is_absolute = file_name.startswith('/')
if not is_absolute:
# It's a relative path, combine with data_dir
if is_windows and normalized_data_dir.startswith('\\\\'):
# Windows UNC path
file_name = normalized_data_dir + '\\' + file_name
else:
# Regular path (Windows or Linux)
file_name = os.path.join(normalized_data_dir, file_name)
# Get annotations for this image
annotations = Annotation.query.filter_by(image_id=image.image_id).all()
coco_images.append({
'id': image_id,
'file_name': file_name, # Use absolute path
'width': image.width or 0,
'height': image.height or 0
})
for annotation in annotations:
# Translate class name using class_map for this specific Label Studio project
original_class = annotation.Label
project_class_map = mappings_by_project.get(ls_project_id, {})
mapped_class = project_class_map.get(original_class, original_class)
# Only add annotation if mapped_class is valid
if mapped_class and mapped_class in category_map:
annotation_id += 1
area = 0
if annotation.width and annotation.height:
area = annotation.width * annotation.height
coco_annotations.append({
'id': annotation_id,
'image_id': image_id,
'category_id': category_map[mapped_class],
'bbox': [annotation.x, annotation.y, annotation.width, annotation.height],
'area': area,
'iscrowd': 0
})
# Shuffle images for random split using seed
def seeded_random(seed):
x = math.sin(seed) * 10000
return x - math.floor(x)
def shuffle(array, seed):
for i in range(len(array) - 1, 0, -1):
j = int(seeded_random(seed + i) * (i + 1))
array[i], array[j] = array[j], array[i]
# Use seed from details_obj if present, else default to 42
split_seed = details_obj.get('seed', 42)
if split_seed is not None:
split_seed = int(split_seed)
else:
split_seed = 42
shuffle(coco_images, split_seed)
# Split images
total_images = len(coco_images)
train_count = int(total_images * train_percent / 100)
valid_count = int(total_images * valid_percent / 100)
test_count = total_images - train_count - valid_count
train_images = coco_images[0:train_count]
valid_images = coco_images[train_count:train_count + valid_count]
test_images = coco_images[train_count + valid_count:]
# Helper to get image ids for each split
train_image_ids = {img['id'] for img in train_images}
valid_image_ids = {img['id'] for img in valid_images}
test_image_ids = {img['id'] for img in test_images}
# Split annotations
train_annotations = [ann for ann in coco_annotations if ann['image_id'] in train_image_ids]
valid_annotations = [ann for ann in coco_annotations if ann['image_id'] in valid_image_ids]
test_annotations = [ann for ann in coco_annotations if ann['image_id'] in test_image_ids]
# Build final COCO JSONs
def build_coco_json(images, annotations, categories):
return {
'images': images,
'annotations': annotations,
'categories': categories
}
train_json = build_coco_json(train_images, train_annotations, coco_categories)
valid_json = build_coco_json(valid_images, valid_annotations, coco_categories)
test_json = build_coco_json(test_images, test_annotations, coco_categories)
# Create output directory
from services.settings_service import get_setting
from models.training import Training
output_base_path = get_setting('yolox_output_path', './backend')
project_name = training_project.title.replace(' ', '_') if training_project and training_project.title else f'project_{details_obj["project_id"]}'
# Get training record to use its name and ID for folder and file names
# Use the same training_id that was passed in (if it was a Training.id)
# or find the first training for this details_id
if not training_record:
training_record = Training.query.filter_by(project_details_id=details_id).first()
if training_record:
training_folder_name = f"{training_record.exp_name or training_record.training_name or 'training'}_{training_record.id}"
training_folder_name = training_folder_name.replace(' ', '_')
training_file_id = training_record.id
else:
training_folder_name = str(details_id)
training_file_id = details_id
# Save annotations to the configured output folder
annotations_dir = os.path.join(output_base_path, project_name, training_folder_name, 'annotations')
os.makedirs(annotations_dir, exist_ok=True)
# Write to files
train_path = os.path.join(annotations_dir, f'coco_project_{training_file_id}_train.json')
valid_path = os.path.join(annotations_dir, f'coco_project_{training_file_id}_valid.json')
test_path = os.path.join(annotations_dir, f'coco_project_{training_file_id}_test.json')
with open(train_path, 'w') as f:
json.dump(train_json, f, indent=2)
with open(valid_path, 'w') as f:
json.dump(valid_json, f, indent=2)
with open(test_path, 'w') as f:
json.dump(test_json, f, indent=2)
print(f'COCO JSON splits written to {annotations_dir} for training_id={training_file_id} (details_id={details_id})')
# Also generate inference exp.py
from services.generate_yolox_exp import generate_yolox_inference_exp
project_folder = os.path.join(output_base_path, project_name, str(training_id))
os.makedirs(project_folder, exist_ok=True)
inference_exp_path = os.path.join(project_folder, 'exp_infer.py')
try:
exp_content = generate_yolox_inference_exp(training_id)
with open(inference_exp_path, 'w') as f:
f.write(exp_content)
print(f'Inference exp.py written to {inference_exp_path}')
except Exception as err:
print(f'Failed to generate inference exp.py: {err}')