training fix. add global settings

This commit is contained in:
2025-12-02 09:31:52 +01:00
parent 55b1b2b5fe
commit c3c7e042bb
86 changed files with 77512 additions and 7054 deletions

View File

@@ -1,179 +1,288 @@
import json
import os
import math
from models.TrainingProject import TrainingProject
from models.TrainingProjectDetails import TrainingProjectDetails
from models.Images import Image
from models.Annotation import Annotation
def generate_training_json(training_id):
"""Generate COCO JSON for training, validation, and test sets"""
# training_id is now project_details_id
training_project_details = TrainingProjectDetails.query.get(training_id)
if not training_project_details:
raise Exception(f'No TrainingProjectDetails found for project_details_id {training_id}')
details_obj = training_project_details.to_dict()
# Get parent project for name
training_project = TrainingProject.query.get(details_obj['project_id'])
# Get split percentages (default values if not set)
train_percent = details_obj.get('train_percent', 85)
valid_percent = details_obj.get('valid_percent', 10)
test_percent = details_obj.get('test_percent', 5)
coco_images = []
coco_annotations = []
coco_categories = []
category_map = {}
category_id = 0
image_id = 0
annotation_id = 0
for cls in details_obj['class_map']:
asg_map = []
list_asg = cls[1]
for asg in list_asg:
asg_map.append({'original': asg[0], 'mapped': asg[1]})
# Build category list and mapping
if asg[1] and asg[1] not in category_map:
category_map[asg[1]] = category_id
coco_categories.append({'id': category_id, 'name': asg[1], 'supercategory': ''})
category_id += 1
# Get images for this project
images = Image.query.filter_by(project_id=cls[0]).all()
for image in images:
image_id += 1
file_name = image.image_path
# Clean up file path
if '%20' in file_name:
file_name = file_name.replace('%20', ' ')
if file_name and file_name.startswith('/data/local-files/?d='):
file_name = file_name.replace('/data/local-files/?d=', '')
file_name = file_name.replace('/home/kitraining/home/kitraining/', '')
if file_name and file_name.startswith('home/kitraining/To_Annotate/'):
file_name = file_name.replace('home/kitraining/To_Annotate/', '')
# Get annotations for this image
annotations = Annotation.query.filter_by(image_id=image.image_id).all()
coco_images.append({
'id': image_id,
'file_name': file_name,
'width': image.width or 0,
'height': image.height or 0
})
for annotation in annotations:
# Translate class name using asg_map
mapped_class = annotation.Label
for map_entry in asg_map:
if annotation.Label == map_entry['original']:
mapped_class = map_entry['mapped']
break
# Only add annotation if mapped_class is valid
if mapped_class and mapped_class in category_map:
annotation_id += 1
area = 0
if annotation.width and annotation.height:
area = annotation.width * annotation.height
coco_annotations.append({
'id': annotation_id,
'image_id': image_id,
'category_id': category_map[mapped_class],
'bbox': [annotation.x, annotation.y, annotation.width, annotation.height],
'area': area,
'iscrowd': 0
})
# Shuffle images for random split using seed
def seeded_random(seed):
x = math.sin(seed) * 10000
return x - math.floor(x)
def shuffle(array, seed):
for i in range(len(array) - 1, 0, -1):
j = int(seeded_random(seed + i) * (i + 1))
array[i], array[j] = array[j], array[i]
# Use seed from details_obj if present, else default to 42
split_seed = details_obj.get('seed', 42)
if split_seed is not None:
split_seed = int(split_seed)
else:
split_seed = 42
shuffle(coco_images, split_seed)
# Split images
total_images = len(coco_images)
train_count = int(total_images * train_percent / 100)
valid_count = int(total_images * valid_percent / 100)
test_count = total_images - train_count - valid_count
train_images = coco_images[0:train_count]
valid_images = coco_images[train_count:train_count + valid_count]
test_images = coco_images[train_count + valid_count:]
# Helper to get image ids for each split
train_image_ids = {img['id'] for img in train_images}
valid_image_ids = {img['id'] for img in valid_images}
test_image_ids = {img['id'] for img in test_images}
# Split annotations
train_annotations = [ann for ann in coco_annotations if ann['image_id'] in train_image_ids]
valid_annotations = [ann for ann in coco_annotations if ann['image_id'] in valid_image_ids]
test_annotations = [ann for ann in coco_annotations if ann['image_id'] in test_image_ids]
# Build final COCO JSONs
def build_coco_json(images, annotations, categories):
return {
'images': images,
'annotations': annotations,
'categories': categories
}
train_json = build_coco_json(train_images, train_annotations, coco_categories)
valid_json = build_coco_json(valid_images, valid_annotations, coco_categories)
test_json = build_coco_json(test_images, test_annotations, coco_categories)
# Create output directory
project_name = training_project.title.replace(' ', '_') if training_project and training_project.title else f'project_{details_obj["project_id"]}'
annotations_dir = '/home/kitraining/To_Annotate/annotations'
os.makedirs(annotations_dir, exist_ok=True)
# Write to files
train_path = f'{annotations_dir}/coco_project_{training_id}_train.json'
valid_path = f'{annotations_dir}/coco_project_{training_id}_valid.json'
test_path = f'{annotations_dir}/coco_project_{training_id}_test.json'
with open(train_path, 'w') as f:
json.dump(train_json, f, indent=2)
with open(valid_path, 'w') as f:
json.dump(valid_json, f, indent=2)
with open(test_path, 'w') as f:
json.dump(test_json, f, indent=2)
print(f'COCO JSON splits written to {annotations_dir} for trainingId {training_id}')
# Also generate inference exp.py
from services.generate_yolox_exp import generate_yolox_inference_exp
project_folder = os.path.join(os.path.dirname(__file__), '..', project_name, str(training_id))
os.makedirs(project_folder, exist_ok=True)
inference_exp_path = os.path.join(project_folder, 'exp_infer.py')
try:
exp_content = generate_yolox_inference_exp(training_id)
with open(inference_exp_path, 'w') as f:
f.write(exp_content)
print(f'Inference exp.py written to {inference_exp_path}')
except Exception as err:
print(f'Failed to generate inference exp.py: {err}')
import json
import os
import math
from models.TrainingProject import TrainingProject
from models.TrainingProjectDetails import TrainingProjectDetails
from models.Images import Image
from models.Annotation import Annotation
def generate_training_json(training_id):
"""Generate COCO JSON for training, validation, and test sets"""
# training_id is now project_details_id
training_project_details = TrainingProjectDetails.query.get(training_id)
if not training_project_details:
raise Exception(f'No TrainingProjectDetails found for project_details_id {training_id}')
details_obj = training_project_details.to_dict()
# Get parent project for name
training_project = TrainingProject.query.get(details_obj['project_id'])
# Get the data directory setting for image paths
from services.settings_service import get_setting
data_dir = get_setting('yolox_data_dir', '/home/kitraining/To_Annotate/')
# Fix UNC path if it's missing the \\ prefix
# Check if it looks like a UNC path without proper prefix (e.g., "192.168.1.19\...")
if data_dir and not data_dir.startswith('\\\\') and not data_dir.startswith('/'):
# Check if it starts with an IP address pattern
import re
if re.match(r'^\d+\.\d+\.\d+\.\d+[/\\]', data_dir):
data_dir = '\\\\' + data_dir
# Ensure data_dir ends with separator
if not data_dir.endswith(os.sep) and not data_dir.endswith('/'):
data_dir += os.sep
# Get split percentages (default values if not set)
train_percent = details_obj.get('train_percent', 85)
valid_percent = details_obj.get('valid_percent', 10)
test_percent = details_obj.get('test_percent', 5)
coco_images = []
coco_annotations = []
coco_categories = []
category_map = {}
category_id = 0
image_id = 0
annotation_id = 0
# Build category list and mapping from class_map dictionary {source: target}
class_map = details_obj.get('class_map', {})
for source_class, target_class in class_map.items():
if target_class and target_class not in category_map:
category_map[target_class] = category_id
coco_categories.append({'id': category_id, 'name': target_class, 'supercategory': ''})
category_id += 1
# Get all annotation projects (Label Studio project IDs)
annotation_projects = details_obj.get('annotation_projects', [])
# Get class mappings from database grouped by Label Studio project
from models.ClassMapping import ClassMapping
all_mappings = ClassMapping.query.filter_by(project_details_id=training_id).all()
# Group mappings by Label Studio project ID
mappings_by_project = {}
for mapping in all_mappings:
ls_proj_id = mapping.label_studio_project_id
if ls_proj_id not in mappings_by_project:
mappings_by_project[ls_proj_id] = {}
mappings_by_project[ls_proj_id][mapping.source_class] = mapping.target_class
# Also add target class to category map if not present
if mapping.target_class and mapping.target_class not in category_map:
category_map[mapping.target_class] = category_id
coco_categories.append({'id': category_id, 'name': mapping.target_class, 'supercategory': ''})
category_id += 1
# Iterate through each annotation project to collect images and annotations
for ls_project_id in annotation_projects:
# Get images for this Label Studio project
images = Image.query.filter_by(project_id=ls_project_id).all()
for image in images:
image_id += 1
file_name = image.image_path
# Clean up file path from Label Studio format
if '%20' in file_name:
file_name = file_name.replace('%20', ' ')
if file_name and file_name.startswith('/data/local-files/?d='):
file_name = file_name.replace('/data/local-files/?d=', '')
# Remove any Label Studio prefixes but keep full path
# Common Label Studio patterns
prefixes_to_remove = [
'//192.168.1.19/home/kitraining/To_Annotate/',
'192.168.1.19/home/kitraining/To_Annotate/',
'/home/kitraining/home/kitraining/',
'home/kitraining/To_Annotate/',
'/home/kitraining/To_Annotate/',
]
# Try each prefix
for prefix in prefixes_to_remove:
if file_name.startswith(prefix):
file_name = file_name[len(prefix):]
break
# Construct ABSOLUTE path using data_dir
# Detect platform for proper path handling
import platform
is_windows = platform.system() == 'Windows'
# Normalize path separators in file_name to forward slashes first (OS-agnostic)
file_name = file_name.replace('\\', '/')
# Normalize data_dir to use forward slashes
normalized_data_dir = data_dir.rstrip('/\\').replace('\\', '/')
# Check if file_name is already an absolute path
is_absolute = False
if is_windows:
# Windows: Check for drive letter (C:/) or UNC path (//server/)
is_absolute = (len(file_name) > 1 and file_name[1] == ':') or file_name.startswith('//')
else:
# Linux/Mac: Check for leading /
is_absolute = file_name.startswith('/')
if not is_absolute:
# It's a relative path, combine with data_dir
if normalized_data_dir.startswith('//'):
# UNC path on Windows
file_name = normalized_data_dir + '/' + file_name
else:
# Regular path - use os.path.join but with forward slashes
file_name = os.path.join(normalized_data_dir, file_name).replace('\\', '/')
# Final OS-specific normalization
if is_windows:
# Convert to Windows-style backslashes
file_name = file_name.replace('/', '\\')
else:
# Keep as forward slashes for Linux/Mac
file_name = file_name.replace('\\', '/')
# Get annotations for this image
annotations = Annotation.query.filter_by(image_id=image.image_id).all()
# Ensure width and height are integers and valid
# If missing or invalid, skip this image or use default dimensions
img_width = int(image.width) if image.width else 0
img_height = int(image.height) if image.height else 0
# Skip images with invalid dimensions
if img_width <= 0 or img_height <= 0:
print(f'Warning: Skipping image {file_name} with invalid dimensions: {img_width}x{img_height}')
continue
coco_images.append({
'id': image_id,
'file_name': file_name, # Use absolute path
'width': img_width,
'height': img_height
})
for annotation in annotations:
# Translate class name using class_map for this specific Label Studio project
original_class = annotation.Label
project_class_map = mappings_by_project.get(ls_project_id, {})
mapped_class = project_class_map.get(original_class, original_class)
# Only add annotation if mapped_class is valid
if mapped_class and mapped_class in category_map:
annotation_id += 1
area = 0
if annotation.width and annotation.height:
area = annotation.width * annotation.height
coco_annotations.append({
'id': annotation_id,
'image_id': image_id,
'category_id': category_map[mapped_class],
'bbox': [annotation.x, annotation.y, annotation.width, annotation.height],
'area': area,
'iscrowd': 0
})
# Shuffle images for random split using seed
def seeded_random(seed):
x = math.sin(seed) * 10000
return x - math.floor(x)
def shuffle(array, seed):
for i in range(len(array) - 1, 0, -1):
j = int(seeded_random(seed + i) * (i + 1))
array[i], array[j] = array[j], array[i]
# Use seed from details_obj if present, else default to 42
split_seed = details_obj.get('seed', 42)
if split_seed is not None:
split_seed = int(split_seed)
else:
split_seed = 42
shuffle(coco_images, split_seed)
# Split images
total_images = len(coco_images)
train_count = int(total_images * train_percent / 100)
valid_count = int(total_images * valid_percent / 100)
test_count = total_images - train_count - valid_count
train_images = coco_images[0:train_count]
valid_images = coco_images[train_count:train_count + valid_count]
test_images = coco_images[train_count + valid_count:]
# Helper to get image ids for each split
train_image_ids = {img['id'] for img in train_images}
valid_image_ids = {img['id'] for img in valid_images}
test_image_ids = {img['id'] for img in test_images}
# Split annotations
train_annotations = [ann for ann in coco_annotations if ann['image_id'] in train_image_ids]
valid_annotations = [ann for ann in coco_annotations if ann['image_id'] in valid_image_ids]
test_annotations = [ann for ann in coco_annotations if ann['image_id'] in test_image_ids]
# Build final COCO JSONs
def build_coco_json(images, annotations, categories):
return {
'images': images,
'annotations': annotations,
'categories': categories
}
train_json = build_coco_json(train_images, train_annotations, coco_categories)
valid_json = build_coco_json(valid_images, valid_annotations, coco_categories)
test_json = build_coco_json(test_images, test_annotations, coco_categories)
# Create output directory
from services.settings_service import get_setting
from models.training import Training
output_base_path = get_setting('yolox_output_path', './backend')
project_name = training_project.title.replace(' ', '_') if training_project and training_project.title else f'project_{details_obj["project_id"]}'
# Get training record to use its name for folder
training_record = Training.query.filter_by(project_details_id=training_id).first()
training_folder_name = f"{training_record.exp_name or training_record.training_name or 'training'}_{training_record.id}" if training_record else str(training_id)
training_folder_name = training_folder_name.replace(' ', '_')
# Use training_record.id for file names to match what generate_yolox_exp expects
training_file_id = training_record.id if training_record else training_id
# Save annotations to the configured output folder
annotations_dir = os.path.join(output_base_path, project_name, training_folder_name, 'annotations')
os.makedirs(annotations_dir, exist_ok=True)
# Write to files
train_path = os.path.join(annotations_dir, f'coco_project_{training_file_id}_train.json')
valid_path = os.path.join(annotations_dir, f'coco_project_{training_file_id}_valid.json')
test_path = os.path.join(annotations_dir, f'coco_project_{training_file_id}_test.json')
with open(train_path, 'w') as f:
json.dump(train_json, f, indent=2)
with open(valid_path, 'w') as f:
json.dump(valid_json, f, indent=2)
with open(test_path, 'w') as f:
json.dump(test_json, f, indent=2)
print(f'COCO JSON splits written to {annotations_dir} for trainingId {training_id}')
# Also generate inference exp.py
from services.generate_yolox_exp import generate_yolox_inference_exp
project_folder = os.path.join(output_base_path, project_name, str(training_id))
os.makedirs(project_folder, exist_ok=True)
inference_exp_path = os.path.join(project_folder, 'exp_infer.py')
try:
exp_content = generate_yolox_inference_exp(training_id)
with open(inference_exp_path, 'w') as f:
f.write(exp_content)
print(f'Inference exp.py written to {inference_exp_path}')
except Exception as err:
print(f'Failed to generate inference exp.py: {err}')