260 lines
12 KiB
Python
260 lines
12 KiB
Python
import json
|
|
import os
|
|
import math
|
|
from models.TrainingProject import TrainingProject
|
|
from models.TrainingProjectDetails import TrainingProjectDetails
|
|
from models.Images import Image
|
|
from models.Annotation import Annotation
|
|
|
|
def generate_training_json(training_id):
|
|
"""Generate COCO JSON for training, validation, and test sets"""
|
|
# training_id is now project_details_id
|
|
training_project_details = TrainingProjectDetails.query.get(training_id)
|
|
|
|
if not training_project_details:
|
|
raise Exception(f'No TrainingProjectDetails found for project_details_id {training_id}')
|
|
|
|
details_obj = training_project_details.to_dict()
|
|
|
|
# Get parent project for name
|
|
training_project = TrainingProject.query.get(details_obj['project_id'])
|
|
|
|
# Get the data directory setting for image paths
|
|
from services.settings_service import get_setting
|
|
data_dir = get_setting('yolox_data_dir', '/home/kitraining/To_Annotate/')
|
|
|
|
# Fix UNC path if it's missing the \\ prefix
|
|
# Check if it looks like a UNC path without proper prefix (e.g., "192.168.1.19\...")
|
|
if data_dir and not data_dir.startswith('\\\\') and not data_dir.startswith('/'):
|
|
# Check if it starts with an IP address pattern
|
|
import re
|
|
if re.match(r'^\d+\.\d+\.\d+\.\d+[/\\]', data_dir):
|
|
data_dir = '\\\\' + data_dir
|
|
|
|
# Ensure data_dir ends with separator
|
|
if not data_dir.endswith(os.sep) and not data_dir.endswith('/'):
|
|
data_dir += os.sep
|
|
|
|
# Get split percentages (default values if not set)
|
|
train_percent = details_obj.get('train_percent', 85)
|
|
valid_percent = details_obj.get('valid_percent', 10)
|
|
test_percent = details_obj.get('test_percent', 5)
|
|
|
|
coco_images = []
|
|
coco_annotations = []
|
|
coco_categories = []
|
|
category_map = {}
|
|
category_id = 0
|
|
image_id = 0
|
|
annotation_id = 0
|
|
|
|
# Build category list and mapping from class_map dictionary {source: target}
|
|
class_map = details_obj.get('class_map', {})
|
|
|
|
for source_class, target_class in class_map.items():
|
|
if target_class and target_class not in category_map:
|
|
category_map[target_class] = category_id
|
|
coco_categories.append({'id': category_id, 'name': target_class, 'supercategory': ''})
|
|
category_id += 1
|
|
|
|
# Get all annotation projects (Label Studio project IDs)
|
|
annotation_projects = details_obj.get('annotation_projects', [])
|
|
|
|
# Get class mappings from database grouped by Label Studio project
|
|
from models.ClassMapping import ClassMapping
|
|
all_mappings = ClassMapping.query.filter_by(project_details_id=training_id).all()
|
|
|
|
# Group mappings by Label Studio project ID
|
|
mappings_by_project = {}
|
|
for mapping in all_mappings:
|
|
ls_proj_id = mapping.label_studio_project_id
|
|
if ls_proj_id not in mappings_by_project:
|
|
mappings_by_project[ls_proj_id] = {}
|
|
mappings_by_project[ls_proj_id][mapping.source_class] = mapping.target_class
|
|
|
|
# Also add target class to category map if not present
|
|
if mapping.target_class and mapping.target_class not in category_map:
|
|
category_map[mapping.target_class] = category_id
|
|
coco_categories.append({'id': category_id, 'name': mapping.target_class, 'supercategory': ''})
|
|
category_id += 1
|
|
|
|
# Iterate through each annotation project to collect images and annotations
|
|
for ls_project_id in annotation_projects:
|
|
# Get images for this Label Studio project
|
|
images = Image.query.filter_by(project_id=ls_project_id).all()
|
|
|
|
for image in images:
|
|
image_id += 1
|
|
file_name = image.image_path
|
|
|
|
# Clean up file path from Label Studio format
|
|
if '%20' in file_name:
|
|
file_name = file_name.replace('%20', ' ')
|
|
if file_name and file_name.startswith('/data/local-files/?d='):
|
|
file_name = file_name.replace('/data/local-files/?d=', '')
|
|
|
|
# Remove any Label Studio prefixes but keep full path
|
|
# Common Label Studio patterns
|
|
prefixes_to_remove = [
|
|
'//192.168.1.19/home/kitraining/To_Annotate/',
|
|
'192.168.1.19/home/kitraining/To_Annotate/',
|
|
'/home/kitraining/home/kitraining/',
|
|
'home/kitraining/To_Annotate/',
|
|
'/home/kitraining/To_Annotate/',
|
|
]
|
|
|
|
# Try each prefix
|
|
for prefix in prefixes_to_remove:
|
|
if file_name.startswith(prefix):
|
|
file_name = file_name[len(prefix):]
|
|
break
|
|
|
|
# Construct ABSOLUTE path using data_dir
|
|
# Normalize data_dir - ensure it uses backslashes for Windows
|
|
normalized_data_dir = data_dir.rstrip('/\\').replace('/', '\\')
|
|
|
|
# Check if already absolute path
|
|
if not (file_name.startswith('\\\\') or (len(file_name) > 1 and file_name[1] == ':')):
|
|
# It's a relative path, combine with data_dir
|
|
# For UNC paths, we need to manually concatenate to preserve \\
|
|
if normalized_data_dir.startswith('\\\\'):
|
|
# UNC path
|
|
file_name = normalized_data_dir + '\\' + file_name.replace('/', '\\')
|
|
else:
|
|
# Regular path
|
|
file_name = os.path.join(normalized_data_dir, file_name.replace('/', '\\'))
|
|
else:
|
|
# Already absolute, just normalize separators
|
|
file_name = file_name.replace('/', '\\')
|
|
|
|
# Get annotations for this image
|
|
annotations = Annotation.query.filter_by(image_id=image.image_id).all()
|
|
|
|
coco_images.append({
|
|
'id': image_id,
|
|
'file_name': file_name, # Use absolute path
|
|
'width': image.width or 0,
|
|
'height': image.height or 0
|
|
})
|
|
|
|
for annotation in annotations:
|
|
# Translate class name using class_map for this specific Label Studio project
|
|
original_class = annotation.Label
|
|
project_class_map = mappings_by_project.get(ls_project_id, {})
|
|
mapped_class = project_class_map.get(original_class, original_class)
|
|
|
|
# Only add annotation if mapped_class is valid
|
|
if mapped_class and mapped_class in category_map:
|
|
annotation_id += 1
|
|
area = 0
|
|
if annotation.width and annotation.height:
|
|
area = annotation.width * annotation.height
|
|
|
|
coco_annotations.append({
|
|
'id': annotation_id,
|
|
'image_id': image_id,
|
|
'category_id': category_map[mapped_class],
|
|
'bbox': [annotation.x, annotation.y, annotation.width, annotation.height],
|
|
'area': area,
|
|
'iscrowd': 0
|
|
})
|
|
|
|
# Shuffle images for random split using seed
|
|
def seeded_random(seed):
|
|
x = math.sin(seed) * 10000
|
|
return x - math.floor(x)
|
|
|
|
def shuffle(array, seed):
|
|
for i in range(len(array) - 1, 0, -1):
|
|
j = int(seeded_random(seed + i) * (i + 1))
|
|
array[i], array[j] = array[j], array[i]
|
|
|
|
# Use seed from details_obj if present, else default to 42
|
|
split_seed = details_obj.get('seed', 42)
|
|
if split_seed is not None:
|
|
split_seed = int(split_seed)
|
|
else:
|
|
split_seed = 42
|
|
|
|
shuffle(coco_images, split_seed)
|
|
|
|
# Split images
|
|
total_images = len(coco_images)
|
|
train_count = int(total_images * train_percent / 100)
|
|
valid_count = int(total_images * valid_percent / 100)
|
|
test_count = total_images - train_count - valid_count
|
|
|
|
train_images = coco_images[0:train_count]
|
|
valid_images = coco_images[train_count:train_count + valid_count]
|
|
test_images = coco_images[train_count + valid_count:]
|
|
|
|
# Helper to get image ids for each split
|
|
train_image_ids = {img['id'] for img in train_images}
|
|
valid_image_ids = {img['id'] for img in valid_images}
|
|
test_image_ids = {img['id'] for img in test_images}
|
|
|
|
# Split annotations
|
|
train_annotations = [ann for ann in coco_annotations if ann['image_id'] in train_image_ids]
|
|
valid_annotations = [ann for ann in coco_annotations if ann['image_id'] in valid_image_ids]
|
|
test_annotations = [ann for ann in coco_annotations if ann['image_id'] in test_image_ids]
|
|
|
|
# Build final COCO JSONs
|
|
def build_coco_json(images, annotations, categories):
|
|
return {
|
|
'images': images,
|
|
'annotations': annotations,
|
|
'categories': categories
|
|
}
|
|
|
|
train_json = build_coco_json(train_images, train_annotations, coco_categories)
|
|
valid_json = build_coco_json(valid_images, valid_annotations, coco_categories)
|
|
test_json = build_coco_json(test_images, test_annotations, coco_categories)
|
|
|
|
# Create output directory
|
|
from services.settings_service import get_setting
|
|
from models.training import Training
|
|
|
|
output_base_path = get_setting('yolox_output_path', './backend')
|
|
|
|
project_name = training_project.title.replace(' ', '_') if training_project and training_project.title else f'project_{details_obj["project_id"]}'
|
|
|
|
# Get training record to use its name for folder
|
|
training_record = Training.query.filter_by(project_details_id=training_id).first()
|
|
training_folder_name = f"{training_record.exp_name or training_record.training_name or 'training'}_{training_record.id}" if training_record else str(training_id)
|
|
training_folder_name = training_folder_name.replace(' ', '_')
|
|
|
|
# Use training_record.id for file names to match what generate_yolox_exp expects
|
|
training_file_id = training_record.id if training_record else training_id
|
|
|
|
# Save annotations to the configured output folder
|
|
annotations_dir = os.path.join(output_base_path, project_name, training_folder_name, 'annotations')
|
|
os.makedirs(annotations_dir, exist_ok=True)
|
|
|
|
# Write to files
|
|
train_path = os.path.join(annotations_dir, f'coco_project_{training_file_id}_train.json')
|
|
valid_path = os.path.join(annotations_dir, f'coco_project_{training_file_id}_valid.json')
|
|
test_path = os.path.join(annotations_dir, f'coco_project_{training_file_id}_test.json')
|
|
|
|
with open(train_path, 'w') as f:
|
|
json.dump(train_json, f, indent=2)
|
|
with open(valid_path, 'w') as f:
|
|
json.dump(valid_json, f, indent=2)
|
|
with open(test_path, 'w') as f:
|
|
json.dump(test_json, f, indent=2)
|
|
|
|
print(f'COCO JSON splits written to {annotations_dir} for trainingId {training_id}')
|
|
|
|
# Also generate inference exp.py
|
|
from services.generate_yolox_exp import generate_yolox_inference_exp
|
|
project_folder = os.path.join(output_base_path, project_name, str(training_id))
|
|
os.makedirs(project_folder, exist_ok=True)
|
|
|
|
inference_exp_path = os.path.join(project_folder, 'exp_infer.py')
|
|
try:
|
|
exp_content = generate_yolox_inference_exp(training_id)
|
|
with open(inference_exp_path, 'w') as f:
|
|
f.write(exp_content)
|
|
print(f'Inference exp.py written to {inference_exp_path}')
|
|
except Exception as err:
|
|
print(f'Failed to generate inference exp.py: {err}')
|