training fix. add global settings

2025-12-02 09:31:52 +01:00
parent 55b1b2b5fe
commit c3c7e042bb
86 changed files with 77512 additions and 7054 deletions
--- a/backend/services/generate_json_yolox.py
+++ b/backend/services/generate_json_yolox.py
@@ -1,179 +1,288 @@
-import json
-import os
-import math
-from models.TrainingProject import TrainingProject
-from models.TrainingProjectDetails import TrainingProjectDetails
-from models.Images import Image
-from models.Annotation import Annotation
-
-def generate_training_json(training_id):
-    """Generate COCO JSON for training, validation, and test sets"""
-    # training_id is now project_details_id
-    training_project_details = TrainingProjectDetails.query.get(training_id)
-    
-    if not training_project_details:
-        raise Exception(f'No TrainingProjectDetails found for project_details_id {training_id}')
-    
-    details_obj = training_project_details.to_dict()
-    
-    # Get parent project for name
-    training_project = TrainingProject.query.get(details_obj['project_id'])
-    
-    # Get split percentages (default values if not set)
-    train_percent = details_obj.get('train_percent', 85)
-    valid_percent = details_obj.get('valid_percent', 10)
-    test_percent = details_obj.get('test_percent', 5)
-    
-    coco_images = []
-    coco_annotations = []
-    coco_categories = []
-    category_map = {}
-    category_id = 0
-    image_id = 0
-    annotation_id = 0
-    
-    for cls in details_obj['class_map']:
-        asg_map = []
-        list_asg = cls[1]
-        
-        for asg in list_asg:
-            asg_map.append({'original': asg[0], 'mapped': asg[1]})
-            # Build category list and mapping
-            if asg[1] and asg[1] not in category_map:
-                category_map[asg[1]] = category_id
-                coco_categories.append({'id': category_id, 'name': asg[1], 'supercategory': ''})
-                category_id += 1
-        
-        # Get images for this project
-        images = Image.query.filter_by(project_id=cls[0]).all()
-        
-        for image in images:
-            image_id += 1
-            file_name = image.image_path
-            
-            # Clean up file path
-            if '%20' in file_name:
-                file_name = file_name.replace('%20', ' ')
-            if file_name and file_name.startswith('/data/local-files/?d='):
-                file_name = file_name.replace('/data/local-files/?d=', '')
-                file_name = file_name.replace('/home/kitraining/home/kitraining/', '')
-            if file_name and file_name.startswith('home/kitraining/To_Annotate/'):
-                file_name = file_name.replace('home/kitraining/To_Annotate/', '')
-            
-            # Get annotations for this image
-            annotations = Annotation.query.filter_by(image_id=image.image_id).all()
-            
-            coco_images.append({
-                'id': image_id,
-                'file_name': file_name,
-                'width': image.width or 0,
-                'height': image.height or 0
-            })
-            
-            for annotation in annotations:
-                # Translate class name using asg_map
-                mapped_class = annotation.Label
-                for map_entry in asg_map:
-                    if annotation.Label == map_entry['original']:
-                        mapped_class = map_entry['mapped']
-                        break
-                
-                # Only add annotation if mapped_class is valid
-                if mapped_class and mapped_class in category_map:
-                    annotation_id += 1
-                    area = 0
-                    if annotation.width and annotation.height:
-                        area = annotation.width * annotation.height
-                    
-                    coco_annotations.append({
-                        'id': annotation_id,
-                        'image_id': image_id,
-                        'category_id': category_map[mapped_class],
-                        'bbox': [annotation.x, annotation.y, annotation.width, annotation.height],
-                        'area': area,
-                        'iscrowd': 0
-                    })
-    
-    # Shuffle images for random split using seed
-    def seeded_random(seed):
-        x = math.sin(seed) * 10000
-        return x - math.floor(x)
-    
-    def shuffle(array, seed):
-        for i in range(len(array) - 1, 0, -1):
-            j = int(seeded_random(seed + i) * (i + 1))
-            array[i], array[j] = array[j], array[i]
-    
-    # Use seed from details_obj if present, else default to 42
-    split_seed = details_obj.get('seed', 42)
-    if split_seed is not None:
-        split_seed = int(split_seed)
-    else:
-        split_seed = 42
-    
-    shuffle(coco_images, split_seed)
-    
-    # Split images
-    total_images = len(coco_images)
-    train_count = int(total_images * train_percent / 100)
-    valid_count = int(total_images * valid_percent / 100)
-    test_count = total_images - train_count - valid_count
-    
-    train_images = coco_images[0:train_count]
-    valid_images = coco_images[train_count:train_count + valid_count]
-    test_images = coco_images[train_count + valid_count:]
-    
-    # Helper to get image ids for each split
-    train_image_ids = {img['id'] for img in train_images}
-    valid_image_ids = {img['id'] for img in valid_images}
-    test_image_ids = {img['id'] for img in test_images}
-    
-    # Split annotations
-    train_annotations = [ann for ann in coco_annotations if ann['image_id'] in train_image_ids]
-    valid_annotations = [ann for ann in coco_annotations if ann['image_id'] in valid_image_ids]
-    test_annotations = [ann for ann in coco_annotations if ann['image_id'] in test_image_ids]
-    
-    # Build final COCO JSONs
-    def build_coco_json(images, annotations, categories):
-        return {
-            'images': images,
-            'annotations': annotations,
-            'categories': categories
-        }
-    
-    train_json = build_coco_json(train_images, train_annotations, coco_categories)
-    valid_json = build_coco_json(valid_images, valid_annotations, coco_categories)
-    test_json = build_coco_json(test_images, test_annotations, coco_categories)
-    
-    # Create output directory
-    project_name = training_project.title.replace(' ', '_') if training_project and training_project.title else f'project_{details_obj["project_id"]}'
-    annotations_dir = '/home/kitraining/To_Annotate/annotations'
-    os.makedirs(annotations_dir, exist_ok=True)
-    
-    # Write to files
-    train_path = f'{annotations_dir}/coco_project_{training_id}_train.json'
-    valid_path = f'{annotations_dir}/coco_project_{training_id}_valid.json'
-    test_path = f'{annotations_dir}/coco_project_{training_id}_test.json'
-    
-    with open(train_path, 'w') as f:
-        json.dump(train_json, f, indent=2)
-    with open(valid_path, 'w') as f:
-        json.dump(valid_json, f, indent=2)
-    with open(test_path, 'w') as f:
-        json.dump(test_json, f, indent=2)
-    
-    print(f'COCO JSON splits written to {annotations_dir} for trainingId {training_id}')
-    
-    # Also generate inference exp.py
-    from services.generate_yolox_exp import generate_yolox_inference_exp
-    project_folder = os.path.join(os.path.dirname(__file__), '..', project_name, str(training_id))
-    os.makedirs(project_folder, exist_ok=True)
-    
-    inference_exp_path = os.path.join(project_folder, 'exp_infer.py')
-    try:
-        exp_content = generate_yolox_inference_exp(training_id)
-        with open(inference_exp_path, 'w') as f:
-            f.write(exp_content)
-        print(f'Inference exp.py written to {inference_exp_path}')
-    except Exception as err:
-        print(f'Failed to generate inference exp.py: {err}')
+import json
+import os
+import math
+from models.TrainingProject import TrainingProject
+from models.TrainingProjectDetails import TrainingProjectDetails
+from models.Images import Image
+from models.Annotation import Annotation
+
+def generate_training_json(training_id):
+    """Generate COCO JSON for training, validation, and test sets"""
+    # training_id is now project_details_id
+    training_project_details = TrainingProjectDetails.query.get(training_id)
+    
+    if not training_project_details:
+        raise Exception(f'No TrainingProjectDetails found for project_details_id {training_id}')
+    
+    details_obj = training_project_details.to_dict()
+    
+    # Get parent project for name
+    training_project = TrainingProject.query.get(details_obj['project_id'])
+    
+    # Get the data directory setting for image paths
+    from services.settings_service import get_setting
+    data_dir = get_setting('yolox_data_dir', '/home/kitraining/To_Annotate/')
+    
+    # Fix UNC path if it's missing the \\ prefix
+    # Check if it looks like a UNC path without proper prefix (e.g., "192.168.1.19\...")
+    if data_dir and not data_dir.startswith('\\\\') and not data_dir.startswith('/'):
+        # Check if it starts with an IP address pattern
+        import re
+        if re.match(r'^\d+\.\d+\.\d+\.\d+[/\\]', data_dir):
+            data_dir = '\\\\' + data_dir
+    
+    # Ensure data_dir ends with separator
+    if not data_dir.endswith(os.sep) and not data_dir.endswith('/'):
+        data_dir += os.sep
+    
+    # Get split percentages (default values if not set)
+    train_percent = details_obj.get('train_percent', 85)
+    valid_percent = details_obj.get('valid_percent', 10)
+    test_percent = details_obj.get('test_percent', 5)
+    
+    coco_images = []
+    coco_annotations = []
+    coco_categories = []
+    category_map = {}
+    category_id = 0
+    image_id = 0
+    annotation_id = 0
+    
+    # Build category list and mapping from class_map dictionary {source: target}
+    class_map = details_obj.get('class_map', {})
+    
+    for source_class, target_class in class_map.items():
+        if target_class and target_class not in category_map:
+            category_map[target_class] = category_id
+            coco_categories.append({'id': category_id, 'name': target_class, 'supercategory': ''})
+            category_id += 1
+    
+    # Get all annotation projects (Label Studio project IDs)
+    annotation_projects = details_obj.get('annotation_projects', [])
+    
+    # Get class mappings from database grouped by Label Studio project
+    from models.ClassMapping import ClassMapping
+    all_mappings = ClassMapping.query.filter_by(project_details_id=training_id).all()
+    
+    # Group mappings by Label Studio project ID
+    mappings_by_project = {}
+    for mapping in all_mappings:
+        ls_proj_id = mapping.label_studio_project_id
+        if ls_proj_id not in mappings_by_project:
+            mappings_by_project[ls_proj_id] = {}
+        mappings_by_project[ls_proj_id][mapping.source_class] = mapping.target_class
+        
+        # Also add target class to category map if not present
+        if mapping.target_class and mapping.target_class not in category_map:
+            category_map[mapping.target_class] = category_id
+            coco_categories.append({'id': category_id, 'name': mapping.target_class, 'supercategory': ''})
+            category_id += 1
+    
+    # Iterate through each annotation project to collect images and annotations
+    for ls_project_id in annotation_projects:
+        # Get images for this Label Studio project
+        images = Image.query.filter_by(project_id=ls_project_id).all()
+        
+        for image in images:
+            image_id += 1
+            file_name = image.image_path
+            
+            # Clean up file path from Label Studio format
+            if '%20' in file_name:
+                file_name = file_name.replace('%20', ' ')
+            if file_name and file_name.startswith('/data/local-files/?d='):
+                file_name = file_name.replace('/data/local-files/?d=', '')
+            
+            # Remove any Label Studio prefixes but keep full path
+            # Common Label Studio patterns
+            prefixes_to_remove = [
+                '//192.168.1.19/home/kitraining/To_Annotate/',
+                '192.168.1.19/home/kitraining/To_Annotate/',
+                '/home/kitraining/home/kitraining/',
+                'home/kitraining/To_Annotate/',
+                '/home/kitraining/To_Annotate/',
+            ]
+            
+            # Try each prefix
+            for prefix in prefixes_to_remove:
+                if file_name.startswith(prefix):
+                    file_name = file_name[len(prefix):]
+                    break
+            
+            # Construct ABSOLUTE path using data_dir
+            # Detect platform for proper path handling
+            import platform
+            is_windows = platform.system() == 'Windows'
+            
+            # Normalize path separators in file_name to forward slashes first (OS-agnostic)
+            file_name = file_name.replace('\\', '/')
+            
+            # Normalize data_dir to use forward slashes
+            normalized_data_dir = data_dir.rstrip('/\\').replace('\\', '/')
+            
+            # Check if file_name is already an absolute path
+            is_absolute = False
+            if is_windows:
+                # Windows: Check for drive letter (C:/) or UNC path (//server/)
+                is_absolute = (len(file_name) > 1 and file_name[1] == ':') or file_name.startswith('//')
+            else:
+                # Linux/Mac: Check for leading /
+                is_absolute = file_name.startswith('/')
+            
+            if not is_absolute:
+                # It's a relative path, combine with data_dir
+                if normalized_data_dir.startswith('//'):
+                    # UNC path on Windows
+                    file_name = normalized_data_dir + '/' + file_name
+                else:
+                    # Regular path - use os.path.join but with forward slashes
+                    file_name = os.path.join(normalized_data_dir, file_name).replace('\\', '/')
+            
+            # Final OS-specific normalization
+            if is_windows:
+                # Convert to Windows-style backslashes
+                file_name = file_name.replace('/', '\\')
+            else:
+                # Keep as forward slashes for Linux/Mac
+                file_name = file_name.replace('\\', '/')
+            
+            # Get annotations for this image
+            annotations = Annotation.query.filter_by(image_id=image.image_id).all()
+            
+            # Ensure width and height are integers and valid
+            # If missing or invalid, skip this image or use default dimensions
+            img_width = int(image.width) if image.width else 0
+            img_height = int(image.height) if image.height else 0
+            
+            # Skip images with invalid dimensions
+            if img_width <= 0 or img_height <= 0:
+                print(f'Warning: Skipping image {file_name} with invalid dimensions: {img_width}x{img_height}')
+                continue
+            
+            coco_images.append({
+                'id': image_id,
+                'file_name': file_name,  # Use absolute path
+                'width': img_width,
+                'height': img_height
+            })
+            
+            for annotation in annotations:
+                # Translate class name using class_map for this specific Label Studio project
+                original_class = annotation.Label
+                project_class_map = mappings_by_project.get(ls_project_id, {})
+                mapped_class = project_class_map.get(original_class, original_class)
+                
+                # Only add annotation if mapped_class is valid
+                if mapped_class and mapped_class in category_map:
+                    annotation_id += 1
+                    area = 0
+                    if annotation.width and annotation.height:
+                        area = annotation.width * annotation.height
+                    
+                    coco_annotations.append({
+                        'id': annotation_id,
+                        'image_id': image_id,
+                        'category_id': category_map[mapped_class],
+                        'bbox': [annotation.x, annotation.y, annotation.width, annotation.height],
+                        'area': area,
+                        'iscrowd': 0
+                    })
+    
+    # Shuffle images for random split using seed
+    def seeded_random(seed):
+        x = math.sin(seed) * 10000
+        return x - math.floor(x)
+    
+    def shuffle(array, seed):
+        for i in range(len(array) - 1, 0, -1):
+            j = int(seeded_random(seed + i) * (i + 1))
+            array[i], array[j] = array[j], array[i]
+    
+    # Use seed from details_obj if present, else default to 42
+    split_seed = details_obj.get('seed', 42)
+    if split_seed is not None:
+        split_seed = int(split_seed)
+    else:
+        split_seed = 42
+    
+    shuffle(coco_images, split_seed)
+    
+    # Split images
+    total_images = len(coco_images)
+    train_count = int(total_images * train_percent / 100)
+    valid_count = int(total_images * valid_percent / 100)
+    test_count = total_images - train_count - valid_count
+    
+    train_images = coco_images[0:train_count]
+    valid_images = coco_images[train_count:train_count + valid_count]
+    test_images = coco_images[train_count + valid_count:]
+    
+    # Helper to get image ids for each split
+    train_image_ids = {img['id'] for img in train_images}
+    valid_image_ids = {img['id'] for img in valid_images}
+    test_image_ids = {img['id'] for img in test_images}
+    
+    # Split annotations
+    train_annotations = [ann for ann in coco_annotations if ann['image_id'] in train_image_ids]
+    valid_annotations = [ann for ann in coco_annotations if ann['image_id'] in valid_image_ids]
+    test_annotations = [ann for ann in coco_annotations if ann['image_id'] in test_image_ids]
+    
+    # Build final COCO JSONs
+    def build_coco_json(images, annotations, categories):
+        return {
+            'images': images,
+            'annotations': annotations,
+            'categories': categories
+        }
+    
+    train_json = build_coco_json(train_images, train_annotations, coco_categories)
+    valid_json = build_coco_json(valid_images, valid_annotations, coco_categories)
+    test_json = build_coco_json(test_images, test_annotations, coco_categories)
+    
+    # Create output directory
+    from services.settings_service import get_setting
+    from models.training import Training
+    
+    output_base_path = get_setting('yolox_output_path', './backend')
+    
+    project_name = training_project.title.replace(' ', '_') if training_project and training_project.title else f'project_{details_obj["project_id"]}'
+    
+    # Get training record to use its name for folder
+    training_record = Training.query.filter_by(project_details_id=training_id).first()
+    training_folder_name = f"{training_record.exp_name or training_record.training_name or 'training'}_{training_record.id}" if training_record else str(training_id)
+    training_folder_name = training_folder_name.replace(' ', '_')
+    
+    # Use training_record.id for file names to match what generate_yolox_exp expects
+    training_file_id = training_record.id if training_record else training_id
+    
+    # Save annotations to the configured output folder
+    annotations_dir = os.path.join(output_base_path, project_name, training_folder_name, 'annotations')
+    os.makedirs(annotations_dir, exist_ok=True)
+    
+    # Write to files
+    train_path = os.path.join(annotations_dir, f'coco_project_{training_file_id}_train.json')
+    valid_path = os.path.join(annotations_dir, f'coco_project_{training_file_id}_valid.json')
+    test_path = os.path.join(annotations_dir, f'coco_project_{training_file_id}_test.json')
+    
+    with open(train_path, 'w') as f:
+        json.dump(train_json, f, indent=2)
+    with open(valid_path, 'w') as f:
+        json.dump(valid_json, f, indent=2)
+    with open(test_path, 'w') as f:
+        json.dump(test_json, f, indent=2)
+    
+    print(f'COCO JSON splits written to {annotations_dir} for trainingId {training_id}')
+    
+    # Also generate inference exp.py
+    from services.generate_yolox_exp import generate_yolox_inference_exp
+    project_folder = os.path.join(output_base_path, project_name, str(training_id))
+    os.makedirs(project_folder, exist_ok=True)
+    
+    inference_exp_path = os.path.join(project_folder, 'exp_infer.py')
+    try:
+        exp_content = generate_yolox_inference_exp(training_id)
+        with open(inference_exp_path, 'w') as f:
+            f.write(exp_content)
+        print(f'Inference exp.py written to {inference_exp_path}')
+    except Exception as err:
+        print(f'Failed to generate inference exp.py: {err}')