cleanup add training bell

2025-12-08 12:26:34 +01:00
parent 036f3b178a
commit ccfb40a2b3
3070 changed files with 671040 additions and 68602 deletions
--- a/backend/services/init.py
+++ b/backend/services/init.py
--- a/backend/services/fetch_labelstudio.py
+++ b/backend/services/fetch_labelstudio.py
--- a/backend/services/generate_json_yolox.py
+++ b/backend/services/generate_json_yolox.py
@@ -7,12 +7,30 @@ from models.Images import Image
 from models.Annotation import Annotation

 def generate_training_json(training_id):
-    """Generate COCO JSON for training, validation, and test sets"""
-    # training_id is now project_details_id
-    training_project_details = TrainingProjectDetails.query.get(training_id)
+    """Generate COCO JSON for training, validation, and test sets
+    
+    Args:
+        training_id: Can be either a Training.id or TrainingProjectDetails.id
+                     Function will automatically detect which one and find the correct details_id
+    """
+    from models.training import Training
+    
+    # First, try to get as a Training record
+    training_record = Training.query.get(training_id)
+    
+    if training_record:
+        # It's a Training.id - use its project_details_id
+        details_id = training_record.project_details_id
+        print(f'[generate_training_json] Using training_id={training_id}, mapped to project_details_id={details_id}')
+    else:
+        # Try as TrainingProjectDetails.id directly
+        details_id = training_id
+        print(f'[generate_training_json] Using training_id={training_id} as project_details_id directly')
+    
+    training_project_details = TrainingProjectDetails.query.get(details_id)
    
    if not training_project_details:
-        raise Exception(f'No TrainingProjectDetails found for project_details_id {training_id}')
+        raise Exception(f'No TrainingProjectDetails found for id {training_id} (details_id: {details_id})')
    
    details_obj = training_project_details.to_dict()
    
@@ -110,22 +128,35 @@ def generate_training_json(training_id):
                    break
            
            # Construct ABSOLUTE path using data_dir
-            # Normalize data_dir - ensure it uses backslashes for Windows
-            normalized_data_dir = data_dir.rstrip('/\\').replace('/', '\\')
+            # Detect platform for proper path handling
+            import platform
+            is_windows = platform.system() == 'Windows'
+            
+            # Normalize data_dir and file_name based on platform
+            if is_windows:
+                # Windows: use backslashes
+                normalized_data_dir = data_dir.rstrip('/\\').replace('/', '\\')
+                file_name = file_name.replace('/', '\\')
+            else:
+                # Linux/Mac: use forward slashes
+                normalized_data_dir = data_dir.rstrip('/\\').replace('\\', '/')
+                file_name = file_name.replace('\\', '/')
            
            # Check if already absolute path
-            if not (file_name.startswith('\\\\') or (len(file_name) > 1 and file_name[1] == ':')):
-                # It's a relative path, combine with data_dir
-                # For UNC paths, we need to manually concatenate to preserve \\
-                if normalized_data_dir.startswith('\\\\'):
-                    # UNC path
-                    file_name = normalized_data_dir + '\\' + file_name.replace('/', '\\')
-                else:
-                    # Regular path
-                    file_name = os.path.join(normalized_data_dir, file_name.replace('/', '\\'))
+            is_absolute = False
+            if is_windows:
+                is_absolute = file_name.startswith('\\\\') or (len(file_name) > 1 and file_name[1] == ':')
            else:
-                # Already absolute, just normalize separators
-                file_name = file_name.replace('/', '\\')
+                is_absolute = file_name.startswith('/')
+            
+            if not is_absolute:
+                # It's a relative path, combine with data_dir
+                if is_windows and normalized_data_dir.startswith('\\\\'):
+                    # Windows UNC path
+                    file_name = normalized_data_dir + '\\' + file_name
+                else:
+                    # Regular path (Windows or Linux)
+                    file_name = os.path.join(normalized_data_dir, file_name)
            
            # Get annotations for this image
            annotations = Annotation.query.filter_by(image_id=image.image_id).all()
@@ -218,13 +249,19 @@ def generate_training_json(training_id):
    
    project_name = training_project.title.replace(' ', '_') if training_project and training_project.title else f'project_{details_obj["project_id"]}'
    
-    # Get training record to use its name for folder
-    training_record = Training.query.filter_by(project_details_id=training_id).first()
-    training_folder_name = f"{training_record.exp_name or training_record.training_name or 'training'}_{training_record.id}" if training_record else str(training_id)
-    training_folder_name = training_folder_name.replace(' ', '_')
+    # Get training record to use its name and ID for folder and file names
+    # Use the same training_id that was passed in (if it was a Training.id)
+    # or find the first training for this details_id
+    if not training_record:
+        training_record = Training.query.filter_by(project_details_id=details_id).first()
    
-    # Use training_record.id for file names to match what generate_yolox_exp expects
-    training_file_id = training_record.id if training_record else training_id
+    if training_record:
+        training_folder_name = f"{training_record.exp_name or training_record.training_name or 'training'}_{training_record.id}"
+        training_folder_name = training_folder_name.replace(' ', '_')
+        training_file_id = training_record.id
+    else:
+        training_folder_name = str(details_id)
+        training_file_id = details_id
    
    # Save annotations to the configured output folder
    annotations_dir = os.path.join(output_base_path, project_name, training_folder_name, 'annotations')
@@ -242,7 +279,7 @@ def generate_training_json(training_id):
    with open(test_path, 'w') as f:
        json.dump(test_json, f, indent=2)
    
-    print(f'COCO JSON splits written to {annotations_dir} for trainingId {training_id}')
+    print(f'COCO JSON splits written to {annotations_dir} for training_id={training_file_id} (details_id={details_id})')
    
    # Also generate inference exp.py
    from services.generate_yolox_exp import generate_yolox_inference_exp
--- a/backend/services/generate_yolox_exp.py
+++ b/backend/services/generate_yolox_exp.py
@@ -220,6 +220,10 @@ def generate_yolox_inference_exp(training_id, options=None, use_base_config=Fals
    annotations_parent_dir = os.path.join(output_base_path, project_name, training_folder_name)
    annotations_parent_escaped = annotations_parent_dir.replace('\\', '\\\\')
    
+    # Set output directory for checkpoints - models subdirectory
+    models_dir = os.path.join(annotations_parent_dir, 'models')
+    models_dir_escaped = models_dir.replace('\\', '\\\\')
+    
    # Build exp content
    exp_content = f'''#!/usr/bin/env python3
 # -*- coding:utf-8 -*-
@@ -235,6 +239,7 @@ class Exp(MyExp):
        super(Exp, self).__init__()
        self.data_dir = "{data_dir_escaped}"  # Where images are located
        self.annotations_dir = "{annotations_parent_escaped}"  # Where annotation JSONs are located
+        self.output_dir = "{models_dir_escaped}"  # Where checkpoints will be saved
        self.train_ann = "{train_ann}"
        self.val_ann = "{val_ann}"
        self.test_ann = "{test_ann}"
@@ -252,21 +257,46 @@ class Exp(MyExp):
        if selected_model:
            exp_content += f"        self.pretrained_ckpt = r'{yolox_base_dir}/pretrained/{selected_model}.pth'\n"
    
-    # Format arrays
-    def format_value(val):
+    # Format arrays and values for Python code generation
+    # Integer-only parameters (sizes, epochs, intervals)
+    integer_params = {
+        'input_size', 'test_size', 'random_size', 'max_epoch', 'warmup_epochs',
+        'no_aug_epochs', 'print_interval', 'eval_interval', 'multiscale_range',
+        'data_num_workers', 'num_classes'
+    }
+    
+    def format_value(val, param_name=''):
        if isinstance(val, (list, tuple)):
-            return '(' + ', '.join(map(str, val)) + ')'
+            # Check if this parameter should have integer values
+            if param_name in integer_params:
+                # Convert all values to integers
+                formatted_items = [str(int(float(item))) if isinstance(item, (int, float)) else str(item) for item in val]
+            else:
+                # Keep as floats or original type
+                formatted_items = []
+                for item in val:
+                    if isinstance(item, float):
+                        formatted_items.append(str(item))
+                    elif isinstance(item, int):
+                        formatted_items.append(str(item))
+                    else:
+                        formatted_items.append(str(item))
+            return '(' + ', '.join(formatted_items) + ')'
        elif isinstance(val, bool):
            return str(val)
        elif isinstance(val, str):
            return f'"{val}"'
+        elif isinstance(val, int):
+            return str(val)
+        elif isinstance(val, float):
+            return str(val)
        else:
            return str(val)
    
    # Add all config parameters to exp
    for key, value in config.items():
        if key not in ['exp_name']:  # exp_name is handled separately
-            exp_content += f"        self.{key} = {format_value(value)}\n"
+            exp_content += f"        self.{key} = {format_value(value, key)}\n"
    
    # Add get_dataset override using name parameter for image directory
    exp_content += '''
@@ -289,7 +319,7 @@ class Exp(MyExp):
    
    def get_eval_dataset(self, **kwargs):
        """Override eval dataset using name parameter"""
-        from yolox.data import COCODataset
+        from yolox.data import COCODataset, ValTransform
        
        testdev = kwargs.get("testdev", False)
        legacy = kwargs.get("legacy", False)
@@ -299,8 +329,34 @@ class Exp(MyExp):
            json_file=self.val_ann if not testdev else self.test_ann,
            name="",
            img_size=self.test_size,
-            preproc=None,  # No preprocessing for evaluation
+            preproc=ValTransform(legacy=legacy),  # Use proper validation transform
        )
+    
+    def get_eval_loader(self, batch_size, is_distributed, **kwargs):
+        """Standard YOLOX eval loader - matches official implementation"""
+        import torch
+        import torch.distributed as dist
+        from torch.utils.data import DataLoader
+        
+        valdataset = self.get_eval_dataset(**kwargs)
+
+        if is_distributed:
+            batch_size = batch_size // dist.get_world_size()
+            sampler = torch.utils.data.distributed.DistributedSampler(
+                valdataset, shuffle=False
+            )
+        else:
+            sampler = torch.utils.data.SequentialSampler(valdataset)
+
+        dataloader_kwargs = {
+            "num_workers": self.data_num_workers,
+            "pin_memory": True,
+            "sampler": sampler,
+        }
+        dataloader_kwargs["batch_size"] = batch_size
+        val_loader = DataLoader(valdataset, **dataloader_kwargs)
+
+        return val_loader
 '''
    
    # Add exp_name at the end (uses dynamic path)
--- a/backend/services/push_yolox_exp.py
+++ b/backend/services/push_yolox_exp.py
--- a/backend/services/seed_label_studio.py
+++ b/backend/services/seed_label_studio.py
--- a/backend/services/settings_service.py
+++ b/backend/services/settings_service.py
--- a/backend/services/training_queue.py
+++ b/backend/services/training_queue.py
@@ -112,16 +112,35 @@ class TrainingQueueManager:
                if line:
                    print(line.strip())
                    
-                    # Parse iteration from YOLOX output
-                    # Example: "2025-12-02 07:30:15 | INFO | yolox.core.trainer:78 - Epoch: [5/300]"
-                    match = re.search(r'Epoch:\s*\[(\d+)/(\d+)\]', line)
-                    if match:
-                        current_epoch = int(match.group(1))
-                        total_epochs = int(match.group(2))
+                    # Parse epoch and iteration from YOLOX output
+                    # Example: "epoch: 3/300, iter: 90/101"
+                    epoch_match = re.search(r'epoch:\s*(\d+)/(\d+)', line, re.IGNORECASE)
+                    iter_match = re.search(r'iter:\s*(\d+)/(\d+)', line, re.IGNORECASE)
+                    
+                    if epoch_match:
+                        current_epoch = int(epoch_match.group(1))
+                        total_epochs = int(epoch_match.group(2))
                        if self.current_training:
-                            self.current_training['iteration'] = current_epoch
+                            self.current_training['current_epoch'] = current_epoch
                            self.current_training['max_epoch'] = total_epochs
-                            print(f'Progress: {current_epoch}/{total_epochs}')
+                            # Debug log
+                            print(f'[PROGRESS] Parsed epoch: {current_epoch}/{total_epochs}')
+                    
+                    if iter_match:
+                        current_iter = int(iter_match.group(1))
+                        total_iters = int(iter_match.group(2))
+                        if self.current_training:
+                            self.current_training['current_iter'] = current_iter
+                            self.current_training['total_iters'] = total_iters
+                            
+                            # Calculate overall progress percentage
+                            if 'current_epoch' in self.current_training and 'max_epoch' in self.current_training:
+                                epoch_progress = (self.current_training['current_epoch'] - 1) / self.current_training['max_epoch']
+                                iter_progress = current_iter / total_iters / self.current_training['max_epoch']
+                                total_progress = (epoch_progress + iter_progress) * 100
+                                self.current_training['progress'] = round(total_progress, 2)
+                                # Debug log
+                                print(f'[PROGRESS] Epoch {self.current_training["current_epoch"]}/{self.current_training["max_epoch"]}, Iter {current_iter}/{total_iters}, Progress: {self.current_training["progress"]}%')
            
            # Wait for completion
            self.current_process.wait()
@@ -158,11 +177,18 @@ class TrainingQueueManager:
        }
        
        if self.current_training:
+            current_epoch = self.current_training.get('current_epoch', 0)
+            max_epoch = self.current_training.get('max_epoch', 300)
            result['current'] = {
                'training_id': self.current_training['training_id'],
                'name': self.current_training.get('name', f'Training {self.current_training["training_id"]}'),
-                'iteration': self.current_training.get('iteration', 0),
-                'max_epoch': self.current_training.get('max_epoch', 300)
+                'epoch': current_epoch,  # For backward compatibility
+                'current_epoch': current_epoch,
+                'max_epoch': max_epoch,
+                'current_iter': self.current_training.get('current_iter', 0),
+                'total_iters': self.current_training.get('total_iters', 0),
+                'progress': self.current_training.get('progress', 0.0),
+                'iteration': current_epoch  # For backward compatibility
            }
        
        return result
--- a/backend/services/validate_dataset.py
+++ b/backend/services/validate_dataset.py
@@ -0,0 +1,244 @@
+"""
+Validate dataset for training - check for problematic images and annotations
+"""
+import os
+import json
+from PIL import Image
+import cv2
+
+def validate_coco_json(json_path, data_dir):
+    """
+    Validate a COCO JSON file and check all images
+    
+    Args:
+        json_path: Path to COCO JSON file
+        data_dir: Directory where images are located
+    
+    Returns:
+        dict with validation results
+    """
+    print(f"\n{'='*60}")
+    print(f"Validating: {json_path}")
+    print(f"{'='*60}\n")
+    
+    issues = {
+        'missing_images': [],
+        'corrupted_images': [],
+        'zero_dimension_images': [],
+        'invalid_annotations': [],
+        'zero_area_boxes': []
+    }
+    
+    try:
+        with open(json_path, 'r') as f:
+            coco_data = json.load(f)
+    except Exception as e:
+        print(f"❌ Failed to load JSON: {e}")
+        return issues
+    
+    images = coco_data.get('images', [])
+    annotations = coco_data.get('annotations', [])
+    
+    print(f"📊 Dataset Stats:")
+    print(f"   Images: {len(images)}")
+    print(f"   Annotations: {len(annotations)}")
+    print(f"   Categories: {len(coco_data.get('categories', []))}")
+    print()
+    
+    # Validate images
+    print("🔍 Validating images...")
+    for idx, img_info in enumerate(images):
+        img_id = img_info.get('id')
+        file_name = img_info.get('file_name', '')
+        width = img_info.get('width', 0)
+        height = img_info.get('height', 0)
+        
+        # Check if image file exists
+        # Try to construct the full path
+        if os.path.isabs(file_name):
+            img_path = file_name
+        else:
+            img_path = os.path.join(data_dir, file_name)
+        
+        if not os.path.exists(img_path):
+            issues['missing_images'].append({
+                'id': img_id,
+                'file_name': file_name,
+                'expected_path': img_path
+            })
+            continue
+        
+        # Check if image can be loaded
+        try:
+            # Try with PIL
+            with Image.open(img_path) as pil_img:
+                pil_width, pil_height = pil_img.size
+                
+                # Check if dimensions match JSON
+                if pil_width != width or pil_height != height:
+                    print(f"⚠️  Image {img_id}: Dimension mismatch - JSON: {width}x{height}, Actual: {pil_width}x{pil_height}")
+                
+                # Check for zero dimensions
+                if pil_width == 0 or pil_height == 0:
+                    issues['zero_dimension_images'].append({
+                        'id': img_id,
+                        'file_name': file_name,
+                        'dimensions': f"{pil_width}x{pil_height}"
+                    })
+        except Exception as e:
+            issues['corrupted_images'].append({
+                'id': img_id,
+                'file_name': file_name,
+                'error': str(e)
+            })
+        
+        # Progress indicator
+        if (idx + 1) % 100 == 0:
+            print(f"   Checked {idx + 1}/{len(images)} images...")
+    
+    print(f"✅ Image validation complete\n")
+    
+    # Validate annotations
+    print("🔍 Validating annotations...")
+    for idx, ann in enumerate(annotations):
+        ann_id = ann.get('id')
+        img_id = ann.get('image_id')
+        bbox = ann.get('bbox', [])
+        
+        if len(bbox) != 4:
+            issues['invalid_annotations'].append({
+                'id': ann_id,
+                'image_id': img_id,
+                'reason': f'Invalid bbox length: {len(bbox)}'
+            })
+            continue
+        
+        x, y, w, h = bbox
+        
+        # Check for zero or negative dimensions
+        if w <= 0 or h <= 0:
+            issues['zero_area_boxes'].append({
+                'id': ann_id,
+                'image_id': img_id,
+                'bbox': bbox,
+                'reason': f'Zero or negative dimensions: w={w}, h={h}'
+            })
+        
+        # Check for extremely small boxes (potential issue with mixup)
+        if w < 1 or h < 1:
+            issues['zero_area_boxes'].append({
+                'id': ann_id,
+                'image_id': img_id,
+                'bbox': bbox,
+                'reason': f'Extremely small box: w={w}, h={h}'
+            })
+        
+        # Progress indicator
+        if (idx + 1) % 1000 == 0:
+            print(f"   Checked {idx + 1}/{len(annotations)} annotations...")
+    
+    print(f"✅ Annotation validation complete\n")
+    
+    # Print summary
+    print(f"\n{'='*60}")
+    print("VALIDATION SUMMARY")
+    print(f"{'='*60}\n")
+    
+    total_issues = sum(len(v) for v in issues.values())
+    
+    if total_issues == 0:
+        print("✅ No issues found! Dataset is ready for training.")
+    else:
+        print(f"⚠️  Found {total_issues} total issues:\n")
+        
+        if issues['missing_images']:
+            print(f"   ❌ Missing images: {len(issues['missing_images'])}")
+            for item in issues['missing_images'][:5]:  # Show first 5
+                print(f"      - {item['file_name']}")
+            if len(issues['missing_images']) > 5:
+                print(f"      ... and {len(issues['missing_images']) - 5} more")
+        
+        if issues['corrupted_images']:
+            print(f"   ❌ Corrupted images: {len(issues['corrupted_images'])}")
+            for item in issues['corrupted_images'][:5]:
+                print(f"      - {item['file_name']}: {item['error']}")
+            if len(issues['corrupted_images']) > 5:
+                print(f"      ... and {len(issues['corrupted_images']) - 5} more")
+        
+        if issues['zero_dimension_images']:
+            print(f"   ❌ Zero dimension images: {len(issues['zero_dimension_images'])}")
+            for item in issues['zero_dimension_images'][:5]:
+                print(f"      - {item['file_name']}: {item['dimensions']}")
+            if len(issues['zero_dimension_images']) > 5:
+                print(f"      ... and {len(issues['zero_dimension_images']) - 5} more")
+        
+        if issues['invalid_annotations']:
+            print(f"   ❌ Invalid annotations: {len(issues['invalid_annotations'])}")
+            for item in issues['invalid_annotations'][:5]:
+                print(f"      - Ann ID {item['id']}: {item['reason']}")
+            if len(issues['invalid_annotations']) > 5:
+                print(f"      ... and {len(issues['invalid_annotations']) - 5} more")
+        
+        if issues['zero_area_boxes']:
+            print(f"   ⚠️  Zero/tiny area boxes: {len(issues['zero_area_boxes'])}")
+            print(f"      These may cause issues with mixup augmentation!")
+            for item in issues['zero_area_boxes'][:5]:
+                print(f"      - Ann ID {item['id']}, bbox: {item['bbox']}")
+            if len(issues['zero_area_boxes']) > 5:
+                print(f"      ... and {len(issues['zero_area_boxes']) - 5} more")
+    
+    print()
+    return issues
+
+
+def validate_training_dataset(training_id):
+    """
+    Validate all COCO JSON files for a training
+    
+    Args:
+        training_id: The training ID to validate
+    """
+    from models.training import Training
+    from models.TrainingProject import TrainingProject
+    from services.settings_service import get_setting
+    
+    training = Training.query.get(training_id)
+    if not training:
+        print(f"❌ Training {training_id} not found")
+        return
+    
+    # Get paths
+    from models.TrainingProjectDetails import TrainingProjectDetails
+    details = TrainingProjectDetails.query.get(training.project_details_id)
+    training_project = TrainingProject.query.get(details.project_id)
+    project_name = training_project.title.replace(' ', '_') if training_project else f'project_{details.project_id}'
+    
+    training_folder_name = f"{training.exp_name or training.training_name or 'training'}_{training_id}"
+    training_folder_name = training_folder_name.replace(' ', '_')
+    
+    output_base_path = get_setting('yolox_output_path', './backend')
+    data_dir = get_setting('yolox_data_dir', '/home/kitraining/To_Annotate/')
+    
+    annotations_dir = os.path.join(output_base_path, project_name, training_folder_name, 'annotations')
+    
+    # Validate each split
+    splits = ['train', 'valid', 'test']
+    all_issues = {}
+    
+    for split in splits:
+        json_file = os.path.join(annotations_dir, f'coco_project_{training_id}_{split}.json')
+        if os.path.exists(json_file):
+            all_issues[split] = validate_coco_json(json_file, data_dir)
+        else:
+            print(f"⚠️  JSON file not found: {json_file}")
+    
+    return all_issues
+
+
+if __name__ == '__main__':
+    import sys
+    if len(sys.argv) > 1:
+        training_id = int(sys.argv[1])
+        validate_training_dataset(training_id)
+    else:
+        print("Usage: python validate_dataset.py <training_id>")