first push

2025-11-28 12:50:27 +01:00
parent 471ea10341
commit 5220ffbe46
84 changed files with 1857 additions and 1527 deletions
--- a/backend/services/init.py
+++ b/backend/services/init.py
@@ -0,0 +1 @@
+# Services module
--- a/backend/services/fetch_labelstudio.py
+++ b/backend/services/fetch_labelstudio.py
@@ -0,0 +1,85 @@
+import requests
+import time
+
+API_URL = 'http://192.168.1.19:8080/api'
+API_TOKEN = 'c1cef980b7c73004f4ee880a42839313b863869f'
+
+def fetch_label_studio_project(project_id):
+    """Fetch Label Studio project annotations"""
+    export_url = f'{API_URL}/projects/{project_id}/export?exportType=JSON_MIN'
+    headers = {'Authorization': f'Token {API_TOKEN}'}
+    
+    # Trigger export
+    res = requests.get(export_url, headers=headers)
+    if not res.ok:
+        error_text = res.text if res.text else ''
+        print(f'Failed to trigger export: {res.status_code} {res.reason} - {error_text}')
+        raise Exception(f'Failed to trigger export: {res.status_code} {res.reason}')
+    
+    data = res.json()
+    
+    # If data is an array, it's ready
+    if isinstance(data, list):
+        return data
+    
+    # If not, poll for the export file
+    file_url = data.get('download_url') or data.get('url')
+    tries = 0
+    
+    while not file_url and tries < 20:
+        time.sleep(2)
+        res = requests.get(export_url, headers=headers)
+        if not res.ok:
+            error_text = res.text if res.text else ''
+            print(f'Failed to poll export: {res.status_code} {res.reason} - {error_text}')
+            raise Exception(f'Failed to poll export: {res.status_code} {res.reason}')
+        
+        data = res.json()
+        file_url = data.get('download_url') or data.get('url')
+        tries += 1
+    
+    if not file_url:
+        raise Exception('Label Studio export did not become ready')
+    
+    # Download the export file
+    full_url = file_url if file_url.startswith('http') else f"{API_URL.replace('/api', '')}{file_url}"
+    res = requests.get(full_url, headers=headers)
+    if not res.ok:
+        error_text = res.text if res.text else ''
+        print(f'Failed to download export: {res.status_code} {res.reason} - {error_text}')
+        raise Exception(f'Failed to download export: {res.status_code} {res.reason}')
+    
+    return res.json()
+
+def fetch_project_ids_and_titles():
+    """Fetch all Label Studio project IDs and titles"""
+    try:
+        response = requests.get(
+            f'{API_URL}/projects/',
+            headers={
+                'Authorization': f'Token {API_TOKEN}',
+                'Content-Type': 'application/json'
+            }
+        )
+        
+        if not response.ok:
+            error_text = response.text if response.text else ''
+            print(f'Failed to fetch projects: {response.status_code} {response.reason} - {error_text}')
+            raise Exception(f'HTTP error! status: {response.status_code}')
+        
+        data = response.json()
+        
+        if 'results' not in data or not isinstance(data['results'], list):
+            raise Exception('API response does not contain results array')
+        
+        # Extract id and title from each project
+        projects = [
+            {'id': project['id'], 'title': project['title']}
+            for project in data['results']
+        ]
+        print(projects)
+        return projects
+    
+    except Exception as error:
+        print(f'Failed to fetch projects: {error}')
+        return []
--- a/backend/services/generate_json_yolox.py
+++ b/backend/services/generate_json_yolox.py
@@ -0,0 +1,179 @@
+import json
+import os
+import math
+from models.TrainingProject import TrainingProject
+from models.TrainingProjectDetails import TrainingProjectDetails
+from models.Images import Image
+from models.Annotation import Annotation
+
+def generate_training_json(training_id):
+    """Generate COCO JSON for training, validation, and test sets"""
+    # training_id is now project_details_id
+    training_project_details = TrainingProjectDetails.query.get(training_id)
+    
+    if not training_project_details:
+        raise Exception(f'No TrainingProjectDetails found for project_details_id {training_id}')
+    
+    details_obj = training_project_details.to_dict()
+    
+    # Get parent project for name
+    training_project = TrainingProject.query.get(details_obj['project_id'])
+    
+    # Get split percentages (default values if not set)
+    train_percent = details_obj.get('train_percent', 85)
+    valid_percent = details_obj.get('valid_percent', 10)
+    test_percent = details_obj.get('test_percent', 5)
+    
+    coco_images = []
+    coco_annotations = []
+    coco_categories = []
+    category_map = {}
+    category_id = 0
+    image_id = 0
+    annotation_id = 0
+    
+    for cls in details_obj['class_map']:
+        asg_map = []
+        list_asg = cls[1]
+        
+        for asg in list_asg:
+            asg_map.append({'original': asg[0], 'mapped': asg[1]})
+            # Build category list and mapping
+            if asg[1] and asg[1] not in category_map:
+                category_map[asg[1]] = category_id
+                coco_categories.append({'id': category_id, 'name': asg[1], 'supercategory': ''})
+                category_id += 1
+        
+        # Get images for this project
+        images = Image.query.filter_by(project_id=cls[0]).all()
+        
+        for image in images:
+            image_id += 1
+            file_name = image.image_path
+            
+            # Clean up file path
+            if '%20' in file_name:
+                file_name = file_name.replace('%20', ' ')
+            if file_name and file_name.startswith('/data/local-files/?d='):
+                file_name = file_name.replace('/data/local-files/?d=', '')
+                file_name = file_name.replace('/home/kitraining/home/kitraining/', '')
+            if file_name and file_name.startswith('home/kitraining/To_Annotate/'):
+                file_name = file_name.replace('home/kitraining/To_Annotate/', '')
+            
+            # Get annotations for this image
+            annotations = Annotation.query.filter_by(image_id=image.image_id).all()
+            
+            coco_images.append({
+                'id': image_id,
+                'file_name': file_name,
+                'width': image.width or 0,
+                'height': image.height or 0
+            })
+            
+            for annotation in annotations:
+                # Translate class name using asg_map
+                mapped_class = annotation.Label
+                for map_entry in asg_map:
+                    if annotation.Label == map_entry['original']:
+                        mapped_class = map_entry['mapped']
+                        break
+                
+                # Only add annotation if mapped_class is valid
+                if mapped_class and mapped_class in category_map:
+                    annotation_id += 1
+                    area = 0
+                    if annotation.width and annotation.height:
+                        area = annotation.width * annotation.height
+                    
+                    coco_annotations.append({
+                        'id': annotation_id,
+                        'image_id': image_id,
+                        'category_id': category_map[mapped_class],
+                        'bbox': [annotation.x, annotation.y, annotation.width, annotation.height],
+                        'area': area,
+                        'iscrowd': 0
+                    })
+    
+    # Shuffle images for random split using seed
+    def seeded_random(seed):
+        x = math.sin(seed) * 10000
+        return x - math.floor(x)
+    
+    def shuffle(array, seed):
+        for i in range(len(array) - 1, 0, -1):
+            j = int(seeded_random(seed + i) * (i + 1))
+            array[i], array[j] = array[j], array[i]
+    
+    # Use seed from details_obj if present, else default to 42
+    split_seed = details_obj.get('seed', 42)
+    if split_seed is not None:
+        split_seed = int(split_seed)
+    else:
+        split_seed = 42
+    
+    shuffle(coco_images, split_seed)
+    
+    # Split images
+    total_images = len(coco_images)
+    train_count = int(total_images * train_percent / 100)
+    valid_count = int(total_images * valid_percent / 100)
+    test_count = total_images - train_count - valid_count
+    
+    train_images = coco_images[0:train_count]
+    valid_images = coco_images[train_count:train_count + valid_count]
+    test_images = coco_images[train_count + valid_count:]
+    
+    # Helper to get image ids for each split
+    train_image_ids = {img['id'] for img in train_images}
+    valid_image_ids = {img['id'] for img in valid_images}
+    test_image_ids = {img['id'] for img in test_images}
+    
+    # Split annotations
+    train_annotations = [ann for ann in coco_annotations if ann['image_id'] in train_image_ids]
+    valid_annotations = [ann for ann in coco_annotations if ann['image_id'] in valid_image_ids]
+    test_annotations = [ann for ann in coco_annotations if ann['image_id'] in test_image_ids]
+    
+    # Build final COCO JSONs
+    def build_coco_json(images, annotations, categories):
+        return {
+            'images': images,
+            'annotations': annotations,
+            'categories': categories
+        }
+    
+    train_json = build_coco_json(train_images, train_annotations, coco_categories)
+    valid_json = build_coco_json(valid_images, valid_annotations, coco_categories)
+    test_json = build_coco_json(test_images, test_annotations, coco_categories)
+    
+    # Create output directory
+    project_name = training_project.title.replace(' ', '_') if training_project and training_project.title else f'project_{details_obj["project_id"]}'
+    annotations_dir = '/home/kitraining/To_Annotate/annotations'
+    os.makedirs(annotations_dir, exist_ok=True)
+    
+    # Write to files
+    train_path = f'{annotations_dir}/coco_project_{training_id}_train.json'
+    valid_path = f'{annotations_dir}/coco_project_{training_id}_valid.json'
+    test_path = f'{annotations_dir}/coco_project_{training_id}_test.json'
+    
+    with open(train_path, 'w') as f:
+        json.dump(train_json, f, indent=2)
+    with open(valid_path, 'w') as f:
+        json.dump(valid_json, f, indent=2)
+    with open(test_path, 'w') as f:
+        json.dump(test_json, f, indent=2)
+    
+    print(f'COCO JSON splits written to {annotations_dir} for trainingId {training_id}')
+    
+    # Also generate inference exp.py
+    from services.generate_yolox_exp import generate_yolox_inference_exp
+    project_folder = os.path.join(os.path.dirname(__file__), '..', project_name, str(training_id))
+    os.makedirs(project_folder, exist_ok=True)
+    
+    inference_exp_path = os.path.join(project_folder, 'exp_infer.py')
+    try:
+        exp_content = generate_yolox_inference_exp(training_id)
+        with open(inference_exp_path, 'w') as f:
+            f.write(exp_content)
+        print(f'Inference exp.py written to {inference_exp_path}')
+    except Exception as err:
+        print(f'Failed to generate inference exp.py: {err}')
--- a/backend/services/generate_yolox_exp.py
+++ b/backend/services/generate_yolox_exp.py
@@ -0,0 +1,152 @@
+import os
+import shutil
+from models.training import Training
+from models.TrainingProject import TrainingProject
+
+def generate_yolox_exp(training_id):
+    """Generate YOLOX exp.py file"""
+    # Fetch training row from DB
+    training = Training.query.get(training_id)
+    if not training:
+        training = Training.query.filter_by(project_details_id=training_id).first()
+    
+    if not training:
+        raise Exception(f'Training not found for trainingId or project_details_id: {training_id}')
+    
+    # If transfer_learning is 'coco', copy default exp.py
+    if training.transfer_learning == 'coco':
+        selected_model = training.selected_model.lower().replace('-', '_')
+        exp_source_path = f'/home/kitraining/Yolox/YOLOX-main/exps/default/{selected_model}.py'
+        
+        if not os.path.exists(exp_source_path):
+            raise Exception(f'Default exp.py not found for model: {selected_model} at {exp_source_path}')
+        
+        # Copy to project folder
+        project_details_id = training.project_details_id
+        project_folder = os.path.join(os.path.dirname(__file__), '..', f'project_23/{project_details_id}')
+        os.makedirs(project_folder, exist_ok=True)
+        
+        exp_dest_path = os.path.join(project_folder, 'exp.py')
+        shutil.copyfile(exp_source_path, exp_dest_path)
+        return {'type': 'default', 'expPath': exp_dest_path}
+    
+    # If transfer_learning is 'sketch', generate custom exp.py
+    if training.transfer_learning == 'sketch':
+        exp_content = generate_yolox_inference_exp(training_id)
+        return {'type': 'custom', 'expContent': exp_content}
+    
+    raise Exception(f'Unknown transfer_learning type: {training.transfer_learning}')
+
+def save_yolox_exp(training_id, out_path):
+    """Save YOLOX exp.py to specified path"""
+    exp_result = generate_yolox_exp(training_id)
+    
+    if exp_result['type'] == 'custom' and 'expContent' in exp_result:
+        with open(out_path, 'w') as f:
+            f.write(exp_result['expContent'])
+        return out_path
+    elif exp_result['type'] == 'default' and 'expPath' in exp_result:
+        # Optionally copy the file if outPath is different
+        if exp_result['expPath'] != out_path:
+            shutil.copyfile(exp_result['expPath'], out_path)
+        return out_path
+    else:
+        raise Exception('Unknown expResult type or missing content')
+
+def generate_yolox_inference_exp(training_id, options=None):
+    """Generate inference exp.py using DB values"""
+    if options is None:
+        options = {}
+    
+    training = Training.query.get(training_id)
+    if not training:
+        training = Training.query.filter_by(project_details_id=training_id).first()
+    
+    if not training:
+        raise Exception(f'Training not found for trainingId or project_details_id: {training_id}')
+    
+    # Always use the training_id (project_details_id) for annotation file names
+    project_details_id = training.project_details_id
+    
+    data_dir = options.get('data_dir', '/home/kitraining/To_Annotate/')
+    train_ann = options.get('train_ann', f'coco_project_{training_id}_train.json')
+    val_ann = options.get('val_ann', f'coco_project_{training_id}_valid.json')
+    test_ann = options.get('test_ann', f'coco_project_{training_id}_test.json')
+    
+    # Get num_classes from TrainingProject.classes JSON
+    num_classes = 80
+    try:
+        training_project = TrainingProject.query.get(project_details_id)
+        if training_project and training_project.classes:
+            classes_arr = training_project.classes
+            if isinstance(classes_arr, str):
+                import json
+                classes_arr = json.loads(classes_arr)
+            
+            if isinstance(classes_arr, list):
+                num_classes = len([c for c in classes_arr if c not in [None, '']])
+            elif isinstance(classes_arr, dict):
+                num_classes = len([k for k, v in classes_arr.items() if v not in [None, '']])
+    except Exception as e:
+        print(f'Could not determine num_classes from TrainingProject.classes: {e}')
+    
+    depth = options.get('depth', training.depth or 1.00)
+    width = options.get('width', training.width or 1.00)
+    input_size = options.get('input_size', training.input_size or [640, 640])
+    mosaic_scale = options.get('mosaic_scale', training.mosaic_scale or [0.1, 2])
+    random_size = options.get('random_size', [10, 20])
+    test_size = options.get('test_size', training.test_size or [640, 640])
+    exp_name = options.get('exp_name', 'inference_exp')
+    enable_mixup = options.get('enable_mixup', False)
+    
+    # Build exp content
+    exp_content = f'''#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+import os
+
+from yolox.exp import Exp as MyExp
+
+
+class Exp(MyExp):
+    def __init__(self):
+        super(Exp, self).__init__()
+        self.data_dir = "{data_dir}"
+        self.train_ann = "{train_ann}"
+        self.val_ann = "{val_ann}"
+        self.test_ann = "coco_project_{training_id}_test.json"
+        self.num_classes = {num_classes}
+'''
+    
+    # Set pretrained_ckpt if transfer_learning is 'coco'
+    if training.transfer_learning and isinstance(training.transfer_learning, str) and training.transfer_learning.lower() == 'coco':
+        yolox_base_dir = '/home/kitraining/Yolox/YOLOX-main'
+        selected_model = training.selected_model.replace('.pth', '') if training.selected_model else ''
+        if selected_model:
+            exp_content += f"        self.pretrained_ckpt = r'{yolox_base_dir}/pretrained/{selected_model}.pth'\n"
+    
+    # Format arrays
+    input_size_str = ', '.join(map(str, input_size)) if isinstance(input_size, list) else str(input_size)
+    mosaic_scale_str = ', '.join(map(str, mosaic_scale)) if isinstance(mosaic_scale, list) else str(mosaic_scale)
+    random_size_str = ', '.join(map(str, random_size)) if isinstance(random_size, list) else str(random_size)
+    test_size_str = ', '.join(map(str, test_size)) if isinstance(test_size, list) else str(test_size)
+    
+    exp_content += f'''        self.depth = {depth}
+        self.width = {width}
+        self.input_size = ({input_size_str})
+        self.mosaic_scale = ({mosaic_scale_str})
+        self.random_size = ({random_size_str})
+        self.test_size = ({test_size_str})
+        self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
+        self.enable_mixup = {str(enable_mixup)}
+'''
+    
+    return exp_content
+
+def save_yolox_inference_exp(training_id, out_path, options=None):
+    """Save inference exp.py to custom path"""
+    exp_content = generate_yolox_inference_exp(training_id, options)
+    with open(out_path, 'w') as f:
+        f.write(exp_content)
+    return out_path
--- a/backend/services/push_yolox_exp.py
+++ b/backend/services/push_yolox_exp.py
@@ -0,0 +1,36 @@
+from models.training import Training
+from models.TrainingProjectDetails import TrainingProjectDetails
+from database.database import db
+
+def push_yolox_exp_to_db(settings):
+    """Save YOLOX settings to database"""
+    normalized = dict(settings)
+    
+    # Map 'act' from frontend to 'activation' for DB
+    if 'act' in normalized:
+        normalized['activation'] = normalized['act']
+        del normalized['act']
+    
+    # Convert 'on'/'off' to boolean for save_history_ckpt
+    if isinstance(normalized.get('save_history_ckpt'), str):
+        normalized['save_history_ckpt'] = normalized['save_history_ckpt'] == 'on'
+    
+    # Convert comma-separated strings to arrays
+    for key in ['input_size', 'test_size', 'mosaic_scale', 'mixup_scale']:
+        if isinstance(normalized.get(key), str):
+            arr = [float(v.strip()) for v in normalized[key].split(',')]
+            normalized[key] = arr[0] if len(arr) == 1 else arr
+    
+    # Find TrainingProjectDetails for this project
+    details = TrainingProjectDetails.query.filter_by(project_id=normalized['project_id']).first()
+    if not details:
+        raise Exception(f'TrainingProjectDetails not found for project_id {normalized["project_id"]}')
+    
+    normalized['project_details_id'] = details.id
+    
+    # Create DB row
+    training = Training(**normalized)
+    db.session.add(training)
+    db.session.commit()
+    
+    return training
--- a/backend/services/seed_label_studio.py
+++ b/backend/services/seed_label_studio.py
@@ -0,0 +1,149 @@
+from database.database import db
+from models.LabelStudioProject import LabelStudioProject
+from models.Images import Image
+from models.Annotation import Annotation
+from services.fetch_labelstudio import fetch_label_studio_project, fetch_project_ids_and_titles
+
+update_status = {"running": False}
+
+def seed_label_studio():
+    """Seed database with Label Studio project data"""
+    update_status["running"] = True
+    print('Seeding started')
+    
+    try:
+        projects = fetch_project_ids_and_titles()
+        
+        for project in projects:
+            print(f"Processing project {project['id']} ({project['title']})")
+            
+            # Upsert project in DB
+            existing_project = LabelStudioProject.query.filter_by(project_id=project['id']).first()
+            if existing_project:
+                existing_project.title = project['title']
+            else:
+                new_project = LabelStudioProject(project_id=project['id'], title=project['title'])
+                db.session.add(new_project)
+            db.session.commit()
+            
+            # Fetch project data (annotations array)
+            data = fetch_label_studio_project(project['id'])
+            if not isinstance(data, list) or len(data) == 0:
+                print(f"No annotation data for project {project['id']}")
+                continue
+            
+            # Remove old images and annotations for this project
+            old_images = Image.query.filter_by(project_id=project['id']).all()
+            old_image_ids = [img.image_id for img in old_images]
+            
+            if old_image_ids:
+                Annotation.query.filter(Annotation.image_id.in_(old_image_ids)).delete(synchronize_session=False)
+                Image.query.filter_by(project_id=project['id']).delete()
+                db.session.commit()
+                print(f"Deleted {len(old_image_ids)} old images and their annotations for project {project['id']}")
+            
+            # Prepare arrays
+            images_bulk = []
+            anns_bulk = []
+            
+            for ann in data:
+                # Extract width/height
+                width = None
+                height = None
+                
+                if isinstance(ann.get('label_rectangles'), list) and len(ann['label_rectangles']) > 0:
+                    width = ann['label_rectangles'][0].get('original_width')
+                    height = ann['label_rectangles'][0].get('original_height')
+                elif isinstance(ann.get('label'), list) and len(ann['label']) > 0:
+                    if ann['label'][0].get('original_width') and ann['label'][0].get('original_height'):
+                        width = ann['label'][0]['original_width']
+                        height = ann['label'][0]['original_height']
+                
+                # Only process if width and height are valid
+                if width and height:
+                    image_data = {
+                        'project_id': project['id'],
+                        'image_path': ann.get('image'),
+                        'width': width,
+                        'height': height
+                    }
+                    images_bulk.append(image_data)
+                    
+                    # Handle multiple annotations per image
+                    if isinstance(ann.get('label_rectangles'), list):
+                        for ann_detail in ann['label_rectangles']:
+                            # Get label safely
+                            rectanglelabels = ann_detail.get('rectanglelabels', [])
+                            if isinstance(rectanglelabels, list) and len(rectanglelabels) > 0:
+                                label = rectanglelabels[0]
+                            elif isinstance(rectanglelabels, str):
+                                label = rectanglelabels
+                            else:
+                                label = 'unknown'
+                            
+                            ann_data = {
+                                'image_path': ann.get('image'),
+                                'x': (ann_detail['x'] * width) / 100,
+                                'y': (ann_detail['y'] * height) / 100,
+                                'width': (ann_detail['width'] * width) / 100,
+                                'height': (ann_detail['height'] * height) / 100,
+                                'Label': label
+                            }
+                            anns_bulk.append(ann_data)
+                    elif isinstance(ann.get('label'), list):
+                        for ann_detail in ann['label']:
+                            # Get label safely
+                            rectanglelabels = ann_detail.get('rectanglelabels', [])
+                            if isinstance(rectanglelabels, list) and len(rectanglelabels) > 0:
+                                label = rectanglelabels[0]
+                            elif isinstance(rectanglelabels, str):
+                                label = rectanglelabels
+                            else:
+                                label = 'unknown'
+                            
+                            ann_data = {
+                                'image_path': ann.get('image'),
+                                'x': (ann_detail['x'] * width) / 100,
+                                'y': (ann_detail['y'] * height) / 100,
+                                'width': (ann_detail['width'] * width) / 100,
+                                'height': (ann_detail['height'] * height) / 100,
+                                'Label': label
+                            }
+                            anns_bulk.append(ann_data)
+            
+            # Insert images and get generated IDs
+            inserted_images = []
+            for img_data in images_bulk:
+                new_image = Image(**img_data)
+                db.session.add(new_image)
+                db.session.flush()  # Flush to get the ID
+                inserted_images.append(new_image)
+            db.session.commit()
+            
+            # Map image_path -> image_id
+            image_map = {img.image_path: img.image_id for img in inserted_images}
+            
+            # Assign correct image_id to each annotation
+            for ann_data in anns_bulk:
+                ann_data['image_id'] = image_map.get(ann_data['image_path'])
+                del ann_data['image_path']
+            
+            # Insert annotations
+            for ann_data in anns_bulk:
+                new_annotation = Annotation(**ann_data)
+                db.session.add(new_annotation)
+            db.session.commit()
+            
+            print(f"Inserted {len(images_bulk)} images and {len(anns_bulk)} annotations for project {project['id']}")
+        
+        print('Seeding done')
+        return {'success': True, 'message': 'Data inserted successfully!'}
+    
+    except Exception as error:
+        print(f'Error inserting data: {error}')
+        db.session.rollback()
+        return {'success': False, 'message': str(error)}
+    
+    finally:
+        update_status["running"] = False
+        print('updateStatus.running set to false')