first push

This commit is contained in:
Philipp
2025-11-28 12:50:27 +01:00
parent 471ea10341
commit 5220ffbe46
84 changed files with 1857 additions and 1527 deletions

View File

@@ -0,0 +1 @@
# Services module

View File

@@ -0,0 +1,85 @@
import requests
import time
API_URL = 'http://192.168.1.19:8080/api'
API_TOKEN = 'c1cef980b7c73004f4ee880a42839313b863869f'
def fetch_label_studio_project(project_id):
"""Fetch Label Studio project annotations"""
export_url = f'{API_URL}/projects/{project_id}/export?exportType=JSON_MIN'
headers = {'Authorization': f'Token {API_TOKEN}'}
# Trigger export
res = requests.get(export_url, headers=headers)
if not res.ok:
error_text = res.text if res.text else ''
print(f'Failed to trigger export: {res.status_code} {res.reason} - {error_text}')
raise Exception(f'Failed to trigger export: {res.status_code} {res.reason}')
data = res.json()
# If data is an array, it's ready
if isinstance(data, list):
return data
# If not, poll for the export file
file_url = data.get('download_url') or data.get('url')
tries = 0
while not file_url and tries < 20:
time.sleep(2)
res = requests.get(export_url, headers=headers)
if not res.ok:
error_text = res.text if res.text else ''
print(f'Failed to poll export: {res.status_code} {res.reason} - {error_text}')
raise Exception(f'Failed to poll export: {res.status_code} {res.reason}')
data = res.json()
file_url = data.get('download_url') or data.get('url')
tries += 1
if not file_url:
raise Exception('Label Studio export did not become ready')
# Download the export file
full_url = file_url if file_url.startswith('http') else f"{API_URL.replace('/api', '')}{file_url}"
res = requests.get(full_url, headers=headers)
if not res.ok:
error_text = res.text if res.text else ''
print(f'Failed to download export: {res.status_code} {res.reason} - {error_text}')
raise Exception(f'Failed to download export: {res.status_code} {res.reason}')
return res.json()
def fetch_project_ids_and_titles():
"""Fetch all Label Studio project IDs and titles"""
try:
response = requests.get(
f'{API_URL}/projects/',
headers={
'Authorization': f'Token {API_TOKEN}',
'Content-Type': 'application/json'
}
)
if not response.ok:
error_text = response.text if response.text else ''
print(f'Failed to fetch projects: {response.status_code} {response.reason} - {error_text}')
raise Exception(f'HTTP error! status: {response.status_code}')
data = response.json()
if 'results' not in data or not isinstance(data['results'], list):
raise Exception('API response does not contain results array')
# Extract id and title from each project
projects = [
{'id': project['id'], 'title': project['title']}
for project in data['results']
]
print(projects)
return projects
except Exception as error:
print(f'Failed to fetch projects: {error}')
return []

View File

@@ -0,0 +1,179 @@
import json
import os
import math
from models.TrainingProject import TrainingProject
from models.TrainingProjectDetails import TrainingProjectDetails
from models.Images import Image
from models.Annotation import Annotation
def generate_training_json(training_id):
"""Generate COCO JSON for training, validation, and test sets"""
# training_id is now project_details_id
training_project_details = TrainingProjectDetails.query.get(training_id)
if not training_project_details:
raise Exception(f'No TrainingProjectDetails found for project_details_id {training_id}')
details_obj = training_project_details.to_dict()
# Get parent project for name
training_project = TrainingProject.query.get(details_obj['project_id'])
# Get split percentages (default values if not set)
train_percent = details_obj.get('train_percent', 85)
valid_percent = details_obj.get('valid_percent', 10)
test_percent = details_obj.get('test_percent', 5)
coco_images = []
coco_annotations = []
coco_categories = []
category_map = {}
category_id = 0
image_id = 0
annotation_id = 0
for cls in details_obj['class_map']:
asg_map = []
list_asg = cls[1]
for asg in list_asg:
asg_map.append({'original': asg[0], 'mapped': asg[1]})
# Build category list and mapping
if asg[1] and asg[1] not in category_map:
category_map[asg[1]] = category_id
coco_categories.append({'id': category_id, 'name': asg[1], 'supercategory': ''})
category_id += 1
# Get images for this project
images = Image.query.filter_by(project_id=cls[0]).all()
for image in images:
image_id += 1
file_name = image.image_path
# Clean up file path
if '%20' in file_name:
file_name = file_name.replace('%20', ' ')
if file_name and file_name.startswith('/data/local-files/?d='):
file_name = file_name.replace('/data/local-files/?d=', '')
file_name = file_name.replace('/home/kitraining/home/kitraining/', '')
if file_name and file_name.startswith('home/kitraining/To_Annotate/'):
file_name = file_name.replace('home/kitraining/To_Annotate/', '')
# Get annotations for this image
annotations = Annotation.query.filter_by(image_id=image.image_id).all()
coco_images.append({
'id': image_id,
'file_name': file_name,
'width': image.width or 0,
'height': image.height or 0
})
for annotation in annotations:
# Translate class name using asg_map
mapped_class = annotation.Label
for map_entry in asg_map:
if annotation.Label == map_entry['original']:
mapped_class = map_entry['mapped']
break
# Only add annotation if mapped_class is valid
if mapped_class and mapped_class in category_map:
annotation_id += 1
area = 0
if annotation.width and annotation.height:
area = annotation.width * annotation.height
coco_annotations.append({
'id': annotation_id,
'image_id': image_id,
'category_id': category_map[mapped_class],
'bbox': [annotation.x, annotation.y, annotation.width, annotation.height],
'area': area,
'iscrowd': 0
})
# Shuffle images for random split using seed
def seeded_random(seed):
x = math.sin(seed) * 10000
return x - math.floor(x)
def shuffle(array, seed):
for i in range(len(array) - 1, 0, -1):
j = int(seeded_random(seed + i) * (i + 1))
array[i], array[j] = array[j], array[i]
# Use seed from details_obj if present, else default to 42
split_seed = details_obj.get('seed', 42)
if split_seed is not None:
split_seed = int(split_seed)
else:
split_seed = 42
shuffle(coco_images, split_seed)
# Split images
total_images = len(coco_images)
train_count = int(total_images * train_percent / 100)
valid_count = int(total_images * valid_percent / 100)
test_count = total_images - train_count - valid_count
train_images = coco_images[0:train_count]
valid_images = coco_images[train_count:train_count + valid_count]
test_images = coco_images[train_count + valid_count:]
# Helper to get image ids for each split
train_image_ids = {img['id'] for img in train_images}
valid_image_ids = {img['id'] for img in valid_images}
test_image_ids = {img['id'] for img in test_images}
# Split annotations
train_annotations = [ann for ann in coco_annotations if ann['image_id'] in train_image_ids]
valid_annotations = [ann for ann in coco_annotations if ann['image_id'] in valid_image_ids]
test_annotations = [ann for ann in coco_annotations if ann['image_id'] in test_image_ids]
# Build final COCO JSONs
def build_coco_json(images, annotations, categories):
return {
'images': images,
'annotations': annotations,
'categories': categories
}
train_json = build_coco_json(train_images, train_annotations, coco_categories)
valid_json = build_coco_json(valid_images, valid_annotations, coco_categories)
test_json = build_coco_json(test_images, test_annotations, coco_categories)
# Create output directory
project_name = training_project.title.replace(' ', '_') if training_project and training_project.title else f'project_{details_obj["project_id"]}'
annotations_dir = '/home/kitraining/To_Annotate/annotations'
os.makedirs(annotations_dir, exist_ok=True)
# Write to files
train_path = f'{annotations_dir}/coco_project_{training_id}_train.json'
valid_path = f'{annotations_dir}/coco_project_{training_id}_valid.json'
test_path = f'{annotations_dir}/coco_project_{training_id}_test.json'
with open(train_path, 'w') as f:
json.dump(train_json, f, indent=2)
with open(valid_path, 'w') as f:
json.dump(valid_json, f, indent=2)
with open(test_path, 'w') as f:
json.dump(test_json, f, indent=2)
print(f'COCO JSON splits written to {annotations_dir} for trainingId {training_id}')
# Also generate inference exp.py
from services.generate_yolox_exp import generate_yolox_inference_exp
project_folder = os.path.join(os.path.dirname(__file__), '..', project_name, str(training_id))
os.makedirs(project_folder, exist_ok=True)
inference_exp_path = os.path.join(project_folder, 'exp_infer.py')
try:
exp_content = generate_yolox_inference_exp(training_id)
with open(inference_exp_path, 'w') as f:
f.write(exp_content)
print(f'Inference exp.py written to {inference_exp_path}')
except Exception as err:
print(f'Failed to generate inference exp.py: {err}')

View File

@@ -0,0 +1,152 @@
import os
import shutil
from models.training import Training
from models.TrainingProject import TrainingProject
def generate_yolox_exp(training_id):
"""Generate YOLOX exp.py file"""
# Fetch training row from DB
training = Training.query.get(training_id)
if not training:
training = Training.query.filter_by(project_details_id=training_id).first()
if not training:
raise Exception(f'Training not found for trainingId or project_details_id: {training_id}')
# If transfer_learning is 'coco', copy default exp.py
if training.transfer_learning == 'coco':
selected_model = training.selected_model.lower().replace('-', '_')
exp_source_path = f'/home/kitraining/Yolox/YOLOX-main/exps/default/{selected_model}.py'
if not os.path.exists(exp_source_path):
raise Exception(f'Default exp.py not found for model: {selected_model} at {exp_source_path}')
# Copy to project folder
project_details_id = training.project_details_id
project_folder = os.path.join(os.path.dirname(__file__), '..', f'project_23/{project_details_id}')
os.makedirs(project_folder, exist_ok=True)
exp_dest_path = os.path.join(project_folder, 'exp.py')
shutil.copyfile(exp_source_path, exp_dest_path)
return {'type': 'default', 'expPath': exp_dest_path}
# If transfer_learning is 'sketch', generate custom exp.py
if training.transfer_learning == 'sketch':
exp_content = generate_yolox_inference_exp(training_id)
return {'type': 'custom', 'expContent': exp_content}
raise Exception(f'Unknown transfer_learning type: {training.transfer_learning}')
def save_yolox_exp(training_id, out_path):
"""Save YOLOX exp.py to specified path"""
exp_result = generate_yolox_exp(training_id)
if exp_result['type'] == 'custom' and 'expContent' in exp_result:
with open(out_path, 'w') as f:
f.write(exp_result['expContent'])
return out_path
elif exp_result['type'] == 'default' and 'expPath' in exp_result:
# Optionally copy the file if outPath is different
if exp_result['expPath'] != out_path:
shutil.copyfile(exp_result['expPath'], out_path)
return out_path
else:
raise Exception('Unknown expResult type or missing content')
def generate_yolox_inference_exp(training_id, options=None):
"""Generate inference exp.py using DB values"""
if options is None:
options = {}
training = Training.query.get(training_id)
if not training:
training = Training.query.filter_by(project_details_id=training_id).first()
if not training:
raise Exception(f'Training not found for trainingId or project_details_id: {training_id}')
# Always use the training_id (project_details_id) for annotation file names
project_details_id = training.project_details_id
data_dir = options.get('data_dir', '/home/kitraining/To_Annotate/')
train_ann = options.get('train_ann', f'coco_project_{training_id}_train.json')
val_ann = options.get('val_ann', f'coco_project_{training_id}_valid.json')
test_ann = options.get('test_ann', f'coco_project_{training_id}_test.json')
# Get num_classes from TrainingProject.classes JSON
num_classes = 80
try:
training_project = TrainingProject.query.get(project_details_id)
if training_project and training_project.classes:
classes_arr = training_project.classes
if isinstance(classes_arr, str):
import json
classes_arr = json.loads(classes_arr)
if isinstance(classes_arr, list):
num_classes = len([c for c in classes_arr if c not in [None, '']])
elif isinstance(classes_arr, dict):
num_classes = len([k for k, v in classes_arr.items() if v not in [None, '']])
except Exception as e:
print(f'Could not determine num_classes from TrainingProject.classes: {e}')
depth = options.get('depth', training.depth or 1.00)
width = options.get('width', training.width or 1.00)
input_size = options.get('input_size', training.input_size or [640, 640])
mosaic_scale = options.get('mosaic_scale', training.mosaic_scale or [0.1, 2])
random_size = options.get('random_size', [10, 20])
test_size = options.get('test_size', training.test_size or [640, 640])
exp_name = options.get('exp_name', 'inference_exp')
enable_mixup = options.get('enable_mixup', False)
# Build exp content
exp_content = f'''#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates.
import os
from yolox.exp import Exp as MyExp
class Exp(MyExp):
def __init__(self):
super(Exp, self).__init__()
self.data_dir = "{data_dir}"
self.train_ann = "{train_ann}"
self.val_ann = "{val_ann}"
self.test_ann = "coco_project_{training_id}_test.json"
self.num_classes = {num_classes}
'''
# Set pretrained_ckpt if transfer_learning is 'coco'
if training.transfer_learning and isinstance(training.transfer_learning, str) and training.transfer_learning.lower() == 'coco':
yolox_base_dir = '/home/kitraining/Yolox/YOLOX-main'
selected_model = training.selected_model.replace('.pth', '') if training.selected_model else ''
if selected_model:
exp_content += f" self.pretrained_ckpt = r'{yolox_base_dir}/pretrained/{selected_model}.pth'\n"
# Format arrays
input_size_str = ', '.join(map(str, input_size)) if isinstance(input_size, list) else str(input_size)
mosaic_scale_str = ', '.join(map(str, mosaic_scale)) if isinstance(mosaic_scale, list) else str(mosaic_scale)
random_size_str = ', '.join(map(str, random_size)) if isinstance(random_size, list) else str(random_size)
test_size_str = ', '.join(map(str, test_size)) if isinstance(test_size, list) else str(test_size)
exp_content += f''' self.depth = {depth}
self.width = {width}
self.input_size = ({input_size_str})
self.mosaic_scale = ({mosaic_scale_str})
self.random_size = ({random_size_str})
self.test_size = ({test_size_str})
self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
self.enable_mixup = {str(enable_mixup)}
'''
return exp_content
def save_yolox_inference_exp(training_id, out_path, options=None):
"""Save inference exp.py to custom path"""
exp_content = generate_yolox_inference_exp(training_id, options)
with open(out_path, 'w') as f:
f.write(exp_content)
return out_path

View File

@@ -0,0 +1,36 @@
from models.training import Training
from models.TrainingProjectDetails import TrainingProjectDetails
from database.database import db
def push_yolox_exp_to_db(settings):
"""Save YOLOX settings to database"""
normalized = dict(settings)
# Map 'act' from frontend to 'activation' for DB
if 'act' in normalized:
normalized['activation'] = normalized['act']
del normalized['act']
# Convert 'on'/'off' to boolean for save_history_ckpt
if isinstance(normalized.get('save_history_ckpt'), str):
normalized['save_history_ckpt'] = normalized['save_history_ckpt'] == 'on'
# Convert comma-separated strings to arrays
for key in ['input_size', 'test_size', 'mosaic_scale', 'mixup_scale']:
if isinstance(normalized.get(key), str):
arr = [float(v.strip()) for v in normalized[key].split(',')]
normalized[key] = arr[0] if len(arr) == 1 else arr
# Find TrainingProjectDetails for this project
details = TrainingProjectDetails.query.filter_by(project_id=normalized['project_id']).first()
if not details:
raise Exception(f'TrainingProjectDetails not found for project_id {normalized["project_id"]}')
normalized['project_details_id'] = details.id
# Create DB row
training = Training(**normalized)
db.session.add(training)
db.session.commit()
return training

View File

@@ -0,0 +1,149 @@
from database.database import db
from models.LabelStudioProject import LabelStudioProject
from models.Images import Image
from models.Annotation import Annotation
from services.fetch_labelstudio import fetch_label_studio_project, fetch_project_ids_and_titles
update_status = {"running": False}
def seed_label_studio():
"""Seed database with Label Studio project data"""
update_status["running"] = True
print('Seeding started')
try:
projects = fetch_project_ids_and_titles()
for project in projects:
print(f"Processing project {project['id']} ({project['title']})")
# Upsert project in DB
existing_project = LabelStudioProject.query.filter_by(project_id=project['id']).first()
if existing_project:
existing_project.title = project['title']
else:
new_project = LabelStudioProject(project_id=project['id'], title=project['title'])
db.session.add(new_project)
db.session.commit()
# Fetch project data (annotations array)
data = fetch_label_studio_project(project['id'])
if not isinstance(data, list) or len(data) == 0:
print(f"No annotation data for project {project['id']}")
continue
# Remove old images and annotations for this project
old_images = Image.query.filter_by(project_id=project['id']).all()
old_image_ids = [img.image_id for img in old_images]
if old_image_ids:
Annotation.query.filter(Annotation.image_id.in_(old_image_ids)).delete(synchronize_session=False)
Image.query.filter_by(project_id=project['id']).delete()
db.session.commit()
print(f"Deleted {len(old_image_ids)} old images and their annotations for project {project['id']}")
# Prepare arrays
images_bulk = []
anns_bulk = []
for ann in data:
# Extract width/height
width = None
height = None
if isinstance(ann.get('label_rectangles'), list) and len(ann['label_rectangles']) > 0:
width = ann['label_rectangles'][0].get('original_width')
height = ann['label_rectangles'][0].get('original_height')
elif isinstance(ann.get('label'), list) and len(ann['label']) > 0:
if ann['label'][0].get('original_width') and ann['label'][0].get('original_height'):
width = ann['label'][0]['original_width']
height = ann['label'][0]['original_height']
# Only process if width and height are valid
if width and height:
image_data = {
'project_id': project['id'],
'image_path': ann.get('image'),
'width': width,
'height': height
}
images_bulk.append(image_data)
# Handle multiple annotations per image
if isinstance(ann.get('label_rectangles'), list):
for ann_detail in ann['label_rectangles']:
# Get label safely
rectanglelabels = ann_detail.get('rectanglelabels', [])
if isinstance(rectanglelabels, list) and len(rectanglelabels) > 0:
label = rectanglelabels[0]
elif isinstance(rectanglelabels, str):
label = rectanglelabels
else:
label = 'unknown'
ann_data = {
'image_path': ann.get('image'),
'x': (ann_detail['x'] * width) / 100,
'y': (ann_detail['y'] * height) / 100,
'width': (ann_detail['width'] * width) / 100,
'height': (ann_detail['height'] * height) / 100,
'Label': label
}
anns_bulk.append(ann_data)
elif isinstance(ann.get('label'), list):
for ann_detail in ann['label']:
# Get label safely
rectanglelabels = ann_detail.get('rectanglelabels', [])
if isinstance(rectanglelabels, list) and len(rectanglelabels) > 0:
label = rectanglelabels[0]
elif isinstance(rectanglelabels, str):
label = rectanglelabels
else:
label = 'unknown'
ann_data = {
'image_path': ann.get('image'),
'x': (ann_detail['x'] * width) / 100,
'y': (ann_detail['y'] * height) / 100,
'width': (ann_detail['width'] * width) / 100,
'height': (ann_detail['height'] * height) / 100,
'Label': label
}
anns_bulk.append(ann_data)
# Insert images and get generated IDs
inserted_images = []
for img_data in images_bulk:
new_image = Image(**img_data)
db.session.add(new_image)
db.session.flush() # Flush to get the ID
inserted_images.append(new_image)
db.session.commit()
# Map image_path -> image_id
image_map = {img.image_path: img.image_id for img in inserted_images}
# Assign correct image_id to each annotation
for ann_data in anns_bulk:
ann_data['image_id'] = image_map.get(ann_data['image_path'])
del ann_data['image_path']
# Insert annotations
for ann_data in anns_bulk:
new_annotation = Annotation(**ann_data)
db.session.add(new_annotation)
db.session.commit()
print(f"Inserted {len(images_bulk)} images and {len(anns_bulk)} annotations for project {project['id']}")
print('Seeding done')
return {'success': True, 'message': 'Data inserted successfully!'}
except Exception as error:
print(f'Error inserting data: {error}')
db.session.rollback()
return {'success': False, 'message': str(error)}
finally:
update_status["running"] = False
print('updateStatus.running set to false')