Files
Trackeep/backend/services/computer_vision_service.go
Tomas Dvorak d27cf14110 first test
2026-02-08 14:14:55 +01:00

462 lines
14 KiB
Go

package services
import (
"bytes"
"encoding/base64"
"fmt"
"image"
"regexp"
"strings"
"time"
"github.com/trackeep/backend/models"
"gorm.io/gorm"
)
// ComputerVisionService provides computer vision capabilities
type ComputerVisionService struct {
db *gorm.DB
}
// NewComputerVisionService creates a new computer vision service
func NewComputerVisionService(db *gorm.DB) *ComputerVisionService {
return &ComputerVisionService{db: db}
}
// ImageAnalysisRequest represents a request for image analysis
type ImageAnalysisRequest struct {
ImageData string `json:"image_data" binding:"required"` // Base64 encoded image
AnalysisType string `json:"analysis_type" binding:"required"` // ocr, objects, text, faces, all
FileID *uint `json:"file_id,omitempty"`
}
// ImageAnalysisResponse represents the result of image analysis
type ImageAnalysisResponse struct {
Success bool `json:"success"`
Analysis map[string]interface{} `json:"analysis"`
Text string `json:"text,omitempty"`
Objects []ObjectDetection `json:"objects,omitempty"`
Faces []FaceDetection `json:"faces,omitempty"`
Metadata ImageMetadata `json:"metadata"`
}
// ObjectDetection represents a detected object
type ObjectDetection struct {
Name string `json:"name"`
Confidence float64 `json:"confidence"`
BoundingBox BoundingBox `json:"bounding_box"`
}
// FaceDetection represents a detected face
type FaceDetection struct {
Confidence float64 `json:"confidence"`
BoundingBox BoundingBox `json:"bounding_box"`
Age *int `json:"age,omitempty"`
Gender *string `json:"gender,omitempty"`
Emotion *string `json:"emotion,omitempty"`
}
// BoundingBox represents coordinates of a detected object
type BoundingBox struct {
X int `json:"x"`
Y int `json:"y"`
Width int `json:"width"`
Height int `json:"height"`
}
// ImageMetadata represents metadata about the analyzed image
type ImageMetadata struct {
Width int `json:"width"`
Height int `json:"height"`
Format string `json:"format"`
SizeBytes int `json:"size_bytes"`
ColorSpace string `json:"color_space"`
DominantColors []string `json:"dominant_colors"`
TextDensity float64 `json:"text_density"`
}
// AnalyzeImage performs computer vision analysis on an image
func (s *ComputerVisionService) AnalyzeImage(req ImageAnalysisRequest) (*ImageAnalysisResponse, error) {
// Decode base64 image
imageData, err := base64.StdEncoding.DecodeString(req.ImageData)
if err != nil {
return nil, fmt.Errorf("invalid base64 image data: %v", err)
}
// Parse image to get metadata
img, format, err := image.Decode(bytes.NewReader(imageData))
if err != nil {
return nil, fmt.Errorf("failed to decode image: %v", err)
}
bounds := img.Bounds()
response := &ImageAnalysisResponse{
Success: true,
Analysis: make(map[string]interface{}),
Metadata: ImageMetadata{
Width: bounds.Dx(),
Height: bounds.Dy(),
Format: format,
SizeBytes: len(imageData),
ColorSpace: "RGB", // Simplified
},
}
// Perform requested analysis types
if req.AnalysisType == "ocr" || req.AnalysisType == "all" {
text, err := s.extractText(imageData)
if err == nil {
response.Text = text
response.Analysis["text"] = text
response.Analysis["word_count"] = len(strings.Fields(text))
response.Metadata.TextDensity = float64(len(text)) / float64(bounds.Dx()*bounds.Dy()) * 1000
}
}
if req.AnalysisType == "objects" || req.AnalysisType == "all" {
objects := s.detectObjects(imageData)
response.Objects = objects
response.Analysis["objects"] = objects
response.Analysis["object_count"] = len(objects)
}
if req.AnalysisType == "faces" || req.AnalysisType == "all" {
faces := s.detectFaces(imageData)
response.Faces = faces
response.Analysis["faces"] = faces
response.Analysis["face_count"] = len(faces)
}
if req.AnalysisType == "text" || req.AnalysisType == "all" {
// Extract readable text from image
text, err := s.extractText(imageData)
if err == nil {
response.Analysis["readable_text"] = text
response.Analysis["has_text"] = len(strings.TrimSpace(text)) > 0
}
}
// Extract dominant colors
colors := s.extractDominantColors(imageData)
response.Metadata.DominantColors = colors
// Save analysis to database if file ID is provided
if req.FileID != nil {
s.saveImageAnalysis(*req.FileID, response)
}
return response, nil
}
// extractText performs OCR on the image (simplified implementation)
func (s *ComputerVisionService) extractText(imageData []byte) (string, error) {
// This is a simplified OCR implementation
// In a real implementation, you would use:
// - Tesseract OCR
// - Google Cloud Vision API
// - Azure Computer Vision
// - AWS Textract
// For demo purposes, we'll extract text from common patterns
// This is just a placeholder implementation
// Try to detect common text patterns in the image
// In reality, this would require actual OCR processing
// Simulate OCR by returning sample text based on image analysis
text := `
This is sample OCR text extracted from the image.
In a real implementation, this would contain the actual
text content found in the image using OCR technology.
Common use cases:
- Document scanning
- Receipt processing
- Business card reading
- Screenshot text extraction
`
return strings.TrimSpace(text), nil
}
// detectObjects performs object detection on the image
func (s *ComputerVisionService) detectObjects(imageData []byte) []ObjectDetection {
// This is a simplified object detection implementation
// In a real implementation, you would use:
// - YOLO (You Only Look Once)
// - TensorFlow Object Detection API
// - OpenCV DNN
// - Cloud vision services
// Simulate object detection with common objects
objects := []ObjectDetection{
{
Name: "document",
Confidence: 0.95,
BoundingBox: BoundingBox{X: 10, Y: 10, Width: 300, Height: 400},
},
{
Name: "text",
Confidence: 0.88,
BoundingBox: BoundingBox{X: 20, Y: 30, Width: 280, Height: 200},
},
{
Name: "logo",
Confidence: 0.72,
BoundingBox: BoundingBox{X: 250, Y: 20, Width: 50, Height: 50},
},
}
return objects
}
// detectFaces performs face detection on the image
func (s *ComputerVisionService) detectFaces(imageData []byte) []FaceDetection {
// This is a simplified face detection implementation
// In a real implementation, you would use:
// - OpenCV Face Detection
// - Dlib
// - FaceNet
// - Cloud face detection services
// Simulate face detection
faces := []FaceDetection{
{
Confidence: 0.92,
BoundingBox: BoundingBox{X: 100, Y: 80, Width: 120, Height: 150},
Age: func() *int { age := 28; return &age }(),
Gender: func() *string { gender := "male"; return &gender }(),
Emotion: func() *string { emotion := "happy"; return &emotion }(),
},
}
return faces
}
// extractDominantColors extracts the dominant colors from the image
func (s *ComputerVisionService) extractDominantColors(imageData []byte) []string {
// This is a simplified color extraction
// In a real implementation, you would use:
// - K-means clustering
// - Color histogram analysis
// - Median cut algorithm
// Simulate dominant colors
colors := []string{
"#FFFFFF", // White
"#333333", // Dark gray
"#0066CC", // Blue
"#FF6600", // Orange
"#00CC66", // Green
}
return colors
}
// saveImageAnalysis saves the analysis results to the database
func (s *ComputerVisionService) saveImageAnalysis(fileID uint, analysis *ImageAnalysisResponse) error {
// Convert analysis to JSON for storage
analysisJSON := fmt.Sprintf(`{
"text": "%s",
"object_count": %d,
"face_count": %d,
"metadata": %+v
}`, analysis.Text, len(analysis.Objects), len(analysis.Faces), analysis.Metadata)
// Create or update file analysis record
var fileAnalysis models.FileAnalysis
err := s.db.Where("file_id = ?", fileID).First(&fileAnalysis).Error
if err == gorm.ErrRecordNotFound {
// Create new analysis record
now := time.Now()
fileAnalysis = models.FileAnalysis{
FileID: fileID,
AnalysisType: "computer_vision",
Results: analysisJSON,
Confidence: 0.85,
ProcessedAt: &now,
}
return s.db.Create(&fileAnalysis).Error
} else if err == nil {
// Update existing record
fileAnalysis.Results = analysisJSON
now := time.Now()
fileAnalysis.ProcessedAt = &now
return s.db.Save(&fileAnalysis).Error
}
return err
}
// ProcessDocumentImage processes a document image for text extraction and structure
func (s *ComputerVisionService) ProcessDocumentImage(imageData []byte) (*DocumentAnalysis, error) {
// Extract text using OCR
text, err := s.extractText(imageData)
if err != nil {
return nil, err
}
// Analyze document structure
analysis := &DocumentAnalysis{
Text: text,
WordCount: len(strings.Fields(text)),
LineCount: len(strings.Split(text, "\n")),
Language: s.detectLanguage(text),
DocumentType: s.detectDocumentType(text),
Sections: s.extractSections(text),
Tables: s.extractTables(text),
Links: s.extractLinks(text),
Emails: s.extractEmails(text),
PhoneNumbers: s.extractPhoneNumbers(text),
}
return analysis, nil
}
// DocumentAnalysis represents the analysis of a document image
type DocumentAnalysis struct {
Text string `json:"text"`
WordCount int `json:"word_count"`
LineCount int `json:"line_count"`
Language string `json:"language"`
DocumentType string `json:"document_type"`
Sections []DocumentSection `json:"sections"`
Tables []DocumentTable `json:"tables"`
Links []string `json:"links"`
Emails []string `json:"emails"`
PhoneNumbers []string `json:"phone_numbers"`
}
// DocumentSection represents a section in a document
type DocumentSection struct {
Title string `json:"title"`
Content string `json:"content"`
Level int `json:"level"`
}
// DocumentTable represents a table in a document
type DocumentTable struct {
Headers []string `json:"headers"`
Rows [][]string `json:"rows"`
}
// detectLanguage detects the language of the text
func (s *ComputerVisionService) detectLanguage(text string) string {
// Simplified language detection
// In a real implementation, you would use:
// - Language detection libraries
// - Machine learning models
// - Cloud language detection services
if strings.Contains(strings.ToLower(text), "the") && strings.Contains(strings.ToLower(text), "and") {
return "en"
} else if strings.Contains(text, "est") && strings.Contains(text, "que") {
return "es"
} else if strings.Contains(text, "und") && strings.Contains(text, "der") {
return "de"
}
return "unknown"
}
// detectDocumentType detects the type of document
func (s *ComputerVisionService) detectDocumentType(text string) string {
text = strings.ToLower(text)
if strings.Contains(text, "invoice") || strings.Contains(text, "bill") {
return "invoice"
} else if strings.Contains(text, "receipt") || strings.Contains(text, "purchase") {
return "receipt"
} else if strings.Contains(text, "resume") || strings.Contains(text, "curriculum") {
return "resume"
} else if strings.Contains(text, "contract") || strings.Contains(text, "agreement") {
return "contract"
} else if strings.Contains(text, "report") || strings.Contains(text, "analysis") {
return "report"
}
return "general"
}
// extractSections extracts document sections
func (s *ComputerVisionService) extractSections(text string) []DocumentSection {
var sections []DocumentSection
lines := strings.Split(text, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if line == "" {
continue
}
// Simple section detection (headers followed by content)
if len(line) < 100 && (strings.HasSuffix(line, ":") || strings.ToUpper(line) == line) {
sections = append(sections, DocumentSection{
Title: line,
Content: "",
Level: 1,
})
}
}
return sections
}
// extractTables extracts tables from the text
func (s *ComputerVisionService) extractTables(text string) []DocumentTable {
// Simplified table extraction
// In a real implementation, this would be much more sophisticated
var tables []DocumentTable
// Look for tabular data patterns
lines := strings.Split(text, "\n")
for i, line := range lines {
if strings.Contains(line, "\t") || strings.Contains(line, " ") {
// Potential table row
if i > 0 && strings.Contains(lines[i-1], "\t") {
// Multiple consecutive rows with tabs - likely a table
table := DocumentTable{
Headers: strings.Split(lines[i-1], "\t"),
Rows: [][]string{strings.Split(line, "\t")},
}
tables = append(tables, table)
}
}
}
return tables
}
// extractLinks extracts URLs from the text
func (s *ComputerVisionService) extractLinks(text string) []string {
urlRegex := regexp.MustCompile(`https?://[^\s]+`)
return urlRegex.FindAllString(text, -1)
}
// extractEmails extracts email addresses from the text
func (s *ComputerVisionService) extractEmails(text string) []string {
emailRegex := regexp.MustCompile(`[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}`)
return emailRegex.FindAllString(text, -1)
}
// extractPhoneNumbers extracts phone numbers from the text
func (s *ComputerVisionService) extractPhoneNumbers(text string) []string {
phoneRegex := regexp.MustCompile(`\b\d{3}[-.]?\d{3}[-.]?\d{4}\b`)
return phoneRegex.FindAllString(text, -1)
}
// CreateFileAnalysis creates a file analysis record
func (s *ComputerVisionService) CreateFileAnalysis(fileID uint, analysisType, results string, confidence float64) error {
now := time.Now()
fileAnalysis := models.FileAnalysis{
FileID: fileID,
AnalysisType: analysisType,
Results: results,
Confidence: confidence,
ProcessedAt: &now,
}
return s.db.Create(&fileAnalysis).Error
}