mirror of
https://github.com/Dvorinka/Trackeep.git
synced 2026-06-03 20:12:58 +00:00
462 lines
14 KiB
Go
462 lines
14 KiB
Go
package services
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/base64"
|
|
"fmt"
|
|
"image"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/trackeep/backend/models"
|
|
"gorm.io/gorm"
|
|
)
|
|
|
|
// ComputerVisionService provides computer vision capabilities
|
|
type ComputerVisionService struct {
|
|
db *gorm.DB
|
|
}
|
|
|
|
// NewComputerVisionService creates a new computer vision service
|
|
func NewComputerVisionService(db *gorm.DB) *ComputerVisionService {
|
|
return &ComputerVisionService{db: db}
|
|
}
|
|
|
|
// ImageAnalysisRequest represents a request for image analysis
|
|
type ImageAnalysisRequest struct {
|
|
ImageData string `json:"image_data" binding:"required"` // Base64 encoded image
|
|
AnalysisType string `json:"analysis_type" binding:"required"` // ocr, objects, text, faces, all
|
|
FileID *uint `json:"file_id,omitempty"`
|
|
}
|
|
|
|
// ImageAnalysisResponse represents the result of image analysis
|
|
type ImageAnalysisResponse struct {
|
|
Success bool `json:"success"`
|
|
Analysis map[string]interface{} `json:"analysis"`
|
|
Text string `json:"text,omitempty"`
|
|
Objects []ObjectDetection `json:"objects,omitempty"`
|
|
Faces []FaceDetection `json:"faces,omitempty"`
|
|
Metadata ImageMetadata `json:"metadata"`
|
|
}
|
|
|
|
// ObjectDetection represents a detected object
|
|
type ObjectDetection struct {
|
|
Name string `json:"name"`
|
|
Confidence float64 `json:"confidence"`
|
|
BoundingBox BoundingBox `json:"bounding_box"`
|
|
}
|
|
|
|
// FaceDetection represents a detected face
|
|
type FaceDetection struct {
|
|
Confidence float64 `json:"confidence"`
|
|
BoundingBox BoundingBox `json:"bounding_box"`
|
|
Age *int `json:"age,omitempty"`
|
|
Gender *string `json:"gender,omitempty"`
|
|
Emotion *string `json:"emotion,omitempty"`
|
|
}
|
|
|
|
// BoundingBox represents coordinates of a detected object
|
|
type BoundingBox struct {
|
|
X int `json:"x"`
|
|
Y int `json:"y"`
|
|
Width int `json:"width"`
|
|
Height int `json:"height"`
|
|
}
|
|
|
|
// ImageMetadata represents metadata about the analyzed image
|
|
type ImageMetadata struct {
|
|
Width int `json:"width"`
|
|
Height int `json:"height"`
|
|
Format string `json:"format"`
|
|
SizeBytes int `json:"size_bytes"`
|
|
ColorSpace string `json:"color_space"`
|
|
DominantColors []string `json:"dominant_colors"`
|
|
TextDensity float64 `json:"text_density"`
|
|
}
|
|
|
|
// AnalyzeImage performs computer vision analysis on an image
|
|
func (s *ComputerVisionService) AnalyzeImage(req ImageAnalysisRequest) (*ImageAnalysisResponse, error) {
|
|
// Decode base64 image
|
|
imageData, err := base64.StdEncoding.DecodeString(req.ImageData)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid base64 image data: %v", err)
|
|
}
|
|
|
|
// Parse image to get metadata
|
|
img, format, err := image.Decode(bytes.NewReader(imageData))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to decode image: %v", err)
|
|
}
|
|
|
|
bounds := img.Bounds()
|
|
response := &ImageAnalysisResponse{
|
|
Success: true,
|
|
Analysis: make(map[string]interface{}),
|
|
Metadata: ImageMetadata{
|
|
Width: bounds.Dx(),
|
|
Height: bounds.Dy(),
|
|
Format: format,
|
|
SizeBytes: len(imageData),
|
|
ColorSpace: "RGB", // Simplified
|
|
},
|
|
}
|
|
|
|
// Perform requested analysis types
|
|
if req.AnalysisType == "ocr" || req.AnalysisType == "all" {
|
|
text, err := s.extractText(imageData)
|
|
if err == nil {
|
|
response.Text = text
|
|
response.Analysis["text"] = text
|
|
response.Analysis["word_count"] = len(strings.Fields(text))
|
|
response.Metadata.TextDensity = float64(len(text)) / float64(bounds.Dx()*bounds.Dy()) * 1000
|
|
}
|
|
}
|
|
|
|
if req.AnalysisType == "objects" || req.AnalysisType == "all" {
|
|
objects := s.detectObjects(imageData)
|
|
response.Objects = objects
|
|
response.Analysis["objects"] = objects
|
|
response.Analysis["object_count"] = len(objects)
|
|
}
|
|
|
|
if req.AnalysisType == "faces" || req.AnalysisType == "all" {
|
|
faces := s.detectFaces(imageData)
|
|
response.Faces = faces
|
|
response.Analysis["faces"] = faces
|
|
response.Analysis["face_count"] = len(faces)
|
|
}
|
|
|
|
if req.AnalysisType == "text" || req.AnalysisType == "all" {
|
|
// Extract readable text from image
|
|
text, err := s.extractText(imageData)
|
|
if err == nil {
|
|
response.Analysis["readable_text"] = text
|
|
response.Analysis["has_text"] = len(strings.TrimSpace(text)) > 0
|
|
}
|
|
}
|
|
|
|
// Extract dominant colors
|
|
colors := s.extractDominantColors(imageData)
|
|
response.Metadata.DominantColors = colors
|
|
|
|
// Save analysis to database if file ID is provided
|
|
if req.FileID != nil {
|
|
s.saveImageAnalysis(*req.FileID, response)
|
|
}
|
|
|
|
return response, nil
|
|
}
|
|
|
|
// extractText performs OCR on the image (simplified implementation)
|
|
func (s *ComputerVisionService) extractText(imageData []byte) (string, error) {
|
|
// This is a simplified OCR implementation
|
|
// In a real implementation, you would use:
|
|
// - Tesseract OCR
|
|
// - Google Cloud Vision API
|
|
// - Azure Computer Vision
|
|
// - AWS Textract
|
|
|
|
// For demo purposes, we'll extract text from common patterns
|
|
// This is just a placeholder implementation
|
|
|
|
// Try to detect common text patterns in the image
|
|
// In reality, this would require actual OCR processing
|
|
|
|
// Simulate OCR by returning sample text based on image analysis
|
|
text := `
|
|
This is sample OCR text extracted from the image.
|
|
In a real implementation, this would contain the actual
|
|
text content found in the image using OCR technology.
|
|
|
|
Common use cases:
|
|
- Document scanning
|
|
- Receipt processing
|
|
- Business card reading
|
|
- Screenshot text extraction
|
|
`
|
|
|
|
return strings.TrimSpace(text), nil
|
|
}
|
|
|
|
// detectObjects performs object detection on the image
|
|
func (s *ComputerVisionService) detectObjects(imageData []byte) []ObjectDetection {
|
|
// This is a simplified object detection implementation
|
|
// In a real implementation, you would use:
|
|
// - YOLO (You Only Look Once)
|
|
// - TensorFlow Object Detection API
|
|
// - OpenCV DNN
|
|
// - Cloud vision services
|
|
|
|
// Simulate object detection with common objects
|
|
objects := []ObjectDetection{
|
|
{
|
|
Name: "document",
|
|
Confidence: 0.95,
|
|
BoundingBox: BoundingBox{X: 10, Y: 10, Width: 300, Height: 400},
|
|
},
|
|
{
|
|
Name: "text",
|
|
Confidence: 0.88,
|
|
BoundingBox: BoundingBox{X: 20, Y: 30, Width: 280, Height: 200},
|
|
},
|
|
{
|
|
Name: "logo",
|
|
Confidence: 0.72,
|
|
BoundingBox: BoundingBox{X: 250, Y: 20, Width: 50, Height: 50},
|
|
},
|
|
}
|
|
|
|
return objects
|
|
}
|
|
|
|
// detectFaces performs face detection on the image
|
|
func (s *ComputerVisionService) detectFaces(imageData []byte) []FaceDetection {
|
|
// This is a simplified face detection implementation
|
|
// In a real implementation, you would use:
|
|
// - OpenCV Face Detection
|
|
// - Dlib
|
|
// - FaceNet
|
|
// - Cloud face detection services
|
|
|
|
// Simulate face detection
|
|
faces := []FaceDetection{
|
|
{
|
|
Confidence: 0.92,
|
|
BoundingBox: BoundingBox{X: 100, Y: 80, Width: 120, Height: 150},
|
|
Age: func() *int { age := 28; return &age }(),
|
|
Gender: func() *string { gender := "male"; return &gender }(),
|
|
Emotion: func() *string { emotion := "happy"; return &emotion }(),
|
|
},
|
|
}
|
|
|
|
return faces
|
|
}
|
|
|
|
// extractDominantColors extracts the dominant colors from the image
|
|
func (s *ComputerVisionService) extractDominantColors(imageData []byte) []string {
|
|
// This is a simplified color extraction
|
|
// In a real implementation, you would use:
|
|
// - K-means clustering
|
|
// - Color histogram analysis
|
|
// - Median cut algorithm
|
|
|
|
// Simulate dominant colors
|
|
colors := []string{
|
|
"#FFFFFF", // White
|
|
"#333333", // Dark gray
|
|
"#0066CC", // Blue
|
|
"#FF6600", // Orange
|
|
"#00CC66", // Green
|
|
}
|
|
|
|
return colors
|
|
}
|
|
|
|
// saveImageAnalysis saves the analysis results to the database
|
|
func (s *ComputerVisionService) saveImageAnalysis(fileID uint, analysis *ImageAnalysisResponse) error {
|
|
// Convert analysis to JSON for storage
|
|
analysisJSON := fmt.Sprintf(`{
|
|
"text": "%s",
|
|
"object_count": %d,
|
|
"face_count": %d,
|
|
"metadata": %+v
|
|
}`, analysis.Text, len(analysis.Objects), len(analysis.Faces), analysis.Metadata)
|
|
|
|
// Create or update file analysis record
|
|
var fileAnalysis models.FileAnalysis
|
|
err := s.db.Where("file_id = ?", fileID).First(&fileAnalysis).Error
|
|
|
|
if err == gorm.ErrRecordNotFound {
|
|
// Create new analysis record
|
|
now := time.Now()
|
|
fileAnalysis = models.FileAnalysis{
|
|
FileID: fileID,
|
|
AnalysisType: "computer_vision",
|
|
Results: analysisJSON,
|
|
Confidence: 0.85,
|
|
ProcessedAt: &now,
|
|
}
|
|
return s.db.Create(&fileAnalysis).Error
|
|
} else if err == nil {
|
|
// Update existing record
|
|
fileAnalysis.Results = analysisJSON
|
|
now := time.Now()
|
|
fileAnalysis.ProcessedAt = &now
|
|
return s.db.Save(&fileAnalysis).Error
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
// ProcessDocumentImage processes a document image for text extraction and structure
|
|
func (s *ComputerVisionService) ProcessDocumentImage(imageData []byte) (*DocumentAnalysis, error) {
|
|
// Extract text using OCR
|
|
text, err := s.extractText(imageData)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Analyze document structure
|
|
analysis := &DocumentAnalysis{
|
|
Text: text,
|
|
WordCount: len(strings.Fields(text)),
|
|
LineCount: len(strings.Split(text, "\n")),
|
|
Language: s.detectLanguage(text),
|
|
DocumentType: s.detectDocumentType(text),
|
|
Sections: s.extractSections(text),
|
|
Tables: s.extractTables(text),
|
|
Links: s.extractLinks(text),
|
|
Emails: s.extractEmails(text),
|
|
PhoneNumbers: s.extractPhoneNumbers(text),
|
|
}
|
|
|
|
return analysis, nil
|
|
}
|
|
|
|
// DocumentAnalysis represents the analysis of a document image
|
|
type DocumentAnalysis struct {
|
|
Text string `json:"text"`
|
|
WordCount int `json:"word_count"`
|
|
LineCount int `json:"line_count"`
|
|
Language string `json:"language"`
|
|
DocumentType string `json:"document_type"`
|
|
Sections []DocumentSection `json:"sections"`
|
|
Tables []DocumentTable `json:"tables"`
|
|
Links []string `json:"links"`
|
|
Emails []string `json:"emails"`
|
|
PhoneNumbers []string `json:"phone_numbers"`
|
|
}
|
|
|
|
// DocumentSection represents a section in a document
|
|
type DocumentSection struct {
|
|
Title string `json:"title"`
|
|
Content string `json:"content"`
|
|
Level int `json:"level"`
|
|
}
|
|
|
|
// DocumentTable represents a table in a document
|
|
type DocumentTable struct {
|
|
Headers []string `json:"headers"`
|
|
Rows [][]string `json:"rows"`
|
|
}
|
|
|
|
// detectLanguage detects the language of the text
|
|
func (s *ComputerVisionService) detectLanguage(text string) string {
|
|
// Simplified language detection
|
|
// In a real implementation, you would use:
|
|
// - Language detection libraries
|
|
// - Machine learning models
|
|
// - Cloud language detection services
|
|
|
|
if strings.Contains(strings.ToLower(text), "the") && strings.Contains(strings.ToLower(text), "and") {
|
|
return "en"
|
|
} else if strings.Contains(text, "est") && strings.Contains(text, "que") {
|
|
return "es"
|
|
} else if strings.Contains(text, "und") && strings.Contains(text, "der") {
|
|
return "de"
|
|
}
|
|
|
|
return "unknown"
|
|
}
|
|
|
|
// detectDocumentType detects the type of document
|
|
func (s *ComputerVisionService) detectDocumentType(text string) string {
|
|
text = strings.ToLower(text)
|
|
|
|
if strings.Contains(text, "invoice") || strings.Contains(text, "bill") {
|
|
return "invoice"
|
|
} else if strings.Contains(text, "receipt") || strings.Contains(text, "purchase") {
|
|
return "receipt"
|
|
} else if strings.Contains(text, "resume") || strings.Contains(text, "curriculum") {
|
|
return "resume"
|
|
} else if strings.Contains(text, "contract") || strings.Contains(text, "agreement") {
|
|
return "contract"
|
|
} else if strings.Contains(text, "report") || strings.Contains(text, "analysis") {
|
|
return "report"
|
|
}
|
|
|
|
return "general"
|
|
}
|
|
|
|
// extractSections extracts document sections
|
|
func (s *ComputerVisionService) extractSections(text string) []DocumentSection {
|
|
var sections []DocumentSection
|
|
lines := strings.Split(text, "\n")
|
|
|
|
for _, line := range lines {
|
|
line = strings.TrimSpace(line)
|
|
if line == "" {
|
|
continue
|
|
}
|
|
|
|
// Simple section detection (headers followed by content)
|
|
if len(line) < 100 && (strings.HasSuffix(line, ":") || strings.ToUpper(line) == line) {
|
|
sections = append(sections, DocumentSection{
|
|
Title: line,
|
|
Content: "",
|
|
Level: 1,
|
|
})
|
|
}
|
|
}
|
|
|
|
return sections
|
|
}
|
|
|
|
// extractTables extracts tables from the text
|
|
func (s *ComputerVisionService) extractTables(text string) []DocumentTable {
|
|
// Simplified table extraction
|
|
// In a real implementation, this would be much more sophisticated
|
|
var tables []DocumentTable
|
|
|
|
// Look for tabular data patterns
|
|
lines := strings.Split(text, "\n")
|
|
for i, line := range lines {
|
|
if strings.Contains(line, "\t") || strings.Contains(line, " ") {
|
|
// Potential table row
|
|
if i > 0 && strings.Contains(lines[i-1], "\t") {
|
|
// Multiple consecutive rows with tabs - likely a table
|
|
table := DocumentTable{
|
|
Headers: strings.Split(lines[i-1], "\t"),
|
|
Rows: [][]string{strings.Split(line, "\t")},
|
|
}
|
|
tables = append(tables, table)
|
|
}
|
|
}
|
|
}
|
|
|
|
return tables
|
|
}
|
|
|
|
// extractLinks extracts URLs from the text
|
|
func (s *ComputerVisionService) extractLinks(text string) []string {
|
|
urlRegex := regexp.MustCompile(`https?://[^\s]+`)
|
|
return urlRegex.FindAllString(text, -1)
|
|
}
|
|
|
|
// extractEmails extracts email addresses from the text
|
|
func (s *ComputerVisionService) extractEmails(text string) []string {
|
|
emailRegex := regexp.MustCompile(`[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}`)
|
|
return emailRegex.FindAllString(text, -1)
|
|
}
|
|
|
|
// extractPhoneNumbers extracts phone numbers from the text
|
|
func (s *ComputerVisionService) extractPhoneNumbers(text string) []string {
|
|
phoneRegex := regexp.MustCompile(`\b\d{3}[-.]?\d{3}[-.]?\d{4}\b`)
|
|
return phoneRegex.FindAllString(text, -1)
|
|
}
|
|
|
|
// CreateFileAnalysis creates a file analysis record
|
|
func (s *ComputerVisionService) CreateFileAnalysis(fileID uint, analysisType, results string, confidence float64) error {
|
|
now := time.Now()
|
|
fileAnalysis := models.FileAnalysis{
|
|
FileID: fileID,
|
|
AnalysisType: analysisType,
|
|
Results: results,
|
|
Confidence: confidence,
|
|
ProcessedAt: &now,
|
|
}
|
|
|
|
return s.db.Create(&fileAnalysis).Error
|
|
}
|