mirror of
https://github.com/Dvorinka/Trackeep.git
synced 2026-06-03 20:12:58 +00:00
first test
This commit is contained in:
@@ -0,0 +1,461 @@
|
||||
package services
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"image"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/trackeep/backend/models"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
// ComputerVisionService provides computer vision capabilities
|
||||
type ComputerVisionService struct {
|
||||
db *gorm.DB
|
||||
}
|
||||
|
||||
// NewComputerVisionService creates a new computer vision service
|
||||
func NewComputerVisionService(db *gorm.DB) *ComputerVisionService {
|
||||
return &ComputerVisionService{db: db}
|
||||
}
|
||||
|
||||
// ImageAnalysisRequest represents a request for image analysis
|
||||
type ImageAnalysisRequest struct {
|
||||
ImageData string `json:"image_data" binding:"required"` // Base64 encoded image
|
||||
AnalysisType string `json:"analysis_type" binding:"required"` // ocr, objects, text, faces, all
|
||||
FileID *uint `json:"file_id,omitempty"`
|
||||
}
|
||||
|
||||
// ImageAnalysisResponse represents the result of image analysis
|
||||
type ImageAnalysisResponse struct {
|
||||
Success bool `json:"success"`
|
||||
Analysis map[string]interface{} `json:"analysis"`
|
||||
Text string `json:"text,omitempty"`
|
||||
Objects []ObjectDetection `json:"objects,omitempty"`
|
||||
Faces []FaceDetection `json:"faces,omitempty"`
|
||||
Metadata ImageMetadata `json:"metadata"`
|
||||
}
|
||||
|
||||
// ObjectDetection represents a detected object
|
||||
type ObjectDetection struct {
|
||||
Name string `json:"name"`
|
||||
Confidence float64 `json:"confidence"`
|
||||
BoundingBox BoundingBox `json:"bounding_box"`
|
||||
}
|
||||
|
||||
// FaceDetection represents a detected face
|
||||
type FaceDetection struct {
|
||||
Confidence float64 `json:"confidence"`
|
||||
BoundingBox BoundingBox `json:"bounding_box"`
|
||||
Age *int `json:"age,omitempty"`
|
||||
Gender *string `json:"gender,omitempty"`
|
||||
Emotion *string `json:"emotion,omitempty"`
|
||||
}
|
||||
|
||||
// BoundingBox represents coordinates of a detected object
|
||||
type BoundingBox struct {
|
||||
X int `json:"x"`
|
||||
Y int `json:"y"`
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
}
|
||||
|
||||
// ImageMetadata represents metadata about the analyzed image
|
||||
type ImageMetadata struct {
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
Format string `json:"format"`
|
||||
SizeBytes int `json:"size_bytes"`
|
||||
ColorSpace string `json:"color_space"`
|
||||
DominantColors []string `json:"dominant_colors"`
|
||||
TextDensity float64 `json:"text_density"`
|
||||
}
|
||||
|
||||
// AnalyzeImage performs computer vision analysis on an image
|
||||
func (s *ComputerVisionService) AnalyzeImage(req ImageAnalysisRequest) (*ImageAnalysisResponse, error) {
|
||||
// Decode base64 image
|
||||
imageData, err := base64.StdEncoding.DecodeString(req.ImageData)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid base64 image data: %v", err)
|
||||
}
|
||||
|
||||
// Parse image to get metadata
|
||||
img, format, err := image.Decode(bytes.NewReader(imageData))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to decode image: %v", err)
|
||||
}
|
||||
|
||||
bounds := img.Bounds()
|
||||
response := &ImageAnalysisResponse{
|
||||
Success: true,
|
||||
Analysis: make(map[string]interface{}),
|
||||
Metadata: ImageMetadata{
|
||||
Width: bounds.Dx(),
|
||||
Height: bounds.Dy(),
|
||||
Format: format,
|
||||
SizeBytes: len(imageData),
|
||||
ColorSpace: "RGB", // Simplified
|
||||
},
|
||||
}
|
||||
|
||||
// Perform requested analysis types
|
||||
if req.AnalysisType == "ocr" || req.AnalysisType == "all" {
|
||||
text, err := s.extractText(imageData)
|
||||
if err == nil {
|
||||
response.Text = text
|
||||
response.Analysis["text"] = text
|
||||
response.Analysis["word_count"] = len(strings.Fields(text))
|
||||
response.Metadata.TextDensity = float64(len(text)) / float64(bounds.Dx()*bounds.Dy()) * 1000
|
||||
}
|
||||
}
|
||||
|
||||
if req.AnalysisType == "objects" || req.AnalysisType == "all" {
|
||||
objects := s.detectObjects(imageData)
|
||||
response.Objects = objects
|
||||
response.Analysis["objects"] = objects
|
||||
response.Analysis["object_count"] = len(objects)
|
||||
}
|
||||
|
||||
if req.AnalysisType == "faces" || req.AnalysisType == "all" {
|
||||
faces := s.detectFaces(imageData)
|
||||
response.Faces = faces
|
||||
response.Analysis["faces"] = faces
|
||||
response.Analysis["face_count"] = len(faces)
|
||||
}
|
||||
|
||||
if req.AnalysisType == "text" || req.AnalysisType == "all" {
|
||||
// Extract readable text from image
|
||||
text, err := s.extractText(imageData)
|
||||
if err == nil {
|
||||
response.Analysis["readable_text"] = text
|
||||
response.Analysis["has_text"] = len(strings.TrimSpace(text)) > 0
|
||||
}
|
||||
}
|
||||
|
||||
// Extract dominant colors
|
||||
colors := s.extractDominantColors(imageData)
|
||||
response.Metadata.DominantColors = colors
|
||||
|
||||
// Save analysis to database if file ID is provided
|
||||
if req.FileID != nil {
|
||||
s.saveImageAnalysis(*req.FileID, response)
|
||||
}
|
||||
|
||||
return response, nil
|
||||
}
|
||||
|
||||
// extractText performs OCR on the image (simplified implementation)
|
||||
func (s *ComputerVisionService) extractText(imageData []byte) (string, error) {
|
||||
// This is a simplified OCR implementation
|
||||
// In a real implementation, you would use:
|
||||
// - Tesseract OCR
|
||||
// - Google Cloud Vision API
|
||||
// - Azure Computer Vision
|
||||
// - AWS Textract
|
||||
|
||||
// For demo purposes, we'll extract text from common patterns
|
||||
// This is just a placeholder implementation
|
||||
|
||||
// Try to detect common text patterns in the image
|
||||
// In reality, this would require actual OCR processing
|
||||
|
||||
// Simulate OCR by returning sample text based on image analysis
|
||||
text := `
|
||||
This is sample OCR text extracted from the image.
|
||||
In a real implementation, this would contain the actual
|
||||
text content found in the image using OCR technology.
|
||||
|
||||
Common use cases:
|
||||
- Document scanning
|
||||
- Receipt processing
|
||||
- Business card reading
|
||||
- Screenshot text extraction
|
||||
`
|
||||
|
||||
return strings.TrimSpace(text), nil
|
||||
}
|
||||
|
||||
// detectObjects performs object detection on the image
|
||||
func (s *ComputerVisionService) detectObjects(imageData []byte) []ObjectDetection {
|
||||
// This is a simplified object detection implementation
|
||||
// In a real implementation, you would use:
|
||||
// - YOLO (You Only Look Once)
|
||||
// - TensorFlow Object Detection API
|
||||
// - OpenCV DNN
|
||||
// - Cloud vision services
|
||||
|
||||
// Simulate object detection with common objects
|
||||
objects := []ObjectDetection{
|
||||
{
|
||||
Name: "document",
|
||||
Confidence: 0.95,
|
||||
BoundingBox: BoundingBox{X: 10, Y: 10, Width: 300, Height: 400},
|
||||
},
|
||||
{
|
||||
Name: "text",
|
||||
Confidence: 0.88,
|
||||
BoundingBox: BoundingBox{X: 20, Y: 30, Width: 280, Height: 200},
|
||||
},
|
||||
{
|
||||
Name: "logo",
|
||||
Confidence: 0.72,
|
||||
BoundingBox: BoundingBox{X: 250, Y: 20, Width: 50, Height: 50},
|
||||
},
|
||||
}
|
||||
|
||||
return objects
|
||||
}
|
||||
|
||||
// detectFaces performs face detection on the image
|
||||
func (s *ComputerVisionService) detectFaces(imageData []byte) []FaceDetection {
|
||||
// This is a simplified face detection implementation
|
||||
// In a real implementation, you would use:
|
||||
// - OpenCV Face Detection
|
||||
// - Dlib
|
||||
// - FaceNet
|
||||
// - Cloud face detection services
|
||||
|
||||
// Simulate face detection
|
||||
faces := []FaceDetection{
|
||||
{
|
||||
Confidence: 0.92,
|
||||
BoundingBox: BoundingBox{X: 100, Y: 80, Width: 120, Height: 150},
|
||||
Age: func() *int { age := 28; return &age }(),
|
||||
Gender: func() *string { gender := "male"; return &gender }(),
|
||||
Emotion: func() *string { emotion := "happy"; return &emotion }(),
|
||||
},
|
||||
}
|
||||
|
||||
return faces
|
||||
}
|
||||
|
||||
// extractDominantColors extracts the dominant colors from the image
|
||||
func (s *ComputerVisionService) extractDominantColors(imageData []byte) []string {
|
||||
// This is a simplified color extraction
|
||||
// In a real implementation, you would use:
|
||||
// - K-means clustering
|
||||
// - Color histogram analysis
|
||||
// - Median cut algorithm
|
||||
|
||||
// Simulate dominant colors
|
||||
colors := []string{
|
||||
"#FFFFFF", // White
|
||||
"#333333", // Dark gray
|
||||
"#0066CC", // Blue
|
||||
"#FF6600", // Orange
|
||||
"#00CC66", // Green
|
||||
}
|
||||
|
||||
return colors
|
||||
}
|
||||
|
||||
// saveImageAnalysis saves the analysis results to the database
|
||||
func (s *ComputerVisionService) saveImageAnalysis(fileID uint, analysis *ImageAnalysisResponse) error {
|
||||
// Convert analysis to JSON for storage
|
||||
analysisJSON := fmt.Sprintf(`{
|
||||
"text": "%s",
|
||||
"object_count": %d,
|
||||
"face_count": %d,
|
||||
"metadata": %+v
|
||||
}`, analysis.Text, len(analysis.Objects), len(analysis.Faces), analysis.Metadata)
|
||||
|
||||
// Create or update file analysis record
|
||||
var fileAnalysis models.FileAnalysis
|
||||
err := s.db.Where("file_id = ?", fileID).First(&fileAnalysis).Error
|
||||
|
||||
if err == gorm.ErrRecordNotFound {
|
||||
// Create new analysis record
|
||||
now := time.Now()
|
||||
fileAnalysis = models.FileAnalysis{
|
||||
FileID: fileID,
|
||||
AnalysisType: "computer_vision",
|
||||
Results: analysisJSON,
|
||||
Confidence: 0.85,
|
||||
ProcessedAt: &now,
|
||||
}
|
||||
return s.db.Create(&fileAnalysis).Error
|
||||
} else if err == nil {
|
||||
// Update existing record
|
||||
fileAnalysis.Results = analysisJSON
|
||||
now := time.Now()
|
||||
fileAnalysis.ProcessedAt = &now
|
||||
return s.db.Save(&fileAnalysis).Error
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// ProcessDocumentImage processes a document image for text extraction and structure
|
||||
func (s *ComputerVisionService) ProcessDocumentImage(imageData []byte) (*DocumentAnalysis, error) {
|
||||
// Extract text using OCR
|
||||
text, err := s.extractText(imageData)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Analyze document structure
|
||||
analysis := &DocumentAnalysis{
|
||||
Text: text,
|
||||
WordCount: len(strings.Fields(text)),
|
||||
LineCount: len(strings.Split(text, "\n")),
|
||||
Language: s.detectLanguage(text),
|
||||
DocumentType: s.detectDocumentType(text),
|
||||
Sections: s.extractSections(text),
|
||||
Tables: s.extractTables(text),
|
||||
Links: s.extractLinks(text),
|
||||
Emails: s.extractEmails(text),
|
||||
PhoneNumbers: s.extractPhoneNumbers(text),
|
||||
}
|
||||
|
||||
return analysis, nil
|
||||
}
|
||||
|
||||
// DocumentAnalysis represents the analysis of a document image
|
||||
type DocumentAnalysis struct {
|
||||
Text string `json:"text"`
|
||||
WordCount int `json:"word_count"`
|
||||
LineCount int `json:"line_count"`
|
||||
Language string `json:"language"`
|
||||
DocumentType string `json:"document_type"`
|
||||
Sections []DocumentSection `json:"sections"`
|
||||
Tables []DocumentTable `json:"tables"`
|
||||
Links []string `json:"links"`
|
||||
Emails []string `json:"emails"`
|
||||
PhoneNumbers []string `json:"phone_numbers"`
|
||||
}
|
||||
|
||||
// DocumentSection represents a section in a document
|
||||
type DocumentSection struct {
|
||||
Title string `json:"title"`
|
||||
Content string `json:"content"`
|
||||
Level int `json:"level"`
|
||||
}
|
||||
|
||||
// DocumentTable represents a table in a document
|
||||
type DocumentTable struct {
|
||||
Headers []string `json:"headers"`
|
||||
Rows [][]string `json:"rows"`
|
||||
}
|
||||
|
||||
// detectLanguage detects the language of the text
|
||||
func (s *ComputerVisionService) detectLanguage(text string) string {
|
||||
// Simplified language detection
|
||||
// In a real implementation, you would use:
|
||||
// - Language detection libraries
|
||||
// - Machine learning models
|
||||
// - Cloud language detection services
|
||||
|
||||
if strings.Contains(strings.ToLower(text), "the") && strings.Contains(strings.ToLower(text), "and") {
|
||||
return "en"
|
||||
} else if strings.Contains(text, "est") && strings.Contains(text, "que") {
|
||||
return "es"
|
||||
} else if strings.Contains(text, "und") && strings.Contains(text, "der") {
|
||||
return "de"
|
||||
}
|
||||
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
// detectDocumentType detects the type of document
|
||||
func (s *ComputerVisionService) detectDocumentType(text string) string {
|
||||
text = strings.ToLower(text)
|
||||
|
||||
if strings.Contains(text, "invoice") || strings.Contains(text, "bill") {
|
||||
return "invoice"
|
||||
} else if strings.Contains(text, "receipt") || strings.Contains(text, "purchase") {
|
||||
return "receipt"
|
||||
} else if strings.Contains(text, "resume") || strings.Contains(text, "curriculum") {
|
||||
return "resume"
|
||||
} else if strings.Contains(text, "contract") || strings.Contains(text, "agreement") {
|
||||
return "contract"
|
||||
} else if strings.Contains(text, "report") || strings.Contains(text, "analysis") {
|
||||
return "report"
|
||||
}
|
||||
|
||||
return "general"
|
||||
}
|
||||
|
||||
// extractSections extracts document sections
|
||||
func (s *ComputerVisionService) extractSections(text string) []DocumentSection {
|
||||
var sections []DocumentSection
|
||||
lines := strings.Split(text, "\n")
|
||||
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Simple section detection (headers followed by content)
|
||||
if len(line) < 100 && (strings.HasSuffix(line, ":") || strings.ToUpper(line) == line) {
|
||||
sections = append(sections, DocumentSection{
|
||||
Title: line,
|
||||
Content: "",
|
||||
Level: 1,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return sections
|
||||
}
|
||||
|
||||
// extractTables extracts tables from the text
|
||||
func (s *ComputerVisionService) extractTables(text string) []DocumentTable {
|
||||
// Simplified table extraction
|
||||
// In a real implementation, this would be much more sophisticated
|
||||
var tables []DocumentTable
|
||||
|
||||
// Look for tabular data patterns
|
||||
lines := strings.Split(text, "\n")
|
||||
for i, line := range lines {
|
||||
if strings.Contains(line, "\t") || strings.Contains(line, " ") {
|
||||
// Potential table row
|
||||
if i > 0 && strings.Contains(lines[i-1], "\t") {
|
||||
// Multiple consecutive rows with tabs - likely a table
|
||||
table := DocumentTable{
|
||||
Headers: strings.Split(lines[i-1], "\t"),
|
||||
Rows: [][]string{strings.Split(line, "\t")},
|
||||
}
|
||||
tables = append(tables, table)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tables
|
||||
}
|
||||
|
||||
// extractLinks extracts URLs from the text
|
||||
func (s *ComputerVisionService) extractLinks(text string) []string {
|
||||
urlRegex := regexp.MustCompile(`https?://[^\s]+`)
|
||||
return urlRegex.FindAllString(text, -1)
|
||||
}
|
||||
|
||||
// extractEmails extracts email addresses from the text
|
||||
func (s *ComputerVisionService) extractEmails(text string) []string {
|
||||
emailRegex := regexp.MustCompile(`[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}`)
|
||||
return emailRegex.FindAllString(text, -1)
|
||||
}
|
||||
|
||||
// extractPhoneNumbers extracts phone numbers from the text
|
||||
func (s *ComputerVisionService) extractPhoneNumbers(text string) []string {
|
||||
phoneRegex := regexp.MustCompile(`\b\d{3}[-.]?\d{3}[-.]?\d{4}\b`)
|
||||
return phoneRegex.FindAllString(text, -1)
|
||||
}
|
||||
|
||||
// CreateFileAnalysis creates a file analysis record
|
||||
func (s *ComputerVisionService) CreateFileAnalysis(fileID uint, analysisType, results string, confidence float64) error {
|
||||
now := time.Now()
|
||||
fileAnalysis := models.FileAnalysis{
|
||||
FileID: fileID,
|
||||
AnalysisType: analysisType,
|
||||
Results: results,
|
||||
Confidence: confidence,
|
||||
ProcessedAt: &now,
|
||||
}
|
||||
|
||||
return s.db.Create(&fileAnalysis).Error
|
||||
}
|
||||
Reference in New Issue
Block a user