|
|
import os |
|
|
import subprocess |
|
|
import sys |
|
|
import re |
|
|
import numpy as np |
|
|
from PIL import Image |
|
|
import gradio as gr |
|
|
import requests |
|
|
import json |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
|
|
|
try: |
|
|
import pytesseract |
|
|
except ImportError: |
|
|
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'pytesseract']) |
|
|
import pytesseract |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
if os.path.exists('/usr/bin/tesseract'): |
|
|
pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract' |
|
|
|
|
|
else: |
|
|
tesseract_path = subprocess.check_output(['which', 'tesseract']).decode().strip() |
|
|
if tesseract_path: |
|
|
pytesseract.pytesseract.tesseract_cmd = tesseract_path |
|
|
except: |
|
|
|
|
|
pytesseract.pytesseract.tesseract_cmd = 'tesseract' |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
MISTRAL_API_KEY = "GlrVCBWyvTYjWGKl5jqtK4K41uWWJ79F" |
|
|
|
|
|
|
|
|
def analyze_ingredients_with_mistral(ingredients_list, health_conditions=None): |
|
|
""" |
|
|
Use Mistral AI to analyze ingredients and provide health insights. |
|
|
""" |
|
|
if not ingredients_list: |
|
|
return "No ingredients detected or provided." |
|
|
|
|
|
|
|
|
ingredients_text = ", ".join(ingredients_list) |
|
|
|
|
|
|
|
|
if health_conditions and health_conditions.strip(): |
|
|
prompt = f""" |
|
|
Analyze the following food ingredients for a person with these health conditions: {health_conditions} |
|
|
Ingredients: {ingredients_text} |
|
|
For each ingredient: |
|
|
1. Provide its potential health benefits |
|
|
2. Identify any potential risks |
|
|
3. Note if it may affect the specified health conditions |
|
|
Then provide an overall assessment of the product's suitability for someone with the specified health conditions. |
|
|
Format your response in markdown with clear headings and sections. |
|
|
""" |
|
|
else: |
|
|
prompt = f""" |
|
|
Analyze the following food ingredients: |
|
|
Ingredients: {ingredients_text} |
|
|
For each ingredient: |
|
|
1. Provide its potential health benefits |
|
|
2. Identify any potential risks or common allergens associated with it |
|
|
Then provide an overall assessment of the product's general health profile. |
|
|
Format your response in markdown with clear headings and sections. |
|
|
""" |
|
|
|
|
|
try: |
|
|
headers = { |
|
|
"Authorization": f"Bearer {MISTRAL_API_KEY}", |
|
|
"Content-Type": "application/json" |
|
|
} |
|
|
data = { |
|
|
"model": "mistral-small", |
|
|
"messages": [{"role": "user", "content": prompt}], |
|
|
"temperature": 0.7, |
|
|
} |
|
|
|
|
|
response = requests.post("https://api.mistral.ai/v1/chat/completions", headers=headers, json=data) |
|
|
|
|
|
if response.status_code == 200: |
|
|
analysis = response.json()['choices'][0]['message']['content'] |
|
|
else: |
|
|
return dummy_analyze(ingredients_list, health_conditions) + f"\n\n(Using fallback analysis: Mistral API Error - {response.status_code} - {response.text})" |
|
|
|
|
|
|
|
|
disclaimer = """ |
|
|
## Disclaimer |
|
|
This analysis is provided for informational purposes only and should not replace professional medical advice. |
|
|
Always consult with a healthcare provider regarding dietary restrictions, allergies, or health conditions. |
|
|
""" |
|
|
|
|
|
return analysis + disclaimer |
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
return dummy_analyze(ingredients_list, health_conditions) + f"\n\n(Using fallback analysis: {str(e)})" |
|
|
|
|
|
|
|
|
|
|
|
def dummy_analyze(ingredients_list, health_conditions=None): |
|
|
ingredients_text = ", ".join(ingredients_list) |
|
|
|
|
|
report = f""" |
|
|
# Ingredient Analysis Report |
|
|
## Detected Ingredients |
|
|
{", ".join([i.title() for i in ingredients_list])} |
|
|
## Overview |
|
|
This is a simulated analysis since the Mistral API call failed. In the actual application, |
|
|
the ingredients would be analyzed by Mistral for their health implications. |
|
|
## Health Considerations |
|
|
""" |
|
|
|
|
|
if health_conditions: |
|
|
report += f""" |
|
|
The analysis would specifically consider these health concerns: {health_conditions} |
|
|
""" |
|
|
else: |
|
|
report += """ |
|
|
No specific health concerns were provided, so a general analysis would be performed. |
|
|
""" |
|
|
|
|
|
report += """ |
|
|
## Disclaimer |
|
|
This analysis is provided for informational purposes only and should not replace professional medical advice. |
|
|
Always consult with a healthcare provider regarding dietary restrictions, allergies, or health conditions. |
|
|
""" |
|
|
|
|
|
return report |
|
|
|
|
|
|
|
|
def extract_text_from_image(image): |
|
|
try: |
|
|
if image is None: |
|
|
return "No image captured. Please try again." |
|
|
|
|
|
|
|
|
try: |
|
|
subprocess.run([pytesseract.pytesseract.tesseract_cmd, "--version"], |
|
|
check=True, capture_output=True, text=True) |
|
|
except (subprocess.SubprocessError, FileNotFoundError): |
|
|
return "Tesseract OCR is not installed or not properly configured. Please check installation." |
|
|
|
|
|
|
|
|
import cv2 |
|
|
import numpy as np |
|
|
from PIL import Image, ImageOps, ImageEnhance |
|
|
|
|
|
|
|
|
inverted_image = ImageOps.invert(image) |
|
|
|
|
|
|
|
|
custom_config = r'--oem 3 --psm 6 -l eng --dpi 300' |
|
|
inverted_text = pytesseract.image_to_string(inverted_image, config=custom_config) |
|
|
|
|
|
|
|
|
img_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) |
|
|
|
|
|
|
|
|
gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY) |
|
|
|
|
|
|
|
|
filtered = cv2.bilateralFilter(gray, 11, 17, 17) |
|
|
|
|
|
|
|
|
thresh = cv2.adaptiveThreshold(filtered, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, |
|
|
cv2.THRESH_BINARY, 11, 2) |
|
|
|
|
|
|
|
|
inverted_thresh = cv2.bitwise_not(thresh) |
|
|
|
|
|
|
|
|
cv_text = pytesseract.image_to_string( |
|
|
Image.fromarray(inverted_thresh), |
|
|
config=custom_config |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
hsv = cv2.cvtColor(img_cv, cv2.COLOR_BGR2HSV) |
|
|
|
|
|
|
|
|
lower_white = np.array([0, 0, 150]) |
|
|
upper_white = np.array([180, 30, 255]) |
|
|
mask = cv2.inRange(hsv, lower_white, upper_white) |
|
|
|
|
|
|
|
|
kernel = np.ones((2, 2), np.uint8) |
|
|
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel) |
|
|
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel) |
|
|
|
|
|
|
|
|
mask = cv2.dilate(mask, kernel, iterations=1) |
|
|
|
|
|
|
|
|
color_text = pytesseract.image_to_string( |
|
|
Image.fromarray(mask), |
|
|
config=r'--oem 3 --psm 6 -l eng --dpi 300' |
|
|
) |
|
|
|
|
|
|
|
|
direct_text = pytesseract.image_to_string( |
|
|
image, |
|
|
config=r'--oem 3 --psm 11 -l eng --dpi 300' |
|
|
) |
|
|
|
|
|
|
|
|
results = [inverted_text, cv_text, color_text, direct_text] |
|
|
|
|
|
|
|
|
def count_alphanumeric(text): |
|
|
return sum(c.isalnum() for c in text) |
|
|
|
|
|
best_text = max(results, key=count_alphanumeric) |
|
|
|
|
|
|
|
|
if count_alphanumeric(best_text) < 20: |
|
|
|
|
|
neg_text = pytesseract.image_to_string( |
|
|
image, |
|
|
config=r'--oem 3 --psm 6 -c textord_heavy_nr=1 -c textord_debug_printable=0 -l eng --dpi 300' |
|
|
) |
|
|
if count_alphanumeric(neg_text) > count_alphanumeric(best_text): |
|
|
best_text = neg_text |
|
|
|
|
|
|
|
|
best_text = re.sub(r'[^\w\s,;:%.()\n\'-]', '', best_text) |
|
|
best_text = best_text.replace('\n\n', '\n') |
|
|
|
|
|
|
|
|
if "ingredient" in best_text.lower() or any(x in best_text.lower() for x in ["sugar", "cocoa", "milk", "contain"]): |
|
|
|
|
|
best_text = re.sub(r'([a-z])([A-Z])', r'\1 \2', best_text) |
|
|
best_text = re.sub(r'(\d+)([a-zA-Z])', r'\1 \2', best_text) |
|
|
|
|
|
if not best_text.strip(): |
|
|
return "No text could be extracted. Ensure image is clear and readable." |
|
|
|
|
|
return best_text.strip() |
|
|
except Exception as e: |
|
|
return f"Error extracting text: {str(e)}" |
|
|
|
|
|
|
|
|
def parse_ingredients(text): |
|
|
if not text: |
|
|
return [] |
|
|
|
|
|
|
|
|
text = re.sub(r'^ingredients:?\s*', '', text.lower(), flags=re.IGNORECASE) |
|
|
|
|
|
|
|
|
text = re.sub(r'[|\\/@#$%^&*()_+=]', '', text) |
|
|
|
|
|
|
|
|
text = re.sub(r'\bngredients\b', 'ingredients', text) |
|
|
|
|
|
|
|
|
replacements = { |
|
|
'0': 'o', 'l': 'i', '1': 'i', |
|
|
'5': 's', '8': 'b', 'Q': 'g', |
|
|
} |
|
|
|
|
|
for error, correction in replacements.items(): |
|
|
text = text.replace(error, correction) |
|
|
|
|
|
|
|
|
ingredients = re.split(r',|;|\n', text) |
|
|
|
|
|
|
|
|
cleaned_ingredients = [] |
|
|
for i in ingredients: |
|
|
i = i.strip().lower() |
|
|
if i and len(i) > 1: |
|
|
cleaned_ingredients.append(i) |
|
|
|
|
|
return cleaned_ingredients |
|
|
|
|
|
|
|
|
|
|
|
def process_input(input_method, text_input, camera_input, upload_input, health_conditions): |
|
|
if input_method == "Camera": |
|
|
if camera_input is not None: |
|
|
extracted_text = extract_text_from_image(camera_input) |
|
|
|
|
|
if "Error" in extracted_text or "No text could be extracted" in extracted_text: |
|
|
return extracted_text + "\n\nPlease try using the 'Manual Entry' option instead." |
|
|
|
|
|
ingredients = parse_ingredients(extracted_text) |
|
|
return analyze_ingredients_with_mistral(ingredients, health_conditions) |
|
|
else: |
|
|
return "No camera image captured. Please try again." |
|
|
|
|
|
elif input_method == "Image Upload": |
|
|
if upload_input is not None: |
|
|
extracted_text = extract_text_from_image(upload_input) |
|
|
|
|
|
if "Error" in extracted_text or "No text could be extracted" in extracted_text: |
|
|
return extracted_text + "\n\nPlease try using the 'Manual Entry' option instead." |
|
|
|
|
|
ingredients = parse_ingredients(extracted_text) |
|
|
return analyze_ingredients_with_mistral(ingredients, health_conditions) |
|
|
else: |
|
|
return "No image uploaded. Please try again." |
|
|
|
|
|
elif input_method == "Manual Entry": |
|
|
if text_input and text_input.strip(): |
|
|
ingredients = parse_ingredients(text_input) |
|
|
return analyze_ingredients_with_mistral(ingredients, health_conditions) |
|
|
else: |
|
|
return "No ingredients entered. Please try again." |
|
|
|
|
|
return "Please provide input using one of the available methods." |
|
|
|
|
|
|
|
|
with gr.Blocks(title="AI Ingredient Scanner") as app: |
|
|
gr.Markdown("# AI Ingredient Scanner") |
|
|
gr.Markdown("Scan product ingredients and analyze them for health benefits, risks, and potential allergens.") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
input_method = gr.Radio( |
|
|
["Camera", "Image Upload", "Manual Entry"], |
|
|
label="Input Method", |
|
|
value="Camera" |
|
|
) |
|
|
|
|
|
|
|
|
camera_input = gr.Image(label="Capture ingredients with camera", type="pil", visible=True) |
|
|
|
|
|
|
|
|
upload_input = gr.Image(label="Upload image of ingredients label", type="pil", visible=False) |
|
|
|
|
|
|
|
|
text_input = gr.Textbox( |
|
|
label="Enter ingredients list (comma separated)", |
|
|
placeholder="milk, sugar, flour, eggs, vanilla extract", |
|
|
lines=3, |
|
|
visible=False |
|
|
) |
|
|
|
|
|
|
|
|
health_conditions = gr.Textbox( |
|
|
label="Enter your health concerns (optional)", |
|
|
placeholder="diabetes, high blood pressure, peanut allergy, etc.", |
|
|
lines=2, |
|
|
info="The AI will automatically analyze ingredients for these conditions" |
|
|
) |
|
|
|
|
|
analyze_button = gr.Button("Analyze Ingredients") |
|
|
|
|
|
with gr.Column(): |
|
|
output = gr.Markdown(label="Analysis Results") |
|
|
extracted_text_output = gr.Textbox(label="Extracted Text (for verification)", lines=3) |
|
|
|
|
|
|
|
|
def update_visible_inputs(choice): |
|
|
return { |
|
|
upload_input: gr.update(visible=(choice == "Image Upload")), |
|
|
camera_input: gr.update(visible=(choice == "Camera")), |
|
|
text_input: gr.update(visible=(choice == "Manual Entry")) |
|
|
} |
|
|
|
|
|
input_method.change(update_visible_inputs, input_method, [upload_input, camera_input, text_input]) |
|
|
|
|
|
|
|
|
def show_extracted_text(input_method, text_input, camera_input, upload_input): |
|
|
if input_method == "Camera" and camera_input is not None: |
|
|
return extract_text_from_image(camera_input) |
|
|
elif input_method == "Image Upload" and upload_input is not None: |
|
|
return extract_text_from_image(upload_input) |
|
|
elif input_method == "Manual Entry": |
|
|
return text_input |
|
|
return "No input detected" |
|
|
|
|
|
|
|
|
analyze_button.click( |
|
|
fn=process_input, |
|
|
inputs=[input_method, text_input, camera_input, upload_input, health_conditions], |
|
|
outputs=output |
|
|
) |
|
|
|
|
|
analyze_button.click( |
|
|
fn=show_extracted_text, |
|
|
inputs=[input_method, text_input, camera_input, upload_input], |
|
|
outputs=extracted_text_output |
|
|
) |
|
|
|
|
|
gr.Markdown("### How to use") |
|
|
gr.Markdown(""" |
|
|
1. Choose your input method (Camera, Image Upload, or Manual Entry) |
|
|
2. Take a photo of the ingredients label or enter ingredients manually |
|
|
3. Optionally enter your health concerns |
|
|
4. Click "Analyze Ingredients" to get your personalized analysis |
|
|
The AI will automatically analyze the ingredients, their health implications, and their potential impact on your specific health concerns. |
|
|
""") |
|
|
|
|
|
gr.Markdown("### Examples of what you can ask") |
|
|
gr.Markdown(""" |
|
|
The system can handle a wide range of health concerns, such as: |
|
|
- General health goals: "trying to reduce sugar intake" or "watching sodium levels" |
|
|
- Medical conditions: "diabetes" or "hypertension" |
|
|
- Allergies: "peanut allergy" or "shellfish allergy" |
|
|
- Dietary restrictions: "vegetarian" or "gluten-free diet" |
|
|
- Multiple conditions: "diabetes, high cholesterol, and lactose intolerance" |
|
|
The AI will tailor its analysis to your specific needs. |
|
|
""") |
|
|
|
|
|
gr.Markdown("### Tips for best results") |
|
|
gr.Markdown(""" |
|
|
- Hold the camera steady and ensure good lighting |
|
|
- Focus directly on the ingredients list |
|
|
- Make sure the text is clear and readable |
|
|
- Be specific about your health concerns for more targeted analysis |
|
|
""") |
|
|
|
|
|
gr.Markdown("### Disclaimer") |
|
|
gr.Markdown(""" |
|
|
This tool is for informational purposes only and should not replace professional medical advice. |
|
|
Always consult with a healthcare provider regarding dietary restrictions, allergies, or health conditions. |
|
|
""") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.launch() |