Spaces:

atifsial123
/

Engineer

Build error

App Files Files Community

atifsial123 commited on Sep 2, 2024

Commit

c12ca9b

verified ·

1 Parent(s): a78f83f

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -39

app.py CHANGED Viewed

@@ -12,25 +12,17 @@ install("torch")
 install("pandas")
 install("scikit-learn")
 install("gradio")
 import os
 import pandas as pd
 import gradio as gr
 from transformers import AutoModel, AutoTokenizer
-# Load the pre-trained model and tokenizer
-def load_model_and_tokenizer():
-    try:
-        model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
-        tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base", trust_remote_code=True)
-        return model, tokenizer
-    except Exception as e:
-        print(f"Error loading model or tokenizer: {e}")
-        return None, None
-# Function to load the dataset
 def load_dataset():
     file_path = "Valid-part-2.xlsx"
     if not os.path.exists(file_path):
         raise FileNotFoundError(f"Dataset not found. Please ensure that '{file_path}' exists.")
@@ -42,44 +34,60 @@ def load_dataset():
         print(f"Error loading dataset: {e}")
         return None
-# Function to search by name and return the PEC number
-def search_by_name(name, df):
-    if df is None:
-        return "Error: Dataset not loaded."
-    try:
-        name_matches = df[df['name'].str.contains(name, case=False, na=False)]
-        if not name_matches.empty:
-            return f"Your PEC number: {name_matches['PEC number'].values[0]}"
-        else:
-            return "No matches found for your name."
-    except Exception as e:
-        return f"Error during search: {e}"
-# Gradio interface
 def build_interface():
     df = load_dataset()  # Load your dataset
     if df is None:
         return None
     iface = gr.Interface(
-        fn=lambda name: search_by_name(name, df),
-        inputs=gr.Textbox(label="Please write your Name"),
-        outputs=gr.Textbox(label="Your PEC number"),
-        title="PEC Number Lookup",
-        description="Enter your name to find your PEC number."
     )
     return iface
-# Main function to run the Gradio app
 if __name__ == "__main__":
-    model, tokenizer = load_model_and_tokenizer()
-    if model is None or tokenizer is None:
-        print("Failed to load model or tokenizer. Exiting.")
     else:
-        iface = build_interface()
-        if iface is not None:
-            iface.launch()
-        else:
-            print("Failed to build interface due to dataset issues.")

 install("pandas")
 install("scikit-learn")
 install("gradio")
 import os
 import pandas as pd
 import gradio as gr
 from transformers import AutoModel, AutoTokenizer
+import torch
+from sklearn.model_selection import train_test_split
+# Load your dataset
 def load_dataset():
     file_path = "Valid-part-2.xlsx"
+    print(f"Current working directory: {os.getcwd()}")
     if not os.path.exists(file_path):
         raise FileNotFoundError(f"Dataset not found. Please ensure that '{file_path}' exists.")
         print(f"Error loading dataset: {e}")
         return None
+# Preprocess the data
+def preprocess_data(df):
+    # Add your preprocessing steps here
+    # For example: cleaning, tokenization, etc.
+    return df
+# Train your model
+def train_model(df):
+    # Split the dataset into training and testing sets
+    train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
+    # Load your pre-trained model and tokenizer from Hugging Face
+    tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base")
+    model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base")
+    # Add your training code here
+    # This may involve tokenizing the data and feeding it into the model
+    return model
+# Define the Gradio interface function
+def predict(input_text):
+    # Load the model and tokenizer
+    tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base")
+    model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base")
+    # Tokenize input and make predictions
+    inputs = tokenizer(input_text, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model(**inputs)
+    # Process the outputs as needed (e.g., extracting relevant information)
+    return outputs.last_hidden_state
+# Build the Gradio interface
 def build_interface():
     df = load_dataset()  # Load your dataset
     if df is None:
         return None
+    df = preprocess_data(df)  # Preprocess the dataset
+    model = train_model(df)  # Train your model
     iface = gr.Interface(
+        fn=predict,
+        inputs=gr.inputs.Textbox(lines=2, placeholder="Enter text here..."),
+        outputs="text"
     )
     return iface
+# Run the Gradio interface
 if __name__ == "__main__":
+    iface = build_interface()
+    if iface:
+        iface.launch()
     else:
+        print("Failed to build the Gradio interface. Please check the dataset and model.")