Spaces:

Canstralian
/

bert_password_sniffer

Paused

App Files Files Community

Canstralian commited on Nov 23, 2024

Commit

41f9a4d

verified ·

1 Parent(s): 9b9c9ac

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -56

app.py CHANGED Viewed

@@ -1,73 +1,95 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-# Initialize the Hugging Face Inference client with a more relevant model (e.g., a fine-tuned password detection model)
-client = InferenceClient("username/password-detection-model")  # Replace with your trained model
-def detect_passwords(text, threshold=0.9):
-    """
-    Detects potential passwords in text using a model from Hugging Face.
-    :param text: Input text containing potential passwords.
-    :param threshold: Confidence score above which a pattern is flagged.
-    :return: Flagged patterns and their confidence scores.
-    """
-    # Using a model inference to classify potential passwords
-    response = client.query({"inputs": text})  # Query the model for classification
-    predictions = response.get("predictions", [])
-    flagged_items = []
-    for pred in predictions:
-        token, confidence_score = pred["token"], pred["score"]
-        if confidence_score > threshold:
-            flagged_items.append((token, confidence_score))
-    if not flagged_items:
-        return "No passwords detected."
-    else:
-        return f"Potential passwords detected: {flagged_items}"
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
     """
-    Processes the user's message and history, integrates the password sniffer functionality.
     """
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    # Use Hugging Face model to detect passwords in the user's message
     detected_passwords = detect_passwords(message)
-    response = detected_passwords
-    return response  # Output the result
-# Gradio Interface for interaction
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
         gr.Textbox(value="You are a password detection chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
     ],
 )
 if __name__ == "__main__":

 import gradio as gr
+from transformers import Trainer, TrainingArguments, BertForSequenceClassification, BertTokenizer
+from datasets import load_dataset
+from huggingface_hub import login
 from huggingface_hub import InferenceClient
+import torch
+# Authenticate with Hugging Face
+login()
+# Load Dataset from Kaggle (you can change this to your specific Kaggle dataset)
+# Example: Load a dataset related to password classification, or any text classification dataset
+dataset = load_dataset("imdb")  # Replace with your own dataset, e.g., Kaggle dataset
+# Load Tokenizer and Model
+model_name = "bert-base-uncased"
+tokenizer = BertTokenizer.from_pretrained(model_name)
+model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)
+# Preprocess the Dataset
+def preprocess_function(examples):
+    return tokenizer(examples['text'], padding="max_length", truncation=True)
+# Apply preprocessing to dataset
+tokenized_datasets = dataset.map(preprocess_function, batched=True)
+# Split into training and evaluation datasets
+train_dataset = tokenized_datasets["train"]
+eval_dataset = tokenized_datasets["test"]
+# Define Training Arguments
+training_args = TrainingArguments(
+    output_dir="./results",          # output directory
+    num_train_epochs=3,              # number of training epochs
+    per_device_train_batch_size=8,   # batch size for training
+    per_device_eval_batch_size=16,   # batch size for evaluation
+    warmup_steps=500,                # number of warmup steps for learning rate scheduler
+    weight_decay=0.01,               # strength of weight decay
+    logging_dir="./logs",            # directory for storing logs
+    logging_steps=10,
+    evaluation_strategy="epoch",     # evaluate each epoch
+    save_strategy="epoch",           # save model each epoch
+)
+# Initialize Trainer
+trainer = Trainer(
+    model=model,                     # the instantiated 🤗 Transformers model to be trained
+    args=training_args,              # training arguments, defined above
+    train_dataset=train_dataset,     # training dataset
+    eval_dataset=eval_dataset,       # evaluation dataset
+)
+# Train the Model
+trainer.train()
+# Save the Model and Tokenizer
+model.save_pretrained("./password_sniffer_model")
+tokenizer.save_pretrained("./password_sniffer_tokenizer")
+# Load the fine-tuned model and tokenizer
+model = BertForSequenceClassification.from_pretrained("./password_sniffer_model")
+tokenizer = BertTokenizer.from_pretrained("./password_sniffer_tokenizer")
+# Setup Hugging Face Inference Client
+client = InferenceClient("password_sniffer_model")
+def detect_passwords(text):
     """
+    Detect potential passwords using the trained BERT model.
     """
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
+    outputs = model(**inputs)
+    predictions = torch.softmax(outputs.logits, dim=-1)
+    predicted_class = torch.argmax(predictions, dim=-1).item()
+    if predicted_class == 1:  # Assuming '1' represents potential password
+        return "Potential password detected."
+    else:
+        return "No password detected."
+# Gradio Interface
+def respond(message, history, system_message, max_tokens, temperature, top_p):
     detected_passwords = detect_passwords(message)
+    return detected_passwords
+demo = gr.Interface(
+    fn=respond,
+    inputs=[
         gr.Textbox(value="You are a password detection chatbot.", label="System message"),
+        gr.Textbox(value="Hello, your password might be 12345!", label="User input"),
     ],
+    outputs="text",
 )
 if __name__ == "__main__":