| | import pandas as pd |
| | import streamlit as st |
| | from transformers import AutoTokenizer, AutoModelForSequenceClassification |
| | import torch |
| |
|
| | |
| | tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-imdb") |
| | model = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-imdb") |
| |
|
| | def analyze_text(text): |
| | |
| | text = text.lower() |
| |
|
| | |
| | encoded_text = tokenizer(text, truncation=True, padding=True, return_tensors='pt') |
| |
|
| | |
| | with torch.no_grad(): |
| | output = model(**encoded_text) |
| | predictions = output.logits.argmax(-1).item() |
| |
|
| | if predictions == 1: |
| | return "Job Related" |
| | else: |
| | return "Not Job Related" |
| |
|
| | def count_job_related_messages(data): |
| | job_related_count = 0 |
| | not_job_related_count = 0 |
| |
|
| | for message in data["message"]: |
| | result = analyze_text(message) |
| | if result == "Job Related": |
| | job_related_count += 1 |
| | else: |
| | not_job_related_count += 1 |
| |
|
| | return job_related_count, not_job_related_count |
| |
|
| | |
| | st.title("Job Related Message Analyzer") |
| |
|
| | uploaded_file = st.file_uploader("Upload CSV file") |
| | user_input = st.text_input("Enter text") |
| |
|
| | if uploaded_file: |
| | |
| | data = pd.read_csv(uploaded_file) |
| |
|
| | |
| | results = [] |
| | for message in data["message"]: |
| | result = analyze_text(message) |
| | results.append(result) |
| |
|
| | data["Job Related"] = results |
| |
|
| | |
| | job_related_count, not_job_related_count = count_job_related_messages(data) |
| |
|
| | st.dataframe(data) |
| | st.write(f"Job Related Messages: {job_related_count}") |
| | st.write(f"Not Job Related Messages: {not_job_related_count}") |
| | elif user_input: |
| | |
| | result = analyze_text(user_input) |
| | st.write(f"Message Classification: {result}") |
| | else: |
| | st.write("Please upload a CSV file or enter text to analyze.") |
| |
|