Our Sentiment Analysis model is trained on the . This dataset is a collection of 1.6 million tweets that have been labeled with sentiment polarity- positive or negative, and is commonly used for sentiment analysis and natural language processing tasks.
We trained a logistic regression sentiment classifier using the Term Frequency-Inverse Document Frequency (TF-IDF) approach:
TF-IDF is a numerical representation of text useful for transforming text into a numerical format suitable for machine learning. It measures how important a word is within a document relative to the entire corpus. It consists of:
Term Frequency (TF): The number of times a word appears in a document.
Inverse Document Frequency (IDF): A measure that reduces the weight of commonly occurring words and increases the weight of rare words.
Here is a sample code for inferring sentiment from text using the trained model:
import re
import numpy as np
import json
import torch
def preprocess_text(text: str) -> str:
# Remove links, special characters, and digits
text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE) # Remove URLs
text = re.sub(r'[^a-zA-Z\s]', '', text) # Remove non-alphabetic characters
text = text.lower() # Convert to lowercase
text = text.strip() # Remove leading/trailing whitespaces
return text
def txt2tfidf(s: str, vocab: dict[str, int], idf: list[float]) -> torch.Tensor:
s_clean = preprocess_text(s)
all_words = s_clean.split()
relevant_words = set(s_clean.split()).intersection(vocab.keys())
res = np.zeros(len(vocab))
for w in relevant_words:
w_idx = vocab[w]
w_cnt = all_words.count(w)
w_idf = idf[w_idx]
w_tfidf = w_cnt * w_idf
res[w_idx] = w_tfidf
return torch.tensor(res / np.linalg.norm(res))
# load tfidf data
with open('sentiment_tfidf.json', 'r') as f:
tfidf_data = json.load(f)
# load the coef matrix and intercept vector of the trained logistic regression model
W = np.load('coef.npy') # shape (1,5000)
b = np.load('intercept.npy') # shape (1,)
txt = 'this is the best day of my life'
# transform text to numerical representation
pt = txt2tfidf(txt, tfidf_data['vocabulary'], tfidf_data['idf']) # shape (5000,)
# calculate the logistic regression prediction
res = pt @ W.T + b
# the predicted sentiment is the sign of the logit
sentiment = "POSITIVE" if res > 0 else "NEGATIVE"
print(f"Predicted sentiment: {sentiment}")
Achieving Full Privacy with LatticaAI
from lattica_query.auth import get_demo_token
from lattica_query.lattica_query_client import QueryClient
model_id = "sentimentAnalysis"
my_token = get_demo_token(model_id)
client = QueryClient(my_token)
context, secret_key, client_blocks, = client.generate_key()
# `pt` and `res` are torch.Tensor, same as in the plain example above
res = client.run_query(context, secret_key, pt, client_blocks)
import { getDemoToken, LatticaQueryClient } from '@Lattica-ai/lattica-query-client';
const modelId = "sentimentAnalysis"
const token = await getDemoToken(modelId);
const client = new LatticaQueryClient(myToken);
await client.init();
// pt is the result of tfidf encoding, as in the python example above
const result = await client.runQuery(pt);
First our client package
See our for a detailed explanation of each step in this flow.
To use the image sharpening model use the sentimentAnalysis model ID