I am trying to train the Sentence Transformer Model named cross-encoder/ms-marco-MiniLM-L-12-v2 where When I try to train it utilizes only one GPU, where in my machine I have two GPUs. I tried DataParallel and DistributedDataParallel, but didn’t Work.
from sentence_transformers import SentenceTransformer, losses
from torch.utils.data import DataLoader
# Replace 'model_name' and 'max_seq_length' with your actual model name and max sequence length
model_name = 'your_model_name'
max_seq_length = your_max_seq_length
# Load SentenceTransformer model
model = SentenceTransformer(model_name)
model.max_seq_length = max_seq_length
# Replace 'train_examples' with your actual training examples
train_examples = your_train_examples
# Create DataLoader for training
train_dataloader = DataLoader(train_examples, batch_size=16, drop_last=True, shuffle=True)
# Define the loss function
train_loss = losses.MarginMSELoss(model)
# Tune the model
model.fit(train_objectives=[(train_dataloader, train_loss)], epochs=500, warmup_steps=int(len(train_dataloader) * 0.1))
# Replace 'output_path' with the desired path for saving the trained model
output_path = 'your_output_path'
# Save the model after training
model.save(output_path)