Running PyTorch to use artificial intelligence to generate images with Nvidia GTX 1650Ti

I am using Ubuntu 22.04.4 LTS with 6.5.0-35-generic kernel.
I have nvidia-driver-555 with cuda-toolkit NVIDIA Container Runtime Hook version 1.17.1
installed. (Driver Version: 555.58.02 CUDA Version: 12.5)

this is the python code I’m running on conda environment

from diffusers import StableDiffusionPipeline
import torch

# Load the pre-trained Stable Diffusion model from Hugging Face
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16)

# Move the model to GPU if available
pipe = pipe.to("cuda")

# Define your text prompt
prompt = "A scenic landscape with high mountains, a flowing river in the valley, and a colorful sunset sky with warm shades of orange and purple."

# Generate the image from the text prompt
image = pipe(prompt).images[0]

# Display the generated image
image.show()

# Optionally, save the image
image.save("generated_landscape.png")

I’m always getting black image with this error as output

from diffusers import StableDiffusionPipeline
import torch
import numpy as np
from PIL import Image

# Load the pre-trained Stable Diffusion model from Hugging Face
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16)

# Disable the safety checker (optional)
pipe.safety_checker = None

# Move the model to CPU (instead of GPU)
pipe = pipe.to("cpu")

# Define a very simple prompt to test the image generation
prompt = "a blue square"  # Simple prompt to check if the image generation works

# Generate the image from the text prompt
output = pipe(prompt)

# Check the raw output (before image conversion)
print(f"Raw output: {output}")

# Check if any images were returned
if hasattr(output, 'images') and len(output.images) > 0:
    image = output.images[0]
    image_array = np.array(image)

    # Check raw image values
    print(f"Raw image array shape: {image_array.shape}")
    print(f"Min value in raw image: {np.min(image_array)}, Max value in raw image: {np.max(image_array)}")

    # If image is completely black
    if np.all(image_array == 0):
        print("The generated image is completely black.")

    # Normalize and convert to uint8
    image_array = np.clip(image_array * 255, 0, 255)
    image_array = image_array.astype(np.uint8)

    # Convert back to a PIL image
    image = Image.fromarray(image_array)

    # Display and save the image
    image.show()
    image.save("generated_blue_square.png")
else:
    print("Error: No images returned by the model.")

and I’m getting following error

/home/user/anaconda3/envs/myenv/lib/python3.9/site-packages/diffusers/image_processor.py:147: RuntimeWarning: invalid value encountered in cast
images = (images * 255).round().astype(“uint8”)

last try, I was able to produce image of blue rectangle on black gradient

using this code (had to be blue squere with white background)

from diffusers import StableDiffusionPipeline
import torch
import numpy as np
from PIL import Image

Load the pre-trained Stable Diffusion model from Hugging Face

pipe = StableDiffusionPipeline.from_pretrained(“CompVis/stable-diffusion-v1-4”)

Disable the safety checker (optional)

pipe.safety_checker = None

Move the model to the CPU (if you are testing on CPU)

pipe = pipe.to(“cpu”)

Define a more refined and specific prompt

prompt = “A perfect blue square centered on a white background.”

Generate the image from the text prompt

output = pipe(prompt)

Check if any images were returned

if hasattr(output, ‘images’) and len(output.images) > 0:
image = output.images[0]
image.show()

# Optionally, save the image
image.save("generated_perfect_blue_square.png")

else:
print(“Error: No images returned by the model.”)

My last try after 300 invalid try’s and this code generates black image

from diffusers import StableDiffusionPipeline
import torch

Clear GPU cache

torch.cuda.empty_cache()

Load the Stable Diffusion model with mixed precision

pipe = StableDiffusionPipeline.from_pretrained(
“stabilityai/stable-diffusion-2-1-base”, torch_dtype=torch.float16
)
pipe = pipe.to(“cuda”) # Move the model to the GPU

Enable attention slicing for lower memory usage

pipe.enable_attention_slicing()

Set environment variable to prevent memory fragmentation

torch.backends.cuda.matmul.allow_tf32 = True # Allows TF32 to reduce memory in tensor ops
torch.backends.cudnn.benchmark = True # Optimizes performance for consistent input sizes

Define the prompt and image parameters

prompt = “A beautiful painting of a serene mountain landscape during sunrise.”

try:
# Generate the image with specified size (lower dimensions if necessary)
output = pipe(prompt=prompt, height=512, width=512)
if not output.images or len(output.images) == 0:
raise ValueError(“No images were generated by the pipeline.”)

# Retrieve the generated image
image = output.images[0]

# Display the image for verification
image.show()

# Save the image
output_path = "mountain_landscape.png"
image.save(output_path)
print(f"Image successfully saved to {output_path}.")

except torch.cuda.OutOfMemoryError:
print(“CUDA out of memory. Try reducing the image size or batch size.”)
except Exception as e:
raise RuntimeError(f"Image generation failed: {e}")