Getting nan loss on ema test samples while using open clip models for prior training #325
              
                Unanswered
              
          
                  
                    
                      RicBriChri
                    
                  
                
                  asked this question in
                Q&A
              
            Replies: 0 comments
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment
  
        
    
Uh oh!
There was an error while loading. Please reload this page.
-
I'm getting nan loss on the ema text samples while I'm using pretrained and my own open clip models, I'm not sure what mistake I'm doing. The training losses are converging well but during the testing phase, the loss is nan. I'm using the following config file:
{
"prior": {
"clip": {
"make": "open_clip",
"model": "ViT-B-32",
"force_quick_gelu": true
},
"net": {
"dim": 512,
"depth": 12,
"num_timesteps": 1000,
"max_text_len": 77,
"num_time_embeds": 1,
"num_image_embeds": 1,
"num_text_embeds": 1,
"dim_head": 64,
"heads": 12,
"ff_mult": 4,
"norm_out": true,
"attn_dropout": 0.05,
"ff_dropout": 0.05,
"final_proj": true,
"normformer": true,
"rotary_emb": true
},
"image_embed_dim": 512,
"image_size": 256,
"image_channels": 3,
"timesteps": 1000,
"sample_timesteps": 64,
"cond_drop_prob": 0.1,
"loss_type": "l2",
"predict_x_start": true,
"beta_schedule": "cosine",
"condition_on_text_encodings": true
},
"data": {
"batch_size": 128,
"num_data_points": 30000,
"eval_every_seconds": 1600,
"image_url": " ",
"meta_url": " ",
"splits": {
"train": 0.9,
"val": 0.05,
"test": 0.05
}
},
"train": {
"epochs": 1,
"lr": 1.1e-4,
"wd": 6.02e-2,
"max_grad_norm": 0.5,
"use_ema": true,
"ema_beta": 0.9999,
"ema_update_after_step": 50,
"warmup_steps": 50,
"amp": false,
"save_every_seconds": 3600,
"eval_timesteps": [64],
"random_seed": 84513
},
}
Beta Was this translation helpful? Give feedback.
All reactions