mindspore-lab
diff --git a/‎examples/animatediff/tests/test_mm.py‎
Lines changed: 2 additions & 2 deletions b/‎examples/animatediff/tests/test_mm.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/hunyuan_dit/IndexKits/index_kits/dataset/config_parse.py‎
Lines changed: 1 addition & 4 deletions b/‎examples/hunyuan_dit/IndexKits/index_kits/dataset/config_parse.py‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎examples/hunyuanvideo/tests/parallel/compare_batch_data.py‎
Lines changed: 11 additions & 14 deletions b/‎examples/hunyuanvideo/tests/parallel/compare_batch_data.py‎
Lines changed: 11 additions & 14 deletions
diff --git a/‎examples/hunyuanvideo/tests/parallel/test_sequence_parallel_data.py‎
Lines changed: 3 additions & 4 deletions b/‎examples/hunyuanvideo/tests/parallel/test_sequence_parallel_data.py‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎examples/instantmesh/README.md‎
Lines changed: 31 additions & 0 deletions b/‎examples/instantmesh/README.md‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎examples/instantmesh/configs/instant-nerf-large-train.yaml‎
Lines changed: 1 addition & 1 deletion b/‎examples/instantmesh/configs/instant-nerf-large-train.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/instantmesh/data/objaverse.py‎
Lines changed: 14 additions & 13 deletions b/‎examples/instantmesh/data/objaverse.py‎
Lines changed: 14 additions & 13 deletions
diff --git a/‎examples/opensora_pku/opensora/dataset/t2v_datasets.py‎
Lines changed: 1 addition & 3 deletions b/‎examples/opensora_pku/opensora/dataset/t2v_datasets.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎examples/sv3d/configs/sv3d_u_train.yaml‎
Lines changed: 1 addition & 1 deletion b/‎examples/sv3d/configs/sv3d_u_train.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/sv3d/data/mulvideo_dataset.py‎
Lines changed: 5 additions & 5 deletions b/‎examples/sv3d/data/mulvideo_dataset.py‎
Lines changed: 5 additions & 5 deletions
@@ -20,7 +20,7 @@ def load_pt_rt_states(mm_idx, folder):
     with open(args_fp, "r") as fp:
         args = json.load(fp)
 
-    inps = np.load(inps_fp, allow_pickle=True)
+    inps = np.load(inps_fp)
     inps_dict = {}
     for name in inps:
         if inps[name].ndim > 0:
@@ -29,7 +29,7 @@ def load_pt_rt_states(mm_idx, folder):
         else:
             inps_dict[name] = None
 
-    output = np.load(outs_fp, allow_pickle=True)["output"]
+    output = np.load(outs_fp)["output"]
 
     return args, inps_dict, output
 
 
@@ -1,7 +1,6 @@
 # Adapted from https://github.com/Tencent-Hunyuan/HunyuanDiT to work with MindSpore.
 import json
 import pathlib
-import pickle
 from collections import defaultdict
 from glob import glob
 from pathlib import Path
@@ -108,9 +107,7 @@ def load_md5s(paths: List[pathlib.Path], merge=True):
             if not path.exists():
                 raise ValueError(f"Path not found: {path}")
             if path.suffix == ".pkl":
-                with path.open("rb") as f:
-                    md5s = pickle.load(f)
-                assert isinstance(md5s, (set, dict)), f"Invalid type: {type(md5s)}"
+                raise ValueError("Loading pickle file is unsafe, please use another file type.")
             elif path.suffix == ".json":
                 with path.open() as f:
                     md5s = json.load(f)
 
@@ -1,21 +1,18 @@
-import pickle
-
 import numpy as np
 
 
-def load_pkl(file_path):
-    with open(file_path, "rb") as f:
-        data = pickle.load(f)
-    return data
+def load_np(file_path: str):
+    with np.load(file_path) as data:
+        return [data[key] for key in data.files]
 
 
 def compare_arrays(arr1, arr2, rtol=1e-05, atol=1e-08):
     return np.allclose(arr1, arr2, rtol=rtol, atol=atol)
 
 
-def compare_pkl_files(file_path1, file_path2):
-    data1 = load_pkl(file_path1)
-    data2 = load_pkl(file_path2)
+def compare_npz_files(file_path1, file_path2):
+    data1 = load_np(file_path1)
+    data2 = load_np(file_path2)
 
     if len(data1) != len(data2):
         print(
@@ -33,11 +30,11 @@ def compare_pkl_files(file_path1, file_path2):
 # Example usage
 max_batches_to_save = 5
 for i_batch in range(max_batches_to_save):
-    file_path1 = f"./rank0_batch{i_batch}.pkl"
-    file_path2 = f"./rank1_batch{i_batch}.pkl"
+    file_path1 = f"./rank0_batch{i_batch}.npz"
+    file_path2 = f"./rank1_batch{i_batch}.npz"
     print(f"Comparing rank0 and rank1 from batch {i_batch}")
-    compare_pkl_files(file_path1, file_path2)
+    compare_npz_files(file_path1, file_path2)
 
     if i_batch > 0:
-        print(f"Comparing batch{i_batch} and batch{i_batch-1} from rank0")
-        compare_pkl_files(f"./rank0_batch{i_batch}.pkl", f"./rank0_batch{i_batch-1}.pkl")
+        print(f"Comparing batch{i_batch} and batch{i_batch - 1} from rank0")
+        compare_npz_files(f"./rank0_batch{i_batch}.npz", f"./rank0_batch{i_batch - 1}.npz")
@@ -1,10 +1,10 @@
 import logging
 import os
-import pickle
 import sys
 from pathlib import Path
 from typing import Dict, Tuple, Union
 
+import numpy as np
 from jsonargparse import ActionConfigFile, ArgumentParser
 from jsonargparse.typing import path_type
 
@@ -123,11 +123,10 @@ def main(args):
             item = item.float().asnumpy()
             data.append(item)
 
-        save_path = f"{save_path_prefix}{batch_count}.pkl"
+        save_path = f"{save_path_prefix}{batch_count}.npz"
         save_path = str(Path(save_path).absolute())
 
-        with open(save_path, "wb") as f:
-            pickle.dump(data, f)
+        np.savez(save_path, *data)
 
         batch_count += 1
         print(f"The batch {batch_count} data has been saved to {save_path}")
 
@@ -118,6 +118,37 @@ One needs to patch `mindcv.models.vgg` in L62 to enable conv kernel bias to alig
 + conv2d = nn.Conv2d(in_channels, v, kernel_size=3, pad_mode="pad", padding=1, has_bias=True)
 ```
 
+### ⚠️ Warning:
+If the dataset has pickle files, you need to pay attention to the following security risks.
+
+- Loading Pickle files will lead to the following risks:
+   - Remote Code Execution (RCE)
+   - Sensitive data leakage
+   - System compromise
+- By using pickle files, you acknowledge the risks and agree to:
+   - Covert pickle files in isolated environments (e.g., sandbox/container)
+   - Never load `.pkl` files from untrusted sources
+
+For more information, review the [Documentation](https://docs.python.org/3/library/pickle.html) for the ``pickle`` module.
+
+Taking the following training dataset as an example:
+```shell
+unzip training_examples.zip && tree training_examples
+```
+```text
+training_examples
+ ├─input
+ │  └──uid_0
+ │      ├─000.png
+ │      └──meta.pkl
+ ├─target
+ │  └──uid_0
+ │      └──000.png
+ └──uid_set.pkl
+```
+The data type stored in ``meta.pkl`` is `List[np.ndarray]`, and the data type stored in ``uid_set.pkl`` is `List[str]`.
+We recommend that you convert the ``meta.pkl`` file to ``meta.npz`` format and convert the ``uid_set.pkl`` file to ``uid_set.json`` format by yourself before training.
+
 ### Data Curation
 Following the original paper, we used Blender to render multiview frames for a 3D object in `.obj` for training. Typically for overfitting, three 3D objects from the objaverse dataset are used. We rendered 5 arbitral views for each object with the corresponding camera parameters extracted.
 
 
@@ -32,7 +32,7 @@ data:
     target: data.objaverse.ObjaverseDataset
     params:
       root_dir: YOUR_PATH_DATA  # for overfitting exp
-      meta_fname: uid_set.pkl
+      meta_fname: uid_set.json
       input_image_dir: input
       target_image_dir: input
       input_view_num: 3
 
@@ -2,7 +2,6 @@
 import json
 import math
 import os
-import pickle
 import sys
 from pathlib import Path
 
@@ -24,9 +23,14 @@
 from mindspore.dataset.vision import Inter, Resize, ToPIL
 
 
-def read_pickle(pkl_path):
-    with open(pkl_path, "rb") as f:
-        return pickle.load(f)
+def read_np(np_path):
+    with np.load(np_path) as data:
+        return [data[key] for key in data.files]
+
+
+def read_json(json_path):
+    with open(json_path, "r") as f:
+        return json.load(f)
 
 
 def read_txt2list(txt_path):
@@ -38,11 +42,6 @@ def read_txt2list(txt_path):
     return list_entry
 
 
-def read_json(json_path):
-    with open(json_path) as f:
-        return json.load(f)
-
-
 def random_crop_return_params(imgs, height, width):
     """imgs: (b h w c)"""
     assert imgs.shape[1] >= height
@@ -63,7 +62,7 @@ class ObjaverseDataset:
     def __init__(
         self,
         root_dir="training_examples/",
-        meta_fname="uid_set.pkl",
+        meta_fname="uid_set.json",
         input_image_dir="input",
         target_image_dir="input",
         input_view_num=6,
@@ -95,9 +94,11 @@ def __init__(
         ]
 
         if meta_fname == "uid_set.pkl":
-            self.paths = read_pickle(os.path.join(root_dir, meta_fname))[-3:]
+            raise TypeError("Loading pickle file is unsafe, please use another file type.")
+        elif meta_fname == "uid_set.json":
+            self.paths = read_json(os.path.join(root_dir, meta_fname))[-3:]
             # [:1]  # only takes the first scene for debugging
-            print("dataset read pickle")
+            print("dataset read json")
         elif meta_fname.split(".")[-1] == "txt":
             self.paths = read_txt2list(os.path.join(root_dir, meta_fname))
             print("reading the fixed pose target list as the dataset")
@@ -236,7 +237,7 @@ def __getitem__(self, index):
         alpha_list = []
         pose_list = []
 
-        K, azimuths, elevations, distances, cam_poses = read_pickle(os.path.join(input_image_path, "meta.pkl"))
+        K, azimuths, elevations, distances, cam_poses = read_np(os.path.join(input_image_path, "meta.npz"))
         input_cameras = cam_poses
         for idx in input_indices:
             image, alpha = self.load_im(os.path.join(input_image_path, "%03d.png" % idx), bg_white)
 
@@ -6,7 +6,6 @@
 import logging
 import math
 import os
-import pickle
 import random
 import time
 from collections import Counter
@@ -250,8 +249,7 @@ def define_frame_index(self, data):
                 with open(anno, "r") as f:
                     sub_list = json.load(f)
             elif anno.endswith(".pkl"):
-                with open(anno, "rb") as f:
-                    sub_list = pickle.load(f)
+                raise TypeError("Loading pickle file is unsafe, please use another file type.")
             for index, i in enumerate(tqdm(sub_list)):
                 cnt += 1
                 path = os.path.join(sub_root, i["path"])
 
@@ -27,7 +27,7 @@ train:
     class_path: data.mulvideo_dataset.MulviewVideoDataset
     init_args:
       root_dir: itmh_training_data_9  # for overfitting exp
-      metadata: uid_set.pkl
+      metadata: uid_set.json
       image_dir: target               # for overfitting w/o pose
       frames: 5                       # to accomodate training with obj-rendering dataset
 
 
@@ -1,6 +1,6 @@
+import json
 import logging
 import os
-import pickle
 import sys
 from pathlib import Path
 from typing import Any, List, Tuple
@@ -16,9 +16,9 @@
 _logger = logging.getLogger("")
 
 
-def read_pickle(pkl_path):
-    with open(pkl_path, "rb") as f:
-        return pickle.load(f)
+def read_json(json_path):
+    with open(json_path, "r") as f:
+        return json.load(f)
 
 
 def load_im(path, color):
@@ -39,7 +39,7 @@ def __init__(
         frames: int,
     ):
         self.root_dir = Path(root_dir)
-        self.paths = read_pickle(os.path.join(root_dir, metadata))
+        self.paths = read_json(os.path.join(root_dir, metadata))
         self.image_dir = image_dir
         self._frames = frames
         self._bg_white = [1.0, 1.0, 1.0]