BryanW commited on
Commit
a60ff7d
·
verified ·
1 Parent(s): 9f88436

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Prism/LLaDA/LLaDA_Baseline/dllm_eval/decontamination/archiver.py +174 -0
  2. Prism/LLaDA/LLaDA_Baseline/dllm_eval/decontamination/decontaminate.py +166 -0
  3. Prism/LLaDA/LLaDA_Prism/.venv/bin/Activate.ps1 +247 -0
  4. Prism/LLaDA/LLaDA_Prism/.venv/bin/accelerate-launch +8 -0
  5. Prism/LLaDA/LLaDA_Prism/.venv/bin/activate.csh +27 -0
  6. Prism/LLaDA/LLaDA_Prism/.venv/bin/activate.fish +69 -0
  7. Prism/LLaDA/LLaDA_Prism/.venv/bin/f2py +8 -0
  8. Prism/LLaDA/LLaDA_Prism/.venv/bin/get_objgraph +54 -0
  9. Prism/LLaDA/LLaDA_Prism/.venv/bin/hf +8 -0
  10. Prism/LLaDA/LLaDA_Prism/.venv/bin/httpx +8 -0
  11. Prism/LLaDA/LLaDA_Prism/.venv/bin/markdown-it +8 -0
  12. Prism/LLaDA/LLaDA_Prism/.venv/bin/pip +8 -0
  13. Prism/LLaDA/LLaDA_Prism/.venv/bin/torchfrtrace +8 -0
  14. Prism/LLaDA/LLaDA_Prism/.venv/bin/torchrun +8 -0
  15. Prism/LLaDA/LLaDA_Prism/.venv/bin/typer +8 -0
  16. Prism/LLaDA/LLaDA_Prism/.venv/bin/undill +22 -0
  17. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs-25.1.0.dist-info/INSTALLER +1 -0
  18. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs-25.1.0.dist-info/METADATA +232 -0
  19. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs-25.1.0.dist-info/RECORD +55 -0
  20. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs-25.1.0.dist-info/WHEEL +4 -0
  21. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/colorama/__init__.py +7 -0
  22. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/colorama/ansi.py +102 -0
  23. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/colorama/ansitowin32.py +277 -0
  24. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/colorama/initialise.py +121 -0
  25. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/colorama/win32.py +180 -0
  26. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/colorama/winterm.py +195 -0
  27. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/cuda/ccuda.pxd +15 -0
  28. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/cuda/ccudart.cpython-312-x86_64-linux-gnu.so +0 -0
  29. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/cuda/cnvrtc.pxd +15 -0
  30. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/cuda/cnvrtc.pyx +7 -0
  31. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/cuda/cuda.cpp +0 -0
  32. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/cuda/cuda.cpython-312-x86_64-linux-gnu.so +0 -0
  33. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/cuda/cudart.pyx +22 -0
  34. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/cuda/nvrtc.pyx +22 -0
  35. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/arrow_dataset.py +0 -0
  36. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/arrow_reader.py +663 -0
  37. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/builder.bak.py +0 -0
  38. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/builder.py +0 -0
  39. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/config.py +272 -0
  40. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/dataset_dict.py +0 -0
  41. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/distributed.py +39 -0
  42. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/exceptions.py +196 -0
  43. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/inspect.py +582 -0
  44. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/keyhash.py +104 -0
  45. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/load.py +0 -0
  46. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/naming.py +84 -0
  47. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/streaming.py +142 -0
  48. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore-1.0.7.dist-info/INSTALLER +1 -0
  49. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore-1.0.7.dist-info/METADATA +616 -0
  50. Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore-1.0.7.dist-info/RECORD +68 -0
Prism/LLaDA/LLaDA_Baseline/dllm_eval/decontamination/archiver.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import io
3
+ import json
4
+ import mmap
5
+ import os
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ import jsonlines
10
+ import tqdm
11
+ import zstandard
12
+
13
+
14
+ def json_serial(obj: Any) -> str:
15
+ """JSON serializer for objects not serializable by default json code"""
16
+
17
+ if isinstance(obj, (datetime.datetime,)):
18
+ return obj.isoformat()
19
+ raise TypeError("Type %s not serializable" % type(obj))
20
+
21
+
22
+ # Modified version of lm_dataformat Archive for single file.
23
+ class Archive:
24
+ def __init__(self, file_path: str, compression_level: int = 3) -> None:
25
+ self.file_path = file_path
26
+ dir_name = os.path.dirname(file_path)
27
+ if dir_name:
28
+ os.makedirs(dir_name, exist_ok=True)
29
+ self.fh = open(self.file_path, "wb")
30
+ self.cctx = zstandard.ZstdCompressor(level=compression_level)
31
+ self.compressor = self.cctx.stream_writer(self.fh)
32
+
33
+ def add_data(self, data, meta=None) -> None:
34
+ if meta is None:
35
+ meta = {}
36
+ self.compressor.write(
37
+ json.dumps({"text": data, "meta": meta}, default=json_serial).encode(
38
+ "UTF-8"
39
+ )
40
+ + b"\n"
41
+ )
42
+
43
+ def commit(self) -> None:
44
+ self.compressor.flush(zstandard.FLUSH_FRAME)
45
+ self.fh.flush()
46
+ self.fh.close()
47
+
48
+
49
+ # Modified version of lm_dataformat Reader with self.fh set, allowing peeking for tqdm.
50
+ class Reader:
51
+ def __init__(self) -> None:
52
+ pass
53
+
54
+ def read(
55
+ self,
56
+ file,
57
+ get_meta: bool = False,
58
+ autojoin_paragraphs: bool = True,
59
+ para_joiner: str = "\n\n",
60
+ ):
61
+ with open(file, "rb") as fh:
62
+ self.fh = fh
63
+ cctx = zstandard.ZstdDecompressor()
64
+ reader = io.BufferedReader(cctx.stream_reader(fh))
65
+ rdr = jsonlines.Reader(reader)
66
+ for ob in rdr:
67
+ # naive jsonl where each object is just the string itself, with no meta. For legacy compatibility.
68
+ if isinstance(ob, str):
69
+ assert not get_meta
70
+ yield ob
71
+ continue
72
+
73
+ text = ob["text"]
74
+
75
+ if autojoin_paragraphs and isinstance(text, list):
76
+ text = para_joiner.join(text)
77
+
78
+ if get_meta:
79
+ yield text, (ob["meta"] if "meta" in ob else {})
80
+ else:
81
+ yield text
82
+
83
+
84
+ class TextArchive:
85
+ def __init__(self, file_path, mode: str = "rb+") -> None:
86
+ self.file_path = file_path
87
+ dir_name = os.path.dirname(file_path)
88
+ if dir_name:
89
+ os.makedirs(dir_name, exist_ok=True)
90
+
91
+ if not os.path.exists(file_path):
92
+ Path(file_path).touch()
93
+
94
+ self.fh = open(self.file_path, mode)
95
+
96
+ def add_data(self, data) -> None:
97
+ self.fh.write(data.encode("UTF-8") + b"\n")
98
+
99
+ def commit(self) -> None:
100
+ self.fh.flush()
101
+ self.fh.close()
102
+
103
+
104
+ class TextReader:
105
+ def __init__(self, file_path) -> None:
106
+ self.file_path = file_path
107
+
108
+ # Optimized mmap read with infrequent tqdm updates to maintain speed
109
+ # Tested up to 250MB/s.
110
+ def read_tqdm(self, update_frequency: int = 10000):
111
+ current_file_position = 0
112
+ line_counter = 0
113
+ with (
114
+ open(self.file_path, "r", encoding="utf-8") as fh,
115
+ tqdm.tqdm(
116
+ total=os.path.getsize(self.file_path),
117
+ dynamic_ncols=True,
118
+ unit="byte",
119
+ unit_scale=1,
120
+ ) as progress,
121
+ ):
122
+ with mmap.mmap(fh.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj:
123
+ for line in iter(mmap_obj.readline, b""):
124
+ line = line.decode("utf-8")
125
+ line_counter += 1
126
+ if line_counter == update_frequency:
127
+ new_file_pos = mmap_obj.tell()
128
+ bytes_read = new_file_pos - current_file_position
129
+ current_file_position = new_file_pos
130
+ progress.update(bytes_read)
131
+ line_counter = 0
132
+ yield line[:-1]
133
+
134
+ def read_and_tell(self):
135
+ current_file_position = 0
136
+ with open(self.file_path, "r", encoding="utf8") as fh:
137
+ with mmap.mmap(fh.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj:
138
+ for line in iter(mmap_obj.readline, b""):
139
+ line = line.decode("utf-8")
140
+ new_file_pos = mmap_obj.tell()
141
+ raw_bytes_read = new_file_pos - current_file_position
142
+ current_file_position = new_file_pos
143
+ yield line[:-1], raw_bytes_read
144
+
145
+ def read(self):
146
+ with open(self.file_path, "r", encoding="utf8") as fh:
147
+ with mmap.mmap(fh.fileno(), length=0, access=mmap.ACCESS_READ) as mmap_obj:
148
+ for line in iter(mmap_obj.readline, b""):
149
+ line = line.decode("utf-8")
150
+ yield line[:-1]
151
+
152
+ def read_slow(self):
153
+ with open(self.file_path, "r", encoding="utf8") as fh:
154
+ while True:
155
+ line = fh.readline()
156
+ if line == -1 or line == "":
157
+ break
158
+ else:
159
+ yield line[:-1]
160
+
161
+
162
+ # Optimized for speed. Decompresses the archive in shell before
163
+ # using the mmap'd TextReader.
164
+ class ZStdTextReader:
165
+ def __init__(self, file) -> None:
166
+ self.file = file
167
+
168
+ def read_tqdm(self):
169
+ decompressed_file = self.file[:-4]
170
+ print("Decompressing file, please wait...")
171
+ os.system(f"zstd -d {self.file}") # linux decompress is faster
172
+ reader = TextReader(decompressed_file)
173
+ yield from reader.read_tqdm()
174
+ os.remove(decompressed_file)
Prism/LLaDA/LLaDA_Baseline/dllm_eval/decontamination/decontaminate.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import collections
2
+ import glob
3
+ import json
4
+ import os
5
+ import pickle
6
+ import random
7
+ import time
8
+
9
+ from .archiver import ZStdTextReader
10
+ from .janitor import Janitor, word_ngrams
11
+
12
+
13
+ # Was used for testing the evaluator decoupled from the full logic below
14
+ def get_train_overlap_stub(docs: dict, ngrams_path: str, ngrams_n_size: str):
15
+ simulated_overlap = 0.1
16
+ contaminated = int(len(docs) * simulated_overlap)
17
+ return random.sample(range(len(docs)), contaminated)
18
+
19
+
20
+ # Returns a dictionary containing all overlapping documents in each
21
+ # task. In the standard use case, an overlap occurs when any of the 13-grams
22
+ # found in the task document exist in the training set documents.
23
+ #
24
+ # To generate 13-grams for the pile see scripts/clean_training_data. The final output of these
25
+ # scripts are an info.json file containing the n_gram_size (13) and a bunch of "ngrams_{x}.bkt.txt.sorted.zst"
26
+ # files. These should exist in the "ngrams_path" provided to this function.
27
+
28
+
29
+ # Algorithm:
30
+ # 1. Build lookups for each dataset {ngram: list(document_ids)}
31
+ # 2. Merge into an overall lookup {ngram: [(task_name, task_set, doc_ids),]}
32
+ # 3. Full scan the 13-grams from the training set against the merged lookup,
33
+ # saving matches in the "duplicates" dictionary {(task_name, task_set): set(doc_ids)}
34
+ # 4. Strip the task_set from the dictionary keys and return
35
+ #
36
+ # We cache the task+set lookups as well as the overlaps.
37
+ def get_train_overlap(docs_by_task_set: dict, ngrams_path: str, limit: int) -> dict:
38
+ # return get_train_overlap_stub(docs, ngrams_path, ngrams_n_size)
39
+
40
+ info_dict_path = os.path.join(ngrams_path, "info.json")
41
+ info_dict = json.load(open(info_dict_path, "r", encoding="utf-8"))
42
+ ngrams_n_size = info_dict["ngram_size"]
43
+
44
+ janitor = Janitor()
45
+
46
+ # Build lookup for each dataset first in case we use different task combinations later
47
+ print("Building Lookups...")
48
+ start = time.perf_counter()
49
+
50
+ def get_overlaps_dump_path(task_name, task_set, ngrams_n_size, limit) -> str:
51
+ return f"data/{task_name}/{task_set}_{ngrams_n_size}grams_limit{limit}.overlaps"
52
+
53
+ lookups = {}
54
+ duplicates = {} # (task_name, task_set): set(doc_ids)}
55
+ sets_to_decontaminate = len(docs_by_task_set.keys())
56
+
57
+ for (task_name, task_set), docs in docs_by_task_set.items():
58
+ if not os.path.exists(f"data/{task_name}"):
59
+ os.mkdir(f"data/{task_name}")
60
+
61
+ # Check if we've decontaminated this combination before
62
+ overlaps_dump_path = get_overlaps_dump_path(
63
+ task_name, task_set, ngrams_n_size, limit
64
+ )
65
+ if os.path.exists(overlaps_dump_path):
66
+ duplicates[(task_name, task_set)] = pickle.load(
67
+ open(overlaps_dump_path, "rb")
68
+ )
69
+ sets_to_decontaminate -= 1
70
+ continue
71
+ else:
72
+ duplicates[(task_name, task_set)] = set()
73
+
74
+ # Build/load the task lookup {ngram: set(documents)}.
75
+ task_set_lookup_path = (
76
+ f"data/{task_name}/{task_set}_{ngrams_n_size}grams_limit{limit}.lookup"
77
+ )
78
+ if os.path.exists(task_set_lookup_path):
79
+ print(f"{task_set_lookup_path} available, loading...")
80
+ lookups[(task_name, task_set)] = pickle.load(
81
+ open(task_set_lookup_path, "rb")
82
+ )
83
+ else:
84
+ print(f"{task_set_lookup_path} not available, building...")
85
+ lookup = collections.defaultdict(set)
86
+
87
+ for doc_id, document in enumerate(docs):
88
+ ngrams = word_ngrams(janitor.normalize_string(document), ngrams_n_size)
89
+ for ngram in ngrams:
90
+ lookup[ngram].add(doc_id)
91
+
92
+ pickle.dump(lookup, open(task_set_lookup_path, "wb"))
93
+ lookups[(task_name, task_set)] = lookup
94
+
95
+ elapsed = time.perf_counter() - start
96
+ print(f"Building lookups took {elapsed:0.5f} seconds.")
97
+
98
+ matched_ngrams = []
99
+
100
+ if sets_to_decontaminate > 0:
101
+ print("Merging lookups...")
102
+ start = time.perf_counter()
103
+ merged_lookup = collections.defaultdict(list)
104
+ for (task_name, task_set), lookup in lookups.items():
105
+ for ngram, doc_ids in lookup.items():
106
+ merged_lookup[ngram].append((task_name, task_set, doc_ids))
107
+
108
+ elapsed = time.perf_counter() - start
109
+ print(f"Merging lookups took {elapsed:0.5f} seconds.")
110
+
111
+ print(f"{ngrams_n_size} grams files found in {ngrams_path}:")
112
+ files = glob.glob(os.path.join(ngrams_path, "*.sorted.zst"))
113
+ print(files)
114
+
115
+ for file in files:
116
+ start = time.perf_counter()
117
+ print(f"Scanning {file}")
118
+ reader = ZStdTextReader(file)
119
+ total_ngrams = 0
120
+ unique_ngrams = 0
121
+ matching_unique = 0
122
+ non_matching_unique = 0
123
+
124
+ current_ngram = ""
125
+ for line in reader.read_tqdm(): # Scan training set ngrams file
126
+ total_ngrams += 1
127
+ [ngram, document_id] = line.rsplit(" ", 1)
128
+ if (
129
+ ngram != current_ngram
130
+ ): # Only need to match the ngram once in training set
131
+ unique_ngrams += 1
132
+ current_ngram = ngram
133
+ if ngram in merged_lookup:
134
+ matched_ngrams.append(ngram) # For logging
135
+ matching_unique += 1
136
+ for task_name, task_set, doc_ids in merged_lookup[ngram]:
137
+ task_doc_set = duplicates[(task_name, task_set)]
138
+ for doc_id in doc_ids: # Record contamination across all relevant task/set combos
139
+ task_doc_set.add(doc_id)
140
+ del merged_lookup[ngram] # No point matching again
141
+ else:
142
+ non_matching_unique += 1
143
+
144
+ print(f"Total Ngrams: {total_ngrams}")
145
+ print(f"Unique Ngrams: {unique_ngrams}")
146
+ print(f"Unique Matching: {matching_unique}")
147
+ print(f"Unique Non Matching: {non_matching_unique}")
148
+ print("Matched ngrams:")
149
+ for ngram in matched_ngrams:
150
+ print(ngram)
151
+
152
+ elapsed = time.perf_counter() - start
153
+ print(f"Read took {elapsed:0.5f} seconds.")
154
+ print(f"Speed: {(os.path.getsize(file) / 1000000.0) / elapsed}MB/second")
155
+
156
+ print(duplicates)
157
+
158
+ # Dump overlaps separately
159
+ for (task_name, task_set), doc_ids in duplicates.items():
160
+ overlaps_dump_path = get_overlaps_dump_path(
161
+ task_name, task_set, ngrams_n_size, limit
162
+ )
163
+ pickle.dump(doc_ids, open(overlaps_dump_path, "wb"))
164
+
165
+ # Strip task set and return
166
+ return {task_name: doc_ids for (task_name, task_set), doc_ids in duplicates.items()}
Prism/LLaDA/LLaDA_Prism/.venv/bin/Activate.ps1 ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <#
2
+ .Synopsis
3
+ Activate a Python virtual environment for the current PowerShell session.
4
+
5
+ .Description
6
+ Pushes the python executable for a virtual environment to the front of the
7
+ $Env:PATH environment variable and sets the prompt to signify that you are
8
+ in a Python virtual environment. Makes use of the command line switches as
9
+ well as the `pyvenv.cfg` file values present in the virtual environment.
10
+
11
+ .Parameter VenvDir
12
+ Path to the directory that contains the virtual environment to activate. The
13
+ default value for this is the parent of the directory that the Activate.ps1
14
+ script is located within.
15
+
16
+ .Parameter Prompt
17
+ The prompt prefix to display when this virtual environment is activated. By
18
+ default, this prompt is the name of the virtual environment folder (VenvDir)
19
+ surrounded by parentheses and followed by a single space (ie. '(.venv) ').
20
+
21
+ .Example
22
+ Activate.ps1
23
+ Activates the Python virtual environment that contains the Activate.ps1 script.
24
+
25
+ .Example
26
+ Activate.ps1 -Verbose
27
+ Activates the Python virtual environment that contains the Activate.ps1 script,
28
+ and shows extra information about the activation as it executes.
29
+
30
+ .Example
31
+ Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
32
+ Activates the Python virtual environment located in the specified location.
33
+
34
+ .Example
35
+ Activate.ps1 -Prompt "MyPython"
36
+ Activates the Python virtual environment that contains the Activate.ps1 script,
37
+ and prefixes the current prompt with the specified string (surrounded in
38
+ parentheses) while the virtual environment is active.
39
+
40
+ .Notes
41
+ On Windows, it may be required to enable this Activate.ps1 script by setting the
42
+ execution policy for the user. You can do this by issuing the following PowerShell
43
+ command:
44
+
45
+ PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
46
+
47
+ For more information on Execution Policies:
48
+ https://go.microsoft.com/fwlink/?LinkID=135170
49
+
50
+ #>
51
+ Param(
52
+ [Parameter(Mandatory = $false)]
53
+ [String]
54
+ $VenvDir,
55
+ [Parameter(Mandatory = $false)]
56
+ [String]
57
+ $Prompt
58
+ )
59
+
60
+ <# Function declarations --------------------------------------------------- #>
61
+
62
+ <#
63
+ .Synopsis
64
+ Remove all shell session elements added by the Activate script, including the
65
+ addition of the virtual environment's Python executable from the beginning of
66
+ the PATH variable.
67
+
68
+ .Parameter NonDestructive
69
+ If present, do not remove this function from the global namespace for the
70
+ session.
71
+
72
+ #>
73
+ function global:deactivate ([switch]$NonDestructive) {
74
+ # Revert to original values
75
+
76
+ # The prior prompt:
77
+ if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
78
+ Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
79
+ Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
80
+ }
81
+
82
+ # The prior PYTHONHOME:
83
+ if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
84
+ Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
85
+ Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
86
+ }
87
+
88
+ # The prior PATH:
89
+ if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
90
+ Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
91
+ Remove-Item -Path Env:_OLD_VIRTUAL_PATH
92
+ }
93
+
94
+ # Just remove the VIRTUAL_ENV altogether:
95
+ if (Test-Path -Path Env:VIRTUAL_ENV) {
96
+ Remove-Item -Path env:VIRTUAL_ENV
97
+ }
98
+
99
+ # Just remove VIRTUAL_ENV_PROMPT altogether.
100
+ if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
101
+ Remove-Item -Path env:VIRTUAL_ENV_PROMPT
102
+ }
103
+
104
+ # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
105
+ if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
106
+ Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
107
+ }
108
+
109
+ # Leave deactivate function in the global namespace if requested:
110
+ if (-not $NonDestructive) {
111
+ Remove-Item -Path function:deactivate
112
+ }
113
+ }
114
+
115
+ <#
116
+ .Description
117
+ Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
118
+ given folder, and returns them in a map.
119
+
120
+ For each line in the pyvenv.cfg file, if that line can be parsed into exactly
121
+ two strings separated by `=` (with any amount of whitespace surrounding the =)
122
+ then it is considered a `key = value` line. The left hand string is the key,
123
+ the right hand is the value.
124
+
125
+ If the value starts with a `'` or a `"` then the first and last character is
126
+ stripped from the value before being captured.
127
+
128
+ .Parameter ConfigDir
129
+ Path to the directory that contains the `pyvenv.cfg` file.
130
+ #>
131
+ function Get-PyVenvConfig(
132
+ [String]
133
+ $ConfigDir
134
+ ) {
135
+ Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
136
+
137
+ # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
138
+ $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
139
+
140
+ # An empty map will be returned if no config file is found.
141
+ $pyvenvConfig = @{ }
142
+
143
+ if ($pyvenvConfigPath) {
144
+
145
+ Write-Verbose "File exists, parse `key = value` lines"
146
+ $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
147
+
148
+ $pyvenvConfigContent | ForEach-Object {
149
+ $keyval = $PSItem -split "\s*=\s*", 2
150
+ if ($keyval[0] -and $keyval[1]) {
151
+ $val = $keyval[1]
152
+
153
+ # Remove extraneous quotations around a string value.
154
+ if ("'""".Contains($val.Substring(0, 1))) {
155
+ $val = $val.Substring(1, $val.Length - 2)
156
+ }
157
+
158
+ $pyvenvConfig[$keyval[0]] = $val
159
+ Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
160
+ }
161
+ }
162
+ }
163
+ return $pyvenvConfig
164
+ }
165
+
166
+
167
+ <# Begin Activate script --------------------------------------------------- #>
168
+
169
+ # Determine the containing directory of this script
170
+ $VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
171
+ $VenvExecDir = Get-Item -Path $VenvExecPath
172
+
173
+ Write-Verbose "Activation script is located in path: '$VenvExecPath'"
174
+ Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
175
+ Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
176
+
177
+ # Set values required in priority: CmdLine, ConfigFile, Default
178
+ # First, get the location of the virtual environment, it might not be
179
+ # VenvExecDir if specified on the command line.
180
+ if ($VenvDir) {
181
+ Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
182
+ }
183
+ else {
184
+ Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
185
+ $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
186
+ Write-Verbose "VenvDir=$VenvDir"
187
+ }
188
+
189
+ # Next, read the `pyvenv.cfg` file to determine any required value such
190
+ # as `prompt`.
191
+ $pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
192
+
193
+ # Next, set the prompt from the command line, or the config file, or
194
+ # just use the name of the virtual environment folder.
195
+ if ($Prompt) {
196
+ Write-Verbose "Prompt specified as argument, using '$Prompt'"
197
+ }
198
+ else {
199
+ Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
200
+ if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
201
+ Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
202
+ $Prompt = $pyvenvCfg['prompt'];
203
+ }
204
+ else {
205
+ Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
206
+ Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
207
+ $Prompt = Split-Path -Path $venvDir -Leaf
208
+ }
209
+ }
210
+
211
+ Write-Verbose "Prompt = '$Prompt'"
212
+ Write-Verbose "VenvDir='$VenvDir'"
213
+
214
+ # Deactivate any currently active virtual environment, but leave the
215
+ # deactivate function in place.
216
+ deactivate -nondestructive
217
+
218
+ # Now set the environment variable VIRTUAL_ENV, used by many tools to determine
219
+ # that there is an activated venv.
220
+ $env:VIRTUAL_ENV = $VenvDir
221
+
222
+ if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
223
+
224
+ Write-Verbose "Setting prompt to '$Prompt'"
225
+
226
+ # Set the prompt to include the env name
227
+ # Make sure _OLD_VIRTUAL_PROMPT is global
228
+ function global:_OLD_VIRTUAL_PROMPT { "" }
229
+ Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
230
+ New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
231
+
232
+ function global:prompt {
233
+ Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
234
+ _OLD_VIRTUAL_PROMPT
235
+ }
236
+ $env:VIRTUAL_ENV_PROMPT = $Prompt
237
+ }
238
+
239
+ # Clear PYTHONHOME
240
+ if (Test-Path -Path Env:PYTHONHOME) {
241
+ Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
242
+ Remove-Item -Path Env:PYTHONHOME
243
+ }
244
+
245
+ # Add the venv to the PATH
246
+ Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
247
+ $Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
Prism/LLaDA/LLaDA_Prism/.venv/bin/accelerate-launch ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/gfs/space/private/fengzl/world_model/Prism/LLaDA/LLaDA_Prism/.venv/bin/python
2
+ # -*- coding: utf-8 -*-
3
+ import re
4
+ import sys
5
+ from accelerate.commands.launch import main
6
+ if __name__ == '__main__':
7
+ sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
8
+ sys.exit(main())
Prism/LLaDA/LLaDA_Prism/.venv/bin/activate.csh ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file must be used with "source bin/activate.csh" *from csh*.
2
+ # You cannot run it directly.
3
+
4
+ # Created by Davide Di Blasi <davidedb@gmail.com>.
5
+ # Ported to Python 3.3 venv by Andrew Svetlov <andrew.svetlov@gmail.com>
6
+
7
+ alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate'
8
+
9
+ # Unset irrelevant variables.
10
+ deactivate nondestructive
11
+
12
+ setenv VIRTUAL_ENV /gfs/space/private/fengzl/world_model/Prism/LLaDA/LLaDA_Prism/.venv
13
+
14
+ set _OLD_VIRTUAL_PATH="$PATH"
15
+ setenv PATH "$VIRTUAL_ENV/"bin":$PATH"
16
+
17
+
18
+ set _OLD_VIRTUAL_PROMPT="$prompt"
19
+
20
+ if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
21
+ set prompt = '(.venv) '"$prompt"
22
+ setenv VIRTUAL_ENV_PROMPT '(.venv) '
23
+ endif
24
+
25
+ alias pydoc python -m pydoc
26
+
27
+ rehash
Prism/LLaDA/LLaDA_Prism/.venv/bin/activate.fish ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file must be used with "source <venv>/bin/activate.fish" *from fish*
2
+ # (https://fishshell.com/). You cannot run it directly.
3
+
4
+ function deactivate -d "Exit virtual environment and return to normal shell environment"
5
+ # reset old environment variables
6
+ if test -n "$_OLD_VIRTUAL_PATH"
7
+ set -gx PATH $_OLD_VIRTUAL_PATH
8
+ set -e _OLD_VIRTUAL_PATH
9
+ end
10
+ if test -n "$_OLD_VIRTUAL_PYTHONHOME"
11
+ set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
12
+ set -e _OLD_VIRTUAL_PYTHONHOME
13
+ end
14
+
15
+ if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
16
+ set -e _OLD_FISH_PROMPT_OVERRIDE
17
+ # prevents error when using nested fish instances (Issue #93858)
18
+ if functions -q _old_fish_prompt
19
+ functions -e fish_prompt
20
+ functions -c _old_fish_prompt fish_prompt
21
+ functions -e _old_fish_prompt
22
+ end
23
+ end
24
+
25
+ set -e VIRTUAL_ENV
26
+ set -e VIRTUAL_ENV_PROMPT
27
+ if test "$argv[1]" != "nondestructive"
28
+ # Self-destruct!
29
+ functions -e deactivate
30
+ end
31
+ end
32
+
33
+ # Unset irrelevant variables.
34
+ deactivate nondestructive
35
+
36
+ set -gx VIRTUAL_ENV /gfs/space/private/fengzl/world_model/Prism/LLaDA/LLaDA_Prism/.venv
37
+
38
+ set -gx _OLD_VIRTUAL_PATH $PATH
39
+ set -gx PATH "$VIRTUAL_ENV/"bin $PATH
40
+
41
+ # Unset PYTHONHOME if set.
42
+ if set -q PYTHONHOME
43
+ set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
44
+ set -e PYTHONHOME
45
+ end
46
+
47
+ if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
48
+ # fish uses a function instead of an env var to generate the prompt.
49
+
50
+ # Save the current fish_prompt function as the function _old_fish_prompt.
51
+ functions -c fish_prompt _old_fish_prompt
52
+
53
+ # With the original prompt function renamed, we can override with our own.
54
+ function fish_prompt
55
+ # Save the return status of the last command.
56
+ set -l old_status $status
57
+
58
+ # Output the venv prompt; color taken from the blue of the Python logo.
59
+ printf "%s%s%s" (set_color 4B8BBE) '(.venv) ' (set_color normal)
60
+
61
+ # Restore the return status of the previous command.
62
+ echo "exit $old_status" | .
63
+ # Output the original/"old" prompt.
64
+ _old_fish_prompt
65
+ end
66
+
67
+ set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
68
+ set -gx VIRTUAL_ENV_PROMPT '(.venv) '
69
+ end
Prism/LLaDA/LLaDA_Prism/.venv/bin/f2py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/gfs/space/private/fengzl/world_model/Prism/LLaDA/LLaDA_Prism/.venv/bin/python
2
+ # -*- coding: utf-8 -*-
3
+ import re
4
+ import sys
5
+ from numpy.f2py.f2py2e import main
6
+ if __name__ == '__main__':
7
+ sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
8
+ sys.exit(main())
Prism/LLaDA/LLaDA_Prism/.venv/bin/get_objgraph ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/gfs/space/private/fengzl/world_model/Prism/LLaDA/LLaDA_Prism/.venv/bin/python
2
+ #
3
+ # Author: Mike McKerns (mmckerns @caltech and @uqfoundation)
4
+ # Copyright (c) 2008-2016 California Institute of Technology.
5
+ # Copyright (c) 2016-2024 The Uncertainty Quantification Foundation.
6
+ # License: 3-clause BSD. The full license text is available at:
7
+ # - https://github.com/uqfoundation/dill/blob/master/LICENSE
8
+ """
9
+ display the reference paths for objects in ``dill.types`` or a .pkl file
10
+
11
+ Notes:
12
+ the generated image is useful in showing the pointer references in
13
+ objects that are or can be pickled. Any object in ``dill.objects``
14
+ listed in ``dill.load_types(picklable=True, unpicklable=True)`` works.
15
+
16
+ Examples::
17
+
18
+ $ get_objgraph ArrayType
19
+ Image generated as ArrayType.png
20
+ """
21
+
22
+ import dill as pickle
23
+ #pickle.debug.trace(True)
24
+ #import pickle
25
+
26
+ # get all objects for testing
27
+ from dill import load_types
28
+ load_types(pickleable=True,unpickleable=True)
29
+ from dill import objects
30
+
31
+ if __name__ == "__main__":
32
+ import sys
33
+ if len(sys.argv) != 2:
34
+ print ("Please provide exactly one file or type name (e.g. 'IntType')")
35
+ msg = "\n"
36
+ for objtype in list(objects.keys())[:40]:
37
+ msg += objtype + ', '
38
+ print (msg + "...")
39
+ else:
40
+ objtype = str(sys.argv[-1])
41
+ try:
42
+ obj = objects[objtype]
43
+ except KeyError:
44
+ obj = pickle.load(open(objtype,'rb'))
45
+ import os
46
+ objtype = os.path.splitext(objtype)[0]
47
+ try:
48
+ import objgraph
49
+ objgraph.show_refs(obj, filename=objtype+'.png')
50
+ except ImportError:
51
+ print ("Please install 'objgraph' to view object graphs")
52
+
53
+
54
+ # EOF
Prism/LLaDA/LLaDA_Prism/.venv/bin/hf ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/gfs/space/private/fengzl/world_model/Prism/LLaDA/LLaDA_Prism/.venv/bin/python
2
+ # -*- coding: utf-8 -*-
3
+ import re
4
+ import sys
5
+ from huggingface_hub.cli.hf import main
6
+ if __name__ == '__main__':
7
+ sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
8
+ sys.exit(main())
Prism/LLaDA/LLaDA_Prism/.venv/bin/httpx ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/gfs/space/private/fengzl/world_model/Prism/LLaDA/LLaDA_Prism/.venv/bin/python
2
+ # -*- coding: utf-8 -*-
3
+ import re
4
+ import sys
5
+ from httpx import main
6
+ if __name__ == '__main__':
7
+ sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
8
+ sys.exit(main())
Prism/LLaDA/LLaDA_Prism/.venv/bin/markdown-it ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/gfs/space/private/fengzl/world_model/Prism/LLaDA/LLaDA_Prism/.venv/bin/python
2
+ # -*- coding: utf-8 -*-
3
+ import re
4
+ import sys
5
+ from markdown_it.cli.parse import main
6
+ if __name__ == '__main__':
7
+ sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
8
+ sys.exit(main())
Prism/LLaDA/LLaDA_Prism/.venv/bin/pip ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/gfs/space/private/fengzl/world_model/Prism/LLaDA/LLaDA_Prism/.venv/bin/python
2
+ # -*- coding: utf-8 -*-
3
+ import re
4
+ import sys
5
+ from pip._internal.cli.main import main
6
+ if __name__ == '__main__':
7
+ sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
8
+ sys.exit(main())
Prism/LLaDA/LLaDA_Prism/.venv/bin/torchfrtrace ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/gfs/space/private/fengzl/world_model/Prism/LLaDA/LLaDA_Prism/.venv/bin/python
2
+ # -*- coding: utf-8 -*-
3
+ import re
4
+ import sys
5
+ from torch.distributed.flight_recorder.fr_trace import main
6
+ if __name__ == '__main__':
7
+ sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
8
+ sys.exit(main())
Prism/LLaDA/LLaDA_Prism/.venv/bin/torchrun ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/gfs/space/private/fengzl/world_model/Prism/LLaDA/LLaDA_Prism/.venv/bin/python
2
+ # -*- coding: utf-8 -*-
3
+ import re
4
+ import sys
5
+ from torch.distributed.run import main
6
+ if __name__ == '__main__':
7
+ sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
8
+ sys.exit(main())
Prism/LLaDA/LLaDA_Prism/.venv/bin/typer ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/gfs/space/private/fengzl/world_model/Prism/LLaDA/LLaDA_Prism/.venv/bin/python
2
+ # -*- coding: utf-8 -*-
3
+ import re
4
+ import sys
5
+ from typer.cli import main
6
+ if __name__ == '__main__':
7
+ sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
8
+ sys.exit(main())
Prism/LLaDA/LLaDA_Prism/.venv/bin/undill ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/gfs/space/private/fengzl/world_model/Prism/LLaDA/LLaDA_Prism/.venv/bin/python
2
+ #
3
+ # Author: Mike McKerns (mmckerns @caltech and @uqfoundation)
4
+ # Copyright (c) 2008-2016 California Institute of Technology.
5
+ # Copyright (c) 2016-2024 The Uncertainty Quantification Foundation.
6
+ # License: 3-clause BSD. The full license text is available at:
7
+ # - https://github.com/uqfoundation/dill/blob/master/LICENSE
8
+ """
9
+ unpickle the contents of a pickled object file
10
+
11
+ Examples::
12
+
13
+ $ undill hello.pkl
14
+ ['hello', 'world']
15
+ """
16
+
17
+ if __name__ == '__main__':
18
+ import sys
19
+ import dill
20
+ for file in sys.argv[1:]:
21
+ print (dill.load(open(file,'rb')))
22
+
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs-25.1.0.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs-25.1.0.dist-info/METADATA ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.4
2
+ Name: attrs
3
+ Version: 25.1.0
4
+ Summary: Classes Without Boilerplate
5
+ Project-URL: Documentation, https://www.attrs.org/
6
+ Project-URL: Changelog, https://www.attrs.org/en/stable/changelog.html
7
+ Project-URL: GitHub, https://github.com/python-attrs/attrs
8
+ Project-URL: Funding, https://github.com/sponsors/hynek
9
+ Project-URL: Tidelift, https://tidelift.com/subscription/pkg/pypi-attrs?utm_source=pypi-attrs&utm_medium=pypi
10
+ Author-email: Hynek Schlawack <hs@ox.cx>
11
+ License-Expression: MIT
12
+ License-File: LICENSE
13
+ Keywords: attribute,boilerplate,class
14
+ Classifier: Development Status :: 5 - Production/Stable
15
+ Classifier: Programming Language :: Python :: 3.8
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Programming Language :: Python :: Implementation :: CPython
22
+ Classifier: Programming Language :: Python :: Implementation :: PyPy
23
+ Classifier: Typing :: Typed
24
+ Requires-Python: >=3.8
25
+ Provides-Extra: benchmark
26
+ Requires-Dist: cloudpickle; (platform_python_implementation == 'CPython') and extra == 'benchmark'
27
+ Requires-Dist: hypothesis; extra == 'benchmark'
28
+ Requires-Dist: mypy>=1.11.1; (platform_python_implementation == 'CPython' and python_version >= '3.10') and extra == 'benchmark'
29
+ Requires-Dist: pympler; extra == 'benchmark'
30
+ Requires-Dist: pytest-codspeed; extra == 'benchmark'
31
+ Requires-Dist: pytest-mypy-plugins; (platform_python_implementation == 'CPython' and python_version >= '3.10') and extra == 'benchmark'
32
+ Requires-Dist: pytest-xdist[psutil]; extra == 'benchmark'
33
+ Requires-Dist: pytest>=4.3.0; extra == 'benchmark'
34
+ Provides-Extra: cov
35
+ Requires-Dist: cloudpickle; (platform_python_implementation == 'CPython') and extra == 'cov'
36
+ Requires-Dist: coverage[toml]>=5.3; extra == 'cov'
37
+ Requires-Dist: hypothesis; extra == 'cov'
38
+ Requires-Dist: mypy>=1.11.1; (platform_python_implementation == 'CPython' and python_version >= '3.10') and extra == 'cov'
39
+ Requires-Dist: pympler; extra == 'cov'
40
+ Requires-Dist: pytest-mypy-plugins; (platform_python_implementation == 'CPython' and python_version >= '3.10') and extra == 'cov'
41
+ Requires-Dist: pytest-xdist[psutil]; extra == 'cov'
42
+ Requires-Dist: pytest>=4.3.0; extra == 'cov'
43
+ Provides-Extra: dev
44
+ Requires-Dist: cloudpickle; (platform_python_implementation == 'CPython') and extra == 'dev'
45
+ Requires-Dist: hypothesis; extra == 'dev'
46
+ Requires-Dist: mypy>=1.11.1; (platform_python_implementation == 'CPython' and python_version >= '3.10') and extra == 'dev'
47
+ Requires-Dist: pre-commit-uv; extra == 'dev'
48
+ Requires-Dist: pympler; extra == 'dev'
49
+ Requires-Dist: pytest-mypy-plugins; (platform_python_implementation == 'CPython' and python_version >= '3.10') and extra == 'dev'
50
+ Requires-Dist: pytest-xdist[psutil]; extra == 'dev'
51
+ Requires-Dist: pytest>=4.3.0; extra == 'dev'
52
+ Provides-Extra: docs
53
+ Requires-Dist: cogapp; extra == 'docs'
54
+ Requires-Dist: furo; extra == 'docs'
55
+ Requires-Dist: myst-parser; extra == 'docs'
56
+ Requires-Dist: sphinx; extra == 'docs'
57
+ Requires-Dist: sphinx-notfound-page; extra == 'docs'
58
+ Requires-Dist: sphinxcontrib-towncrier; extra == 'docs'
59
+ Requires-Dist: towncrier<24.7; extra == 'docs'
60
+ Provides-Extra: tests
61
+ Requires-Dist: cloudpickle; (platform_python_implementation == 'CPython') and extra == 'tests'
62
+ Requires-Dist: hypothesis; extra == 'tests'
63
+ Requires-Dist: mypy>=1.11.1; (platform_python_implementation == 'CPython' and python_version >= '3.10') and extra == 'tests'
64
+ Requires-Dist: pympler; extra == 'tests'
65
+ Requires-Dist: pytest-mypy-plugins; (platform_python_implementation == 'CPython' and python_version >= '3.10') and extra == 'tests'
66
+ Requires-Dist: pytest-xdist[psutil]; extra == 'tests'
67
+ Requires-Dist: pytest>=4.3.0; extra == 'tests'
68
+ Provides-Extra: tests-mypy
69
+ Requires-Dist: mypy>=1.11.1; (platform_python_implementation == 'CPython' and python_version >= '3.10') and extra == 'tests-mypy'
70
+ Requires-Dist: pytest-mypy-plugins; (platform_python_implementation == 'CPython' and python_version >= '3.10') and extra == 'tests-mypy'
71
+ Description-Content-Type: text/markdown
72
+
73
+ <p align="center">
74
+ <a href="https://www.attrs.org/">
75
+ <img src="https://raw.githubusercontent.com/python-attrs/attrs/main/docs/_static/attrs_logo.svg" width="35%" alt="attrs" />
76
+ </a>
77
+ </p>
78
+
79
+
80
+ *attrs* is the Python package that will bring back the **joy** of **writing classes** by relieving you from the drudgery of implementing object protocols (aka [dunder methods](https://www.attrs.org/en/latest/glossary.html#term-dunder-methods)).
81
+ [Trusted by NASA](https://docs.github.com/en/account-and-profile/setting-up-and-managing-your-github-profile/customizing-your-profile/personalizing-your-profile#list-of-qualifying-repositories-for-mars-2020-helicopter-contributor-achievement) for Mars missions since 2020!
82
+
83
+ Its main goal is to help you to write **concise** and **correct** software without slowing down your code.
84
+
85
+
86
+ ## Sponsors
87
+
88
+ *attrs* would not be possible without our [amazing sponsors](https://github.com/sponsors/hynek).
89
+ Especially those generously supporting us at the *The Organization* tier and higher:
90
+
91
+ <!-- sponsor-break-begin -->
92
+
93
+ <p align="center">
94
+
95
+ <!-- [[[cog
96
+ import pathlib, tomllib
97
+
98
+ for sponsor in tomllib.loads(pathlib.Path("pyproject.toml").read_text())["tool"]["sponcon"]["sponsors"]:
99
+ print(f'<a href="{sponsor["url"]}"><img title="{sponsor["title"]}" src="https://www.attrs.org/en/25.1.0/_static/sponsors/{sponsor["img"]}" width="190" /></a>')
100
+ ]]] -->
101
+ <a href="https://www.variomedia.de/"><img title="Variomedia AG" src="https://www.attrs.org/en/25.1.0/_static/sponsors/Variomedia.svg" width="190" /></a>
102
+ <a href="https://tidelift.com/?utm_source=lifter&utm_medium=referral&utm_campaign=hynek"><img title="Tidelift" src="https://www.attrs.org/en/25.1.0/_static/sponsors/Tidelift.svg" width="190" /></a>
103
+ <a href="https://klaviyo.com/"><img title="Klaviyo" src="https://www.attrs.org/en/25.1.0/_static/sponsors/Klaviyo.svg" width="190" /></a>
104
+ <a href="https://www.emsys-renewables.com/"><img title="emsys renewables" src="https://www.attrs.org/en/25.1.0/_static/sponsors/emsys-renewables.svg" width="190" /></a>
105
+ <a href="https://filepreviews.io/"><img title="FilePreviews" src="https://www.attrs.org/en/25.1.0/_static/sponsors/FilePreviews.svg" width="190" /></a>
106
+ <a href="https://privacy-solutions.org/"><img title="Privacy Solutions" src="https://www.attrs.org/en/25.1.0/_static/sponsors/Privacy-Solutions.svg" width="190" /></a>
107
+ <a href="https://polar.sh/"><img title="Polar" src="https://www.attrs.org/en/25.1.0/_static/sponsors/Polar.svg" width="190" /></a>
108
+ <!-- [[[end]]] -->
109
+
110
+ </p>
111
+
112
+ <!-- sponsor-break-end -->
113
+
114
+ <p align="center">
115
+ <strong>Please consider <a href="https://github.com/sponsors/hynek">joining them</a> to help make <em>attrs</em>’s maintenance more sustainable!</strong>
116
+ </p>
117
+
118
+ <!-- teaser-end -->
119
+
120
+ ## Example
121
+
122
+ *attrs* gives you a class decorator and a way to declaratively define the attributes on that class:
123
+
124
+ <!-- code-begin -->
125
+
126
+ ```pycon
127
+ >>> from attrs import asdict, define, make_class, Factory
128
+
129
+ >>> @define
130
+ ... class SomeClass:
131
+ ... a_number: int = 42
132
+ ... list_of_numbers: list[int] = Factory(list)
133
+ ...
134
+ ... def hard_math(self, another_number):
135
+ ... return self.a_number + sum(self.list_of_numbers) * another_number
136
+
137
+
138
+ >>> sc = SomeClass(1, [1, 2, 3])
139
+ >>> sc
140
+ SomeClass(a_number=1, list_of_numbers=[1, 2, 3])
141
+
142
+ >>> sc.hard_math(3)
143
+ 19
144
+ >>> sc == SomeClass(1, [1, 2, 3])
145
+ True
146
+ >>> sc != SomeClass(2, [3, 2, 1])
147
+ True
148
+
149
+ >>> asdict(sc)
150
+ {'a_number': 1, 'list_of_numbers': [1, 2, 3]}
151
+
152
+ >>> SomeClass()
153
+ SomeClass(a_number=42, list_of_numbers=[])
154
+
155
+ >>> C = make_class("C", ["a", "b"])
156
+ >>> C("foo", "bar")
157
+ C(a='foo', b='bar')
158
+ ```
159
+
160
+ After *declaring* your attributes, *attrs* gives you:
161
+
162
+ - a concise and explicit overview of the class's attributes,
163
+ - a nice human-readable `__repr__`,
164
+ - equality-checking methods,
165
+ - an initializer,
166
+ - and much more,
167
+
168
+ *without* writing dull boilerplate code again and again and *without* runtime performance penalties.
169
+
170
+ ---
171
+
172
+ This example uses *attrs*'s modern APIs that have been introduced in version 20.1.0, and the *attrs* package import name that has been added in version 21.3.0.
173
+ The classic APIs (`@attr.s`, `attr.ib`, plus their serious-business aliases) and the `attr` package import name will remain **indefinitely**.
174
+
175
+ Check out [*On The Core API Names*](https://www.attrs.org/en/latest/names.html) for an in-depth explanation!
176
+
177
+
178
+ ### Hate Type Annotations!?
179
+
180
+ No problem!
181
+ Types are entirely **optional** with *attrs*.
182
+ Simply assign `attrs.field()` to the attributes instead of annotating them with types:
183
+
184
+ ```python
185
+ from attrs import define, field
186
+
187
+ @define
188
+ class SomeClass:
189
+ a_number = field(default=42)
190
+ list_of_numbers = field(factory=list)
191
+ ```
192
+
193
+
194
+ ## Data Classes
195
+
196
+ On the tin, *attrs* might remind you of `dataclasses` (and indeed, `dataclasses` [are a descendant](https://hynek.me/articles/import-attrs/) of *attrs*).
197
+ In practice it does a lot more and is more flexible.
198
+ For instance, it allows you to define [special handling of NumPy arrays for equality checks](https://www.attrs.org/en/stable/comparison.html#customization), allows more ways to [plug into the initialization process](https://www.attrs.org/en/stable/init.html#hooking-yourself-into-initialization), has a replacement for `__init_subclass__`, and allows for stepping through the generated methods using a debugger.
199
+
200
+ For more details, please refer to our [comparison page](https://www.attrs.org/en/stable/why.html#data-classes), but generally speaking, we are more likely to commit crimes against nature to make things work that one would expect to work, but that are quite complicated in practice.
201
+
202
+
203
+ ## Project Information
204
+
205
+ - [**Changelog**](https://www.attrs.org/en/stable/changelog.html)
206
+ - [**Documentation**](https://www.attrs.org/)
207
+ - [**PyPI**](https://pypi.org/project/attrs/)
208
+ - [**Source Code**](https://github.com/python-attrs/attrs)
209
+ - [**Contributing**](https://github.com/python-attrs/attrs/blob/main/.github/CONTRIBUTING.md)
210
+ - [**Third-party Extensions**](https://github.com/python-attrs/attrs/wiki/Extensions-to-attrs)
211
+ - **Get Help**: use the `python-attrs` tag on [Stack Overflow](https://stackoverflow.com/questions/tagged/python-attrs)
212
+
213
+
214
+ ### *attrs* for Enterprise
215
+
216
+ Available as part of the [Tidelift Subscription](https://tidelift.com/?utm_source=lifter&utm_medium=referral&utm_campaign=hynek).
217
+
218
+ The maintainers of *attrs* and thousands of other packages are working with Tidelift to deliver commercial support and maintenance for the open source packages you use to build your applications.
219
+ Save time, reduce risk, and improve code health, while paying the maintainers of the exact packages you use.
220
+
221
+ ## Release Information
222
+
223
+ ### Changes
224
+
225
+ - This release only ensures correct PyPI licensing metadata.
226
+ [#1386](https://github.com/python-attrs/attrs/issues/1386)
227
+
228
+
229
+
230
+ ---
231
+
232
+ [Full changelog →](https://www.attrs.org/en/stable/changelog.html)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs-25.1.0.dist-info/RECORD ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ attr/__init__.py,sha256=fOYIvt1eGSqQre4uCS3sJWKZ0mwAuC8UD6qba5OS9_U,2057
2
+ attr/__init__.pyi,sha256=QIXnnHPoucmDWkbpNsWTP-cgJ1bn8le7DjyRa_wYdew,11281
3
+ attr/__pycache__/__init__.cpython-312.pyc,,
4
+ attr/__pycache__/_cmp.cpython-312.pyc,,
5
+ attr/__pycache__/_compat.cpython-312.pyc,,
6
+ attr/__pycache__/_config.cpython-312.pyc,,
7
+ attr/__pycache__/_funcs.cpython-312.pyc,,
8
+ attr/__pycache__/_make.cpython-312.pyc,,
9
+ attr/__pycache__/_next_gen.cpython-312.pyc,,
10
+ attr/__pycache__/_version_info.cpython-312.pyc,,
11
+ attr/__pycache__/converters.cpython-312.pyc,,
12
+ attr/__pycache__/exceptions.cpython-312.pyc,,
13
+ attr/__pycache__/filters.cpython-312.pyc,,
14
+ attr/__pycache__/setters.cpython-312.pyc,,
15
+ attr/__pycache__/validators.cpython-312.pyc,,
16
+ attr/_cmp.py,sha256=3umHiBtgsEYtvNP_8XrQwTCdFoZIX4DEur76N-2a3X8,4123
17
+ attr/_cmp.pyi,sha256=U-_RU_UZOyPUEQzXE6RMYQQcjkZRY25wTH99sN0s7MM,368
18
+ attr/_compat.py,sha256=4hlXbWhdDjQCDK6FKF1EgnZ3POiHgtpp54qE0nxaGHg,2704
19
+ attr/_config.py,sha256=dGq3xR6fgZEF6UBt_L0T-eUHIB4i43kRmH0P28sJVw8,843
20
+ attr/_funcs.py,sha256=5-tUKJtp3h5El55EcDl6GWXFp68fT8D8U7uCRN6497I,15854
21
+ attr/_make.py,sha256=XS_pYn_-KNo69Tb8-_y3YUcB3Xus00MwAShh2WulkjQ,94157
22
+ attr/_next_gen.py,sha256=7FRkbtl_N017SuBhf_Vw3mw2c2pGZhtCGOzadgz7tp4,24395
23
+ attr/_typing_compat.pyi,sha256=XDP54TUn-ZKhD62TOQebmzrwFyomhUCoGRpclb6alRA,469
24
+ attr/_version_info.py,sha256=exSqb3b5E-fMSsgZAlEw9XcLpEgobPORCZpcaEglAM4,2121
25
+ attr/_version_info.pyi,sha256=x_M3L3WuB7r_ULXAWjx959udKQ4HLB8l-hsc1FDGNvk,209
26
+ attr/converters.py,sha256=GlDeOzPeTFgeBBLbj9G57Ez5lAk68uhSALRYJ_exe84,3861
27
+ attr/converters.pyi,sha256=orU2bff-VjQa2kMDyvnMQV73oJT2WRyQuw4ZR1ym1bE,643
28
+ attr/exceptions.py,sha256=HRFq4iybmv7-DcZwyjl6M1euM2YeJVK_hFxuaBGAngI,1977
29
+ attr/exceptions.pyi,sha256=zZq8bCUnKAy9mDtBEw42ZhPhAUIHoTKedDQInJD883M,539
30
+ attr/filters.py,sha256=ZBiKWLp3R0LfCZsq7X11pn9WX8NslS2wXM4jsnLOGc8,1795
31
+ attr/filters.pyi,sha256=3J5BG-dTxltBk1_-RuNRUHrv2qu1v8v4aDNAQ7_mifA,208
32
+ attr/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
+ attr/setters.py,sha256=5-dcT63GQK35ONEzSgfXCkbB7pPkaR-qv15mm4PVSzQ,1617
34
+ attr/setters.pyi,sha256=NnVkaFU1BB4JB8E4JuXyrzTUgvtMpj8p3wBdJY7uix4,584
35
+ attr/validators.py,sha256=WaB1HLAHHqRHWsrv_K9H-sJ7ESil3H3Cmv2d8TtVZx4,20046
36
+ attr/validators.pyi,sha256=s2WhKPqskxbsckJfKk8zOuuB088GfgpyxcCYSNFLqNU,2603
37
+ attrs-25.1.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
38
+ attrs-25.1.0.dist-info/METADATA,sha256=bZidcSPgoF4BvFNQYyqph4NeHVg9r55WXiwAEtbvRnc,10999
39
+ attrs-25.1.0.dist-info/RECORD,,
40
+ attrs-25.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
41
+ attrs-25.1.0.dist-info/licenses/LICENSE,sha256=iCEVyV38KvHutnFPjsbVy8q_Znyv-HKfQkINpj9xTp8,1109
42
+ attrs/__init__.py,sha256=qeQJZ4O08yczSn840v9bYOaZyRE81WsVi-QCrY3krCU,1107
43
+ attrs/__init__.pyi,sha256=nZmInocjM7tHV4AQw0vxO_fo6oJjL_PonlV9zKKW8DY,7931
44
+ attrs/__pycache__/__init__.cpython-312.pyc,,
45
+ attrs/__pycache__/converters.cpython-312.pyc,,
46
+ attrs/__pycache__/exceptions.cpython-312.pyc,,
47
+ attrs/__pycache__/filters.cpython-312.pyc,,
48
+ attrs/__pycache__/setters.cpython-312.pyc,,
49
+ attrs/__pycache__/validators.cpython-312.pyc,,
50
+ attrs/converters.py,sha256=8kQljrVwfSTRu8INwEk8SI0eGrzmWftsT7rM0EqyohM,76
51
+ attrs/exceptions.py,sha256=ACCCmg19-vDFaDPY9vFl199SPXCQMN_bENs4DALjzms,76
52
+ attrs/filters.py,sha256=VOUMZug9uEU6dUuA0dF1jInUK0PL3fLgP0VBS5d-CDE,73
53
+ attrs/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
+ attrs/setters.py,sha256=eL1YidYQV3T2h9_SYIZSZR1FAcHGb1TuCTy0E0Lv2SU,73
55
+ attrs/validators.py,sha256=xcy6wD5TtTkdCG1f4XWbocPSO0faBjk5IfVJfP6SUj0,76
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs-25.1.0.dist-info/WHEEL ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/colorama/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
2
+ from .initialise import init, deinit, reinit, colorama_text, just_fix_windows_console
3
+ from .ansi import Fore, Back, Style, Cursor
4
+ from .ansitowin32 import AnsiToWin32
5
+
6
+ __version__ = '0.4.6'
7
+
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/colorama/ansi.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
2
+ '''
3
+ This module generates ANSI character codes to printing colors to terminals.
4
+ See: http://en.wikipedia.org/wiki/ANSI_escape_code
5
+ '''
6
+
7
+ CSI = '\033['
8
+ OSC = '\033]'
9
+ BEL = '\a'
10
+
11
+
12
+ def code_to_chars(code):
13
+ return CSI + str(code) + 'm'
14
+
15
+ def set_title(title):
16
+ return OSC + '2;' + title + BEL
17
+
18
+ def clear_screen(mode=2):
19
+ return CSI + str(mode) + 'J'
20
+
21
+ def clear_line(mode=2):
22
+ return CSI + str(mode) + 'K'
23
+
24
+
25
+ class AnsiCodes(object):
26
+ def __init__(self):
27
+ # the subclasses declare class attributes which are numbers.
28
+ # Upon instantiation we define instance attributes, which are the same
29
+ # as the class attributes but wrapped with the ANSI escape sequence
30
+ for name in dir(self):
31
+ if not name.startswith('_'):
32
+ value = getattr(self, name)
33
+ setattr(self, name, code_to_chars(value))
34
+
35
+
36
+ class AnsiCursor(object):
37
+ def UP(self, n=1):
38
+ return CSI + str(n) + 'A'
39
+ def DOWN(self, n=1):
40
+ return CSI + str(n) + 'B'
41
+ def FORWARD(self, n=1):
42
+ return CSI + str(n) + 'C'
43
+ def BACK(self, n=1):
44
+ return CSI + str(n) + 'D'
45
+ def POS(self, x=1, y=1):
46
+ return CSI + str(y) + ';' + str(x) + 'H'
47
+
48
+
49
+ class AnsiFore(AnsiCodes):
50
+ BLACK = 30
51
+ RED = 31
52
+ GREEN = 32
53
+ YELLOW = 33
54
+ BLUE = 34
55
+ MAGENTA = 35
56
+ CYAN = 36
57
+ WHITE = 37
58
+ RESET = 39
59
+
60
+ # These are fairly well supported, but not part of the standard.
61
+ LIGHTBLACK_EX = 90
62
+ LIGHTRED_EX = 91
63
+ LIGHTGREEN_EX = 92
64
+ LIGHTYELLOW_EX = 93
65
+ LIGHTBLUE_EX = 94
66
+ LIGHTMAGENTA_EX = 95
67
+ LIGHTCYAN_EX = 96
68
+ LIGHTWHITE_EX = 97
69
+
70
+
71
+ class AnsiBack(AnsiCodes):
72
+ BLACK = 40
73
+ RED = 41
74
+ GREEN = 42
75
+ YELLOW = 43
76
+ BLUE = 44
77
+ MAGENTA = 45
78
+ CYAN = 46
79
+ WHITE = 47
80
+ RESET = 49
81
+
82
+ # These are fairly well supported, but not part of the standard.
83
+ LIGHTBLACK_EX = 100
84
+ LIGHTRED_EX = 101
85
+ LIGHTGREEN_EX = 102
86
+ LIGHTYELLOW_EX = 103
87
+ LIGHTBLUE_EX = 104
88
+ LIGHTMAGENTA_EX = 105
89
+ LIGHTCYAN_EX = 106
90
+ LIGHTWHITE_EX = 107
91
+
92
+
93
+ class AnsiStyle(AnsiCodes):
94
+ BRIGHT = 1
95
+ DIM = 2
96
+ NORMAL = 22
97
+ RESET_ALL = 0
98
+
99
+ Fore = AnsiFore()
100
+ Back = AnsiBack()
101
+ Style = AnsiStyle()
102
+ Cursor = AnsiCursor()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/colorama/ansitowin32.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
2
+ import re
3
+ import sys
4
+ import os
5
+
6
+ from .ansi import AnsiFore, AnsiBack, AnsiStyle, Style, BEL
7
+ from .winterm import enable_vt_processing, WinTerm, WinColor, WinStyle
8
+ from .win32 import windll, winapi_test
9
+
10
+
11
+ winterm = None
12
+ if windll is not None:
13
+ winterm = WinTerm()
14
+
15
+
16
+ class StreamWrapper(object):
17
+ '''
18
+ Wraps a stream (such as stdout), acting as a transparent proxy for all
19
+ attribute access apart from method 'write()', which is delegated to our
20
+ Converter instance.
21
+ '''
22
+ def __init__(self, wrapped, converter):
23
+ # double-underscore everything to prevent clashes with names of
24
+ # attributes on the wrapped stream object.
25
+ self.__wrapped = wrapped
26
+ self.__convertor = converter
27
+
28
+ def __getattr__(self, name):
29
+ return getattr(self.__wrapped, name)
30
+
31
+ def __enter__(self, *args, **kwargs):
32
+ # special method lookup bypasses __getattr__/__getattribute__, see
33
+ # https://stackoverflow.com/questions/12632894/why-doesnt-getattr-work-with-exit
34
+ # thus, contextlib magic methods are not proxied via __getattr__
35
+ return self.__wrapped.__enter__(*args, **kwargs)
36
+
37
+ def __exit__(self, *args, **kwargs):
38
+ return self.__wrapped.__exit__(*args, **kwargs)
39
+
40
+ def __setstate__(self, state):
41
+ self.__dict__ = state
42
+
43
+ def __getstate__(self):
44
+ return self.__dict__
45
+
46
+ def write(self, text):
47
+ self.__convertor.write(text)
48
+
49
+ def isatty(self):
50
+ stream = self.__wrapped
51
+ if 'PYCHARM_HOSTED' in os.environ:
52
+ if stream is not None and (stream is sys.__stdout__ or stream is sys.__stderr__):
53
+ return True
54
+ try:
55
+ stream_isatty = stream.isatty
56
+ except AttributeError:
57
+ return False
58
+ else:
59
+ return stream_isatty()
60
+
61
+ @property
62
+ def closed(self):
63
+ stream = self.__wrapped
64
+ try:
65
+ return stream.closed
66
+ # AttributeError in the case that the stream doesn't support being closed
67
+ # ValueError for the case that the stream has already been detached when atexit runs
68
+ except (AttributeError, ValueError):
69
+ return True
70
+
71
+
72
+ class AnsiToWin32(object):
73
+ '''
74
+ Implements a 'write()' method which, on Windows, will strip ANSI character
75
+ sequences from the text, and if outputting to a tty, will convert them into
76
+ win32 function calls.
77
+ '''
78
+ ANSI_CSI_RE = re.compile('\001?\033\\[((?:\\d|;)*)([a-zA-Z])\002?') # Control Sequence Introducer
79
+ ANSI_OSC_RE = re.compile('\001?\033\\]([^\a]*)(\a)\002?') # Operating System Command
80
+
81
+ def __init__(self, wrapped, convert=None, strip=None, autoreset=False):
82
+ # The wrapped stream (normally sys.stdout or sys.stderr)
83
+ self.wrapped = wrapped
84
+
85
+ # should we reset colors to defaults after every .write()
86
+ self.autoreset = autoreset
87
+
88
+ # create the proxy wrapping our output stream
89
+ self.stream = StreamWrapper(wrapped, self)
90
+
91
+ on_windows = os.name == 'nt'
92
+ # We test if the WinAPI works, because even if we are on Windows
93
+ # we may be using a terminal that doesn't support the WinAPI
94
+ # (e.g. Cygwin Terminal). In this case it's up to the terminal
95
+ # to support the ANSI codes.
96
+ conversion_supported = on_windows and winapi_test()
97
+ try:
98
+ fd = wrapped.fileno()
99
+ except Exception:
100
+ fd = -1
101
+ system_has_native_ansi = not on_windows or enable_vt_processing(fd)
102
+ have_tty = not self.stream.closed and self.stream.isatty()
103
+ need_conversion = conversion_supported and not system_has_native_ansi
104
+
105
+ # should we strip ANSI sequences from our output?
106
+ if strip is None:
107
+ strip = need_conversion or not have_tty
108
+ self.strip = strip
109
+
110
+ # should we should convert ANSI sequences into win32 calls?
111
+ if convert is None:
112
+ convert = need_conversion and have_tty
113
+ self.convert = convert
114
+
115
+ # dict of ansi codes to win32 functions and parameters
116
+ self.win32_calls = self.get_win32_calls()
117
+
118
+ # are we wrapping stderr?
119
+ self.on_stderr = self.wrapped is sys.stderr
120
+
121
+ def should_wrap(self):
122
+ '''
123
+ True if this class is actually needed. If false, then the output
124
+ stream will not be affected, nor will win32 calls be issued, so
125
+ wrapping stdout is not actually required. This will generally be
126
+ False on non-Windows platforms, unless optional functionality like
127
+ autoreset has been requested using kwargs to init()
128
+ '''
129
+ return self.convert or self.strip or self.autoreset
130
+
131
+ def get_win32_calls(self):
132
+ if self.convert and winterm:
133
+ return {
134
+ AnsiStyle.RESET_ALL: (winterm.reset_all, ),
135
+ AnsiStyle.BRIGHT: (winterm.style, WinStyle.BRIGHT),
136
+ AnsiStyle.DIM: (winterm.style, WinStyle.NORMAL),
137
+ AnsiStyle.NORMAL: (winterm.style, WinStyle.NORMAL),
138
+ AnsiFore.BLACK: (winterm.fore, WinColor.BLACK),
139
+ AnsiFore.RED: (winterm.fore, WinColor.RED),
140
+ AnsiFore.GREEN: (winterm.fore, WinColor.GREEN),
141
+ AnsiFore.YELLOW: (winterm.fore, WinColor.YELLOW),
142
+ AnsiFore.BLUE: (winterm.fore, WinColor.BLUE),
143
+ AnsiFore.MAGENTA: (winterm.fore, WinColor.MAGENTA),
144
+ AnsiFore.CYAN: (winterm.fore, WinColor.CYAN),
145
+ AnsiFore.WHITE: (winterm.fore, WinColor.GREY),
146
+ AnsiFore.RESET: (winterm.fore, ),
147
+ AnsiFore.LIGHTBLACK_EX: (winterm.fore, WinColor.BLACK, True),
148
+ AnsiFore.LIGHTRED_EX: (winterm.fore, WinColor.RED, True),
149
+ AnsiFore.LIGHTGREEN_EX: (winterm.fore, WinColor.GREEN, True),
150
+ AnsiFore.LIGHTYELLOW_EX: (winterm.fore, WinColor.YELLOW, True),
151
+ AnsiFore.LIGHTBLUE_EX: (winterm.fore, WinColor.BLUE, True),
152
+ AnsiFore.LIGHTMAGENTA_EX: (winterm.fore, WinColor.MAGENTA, True),
153
+ AnsiFore.LIGHTCYAN_EX: (winterm.fore, WinColor.CYAN, True),
154
+ AnsiFore.LIGHTWHITE_EX: (winterm.fore, WinColor.GREY, True),
155
+ AnsiBack.BLACK: (winterm.back, WinColor.BLACK),
156
+ AnsiBack.RED: (winterm.back, WinColor.RED),
157
+ AnsiBack.GREEN: (winterm.back, WinColor.GREEN),
158
+ AnsiBack.YELLOW: (winterm.back, WinColor.YELLOW),
159
+ AnsiBack.BLUE: (winterm.back, WinColor.BLUE),
160
+ AnsiBack.MAGENTA: (winterm.back, WinColor.MAGENTA),
161
+ AnsiBack.CYAN: (winterm.back, WinColor.CYAN),
162
+ AnsiBack.WHITE: (winterm.back, WinColor.GREY),
163
+ AnsiBack.RESET: (winterm.back, ),
164
+ AnsiBack.LIGHTBLACK_EX: (winterm.back, WinColor.BLACK, True),
165
+ AnsiBack.LIGHTRED_EX: (winterm.back, WinColor.RED, True),
166
+ AnsiBack.LIGHTGREEN_EX: (winterm.back, WinColor.GREEN, True),
167
+ AnsiBack.LIGHTYELLOW_EX: (winterm.back, WinColor.YELLOW, True),
168
+ AnsiBack.LIGHTBLUE_EX: (winterm.back, WinColor.BLUE, True),
169
+ AnsiBack.LIGHTMAGENTA_EX: (winterm.back, WinColor.MAGENTA, True),
170
+ AnsiBack.LIGHTCYAN_EX: (winterm.back, WinColor.CYAN, True),
171
+ AnsiBack.LIGHTWHITE_EX: (winterm.back, WinColor.GREY, True),
172
+ }
173
+ return dict()
174
+
175
+ def write(self, text):
176
+ if self.strip or self.convert:
177
+ self.write_and_convert(text)
178
+ else:
179
+ self.wrapped.write(text)
180
+ self.wrapped.flush()
181
+ if self.autoreset:
182
+ self.reset_all()
183
+
184
+
185
+ def reset_all(self):
186
+ if self.convert:
187
+ self.call_win32('m', (0,))
188
+ elif not self.strip and not self.stream.closed:
189
+ self.wrapped.write(Style.RESET_ALL)
190
+
191
+
192
+ def write_and_convert(self, text):
193
+ '''
194
+ Write the given text to our wrapped stream, stripping any ANSI
195
+ sequences from the text, and optionally converting them into win32
196
+ calls.
197
+ '''
198
+ cursor = 0
199
+ text = self.convert_osc(text)
200
+ for match in self.ANSI_CSI_RE.finditer(text):
201
+ start, end = match.span()
202
+ self.write_plain_text(text, cursor, start)
203
+ self.convert_ansi(*match.groups())
204
+ cursor = end
205
+ self.write_plain_text(text, cursor, len(text))
206
+
207
+
208
+ def write_plain_text(self, text, start, end):
209
+ if start < end:
210
+ self.wrapped.write(text[start:end])
211
+ self.wrapped.flush()
212
+
213
+
214
+ def convert_ansi(self, paramstring, command):
215
+ if self.convert:
216
+ params = self.extract_params(command, paramstring)
217
+ self.call_win32(command, params)
218
+
219
+
220
+ def extract_params(self, command, paramstring):
221
+ if command in 'Hf':
222
+ params = tuple(int(p) if len(p) != 0 else 1 for p in paramstring.split(';'))
223
+ while len(params) < 2:
224
+ # defaults:
225
+ params = params + (1,)
226
+ else:
227
+ params = tuple(int(p) for p in paramstring.split(';') if len(p) != 0)
228
+ if len(params) == 0:
229
+ # defaults:
230
+ if command in 'JKm':
231
+ params = (0,)
232
+ elif command in 'ABCD':
233
+ params = (1,)
234
+
235
+ return params
236
+
237
+
238
+ def call_win32(self, command, params):
239
+ if command == 'm':
240
+ for param in params:
241
+ if param in self.win32_calls:
242
+ func_args = self.win32_calls[param]
243
+ func = func_args[0]
244
+ args = func_args[1:]
245
+ kwargs = dict(on_stderr=self.on_stderr)
246
+ func(*args, **kwargs)
247
+ elif command in 'J':
248
+ winterm.erase_screen(params[0], on_stderr=self.on_stderr)
249
+ elif command in 'K':
250
+ winterm.erase_line(params[0], on_stderr=self.on_stderr)
251
+ elif command in 'Hf': # cursor position - absolute
252
+ winterm.set_cursor_position(params, on_stderr=self.on_stderr)
253
+ elif command in 'ABCD': # cursor position - relative
254
+ n = params[0]
255
+ # A - up, B - down, C - forward, D - back
256
+ x, y = {'A': (0, -n), 'B': (0, n), 'C': (n, 0), 'D': (-n, 0)}[command]
257
+ winterm.cursor_adjust(x, y, on_stderr=self.on_stderr)
258
+
259
+
260
+ def convert_osc(self, text):
261
+ for match in self.ANSI_OSC_RE.finditer(text):
262
+ start, end = match.span()
263
+ text = text[:start] + text[end:]
264
+ paramstring, command = match.groups()
265
+ if command == BEL:
266
+ if paramstring.count(";") == 1:
267
+ params = paramstring.split(";")
268
+ # 0 - change title and icon (we will only change title)
269
+ # 1 - change icon (we don't support this)
270
+ # 2 - change title
271
+ if params[0] in '02':
272
+ winterm.set_title(params[1])
273
+ return text
274
+
275
+
276
+ def flush(self):
277
+ self.wrapped.flush()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/colorama/initialise.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
2
+ import atexit
3
+ import contextlib
4
+ import sys
5
+
6
+ from .ansitowin32 import AnsiToWin32
7
+
8
+
9
+ def _wipe_internal_state_for_tests():
10
+ global orig_stdout, orig_stderr
11
+ orig_stdout = None
12
+ orig_stderr = None
13
+
14
+ global wrapped_stdout, wrapped_stderr
15
+ wrapped_stdout = None
16
+ wrapped_stderr = None
17
+
18
+ global atexit_done
19
+ atexit_done = False
20
+
21
+ global fixed_windows_console
22
+ fixed_windows_console = False
23
+
24
+ try:
25
+ # no-op if it wasn't registered
26
+ atexit.unregister(reset_all)
27
+ except AttributeError:
28
+ # python 2: no atexit.unregister. Oh well, we did our best.
29
+ pass
30
+
31
+
32
+ def reset_all():
33
+ if AnsiToWin32 is not None: # Issue #74: objects might become None at exit
34
+ AnsiToWin32(orig_stdout).reset_all()
35
+
36
+
37
+ def init(autoreset=False, convert=None, strip=None, wrap=True):
38
+
39
+ if not wrap and any([autoreset, convert, strip]):
40
+ raise ValueError('wrap=False conflicts with any other arg=True')
41
+
42
+ global wrapped_stdout, wrapped_stderr
43
+ global orig_stdout, orig_stderr
44
+
45
+ orig_stdout = sys.stdout
46
+ orig_stderr = sys.stderr
47
+
48
+ if sys.stdout is None:
49
+ wrapped_stdout = None
50
+ else:
51
+ sys.stdout = wrapped_stdout = \
52
+ wrap_stream(orig_stdout, convert, strip, autoreset, wrap)
53
+ if sys.stderr is None:
54
+ wrapped_stderr = None
55
+ else:
56
+ sys.stderr = wrapped_stderr = \
57
+ wrap_stream(orig_stderr, convert, strip, autoreset, wrap)
58
+
59
+ global atexit_done
60
+ if not atexit_done:
61
+ atexit.register(reset_all)
62
+ atexit_done = True
63
+
64
+
65
+ def deinit():
66
+ if orig_stdout is not None:
67
+ sys.stdout = orig_stdout
68
+ if orig_stderr is not None:
69
+ sys.stderr = orig_stderr
70
+
71
+
72
+ def just_fix_windows_console():
73
+ global fixed_windows_console
74
+
75
+ if sys.platform != "win32":
76
+ return
77
+ if fixed_windows_console:
78
+ return
79
+ if wrapped_stdout is not None or wrapped_stderr is not None:
80
+ # Someone already ran init() and it did stuff, so we won't second-guess them
81
+ return
82
+
83
+ # On newer versions of Windows, AnsiToWin32.__init__ will implicitly enable the
84
+ # native ANSI support in the console as a side-effect. We only need to actually
85
+ # replace sys.stdout/stderr if we're in the old-style conversion mode.
86
+ new_stdout = AnsiToWin32(sys.stdout, convert=None, strip=None, autoreset=False)
87
+ if new_stdout.convert:
88
+ sys.stdout = new_stdout
89
+ new_stderr = AnsiToWin32(sys.stderr, convert=None, strip=None, autoreset=False)
90
+ if new_stderr.convert:
91
+ sys.stderr = new_stderr
92
+
93
+ fixed_windows_console = True
94
+
95
+ @contextlib.contextmanager
96
+ def colorama_text(*args, **kwargs):
97
+ init(*args, **kwargs)
98
+ try:
99
+ yield
100
+ finally:
101
+ deinit()
102
+
103
+
104
+ def reinit():
105
+ if wrapped_stdout is not None:
106
+ sys.stdout = wrapped_stdout
107
+ if wrapped_stderr is not None:
108
+ sys.stderr = wrapped_stderr
109
+
110
+
111
+ def wrap_stream(stream, convert, strip, autoreset, wrap):
112
+ if wrap:
113
+ wrapper = AnsiToWin32(stream,
114
+ convert=convert, strip=strip, autoreset=autoreset)
115
+ if wrapper.should_wrap():
116
+ stream = wrapper.stream
117
+ return stream
118
+
119
+
120
+ # Use this for initial setup as well, to reduce code duplication
121
+ _wipe_internal_state_for_tests()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/colorama/win32.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
2
+
3
+ # from winbase.h
4
+ STDOUT = -11
5
+ STDERR = -12
6
+
7
+ ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004
8
+
9
+ try:
10
+ import ctypes
11
+ from ctypes import LibraryLoader
12
+ windll = LibraryLoader(ctypes.WinDLL)
13
+ from ctypes import wintypes
14
+ except (AttributeError, ImportError):
15
+ windll = None
16
+ SetConsoleTextAttribute = lambda *_: None
17
+ winapi_test = lambda *_: None
18
+ else:
19
+ from ctypes import byref, Structure, c_char, POINTER
20
+
21
+ COORD = wintypes._COORD
22
+
23
+ class CONSOLE_SCREEN_BUFFER_INFO(Structure):
24
+ """struct in wincon.h."""
25
+ _fields_ = [
26
+ ("dwSize", COORD),
27
+ ("dwCursorPosition", COORD),
28
+ ("wAttributes", wintypes.WORD),
29
+ ("srWindow", wintypes.SMALL_RECT),
30
+ ("dwMaximumWindowSize", COORD),
31
+ ]
32
+ def __str__(self):
33
+ return '(%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)' % (
34
+ self.dwSize.Y, self.dwSize.X
35
+ , self.dwCursorPosition.Y, self.dwCursorPosition.X
36
+ , self.wAttributes
37
+ , self.srWindow.Top, self.srWindow.Left, self.srWindow.Bottom, self.srWindow.Right
38
+ , self.dwMaximumWindowSize.Y, self.dwMaximumWindowSize.X
39
+ )
40
+
41
+ _GetStdHandle = windll.kernel32.GetStdHandle
42
+ _GetStdHandle.argtypes = [
43
+ wintypes.DWORD,
44
+ ]
45
+ _GetStdHandle.restype = wintypes.HANDLE
46
+
47
+ _GetConsoleScreenBufferInfo = windll.kernel32.GetConsoleScreenBufferInfo
48
+ _GetConsoleScreenBufferInfo.argtypes = [
49
+ wintypes.HANDLE,
50
+ POINTER(CONSOLE_SCREEN_BUFFER_INFO),
51
+ ]
52
+ _GetConsoleScreenBufferInfo.restype = wintypes.BOOL
53
+
54
+ _SetConsoleTextAttribute = windll.kernel32.SetConsoleTextAttribute
55
+ _SetConsoleTextAttribute.argtypes = [
56
+ wintypes.HANDLE,
57
+ wintypes.WORD,
58
+ ]
59
+ _SetConsoleTextAttribute.restype = wintypes.BOOL
60
+
61
+ _SetConsoleCursorPosition = windll.kernel32.SetConsoleCursorPosition
62
+ _SetConsoleCursorPosition.argtypes = [
63
+ wintypes.HANDLE,
64
+ COORD,
65
+ ]
66
+ _SetConsoleCursorPosition.restype = wintypes.BOOL
67
+
68
+ _FillConsoleOutputCharacterA = windll.kernel32.FillConsoleOutputCharacterA
69
+ _FillConsoleOutputCharacterA.argtypes = [
70
+ wintypes.HANDLE,
71
+ c_char,
72
+ wintypes.DWORD,
73
+ COORD,
74
+ POINTER(wintypes.DWORD),
75
+ ]
76
+ _FillConsoleOutputCharacterA.restype = wintypes.BOOL
77
+
78
+ _FillConsoleOutputAttribute = windll.kernel32.FillConsoleOutputAttribute
79
+ _FillConsoleOutputAttribute.argtypes = [
80
+ wintypes.HANDLE,
81
+ wintypes.WORD,
82
+ wintypes.DWORD,
83
+ COORD,
84
+ POINTER(wintypes.DWORD),
85
+ ]
86
+ _FillConsoleOutputAttribute.restype = wintypes.BOOL
87
+
88
+ _SetConsoleTitleW = windll.kernel32.SetConsoleTitleW
89
+ _SetConsoleTitleW.argtypes = [
90
+ wintypes.LPCWSTR
91
+ ]
92
+ _SetConsoleTitleW.restype = wintypes.BOOL
93
+
94
+ _GetConsoleMode = windll.kernel32.GetConsoleMode
95
+ _GetConsoleMode.argtypes = [
96
+ wintypes.HANDLE,
97
+ POINTER(wintypes.DWORD)
98
+ ]
99
+ _GetConsoleMode.restype = wintypes.BOOL
100
+
101
+ _SetConsoleMode = windll.kernel32.SetConsoleMode
102
+ _SetConsoleMode.argtypes = [
103
+ wintypes.HANDLE,
104
+ wintypes.DWORD
105
+ ]
106
+ _SetConsoleMode.restype = wintypes.BOOL
107
+
108
+ def _winapi_test(handle):
109
+ csbi = CONSOLE_SCREEN_BUFFER_INFO()
110
+ success = _GetConsoleScreenBufferInfo(
111
+ handle, byref(csbi))
112
+ return bool(success)
113
+
114
+ def winapi_test():
115
+ return any(_winapi_test(h) for h in
116
+ (_GetStdHandle(STDOUT), _GetStdHandle(STDERR)))
117
+
118
+ def GetConsoleScreenBufferInfo(stream_id=STDOUT):
119
+ handle = _GetStdHandle(stream_id)
120
+ csbi = CONSOLE_SCREEN_BUFFER_INFO()
121
+ success = _GetConsoleScreenBufferInfo(
122
+ handle, byref(csbi))
123
+ return csbi
124
+
125
+ def SetConsoleTextAttribute(stream_id, attrs):
126
+ handle = _GetStdHandle(stream_id)
127
+ return _SetConsoleTextAttribute(handle, attrs)
128
+
129
+ def SetConsoleCursorPosition(stream_id, position, adjust=True):
130
+ position = COORD(*position)
131
+ # If the position is out of range, do nothing.
132
+ if position.Y <= 0 or position.X <= 0:
133
+ return
134
+ # Adjust for Windows' SetConsoleCursorPosition:
135
+ # 1. being 0-based, while ANSI is 1-based.
136
+ # 2. expecting (x,y), while ANSI uses (y,x).
137
+ adjusted_position = COORD(position.Y - 1, position.X - 1)
138
+ if adjust:
139
+ # Adjust for viewport's scroll position
140
+ sr = GetConsoleScreenBufferInfo(STDOUT).srWindow
141
+ adjusted_position.Y += sr.Top
142
+ adjusted_position.X += sr.Left
143
+ # Resume normal processing
144
+ handle = _GetStdHandle(stream_id)
145
+ return _SetConsoleCursorPosition(handle, adjusted_position)
146
+
147
+ def FillConsoleOutputCharacter(stream_id, char, length, start):
148
+ handle = _GetStdHandle(stream_id)
149
+ char = c_char(char.encode())
150
+ length = wintypes.DWORD(length)
151
+ num_written = wintypes.DWORD(0)
152
+ # Note that this is hard-coded for ANSI (vs wide) bytes.
153
+ success = _FillConsoleOutputCharacterA(
154
+ handle, char, length, start, byref(num_written))
155
+ return num_written.value
156
+
157
+ def FillConsoleOutputAttribute(stream_id, attr, length, start):
158
+ ''' FillConsoleOutputAttribute( hConsole, csbi.wAttributes, dwConSize, coordScreen, &cCharsWritten )'''
159
+ handle = _GetStdHandle(stream_id)
160
+ attribute = wintypes.WORD(attr)
161
+ length = wintypes.DWORD(length)
162
+ num_written = wintypes.DWORD(0)
163
+ # Note that this is hard-coded for ANSI (vs wide) bytes.
164
+ return _FillConsoleOutputAttribute(
165
+ handle, attribute, length, start, byref(num_written))
166
+
167
+ def SetConsoleTitle(title):
168
+ return _SetConsoleTitleW(title)
169
+
170
+ def GetConsoleMode(handle):
171
+ mode = wintypes.DWORD()
172
+ success = _GetConsoleMode(handle, byref(mode))
173
+ if not success:
174
+ raise ctypes.WinError()
175
+ return mode.value
176
+
177
+ def SetConsoleMode(handle, mode):
178
+ success = _SetConsoleMode(handle, mode)
179
+ if not success:
180
+ raise ctypes.WinError()
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/colorama/winterm.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
2
+ try:
3
+ from msvcrt import get_osfhandle
4
+ except ImportError:
5
+ def get_osfhandle(_):
6
+ raise OSError("This isn't windows!")
7
+
8
+
9
+ from . import win32
10
+
11
+ # from wincon.h
12
+ class WinColor(object):
13
+ BLACK = 0
14
+ BLUE = 1
15
+ GREEN = 2
16
+ CYAN = 3
17
+ RED = 4
18
+ MAGENTA = 5
19
+ YELLOW = 6
20
+ GREY = 7
21
+
22
+ # from wincon.h
23
+ class WinStyle(object):
24
+ NORMAL = 0x00 # dim text, dim background
25
+ BRIGHT = 0x08 # bright text, dim background
26
+ BRIGHT_BACKGROUND = 0x80 # dim text, bright background
27
+
28
+ class WinTerm(object):
29
+
30
+ def __init__(self):
31
+ self._default = win32.GetConsoleScreenBufferInfo(win32.STDOUT).wAttributes
32
+ self.set_attrs(self._default)
33
+ self._default_fore = self._fore
34
+ self._default_back = self._back
35
+ self._default_style = self._style
36
+ # In order to emulate LIGHT_EX in windows, we borrow the BRIGHT style.
37
+ # So that LIGHT_EX colors and BRIGHT style do not clobber each other,
38
+ # we track them separately, since LIGHT_EX is overwritten by Fore/Back
39
+ # and BRIGHT is overwritten by Style codes.
40
+ self._light = 0
41
+
42
+ def get_attrs(self):
43
+ return self._fore + self._back * 16 + (self._style | self._light)
44
+
45
+ def set_attrs(self, value):
46
+ self._fore = value & 7
47
+ self._back = (value >> 4) & 7
48
+ self._style = value & (WinStyle.BRIGHT | WinStyle.BRIGHT_BACKGROUND)
49
+
50
+ def reset_all(self, on_stderr=None):
51
+ self.set_attrs(self._default)
52
+ self.set_console(attrs=self._default)
53
+ self._light = 0
54
+
55
+ def fore(self, fore=None, light=False, on_stderr=False):
56
+ if fore is None:
57
+ fore = self._default_fore
58
+ self._fore = fore
59
+ # Emulate LIGHT_EX with BRIGHT Style
60
+ if light:
61
+ self._light |= WinStyle.BRIGHT
62
+ else:
63
+ self._light &= ~WinStyle.BRIGHT
64
+ self.set_console(on_stderr=on_stderr)
65
+
66
+ def back(self, back=None, light=False, on_stderr=False):
67
+ if back is None:
68
+ back = self._default_back
69
+ self._back = back
70
+ # Emulate LIGHT_EX with BRIGHT_BACKGROUND Style
71
+ if light:
72
+ self._light |= WinStyle.BRIGHT_BACKGROUND
73
+ else:
74
+ self._light &= ~WinStyle.BRIGHT_BACKGROUND
75
+ self.set_console(on_stderr=on_stderr)
76
+
77
+ def style(self, style=None, on_stderr=False):
78
+ if style is None:
79
+ style = self._default_style
80
+ self._style = style
81
+ self.set_console(on_stderr=on_stderr)
82
+
83
+ def set_console(self, attrs=None, on_stderr=False):
84
+ if attrs is None:
85
+ attrs = self.get_attrs()
86
+ handle = win32.STDOUT
87
+ if on_stderr:
88
+ handle = win32.STDERR
89
+ win32.SetConsoleTextAttribute(handle, attrs)
90
+
91
+ def get_position(self, handle):
92
+ position = win32.GetConsoleScreenBufferInfo(handle).dwCursorPosition
93
+ # Because Windows coordinates are 0-based,
94
+ # and win32.SetConsoleCursorPosition expects 1-based.
95
+ position.X += 1
96
+ position.Y += 1
97
+ return position
98
+
99
+ def set_cursor_position(self, position=None, on_stderr=False):
100
+ if position is None:
101
+ # I'm not currently tracking the position, so there is no default.
102
+ # position = self.get_position()
103
+ return
104
+ handle = win32.STDOUT
105
+ if on_stderr:
106
+ handle = win32.STDERR
107
+ win32.SetConsoleCursorPosition(handle, position)
108
+
109
+ def cursor_adjust(self, x, y, on_stderr=False):
110
+ handle = win32.STDOUT
111
+ if on_stderr:
112
+ handle = win32.STDERR
113
+ position = self.get_position(handle)
114
+ adjusted_position = (position.Y + y, position.X + x)
115
+ win32.SetConsoleCursorPosition(handle, adjusted_position, adjust=False)
116
+
117
+ def erase_screen(self, mode=0, on_stderr=False):
118
+ # 0 should clear from the cursor to the end of the screen.
119
+ # 1 should clear from the cursor to the beginning of the screen.
120
+ # 2 should clear the entire screen, and move cursor to (1,1)
121
+ handle = win32.STDOUT
122
+ if on_stderr:
123
+ handle = win32.STDERR
124
+ csbi = win32.GetConsoleScreenBufferInfo(handle)
125
+ # get the number of character cells in the current buffer
126
+ cells_in_screen = csbi.dwSize.X * csbi.dwSize.Y
127
+ # get number of character cells before current cursor position
128
+ cells_before_cursor = csbi.dwSize.X * csbi.dwCursorPosition.Y + csbi.dwCursorPosition.X
129
+ if mode == 0:
130
+ from_coord = csbi.dwCursorPosition
131
+ cells_to_erase = cells_in_screen - cells_before_cursor
132
+ elif mode == 1:
133
+ from_coord = win32.COORD(0, 0)
134
+ cells_to_erase = cells_before_cursor
135
+ elif mode == 2:
136
+ from_coord = win32.COORD(0, 0)
137
+ cells_to_erase = cells_in_screen
138
+ else:
139
+ # invalid mode
140
+ return
141
+ # fill the entire screen with blanks
142
+ win32.FillConsoleOutputCharacter(handle, ' ', cells_to_erase, from_coord)
143
+ # now set the buffer's attributes accordingly
144
+ win32.FillConsoleOutputAttribute(handle, self.get_attrs(), cells_to_erase, from_coord)
145
+ if mode == 2:
146
+ # put the cursor where needed
147
+ win32.SetConsoleCursorPosition(handle, (1, 1))
148
+
149
+ def erase_line(self, mode=0, on_stderr=False):
150
+ # 0 should clear from the cursor to the end of the line.
151
+ # 1 should clear from the cursor to the beginning of the line.
152
+ # 2 should clear the entire line.
153
+ handle = win32.STDOUT
154
+ if on_stderr:
155
+ handle = win32.STDERR
156
+ csbi = win32.GetConsoleScreenBufferInfo(handle)
157
+ if mode == 0:
158
+ from_coord = csbi.dwCursorPosition
159
+ cells_to_erase = csbi.dwSize.X - csbi.dwCursorPosition.X
160
+ elif mode == 1:
161
+ from_coord = win32.COORD(0, csbi.dwCursorPosition.Y)
162
+ cells_to_erase = csbi.dwCursorPosition.X
163
+ elif mode == 2:
164
+ from_coord = win32.COORD(0, csbi.dwCursorPosition.Y)
165
+ cells_to_erase = csbi.dwSize.X
166
+ else:
167
+ # invalid mode
168
+ return
169
+ # fill the entire screen with blanks
170
+ win32.FillConsoleOutputCharacter(handle, ' ', cells_to_erase, from_coord)
171
+ # now set the buffer's attributes accordingly
172
+ win32.FillConsoleOutputAttribute(handle, self.get_attrs(), cells_to_erase, from_coord)
173
+
174
+ def set_title(self, title):
175
+ win32.SetConsoleTitle(title)
176
+
177
+
178
+ def enable_vt_processing(fd):
179
+ if win32.windll is None or not win32.winapi_test():
180
+ return False
181
+
182
+ try:
183
+ handle = get_osfhandle(fd)
184
+ mode = win32.GetConsoleMode(handle)
185
+ win32.SetConsoleMode(
186
+ handle,
187
+ mode | win32.ENABLE_VIRTUAL_TERMINAL_PROCESSING,
188
+ )
189
+
190
+ mode = win32.GetConsoleMode(handle)
191
+ if mode & win32.ENABLE_VIRTUAL_TERMINAL_PROCESSING:
192
+ return True
193
+ # Can get TypeError in testsuite where 'fd' is a Mock()
194
+ except (OSError, TypeError):
195
+ return False
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/cuda/ccuda.pxd ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
3
+
4
+ from cuda.bindings.cydriver cimport *
5
+
6
+ cdef extern from *:
7
+ """
8
+ #ifdef _MSC_VER
9
+ #pragma message ( "The cuda.ccuda module is deprecated and will be removed in a future release, " \
10
+ "please switch to use the cuda.bindings.cydriver module instead." )
11
+ #else
12
+ #warning The cuda.ccuda module is deprecated and will be removed in a future release, \
13
+ please switch to use the cuda.bindings.cydriver module instead.
14
+ #endif
15
+ """
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/cuda/ccudart.cpython-312-x86_64-linux-gnu.so ADDED
Binary file (23.4 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/cuda/cnvrtc.pxd ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
3
+
4
+ from cuda.bindings.cynvrtc cimport *
5
+
6
+ cdef extern from *:
7
+ """
8
+ #ifdef _MSC_VER
9
+ #pragma message ( "The cuda.cnvrtc module is deprecated and will be removed in a future release, " \
10
+ "please switch to use the cuda.bindings.cynvrtc module instead." )
11
+ #else
12
+ #warning The cuda.cnvrtc module is deprecated and will be removed in a future release, \
13
+ please switch to use the cuda.bindings.cynvrtc module instead.
14
+ #endif
15
+ """
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/cuda/cnvrtc.pyx ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
3
+
4
+ from cuda.bindings.cynvrtc cimport *
5
+ from cuda.bindings import cynvrtc
6
+ __pyx_capi__ = cynvrtc.__pyx_capi__
7
+ del cynvrtc
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/cuda/cuda.cpp ADDED
The diff for this file is too large to render. See raw diff
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/cuda/cuda.cpython-312-x86_64-linux-gnu.so ADDED
Binary file (23.5 kB). View file
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/cuda/cudart.pyx ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
3
+
4
+ import warnings as _warnings
5
+
6
+ from cuda.bindings.runtime import *
7
+
8
+
9
+ cdef extern from *:
10
+ """
11
+ #ifdef _MSC_VER
12
+ #pragma message ( "The cuda.cudart module is deprecated and will be removed in a future release, " \
13
+ "please switch to use the cuda.bindings.runtime module instead." )
14
+ #else
15
+ #warning The cuda.cudart module is deprecated and will be removed in a future release, \
16
+ please switch to use the cuda.bindings.runtime module instead.
17
+ #endif
18
+ """
19
+
20
+
21
+ _warnings.warn("The cuda.cudart module is deprecated and will be removed in a future release, "
22
+ "please switch to use the cuda.bindings.runtime module instead.", FutureWarning, stacklevel=2)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/cuda/nvrtc.pyx ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
3
+
4
+ import warnings as _warnings
5
+
6
+ from cuda.bindings.nvrtc import *
7
+
8
+
9
+ cdef extern from *:
10
+ """
11
+ #ifdef _MSC_VER
12
+ #pragma message ( "The cuda.nvrtc module is deprecated and will be removed in a future release, " \
13
+ "please switch to use the cuda.bindings.nvrtc module instead." )
14
+ #else
15
+ #warning The cuda.nvrtc module is deprecated and will be removed in a future release, \
16
+ please switch to use the cuda.bindings.nvrtc module instead.
17
+ #endif
18
+ """
19
+
20
+
21
+ _warnings.warn("The cuda.nvrtc module is deprecated and will be removed in a future release, "
22
+ "please switch to use the cuda.bindings.nvrtc module instead.", FutureWarning, stacklevel=2)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/arrow_dataset.py ADDED
The diff for this file is too large to render. See raw diff
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/arrow_reader.py ADDED
@@ -0,0 +1,663 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 The HuggingFace Datasets Authors and the TensorFlow Datasets Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # Lint as: python3
16
+ """Arrow ArrowReader."""
17
+
18
+ import copy
19
+ import math
20
+ import os
21
+ import re
22
+ import shutil
23
+ from dataclasses import dataclass
24
+ from functools import partial
25
+ from pathlib import Path
26
+ from typing import TYPE_CHECKING, List, Optional, Union
27
+
28
+ import pyarrow as pa
29
+ import pyarrow.parquet as pq
30
+ from tqdm.contrib.concurrent import thread_map
31
+
32
+ from .download.download_config import DownloadConfig
33
+ from .naming import _split_re, filenames_for_dataset_split
34
+ from .table import InMemoryTable, MemoryMappedTable, Table, concat_tables
35
+ from .utils import logging
36
+ from .utils import tqdm as hf_tqdm
37
+ from .utils.deprecation_utils import deprecated
38
+ from .utils.file_utils import cached_path
39
+
40
+
41
+ if TYPE_CHECKING:
42
+ from .info import DatasetInfo # noqa: F401
43
+ from .splits import Split, SplitInfo # noqa: F401
44
+
45
+
46
+ logger = logging.get_logger(__name__)
47
+
48
+ HF_GCP_BASE_URL = "https://storage.googleapis.com/huggingface-nlp/cache/datasets"
49
+
50
+ _SUB_SPEC_RE = re.compile(
51
+ rf"""
52
+ ^
53
+ (?P<split>{_split_re[1:-1]})
54
+ (\[
55
+ ((?P<from>-?\d+)
56
+ (?P<from_pct>%)?)?
57
+ :
58
+ ((?P<to>-?\d+)
59
+ (?P<to_pct>%)?)?
60
+ \])?(\((?P<rounding>[^\)]*)\))?
61
+ $
62
+ """, # remove ^ and $
63
+ re.X,
64
+ )
65
+
66
+ _ADDITION_SEP_RE = re.compile(r"\s*\+\s*")
67
+
68
+
69
+ class DatasetNotOnHfGcsError(ConnectionError):
70
+ """When you can't get the dataset from the Hf google cloud storage"""
71
+
72
+ pass
73
+
74
+
75
+ class MissingFilesOnHfGcsError(ConnectionError):
76
+ """When some files are missing on the Hf oogle cloud storage"""
77
+
78
+ pass
79
+
80
+
81
+ @dataclass(frozen=True)
82
+ class FileInstructions:
83
+ """The file instructions associated with a split ReadInstruction.
84
+
85
+ Attributes:
86
+ num_examples: `int`, The total number of examples
87
+ file_instructions: List[dict(filename, skip, take)], the files information.
88
+ The filenames contains the relative path, not absolute.
89
+ skip/take indicates which example read in the file: `ds.slice(skip, take)`
90
+ """
91
+
92
+ num_examples: int
93
+ file_instructions: List[dict]
94
+
95
+
96
+ def make_file_instructions(
97
+ name: str,
98
+ split_infos: List["SplitInfo"],
99
+ instruction: Union[str, "ReadInstruction"],
100
+ filetype_suffix: Optional[str] = None,
101
+ prefix_path: Optional[str] = None,
102
+ ) -> FileInstructions:
103
+ """Returns instructions of the split dict.
104
+
105
+ Args:
106
+ name (`str`): Name of the dataset.
107
+ split_infos (`list` of `[SplitInfo]`): Dataset splits information.
108
+ instruction ([`ReadInstruction`] or `str`): Reading instruction for a dataset.
109
+ filetype_suffix (`str`, *optional*): Suffix of dataset files, e.g. 'arrow' or 'parquet'.
110
+ prefix_path (`str`, *optional*): Prefix of dataset files, e.g. directory name.
111
+
112
+ Returns:
113
+ [`FileInstructions`]
114
+ """
115
+ if not isinstance(name, str):
116
+ raise TypeError(f"Expected str 'name', but got: {type(name).__name__}")
117
+ elif not name:
118
+ raise ValueError("Expected non-empty str 'name'")
119
+ name2len = {info.name: info.num_examples for info in split_infos}
120
+ name2shard_lengths = {info.name: info.shard_lengths for info in split_infos}
121
+ name2filenames = {
122
+ info.name: filenames_for_dataset_split(
123
+ path=prefix_path,
124
+ dataset_name=name,
125
+ split=info.name,
126
+ filetype_suffix=filetype_suffix,
127
+ shard_lengths=name2shard_lengths[info.name],
128
+ )
129
+ for info in split_infos
130
+ }
131
+ if not isinstance(instruction, ReadInstruction):
132
+ instruction = ReadInstruction.from_spec(instruction)
133
+ # Create the absolute instruction (per split)
134
+ absolute_instructions = instruction.to_absolute(name2len)
135
+
136
+ # For each split, return the files instruction (skip/take)
137
+ file_instructions = []
138
+ num_examples = 0
139
+ for abs_instr in absolute_instructions:
140
+ split_length = name2len[abs_instr.splitname]
141
+ filenames = name2filenames[abs_instr.splitname]
142
+ shard_lengths = name2shard_lengths[abs_instr.splitname]
143
+ from_ = 0 if abs_instr.from_ is None else abs_instr.from_
144
+ to = split_length if abs_instr.to is None else abs_instr.to
145
+ if shard_lengths is None: # not sharded
146
+ for filename in filenames:
147
+ take = to - from_
148
+ if take == 0:
149
+ continue
150
+ num_examples += take
151
+ file_instructions.append({"filename": filename, "skip": from_, "take": take})
152
+ else: # sharded
153
+ index_start = 0 # Beginning (included) of moving window.
154
+ index_end = 0 # End (excluded) of moving window.
155
+ for filename, shard_length in zip(filenames, shard_lengths):
156
+ index_end += shard_length
157
+ if from_ < index_end and to > index_start: # There is something to take.
158
+ skip = from_ - index_start if from_ > index_start else 0
159
+ take = to - index_start - skip if to < index_end else -1
160
+ if take == 0:
161
+ continue
162
+ file_instructions.append({"filename": filename, "skip": skip, "take": take})
163
+ num_examples += shard_length - skip if take == -1 else take
164
+ index_start += shard_length
165
+ return FileInstructions(
166
+ num_examples=num_examples,
167
+ file_instructions=file_instructions,
168
+ )
169
+
170
+
171
+ class BaseReader:
172
+ """
173
+ Build a Dataset object out of Instruction instance(s).
174
+ """
175
+
176
+ def __init__(self, path: str, info: Optional["DatasetInfo"]):
177
+ """Initializes ArrowReader.
178
+
179
+ Args:
180
+ path (str): path where tfrecords are stored.
181
+ info (DatasetInfo): info about the dataset.
182
+ """
183
+ self._path: str = path
184
+ self._info: Optional["DatasetInfo"] = info
185
+ self._filetype_suffix: Optional[str] = None
186
+
187
+ def _get_table_from_filename(self, filename_skip_take, in_memory=False) -> Table:
188
+ """Returns a Dataset instance from given (filename, skip, take)."""
189
+ raise NotImplementedError
190
+
191
+ def _read_files(self, files, in_memory=False) -> Table:
192
+ """Returns Dataset for given file instructions.
193
+
194
+ Args:
195
+ files: List[dict(filename, skip, take)], the files information.
196
+ The filenames contain the absolute path, not relative.
197
+ skip/take indicates which example read in the file: `ds.slice(skip, take)`
198
+ in_memory (bool, default False): Whether to copy the data in-memory.
199
+ """
200
+ if len(files) == 0 or not all(isinstance(f, dict) for f in files):
201
+ raise ValueError("please provide valid file informations")
202
+ files = copy.deepcopy(files)
203
+ for f in files:
204
+ f["filename"] = os.path.join(self._path, f["filename"])
205
+
206
+ pa_tables = thread_map(
207
+ partial(self._get_table_from_filename, in_memory=in_memory),
208
+ files,
209
+ tqdm_class=hf_tqdm,
210
+ desc="Loading dataset shards",
211
+ # set `disable=None` rather than `disable=False` by default to disable progress bar when no TTY attached
212
+ disable=len(files) <= 16 or None,
213
+ )
214
+ pa_tables = [t for t in pa_tables if len(t) > 0]
215
+ if not pa_tables and (self._info is None or self._info.features is None):
216
+ raise ValueError(
217
+ "Tried to read an empty table. Please specify at least info.features to create an empty table with the right type."
218
+ )
219
+ pa_tables = pa_tables or [InMemoryTable.from_batches([], schema=pa.schema(self._info.features.type))]
220
+ pa_table = concat_tables(pa_tables) if len(pa_tables) != 1 else pa_tables[0]
221
+ return pa_table
222
+
223
+ def get_file_instructions(self, name, instruction, split_infos):
224
+ """Return list of dict {'filename': str, 'skip': int, 'take': int}"""
225
+ file_instructions = make_file_instructions(
226
+ name, split_infos, instruction, filetype_suffix=self._filetype_suffix, prefix_path=self._path
227
+ )
228
+ files = file_instructions.file_instructions
229
+ return files
230
+
231
+ def read(
232
+ self,
233
+ name,
234
+ instructions,
235
+ split_infos,
236
+ in_memory=False,
237
+ ):
238
+ """Returns Dataset instance(s).
239
+
240
+ Args:
241
+ name (str): name of the dataset.
242
+ instructions (ReadInstruction): instructions to read.
243
+ Instruction can be string and will then be passed to the Instruction
244
+ constructor as it.
245
+ split_infos (list of SplitInfo proto): the available splits for dataset.
246
+ in_memory (bool, default False): Whether to copy the data in-memory.
247
+
248
+ Returns:
249
+ kwargs to build a single Dataset instance.
250
+ """
251
+
252
+ files = self.get_file_instructions(name, instructions, split_infos)
253
+ if not files:
254
+ msg = f'Instruction "{instructions}" corresponds to no data!'
255
+ raise ValueError(msg)
256
+ return self.read_files(files=files, original_instructions=instructions, in_memory=in_memory)
257
+
258
+ def read_files(
259
+ self,
260
+ files: List[dict],
261
+ original_instructions: Union[None, "ReadInstruction", "Split"] = None,
262
+ in_memory=False,
263
+ ):
264
+ """Returns single Dataset instance for the set of file instructions.
265
+
266
+ Args:
267
+ files: List[dict(filename, skip, take)], the files information.
268
+ The filenames contains the relative path, not absolute.
269
+ skip/take indicates which example read in the file: `ds.skip().take()`
270
+ original_instructions: store the original instructions used to build the dataset split in the dataset.
271
+ in_memory (bool, default False): Whether to copy the data in-memory.
272
+
273
+ Returns:
274
+ kwargs to build a Dataset instance.
275
+ """
276
+ # Prepend path to filename
277
+ pa_table = self._read_files(files, in_memory=in_memory)
278
+ # If original_instructions is not None, convert it to a human-readable NamedSplit
279
+ if original_instructions is not None:
280
+ from .splits import Split # noqa
281
+
282
+ split = Split(str(original_instructions))
283
+ else:
284
+ split = None
285
+ dataset_kwargs = {"arrow_table": pa_table, "info": self._info, "split": split}
286
+ return dataset_kwargs
287
+
288
+ @deprecated()
289
+ def download_from_hf_gcs(self, download_config: DownloadConfig, relative_data_dir):
290
+ """
291
+ Download the dataset files from the Hf GCS
292
+
293
+ Args:
294
+ dl_cache_dir: `str`, the local cache directory used to download files
295
+ relative_data_dir: `str`, the relative directory of the remote files from
296
+ the `datasets` directory on GCS.
297
+
298
+ """
299
+ remote_cache_dir = HF_GCP_BASE_URL + "/" + relative_data_dir.replace(os.sep, "/")
300
+ try:
301
+ remote_dataset_info = os.path.join(remote_cache_dir, "dataset_info.json")
302
+ downloaded_dataset_info = cached_path(
303
+ remote_dataset_info.replace(os.sep, "/"), download_config=download_config
304
+ )
305
+ shutil.move(downloaded_dataset_info, os.path.join(self._path, "dataset_info.json"))
306
+ if self._info is not None:
307
+ self._info.update(self._info.from_directory(self._path))
308
+ except FileNotFoundError as err:
309
+ raise DatasetNotOnHfGcsError(err) from None
310
+ try:
311
+ for split in self._info.splits:
312
+ file_instructions = self.get_file_instructions(
313
+ name=self._info.builder_name,
314
+ instruction=split,
315
+ split_infos=self._info.splits.values(),
316
+ )
317
+ for file_instruction in file_instructions:
318
+ file_to_download = str(Path(file_instruction["filename"]).relative_to(self._path))
319
+ remote_prepared_filename = os.path.join(remote_cache_dir, file_to_download)
320
+ downloaded_prepared_filename = cached_path(
321
+ remote_prepared_filename.replace(os.sep, "/"), download_config=download_config
322
+ )
323
+ shutil.move(downloaded_prepared_filename, file_instruction["filename"])
324
+ except FileNotFoundError as err:
325
+ raise MissingFilesOnHfGcsError(err) from None
326
+
327
+
328
+ class ArrowReader(BaseReader):
329
+ """
330
+ Build a Dataset object out of Instruction instance(s).
331
+ This Reader uses either memory mapping or file descriptors (in-memory) on arrow files.
332
+ """
333
+
334
+ def __init__(self, path: str, info: Optional["DatasetInfo"]):
335
+ """Initializes ArrowReader.
336
+
337
+ Args:
338
+ path (str): path where Arrow files are stored.
339
+ info (DatasetInfo): info about the dataset.
340
+ """
341
+ super().__init__(path, info)
342
+ self._filetype_suffix = "arrow"
343
+
344
+ def _get_table_from_filename(self, filename_skip_take, in_memory=False) -> Table:
345
+ """Returns a Dataset instance from given (filename, skip, take)."""
346
+ filename, skip, take = (
347
+ filename_skip_take["filename"],
348
+ filename_skip_take["skip"] if "skip" in filename_skip_take else None,
349
+ filename_skip_take["take"] if "take" in filename_skip_take else None,
350
+ )
351
+ table = ArrowReader.read_table(filename, in_memory=in_memory)
352
+ if take == -1:
353
+ take = len(table) - skip
354
+ # here we don't want to slice an empty table, or it may segfault
355
+ if skip is not None and take is not None and not (skip == 0 and take == len(table)):
356
+ table = table.slice(skip, take)
357
+ return table
358
+
359
+ @staticmethod
360
+ def read_table(filename, in_memory=False) -> Table:
361
+ """
362
+ Read table from file.
363
+
364
+ Args:
365
+ filename (str): File name of the table.
366
+ in_memory (bool, default=False): Whether to copy the data in-memory.
367
+
368
+ Returns:
369
+ pyarrow.Table
370
+ """
371
+ table_cls = InMemoryTable if in_memory else MemoryMappedTable
372
+ return table_cls.from_file(filename)
373
+
374
+
375
+ class ParquetReader(BaseReader):
376
+ """
377
+ Build a Dataset object out of Instruction instance(s).
378
+ This Reader uses memory mapping on parquet files.
379
+ """
380
+
381
+ def __init__(self, path: str, info: Optional["DatasetInfo"]):
382
+ """Initializes ParquetReader.
383
+
384
+ Args:
385
+ path (str): path where tfrecords are stored.
386
+ info (DatasetInfo): info about the dataset.
387
+ """
388
+ super().__init__(path, info)
389
+ self._filetype_suffix = "parquet"
390
+
391
+ def _get_table_from_filename(self, filename_skip_take, **kwargs):
392
+ """Returns a Dataset instance from given (filename, skip, take)."""
393
+ filename, skip, take = (
394
+ filename_skip_take["filename"],
395
+ filename_skip_take["skip"] if "skip" in filename_skip_take else None,
396
+ filename_skip_take["take"] if "take" in filename_skip_take else None,
397
+ )
398
+ # Parquet read_table always loads data in memory, independently of memory_map
399
+ pa_table = pq.read_table(filename, memory_map=True)
400
+ # here we don't want to slice an empty table, or it may segfault
401
+ if skip is not None and take is not None and not (skip == 0 and take == len(pa_table)):
402
+ pa_table = pa_table.slice(skip, take)
403
+ return pa_table
404
+
405
+
406
+ @dataclass(frozen=True)
407
+ class _AbsoluteInstruction:
408
+ """A machine friendly slice: defined absolute positive boundaries."""
409
+
410
+ splitname: str
411
+ from_: int # uint (starting index).
412
+ to: int # uint (ending index).
413
+
414
+
415
+ @dataclass(frozen=True)
416
+ class _RelativeInstruction:
417
+ """Represents a single parsed slicing instruction, can use % and negatives."""
418
+
419
+ splitname: str
420
+ from_: Optional[int] = None # int (starting index) or None if no lower boundary.
421
+ to: Optional[int] = None # int (ending index) or None if no upper boundary.
422
+ unit: Optional[str] = None
423
+ rounding: Optional[str] = None
424
+
425
+ def __post_init__(self):
426
+ if self.unit is not None and self.unit not in ["%", "abs"]:
427
+ raise ValueError("unit must be either % or abs")
428
+ if self.rounding is not None and self.rounding not in ["closest", "pct1_dropremainder"]:
429
+ raise ValueError("rounding must be either closest or pct1_dropremainder")
430
+ if self.unit != "%" and self.rounding is not None:
431
+ raise ValueError("It is forbidden to specify rounding if not using percent slicing.")
432
+ if self.unit == "%" and self.from_ is not None and abs(self.from_) > 100:
433
+ raise ValueError("Percent slice boundaries must be > -100 and < 100.")
434
+ if self.unit == "%" and self.to is not None and abs(self.to) > 100:
435
+ raise ValueError("Percent slice boundaries must be > -100 and < 100.")
436
+ # Update via __dict__ due to instance being "frozen"
437
+ self.__dict__["rounding"] = "closest" if self.rounding is None and self.unit == "%" else self.rounding
438
+
439
+
440
+ def _str_to_read_instruction(spec):
441
+ """Returns ReadInstruction for given string."""
442
+ res = _SUB_SPEC_RE.match(spec)
443
+ if not res:
444
+ raise ValueError(f"Unrecognized instruction format: {spec}")
445
+ unit = "%" if res.group("from_pct") or res.group("to_pct") else "abs"
446
+ return ReadInstruction(
447
+ split_name=res.group("split"),
448
+ rounding=res.group("rounding"),
449
+ from_=int(res.group("from")) if res.group("from") else None,
450
+ to=int(res.group("to")) if res.group("to") else None,
451
+ unit=unit,
452
+ )
453
+
454
+
455
+ def _pct_to_abs_pct1(boundary, num_examples):
456
+ # Using math.trunc here, since -99.5% should give -99%, not -100%.
457
+ if num_examples < 100:
458
+ msg = (
459
+ 'Using "pct1_dropremainder" rounding on a split with less than 100 '
460
+ "elements is forbidden: it always results in an empty dataset."
461
+ )
462
+ raise ValueError(msg)
463
+ return boundary * math.trunc(num_examples / 100.0)
464
+
465
+
466
+ def _pct_to_abs_closest(boundary, num_examples):
467
+ return int(round(boundary * num_examples / 100.0))
468
+
469
+
470
+ def _rel_to_abs_instr(rel_instr, name2len):
471
+ """Returns _AbsoluteInstruction instance for given RelativeInstruction.
472
+
473
+ Args:
474
+ rel_instr: RelativeInstruction instance.
475
+ name2len: dict {split_name: num_examples}.
476
+ """
477
+ pct_to_abs = _pct_to_abs_closest if rel_instr.rounding == "closest" else _pct_to_abs_pct1
478
+ split = rel_instr.splitname
479
+ if split not in name2len:
480
+ raise ValueError(f'Unknown split "{split}". Should be one of {list(name2len)}.')
481
+ num_examples = name2len[split]
482
+ from_ = rel_instr.from_
483
+ to = rel_instr.to
484
+ if rel_instr.unit == "%":
485
+ from_ = 0 if from_ is None else pct_to_abs(from_, num_examples)
486
+ to = num_examples if to is None else pct_to_abs(to, num_examples)
487
+ else:
488
+ from_ = 0 if from_ is None else from_
489
+ to = num_examples if to is None else to
490
+ if from_ < 0:
491
+ from_ = max(num_examples + from_, 0)
492
+ if to < 0:
493
+ to = max(num_examples + to, 0)
494
+ from_ = min(from_, num_examples)
495
+ to = min(to, num_examples)
496
+ return _AbsoluteInstruction(split, from_, to)
497
+
498
+
499
+ class ReadInstruction:
500
+ """Reading instruction for a dataset.
501
+
502
+ Examples::
503
+
504
+ # The following lines are equivalent:
505
+ ds = datasets.load_dataset('mnist', split='test[:33%]')
506
+ ds = datasets.load_dataset('mnist', split=datasets.ReadInstruction.from_spec('test[:33%]'))
507
+ ds = datasets.load_dataset('mnist', split=datasets.ReadInstruction('test', to=33, unit='%'))
508
+ ds = datasets.load_dataset('mnist', split=datasets.ReadInstruction(
509
+ 'test', from_=0, to=33, unit='%'))
510
+
511
+ # The following lines are equivalent:
512
+ ds = datasets.load_dataset('mnist', split='test[:33%]+train[1:-1]')
513
+ ds = datasets.load_dataset('mnist', split=datasets.ReadInstruction.from_spec(
514
+ 'test[:33%]+train[1:-1]'))
515
+ ds = datasets.load_dataset('mnist', split=(
516
+ datasets.ReadInstruction('test', to=33, unit='%') +
517
+ datasets.ReadInstruction('train', from_=1, to=-1, unit='abs')))
518
+
519
+ # The following lines are equivalent:
520
+ ds = datasets.load_dataset('mnist', split='test[:33%](pct1_dropremainder)')
521
+ ds = datasets.load_dataset('mnist', split=datasets.ReadInstruction.from_spec(
522
+ 'test[:33%](pct1_dropremainder)'))
523
+ ds = datasets.load_dataset('mnist', split=datasets.ReadInstruction(
524
+ 'test', from_=0, to=33, unit='%', rounding="pct1_dropremainder"))
525
+
526
+ # 10-fold validation:
527
+ tests = datasets.load_dataset(
528
+ 'mnist',
529
+ [datasets.ReadInstruction('train', from_=k, to=k+10, unit='%')
530
+ for k in range(0, 100, 10)])
531
+ trains = datasets.load_dataset(
532
+ 'mnist',
533
+ [datasets.ReadInstruction('train', to=k, unit='%') + datasets.ReadInstruction('train', from_=k+10, unit='%')
534
+ for k in range(0, 100, 10)])
535
+
536
+ """
537
+
538
+ def _init(self, relative_instructions):
539
+ # Private initializer.
540
+ self._relative_instructions = relative_instructions
541
+
542
+ @classmethod
543
+ def _read_instruction_from_relative_instructions(cls, relative_instructions):
544
+ """Returns ReadInstruction obj initialized with relative_instructions."""
545
+ # Use __new__ to bypass __init__ used by public API and not conveniant here.
546
+ result = cls.__new__(cls)
547
+ result._init(relative_instructions) # pylint: disable=protected-access
548
+ return result
549
+
550
+ def __init__(self, split_name, rounding=None, from_=None, to=None, unit=None):
551
+ """Initialize ReadInstruction.
552
+
553
+ Args:
554
+ split_name (str): name of the split to read. Eg: 'train'.
555
+ rounding (str, optional): The rounding behaviour to use when percent slicing is
556
+ used. Ignored when slicing with absolute indices.
557
+ Possible values:
558
+ - 'closest' (default): The specified percentages are rounded to the
559
+ closest value. Use this if you want specified percents to be as
560
+ much exact as possible.
561
+ - 'pct1_dropremainder': the specified percentages are treated as
562
+ multiple of 1%. Use this option if you want consistency. Eg:
563
+ len(5%) == 5 * len(1%).
564
+ Using this option, one might not be able to use the full set of
565
+ examples, if the number of those is not a multiple of 100.
566
+ from_ (int):
567
+ to (int): alternative way of specifying slicing boundaries. If any of
568
+ {from_, to, unit} argument is used, slicing cannot be specified as
569
+ string.
570
+ unit (str): optional, one of:
571
+ '%': to set the slicing unit as percents of the split size.
572
+ 'abs': to set the slicing unit as absolute numbers.
573
+ """
574
+ # This constructor is not always called. See factory method
575
+ # `_read_instruction_from_relative_instructions`. Common init instructions
576
+ # MUST be placed in the _init method.
577
+ self._init([_RelativeInstruction(split_name, from_, to, unit, rounding)])
578
+
579
+ @classmethod
580
+ def from_spec(cls, spec):
581
+ """Creates a `ReadInstruction` instance out of a string spec.
582
+
583
+ Args:
584
+ spec (`str`):
585
+ Split(s) + optional slice(s) to read + optional rounding
586
+ if percents are used as the slicing unit. A slice can be specified,
587
+ using absolute numbers (`int`) or percentages (`int`).
588
+
589
+ Examples:
590
+
591
+ ```
592
+ test: test split.
593
+ test + validation: test split + validation split.
594
+ test[10:]: test split, minus its first 10 records.
595
+ test[:10%]: first 10% records of test split.
596
+ test[:20%](pct1_dropremainder): first 10% records, rounded with the pct1_dropremainder rounding.
597
+ test[:-5%]+train[40%:60%]: first 95% of test + middle 20% of train.
598
+ ```
599
+
600
+ Returns:
601
+ ReadInstruction instance.
602
+ """
603
+ spec = str(spec) # Need to convert to str in case of NamedSplit instance.
604
+ subs = _ADDITION_SEP_RE.split(spec)
605
+ if not subs:
606
+ raise ValueError(f"No instructions could be built out of {spec}")
607
+ instruction = _str_to_read_instruction(subs[0])
608
+ return sum((_str_to_read_instruction(sub) for sub in subs[1:]), instruction)
609
+
610
+ def to_spec(self):
611
+ rel_instr_specs = []
612
+ for rel_instr in self._relative_instructions:
613
+ rel_instr_spec = rel_instr.splitname
614
+ if rel_instr.from_ is not None or rel_instr.to is not None:
615
+ from_ = rel_instr.from_
616
+ to = rel_instr.to
617
+ unit = rel_instr.unit
618
+ rounding = rel_instr.rounding
619
+ unit = unit if unit == "%" else ""
620
+ from_ = str(from_) + unit if from_ is not None else ""
621
+ to = str(to) + unit if to is not None else ""
622
+ slice_str = f"[{from_}:{to}]"
623
+ rounding_str = (
624
+ f"({rounding})" if unit == "%" and rounding is not None and rounding != "closest" else ""
625
+ )
626
+ rel_instr_spec += slice_str + rounding_str
627
+ rel_instr_specs.append(rel_instr_spec)
628
+ return "+".join(rel_instr_specs)
629
+
630
+ def __add__(self, other):
631
+ """Returns a new ReadInstruction obj, result of appending other to self."""
632
+ if not isinstance(other, ReadInstruction):
633
+ msg = "ReadInstruction can only be added to another ReadInstruction obj."
634
+ raise TypeError(msg)
635
+ self_ris = self._relative_instructions
636
+ other_ris = other._relative_instructions # pylint: disable=protected-access
637
+ if (
638
+ self_ris[0].unit != "abs"
639
+ and other_ris[0].unit != "abs"
640
+ and self._relative_instructions[0].rounding != other_ris[0].rounding
641
+ ):
642
+ raise ValueError("It is forbidden to sum ReadInstruction instances with different rounding values.")
643
+ return self._read_instruction_from_relative_instructions(self_ris + other_ris)
644
+
645
+ def __str__(self):
646
+ return self.to_spec()
647
+
648
+ def __repr__(self):
649
+ return f"ReadInstruction({self._relative_instructions})"
650
+
651
+ def to_absolute(self, name2len):
652
+ """Translate instruction into a list of absolute instructions.
653
+
654
+ Those absolute instructions are then to be added together.
655
+
656
+ Args:
657
+ name2len (`dict`):
658
+ Associating split names to number of examples.
659
+
660
+ Returns:
661
+ list of _AbsoluteInstruction instances (corresponds to the + in spec).
662
+ """
663
+ return [_rel_to_abs_instr(rel_instr, name2len) for rel_instr in self._relative_instructions]
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/builder.bak.py ADDED
The diff for this file is too large to render. See raw diff
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/builder.py ADDED
The diff for this file is too large to render. See raw diff
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/config.py ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib
2
+ import importlib.metadata
3
+ import logging
4
+ import os
5
+ import platform
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ from packaging import version
10
+
11
+
12
+ logger = logging.getLogger(__name__.split(".", 1)[0]) # to avoid circular import from .utils.logging
13
+
14
+ # Datasets
15
+ S3_DATASETS_BUCKET_PREFIX = "https://s3.amazonaws.com/datasets.huggingface.co/datasets/datasets"
16
+ CLOUDFRONT_DATASETS_DISTRIB_PREFIX = "https://cdn-datasets.huggingface.co/datasets/datasets"
17
+ REPO_DATASETS_URL = "https://raw.githubusercontent.com/huggingface/datasets/{revision}/datasets/{path}/{name}"
18
+
19
+ # Metrics
20
+ S3_METRICS_BUCKET_PREFIX = "https://s3.amazonaws.com/datasets.huggingface.co/datasets/metrics"
21
+ CLOUDFRONT_METRICS_DISTRIB_PREFIX = "https://cdn-datasets.huggingface.co/datasets/metric"
22
+ REPO_METRICS_URL = "https://raw.githubusercontent.com/huggingface/datasets/{revision}/metrics/{path}/{name}"
23
+
24
+ # Hub
25
+ HF_ENDPOINT = os.environ.get("HF_ENDPOINT", "https://huggingface.co")
26
+ HUB_DATASETS_URL = HF_ENDPOINT + "/datasets/{repo_id}/resolve/{revision}/{path}"
27
+ HUB_DATASETS_HFFS_URL = "hf://datasets/{repo_id}@{revision}/{path}"
28
+ HUB_DEFAULT_VERSION = "main"
29
+
30
+ PY_VERSION = version.parse(platform.python_version())
31
+
32
+ # General environment variables accepted values for booleans
33
+ ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"}
34
+ ENV_VARS_FALSE_VALUES = {"0", "OFF", "NO", "FALSE"}
35
+ ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"})
36
+ ENV_VARS_FALSE_AND_AUTO_VALUES = ENV_VARS_FALSE_VALUES.union({"AUTO"})
37
+
38
+
39
+ # Imports
40
+ DILL_VERSION = version.parse(importlib.metadata.version("dill"))
41
+ FSSPEC_VERSION = version.parse(importlib.metadata.version("fsspec"))
42
+ PANDAS_VERSION = version.parse(importlib.metadata.version("pandas"))
43
+ PYARROW_VERSION = version.parse(importlib.metadata.version("pyarrow"))
44
+ HF_HUB_VERSION = version.parse(importlib.metadata.version("huggingface_hub"))
45
+
46
+ USE_TF = os.environ.get("USE_TF", "AUTO").upper()
47
+ USE_TORCH = os.environ.get("USE_TORCH", "AUTO").upper()
48
+ USE_JAX = os.environ.get("USE_JAX", "AUTO").upper()
49
+
50
+ TORCH_VERSION = "N/A"
51
+ TORCH_AVAILABLE = False
52
+
53
+ if USE_TORCH in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TF not in ENV_VARS_TRUE_VALUES:
54
+ TORCH_AVAILABLE = importlib.util.find_spec("torch") is not None
55
+ if TORCH_AVAILABLE:
56
+ try:
57
+ TORCH_VERSION = version.parse(importlib.metadata.version("torch"))
58
+ logger.info(f"PyTorch version {TORCH_VERSION} available.")
59
+ except importlib.metadata.PackageNotFoundError:
60
+ pass
61
+ else:
62
+ logger.info("Disabling PyTorch because USE_TF is set")
63
+
64
+ POLARS_VERSION = "N/A"
65
+ POLARS_AVAILABLE = importlib.util.find_spec("polars") is not None
66
+
67
+ if POLARS_AVAILABLE:
68
+ try:
69
+ POLARS_VERSION = version.parse(importlib.metadata.version("polars"))
70
+ logger.info(f"Polars version {POLARS_VERSION} available.")
71
+ except importlib.metadata.PackageNotFoundError:
72
+ pass
73
+
74
+ TF_VERSION = "N/A"
75
+ TF_AVAILABLE = False
76
+
77
+ if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VALUES:
78
+ TF_AVAILABLE = importlib.util.find_spec("tensorflow") is not None
79
+ if TF_AVAILABLE:
80
+ # For the metadata, we have to look for both tensorflow and tensorflow-cpu
81
+ for package in [
82
+ "tensorflow",
83
+ "tensorflow-cpu",
84
+ "tensorflow-gpu",
85
+ "tf-nightly",
86
+ "tf-nightly-cpu",
87
+ "tf-nightly-gpu",
88
+ "intel-tensorflow",
89
+ "tensorflow-rocm",
90
+ "tensorflow-macos",
91
+ ]:
92
+ try:
93
+ TF_VERSION = version.parse(importlib.metadata.version(package))
94
+ except importlib.metadata.PackageNotFoundError:
95
+ continue
96
+ else:
97
+ break
98
+ else:
99
+ TF_AVAILABLE = False
100
+ if TF_AVAILABLE:
101
+ if TF_VERSION.major < 2:
102
+ logger.info(f"TensorFlow found but with version {TF_VERSION}. `datasets` requires version 2 minimum.")
103
+ TF_AVAILABLE = False
104
+ else:
105
+ logger.info(f"TensorFlow version {TF_VERSION} available.")
106
+ else:
107
+ logger.info("Disabling Tensorflow because USE_TORCH is set")
108
+
109
+
110
+ JAX_VERSION = "N/A"
111
+ JAX_AVAILABLE = False
112
+
113
+ if USE_JAX in ENV_VARS_TRUE_AND_AUTO_VALUES:
114
+ JAX_AVAILABLE = importlib.util.find_spec("jax") is not None and importlib.util.find_spec("jaxlib") is not None
115
+ if JAX_AVAILABLE:
116
+ try:
117
+ JAX_VERSION = version.parse(importlib.metadata.version("jax"))
118
+ logger.info(f"JAX version {JAX_VERSION} available.")
119
+ except importlib.metadata.PackageNotFoundError:
120
+ pass
121
+ else:
122
+ logger.info("Disabling JAX because USE_JAX is set to False")
123
+
124
+
125
+ USE_BEAM = os.environ.get("USE_BEAM", "AUTO").upper()
126
+ BEAM_VERSION = "N/A"
127
+ BEAM_AVAILABLE = False
128
+ if USE_BEAM in ENV_VARS_TRUE_AND_AUTO_VALUES:
129
+ try:
130
+ BEAM_VERSION = version.parse(importlib.metadata.version("apache_beam"))
131
+ BEAM_AVAILABLE = True
132
+ logger.info(f"Apache Beam version {BEAM_VERSION} available.")
133
+ except importlib.metadata.PackageNotFoundError:
134
+ pass
135
+ else:
136
+ logger.info("Disabling Apache Beam because USE_BEAM is set to False")
137
+
138
+
139
+ # Optional tools for data loading
140
+ SQLALCHEMY_AVAILABLE = importlib.util.find_spec("sqlalchemy") is not None
141
+
142
+ # Optional tools for feature decoding
143
+ PIL_AVAILABLE = importlib.util.find_spec("PIL") is not None
144
+ IS_OPUS_SUPPORTED = importlib.util.find_spec("soundfile") is not None and version.parse(
145
+ importlib.import_module("soundfile").__libsndfile_version__
146
+ ) >= version.parse("1.0.31")
147
+ IS_MP3_SUPPORTED = importlib.util.find_spec("soundfile") is not None and version.parse(
148
+ importlib.import_module("soundfile").__libsndfile_version__
149
+ ) >= version.parse("1.1.0")
150
+
151
+ # Optional compression tools
152
+ RARFILE_AVAILABLE = importlib.util.find_spec("rarfile") is not None
153
+ ZSTANDARD_AVAILABLE = importlib.util.find_spec("zstandard") is not None
154
+ LZ4_AVAILABLE = importlib.util.find_spec("lz4") is not None
155
+ PY7ZR_AVAILABLE = importlib.util.find_spec("py7zr") is not None
156
+
157
+ # Cache location
158
+ DEFAULT_XDG_CACHE_HOME = "~/.cache"
159
+ XDG_CACHE_HOME = os.getenv("XDG_CACHE_HOME", DEFAULT_XDG_CACHE_HOME)
160
+ DEFAULT_HF_CACHE_HOME = os.path.join(XDG_CACHE_HOME, "huggingface")
161
+ HF_CACHE_HOME = os.path.expanduser(os.getenv("HF_HOME", DEFAULT_HF_CACHE_HOME))
162
+
163
+ DEFAULT_HF_DATASETS_CACHE = os.path.join(HF_CACHE_HOME, "datasets")
164
+ HF_DATASETS_CACHE = Path(os.getenv("HF_DATASETS_CACHE", DEFAULT_HF_DATASETS_CACHE))
165
+
166
+ DEFAULT_HF_METRICS_CACHE = os.path.join(HF_CACHE_HOME, "metrics")
167
+ HF_METRICS_CACHE = Path(os.getenv("HF_METRICS_CACHE", DEFAULT_HF_METRICS_CACHE))
168
+
169
+ DEFAULT_HF_MODULES_CACHE = os.path.join(HF_CACHE_HOME, "modules")
170
+ HF_MODULES_CACHE = Path(os.getenv("HF_MODULES_CACHE", DEFAULT_HF_MODULES_CACHE))
171
+
172
+ DOWNLOADED_DATASETS_DIR = "downloads"
173
+ DEFAULT_DOWNLOADED_DATASETS_PATH = os.path.join(HF_DATASETS_CACHE, DOWNLOADED_DATASETS_DIR)
174
+ DOWNLOADED_DATASETS_PATH = Path(os.getenv("HF_DATASETS_DOWNLOADED_DATASETS_PATH", DEFAULT_DOWNLOADED_DATASETS_PATH))
175
+
176
+ EXTRACTED_DATASETS_DIR = "extracted"
177
+ DEFAULT_EXTRACTED_DATASETS_PATH = os.path.join(DEFAULT_DOWNLOADED_DATASETS_PATH, EXTRACTED_DATASETS_DIR)
178
+ EXTRACTED_DATASETS_PATH = Path(os.getenv("HF_DATASETS_EXTRACTED_DATASETS_PATH", DEFAULT_EXTRACTED_DATASETS_PATH))
179
+
180
+ # Download count for the website
181
+ HF_UPDATE_DOWNLOAD_COUNTS = (
182
+ os.environ.get("HF_UPDATE_DOWNLOAD_COUNTS", "AUTO").upper() in ENV_VARS_TRUE_AND_AUTO_VALUES
183
+ )
184
+
185
+ # For downloads and to check remote files metadata
186
+ HF_DATASETS_MULTITHREADING_MAX_WORKERS = 16
187
+
188
+ # Remote dataset scripts support
189
+ __HF_DATASETS_TRUST_REMOTE_CODE = os.environ.get("HF_DATASETS_TRUST_REMOTE_CODE", "ask")
190
+ HF_DATASETS_TRUST_REMOTE_CODE: Optional[bool] = (
191
+ True
192
+ if __HF_DATASETS_TRUST_REMOTE_CODE.upper() in ENV_VARS_TRUE_VALUES
193
+ else False
194
+ if __HF_DATASETS_TRUST_REMOTE_CODE.upper() in ENV_VARS_FALSE_VALUES
195
+ else None
196
+ )
197
+ TIME_OUT_REMOTE_CODE = 15
198
+
199
+ # Dataset viewer API
200
+ USE_PARQUET_EXPORT = True
201
+
202
+ # Batch size constants. For more info, see:
203
+ # https://github.com/apache/arrow/blob/master/docs/source/cpp/arrays.rst#size-limitations-and-recommendations)
204
+ DEFAULT_MAX_BATCH_SIZE = 1000
205
+
206
+ # Size of the preloaded record batch in `Dataset.__iter__`
207
+ ARROW_READER_BATCH_SIZE_IN_DATASET_ITER = 10
208
+
209
+ # Max shard size in bytes (e.g. to shard parquet datasets in push_to_hub or download_and_prepare)
210
+ MAX_SHARD_SIZE = "500MB"
211
+
212
+ # Parquet configuration
213
+ PARQUET_ROW_GROUP_SIZE_FOR_AUDIO_DATASETS = 100
214
+ PARQUET_ROW_GROUP_SIZE_FOR_IMAGE_DATASETS = 100
215
+ PARQUET_ROW_GROUP_SIZE_FOR_BINARY_DATASETS = 100
216
+
217
+ # Offline mode
218
+ HF_DATASETS_OFFLINE = os.environ.get("HF_DATASETS_OFFLINE", "AUTO").upper() in ENV_VARS_TRUE_VALUES
219
+
220
+ # Here, `True` will disable progress bars globally without possibility of enabling it
221
+ # programmatically. `False` will enable them without possibility of disabling them.
222
+ # If environment variable is not set (None), then the user is free to enable/disable
223
+ # them programmatically.
224
+ # TL;DR: env variable has priority over code
225
+ __HF_DATASETS_DISABLE_PROGRESS_BARS = os.environ.get("HF_DATASETS_DISABLE_PROGRESS_BARS")
226
+ HF_DATASETS_DISABLE_PROGRESS_BARS: Optional[bool] = (
227
+ __HF_DATASETS_DISABLE_PROGRESS_BARS.upper() in ENV_VARS_TRUE_VALUES
228
+ if __HF_DATASETS_DISABLE_PROGRESS_BARS is not None
229
+ else None
230
+ )
231
+
232
+ # In-memory
233
+ DEFAULT_IN_MEMORY_MAX_SIZE = 0 # Disabled
234
+ IN_MEMORY_MAX_SIZE = float(os.environ.get("HF_DATASETS_IN_MEMORY_MAX_SIZE", DEFAULT_IN_MEMORY_MAX_SIZE))
235
+
236
+ # File names
237
+ DATASET_ARROW_FILENAME = "dataset.arrow"
238
+ DATASET_INDICES_FILENAME = "indices.arrow"
239
+ DATASET_STATE_JSON_FILENAME = "state.json"
240
+ DATASET_INFO_FILENAME = "dataset_info.json"
241
+ DATASETDICT_INFOS_FILENAME = "dataset_infos.json"
242
+ LICENSE_FILENAME = "LICENSE"
243
+ METRIC_INFO_FILENAME = "metric_info.json"
244
+ DATASETDICT_JSON_FILENAME = "dataset_dict.json"
245
+ METADATA_CONFIGS_FIELD = "configs"
246
+ REPOCARD_FILENAME = "README.md"
247
+ REPOYAML_FILENAME = ".huggingface.yaml"
248
+
249
+ MODULE_NAME_FOR_DYNAMIC_MODULES = "datasets_modules"
250
+
251
+ MAX_DATASET_CONFIG_ID_READABLE_LENGTH = 255
252
+
253
+ # Temporary cache directory prefix
254
+ TEMP_CACHE_DIR_PREFIX = "hf_datasets-"
255
+
256
+ # Streaming
257
+ STREAMING_READ_MAX_RETRIES = 20
258
+ STREAMING_READ_RETRY_INTERVAL = 5
259
+
260
+ # Datasets without script
261
+ DATA_FILES_MAX_NUMBER_FOR_MODULE_INFERENCE = 200
262
+ GLOBBED_DATA_FILES_MAX_NUMBER_FOR_MODULE_INFERENCE = 10
263
+ ARCHIVED_DATA_FILES_MAX_NUMBER_FOR_MODULE_INFERENCE = 200
264
+
265
+ # Progress bars
266
+ PBAR_REFRESH_TIME_INTERVAL = 0.05 # 20 progress updates per sec
267
+
268
+ # Maximum number of uploaded files per commit
269
+ UPLOADS_MAX_NUMBER_PER_COMMIT = 50
270
+
271
+ # Backward compatibiliy
272
+ MAX_TABLE_NBYTES_FOR_PICKLING = 4 << 30
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/dataset_dict.py ADDED
The diff for this file is too large to render. See raw diff
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/distributed.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import TypeVar
2
+
3
+ from .arrow_dataset import Dataset, _split_by_node_map_style_dataset
4
+ from .iterable_dataset import IterableDataset, _split_by_node_iterable_dataset
5
+
6
+
7
+ DatasetType = TypeVar("DatasetType", Dataset, IterableDataset)
8
+
9
+
10
+ def split_dataset_by_node(dataset: DatasetType, rank: int, world_size: int) -> DatasetType:
11
+ """
12
+ Split a dataset for the node at rank `rank` in a pool of nodes of size `world_size`.
13
+
14
+ For map-style datasets:
15
+
16
+ Each node is assigned a chunk of data, e.g. rank 0 is given the first chunk of the dataset.
17
+ To maximize data loading throughput, chunks are made of contiguous data on disk if possible.
18
+
19
+ For iterable datasets:
20
+
21
+ If the dataset has a number of shards that is a factor of `world_size` (i.e. if `dataset.n_shards % world_size == 0`),
22
+ then the shards are evenly assigned across the nodes, which is the most optimized.
23
+ Otherwise, each node keeps 1 example out of `world_size`, skipping the other examples.
24
+
25
+ Args:
26
+ dataset ([`Dataset`] or [`IterableDataset`]):
27
+ The dataset to split by node.
28
+ rank (`int`):
29
+ Rank of the current node.
30
+ world_size (`int`):
31
+ Total number of nodes.
32
+
33
+ Returns:
34
+ [`Dataset`] or [`IterableDataset`]: The dataset to be used on the node at rank `rank`.
35
+ """
36
+ if isinstance(dataset, Dataset):
37
+ return _split_by_node_map_style_dataset(dataset, rank=rank, world_size=world_size)
38
+ else:
39
+ return _split_by_node_iterable_dataset(dataset, rank=rank, world_size=world_size)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/exceptions.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright 2023 The HuggingFace Authors.
3
+ from typing import Any, Dict, List, Optional, Union
4
+
5
+ from huggingface_hub import HfFileSystem
6
+
7
+ from . import config
8
+ from .table import CastError
9
+ from .utils.deprecation_utils import deprecated
10
+ from .utils.track import TrackedIterable, tracked_list, tracked_str
11
+
12
+
13
+ class DatasetsError(Exception):
14
+ """Base class for exceptions in this library."""
15
+
16
+
17
+ class DefunctDatasetError(DatasetsError):
18
+ """The dataset has been defunct."""
19
+
20
+
21
+ class FileNotFoundDatasetsError(DatasetsError, FileNotFoundError):
22
+ """FileNotFoundError raised by this library."""
23
+
24
+
25
+ class DataFilesNotFoundError(FileNotFoundDatasetsError):
26
+ """No (supported) data files found."""
27
+
28
+
29
+ class DatasetNotFoundError(FileNotFoundDatasetsError):
30
+ """Dataset not found.
31
+
32
+ Raised when trying to access:
33
+ - a missing dataset, or
34
+ - a private/gated dataset and the user is not authenticated.
35
+ """
36
+
37
+
38
+ class DatasetBuildError(DatasetsError):
39
+ pass
40
+
41
+
42
+ class ManualDownloadError(DatasetBuildError):
43
+ pass
44
+
45
+
46
+ class FileFormatError(DatasetBuildError):
47
+ pass
48
+
49
+
50
+ class DatasetGenerationError(DatasetBuildError):
51
+ pass
52
+
53
+
54
+ class DatasetGenerationCastError(DatasetGenerationError):
55
+ @classmethod
56
+ def from_cast_error(
57
+ cls,
58
+ cast_error: CastError,
59
+ builder_name: str,
60
+ gen_kwargs: Dict[str, Any],
61
+ token: Optional[Union[bool, str]],
62
+ ) -> "DatasetGenerationCastError":
63
+ explanation_message = (
64
+ f"\n\nAll the data files must have the same columns, but at some point {cast_error.details()}"
65
+ )
66
+ formatted_tracked_gen_kwargs: List[str] = []
67
+ for gen_kwarg in gen_kwargs.values():
68
+ if not isinstance(gen_kwarg, (tracked_str, tracked_list, TrackedIterable)):
69
+ continue
70
+ while isinstance(gen_kwarg, (tracked_list, TrackedIterable)) and gen_kwarg.last_item is not None:
71
+ gen_kwarg = gen_kwarg.last_item
72
+ if isinstance(gen_kwarg, tracked_str):
73
+ gen_kwarg = gen_kwarg.get_origin()
74
+ if isinstance(gen_kwarg, str) and gen_kwarg.startswith("hf://"):
75
+ resolved_path = HfFileSystem(endpoint=config.HF_ENDPOINT, token=token).resolve_path(gen_kwarg)
76
+ gen_kwarg = "hf://" + resolved_path.unresolve()
77
+ if "@" + resolved_path.revision in gen_kwarg:
78
+ gen_kwarg = (
79
+ gen_kwarg.replace("@" + resolved_path.revision, "", 1)
80
+ + f" (at revision {resolved_path.revision})"
81
+ )
82
+ formatted_tracked_gen_kwargs.append(str(gen_kwarg))
83
+ if formatted_tracked_gen_kwargs:
84
+ explanation_message += f"\n\nThis happened while the {builder_name} dataset builder was generating data using\n\n{', '.join(formatted_tracked_gen_kwargs)}"
85
+ help_message = "\n\nPlease either edit the data files to have matching columns, or separate them into different configurations (see docs at https://hf.co/docs/hub/datasets-manual-configuration#multiple-configurations)"
86
+ return cls("An error occurred while generating the dataset" + explanation_message + help_message)
87
+
88
+
89
+ @deprecated("Use 'ChecksumVerificationError' instead.")
90
+ class ChecksumVerificationException(Exception):
91
+ """Exceptions during checksums verifications of downloaded files.
92
+
93
+ <Deprecated version="2.20.0">
94
+
95
+ Use `ChecksumVerificationError` instead.
96
+
97
+ </Deprecated>
98
+ """
99
+
100
+
101
+ class ChecksumVerificationError(DatasetsError, ChecksumVerificationException):
102
+ """Error raised during checksums verifications of downloaded files."""
103
+
104
+ def __init__(self, *args, **kwargs):
105
+ DatasetsError.__init__(self, *args, **kwargs)
106
+
107
+
108
+ @deprecated("Use 'UnexpectedDownloadedFileError' instead.")
109
+ class UnexpectedDownloadedFile(ChecksumVerificationException):
110
+ """Some downloaded files were not expected.
111
+
112
+ <Deprecated version="2.20.0">
113
+
114
+ Use `UnexpectedDownloadedFileError` instead.
115
+
116
+ </Deprecated>
117
+ """
118
+
119
+
120
+ class UnexpectedDownloadedFileError(ChecksumVerificationError, UnexpectedDownloadedFile):
121
+ """Some downloaded files were not expected."""
122
+
123
+
124
+ @deprecated("Use 'ExpectedMoreDownloadedFilesError' instead.")
125
+ class ExpectedMoreDownloadedFiles(ChecksumVerificationException):
126
+ """Some files were supposed to be downloaded but were not.
127
+
128
+ <Deprecated version="2.20.0">
129
+
130
+ Use `ExpectedMoreDownloadedFilesError` instead.
131
+
132
+ </Deprecated>
133
+ """
134
+
135
+
136
+ class ExpectedMoreDownloadedFilesError(ChecksumVerificationError, ExpectedMoreDownloadedFiles):
137
+ """Some files were supposed to be downloaded but were not."""
138
+
139
+
140
+ class NonMatchingChecksumError(ChecksumVerificationError):
141
+ """The downloaded file checksum don't match the expected checksum."""
142
+
143
+
144
+ @deprecated("Use 'SplitsVerificationError' instead.")
145
+ class SplitsVerificationException(Exception):
146
+ """Exceptions during splits verifications.
147
+
148
+ <Deprecated version="2.20.0">
149
+
150
+ Use `SplitsVerificationError` instead.
151
+
152
+ </Deprecated>
153
+ """
154
+
155
+
156
+ class SplitsVerificationError(DatasetsError, SplitsVerificationException):
157
+ """Error raised during splits verifications."""
158
+
159
+ def __init__(self, *args, **kwargs):
160
+ DatasetsError.__init__(self, *args, **kwargs)
161
+
162
+
163
+ @deprecated("Use 'UnexpectedSplitsError' instead.")
164
+ class UnexpectedSplits(SplitsVerificationException):
165
+ """The expected splits of the downloaded file is missing.
166
+
167
+ <Deprecated version="2.20.0">
168
+
169
+ Use `UnexpectedSplitsError` instead.
170
+
171
+ </Deprecated>
172
+ """
173
+
174
+
175
+ class UnexpectedSplitsError(SplitsVerificationError, UnexpectedSplits):
176
+ """The expected splits of the downloaded file is missing."""
177
+
178
+
179
+ @deprecated("Use 'ExpectedMoreSplitsError' instead.")
180
+ class ExpectedMoreSplits(SplitsVerificationException):
181
+ """Some recorded splits are missing.
182
+
183
+ <Deprecated version="2.20.0">
184
+
185
+ Use `ExpectedMoreSplitsError` instead.
186
+
187
+ </Deprecated>
188
+ """
189
+
190
+
191
+ class ExpectedMoreSplitsError(SplitsVerificationError, ExpectedMoreSplits):
192
+ """Some recorded splits are missing."""
193
+
194
+
195
+ class NonMatchingSplitsSizesError(SplitsVerificationError):
196
+ """The splits sizes don't match the expected splits sizes."""
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/inspect.py ADDED
@@ -0,0 +1,582 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 The HuggingFace Datasets Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # Lint as: python3
16
+ """List and inspect datasets."""
17
+
18
+ import inspect
19
+ import os
20
+ import shutil
21
+ import warnings
22
+ from pathlib import Path, PurePath
23
+ from typing import Dict, List, Mapping, Optional, Sequence, Union
24
+
25
+ import huggingface_hub
26
+
27
+ from . import config
28
+ from .download.download_config import DownloadConfig
29
+ from .download.download_manager import DownloadMode
30
+ from .download.streaming_download_manager import StreamingDownloadManager
31
+ from .info import DatasetInfo
32
+ from .load import (
33
+ dataset_module_factory,
34
+ get_dataset_builder_class,
35
+ import_main_class,
36
+ load_dataset_builder,
37
+ metric_module_factory,
38
+ )
39
+ from .utils.deprecation_utils import deprecated
40
+ from .utils.file_utils import relative_to_absolute_path
41
+ from .utils.logging import get_logger
42
+ from .utils.version import Version
43
+
44
+
45
+ logger = get_logger(__name__)
46
+
47
+
48
+ class SplitsNotFoundError(ValueError):
49
+ pass
50
+
51
+
52
+ @deprecated("Use 'huggingface_hub.list_datasets' instead.")
53
+ def list_datasets(with_community_datasets=True, with_details=False):
54
+ """List all the datasets scripts available on the Hugging Face Hub.
55
+
56
+ Args:
57
+ with_community_datasets (`bool`, *optional*, defaults to `True`):
58
+ Include the community provided datasets.
59
+ with_details (`bool`, *optional*, defaults to `False`):
60
+ Return the full details on the datasets instead of only the short name.
61
+
62
+ Example:
63
+
64
+ ```py
65
+ >>> from datasets import list_datasets
66
+ >>> list_datasets()
67
+ ['acronym_identification',
68
+ 'ade_corpus_v2',
69
+ 'adversarial_qa',
70
+ 'aeslc',
71
+ 'afrikaans_ner_corpus',
72
+ 'ag_news',
73
+ ...
74
+ ]
75
+ ```
76
+ """
77
+ datasets = huggingface_hub.list_datasets(full=with_details)
78
+ if not with_community_datasets:
79
+ datasets = [dataset for dataset in datasets if "/" not in dataset.id]
80
+ if not with_details:
81
+ datasets = [dataset.id for dataset in datasets]
82
+ return list(datasets)
83
+
84
+
85
+ @deprecated(
86
+ "Use 'evaluate.list_evaluation_modules' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate"
87
+ )
88
+ def list_metrics(with_community_metrics=True, with_details=False):
89
+ """List all the metrics script available on the Hugging Face Hub.
90
+
91
+ <Deprecated version="2.5.0">
92
+
93
+ Use `evaluate.list_evaluation_modules` instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate
94
+
95
+ </Deprecated>
96
+
97
+ Args:
98
+ with_community_metrics (:obj:`bool`, optional, default ``True``): Include the community provided metrics.
99
+ with_details (:obj:`bool`, optional, default ``False``): Return the full details on the metrics instead of only the short name.
100
+
101
+ Example:
102
+
103
+ ```py
104
+ >>> from datasets import list_metrics
105
+ >>> list_metrics()
106
+ ['accuracy',
107
+ 'bertscore',
108
+ 'bleu',
109
+ 'bleurt',
110
+ 'cer',
111
+ 'chrf',
112
+ ...
113
+ ]
114
+ ```
115
+ """
116
+ metrics = huggingface_hub.list_metrics()
117
+ if not with_community_metrics:
118
+ metrics = [metric for metric in metrics if "/" not in metric.id]
119
+ if not with_details:
120
+ metrics = [metric.id for metric in metrics]
121
+ return metrics
122
+
123
+
124
+ @deprecated("Clone the dataset repository from the Hugging Face Hub instead.")
125
+ def inspect_dataset(path: str, local_path: str, download_config: Optional[DownloadConfig] = None, **download_kwargs):
126
+ """
127
+ Allow inspection/modification of a dataset script by copying on local drive at local_path.
128
+
129
+ Args:
130
+ path (`str`): Path to the dataset processing script with the dataset builder. Can be either:
131
+
132
+ - a local path to processing script or the directory containing the script (if the script has the same name
133
+ as the directory),
134
+ e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`.
135
+ - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`list_datasets`])
136
+ e.g. `'squad'`, `'glue'` or `'openai/webtext'`.
137
+ local_path (`str`):
138
+ Path to the local folder to copy the dataset script to.
139
+ download_config ([`DownloadConfig`], *optional*):
140
+ Specific download configuration parameters.
141
+ **download_kwargs (additional keyword arguments):
142
+ Optional arguments for [`DownloadConfig`] which will override
143
+ the attributes of `download_config` if supplied.
144
+ """
145
+ if download_config is None:
146
+ download_config = DownloadConfig(**download_kwargs)
147
+ if os.path.isfile(path):
148
+ path = str(Path(path).parent)
149
+ if os.path.isdir(path):
150
+ shutil.copytree(path, local_path, dirs_exist_ok=True)
151
+ else:
152
+ huggingface_hub.HfApi(endpoint=config.HF_ENDPOINT, token=download_config.token).snapshot_download(
153
+ repo_id=path, repo_type="dataset", local_dir=local_path, force_download=download_config.force_download
154
+ )
155
+ print(
156
+ f"The dataset {path} can be inspected at {local_path}. "
157
+ f'You can modify this loading script if it has one and use it with `datasets.load_dataset("{PurePath(local_path).as_posix()}")`.'
158
+ )
159
+
160
+
161
+ @deprecated(
162
+ "Use 'evaluate.inspect_evaluation_module' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate"
163
+ )
164
+ def inspect_metric(path: str, local_path: str, download_config: Optional[DownloadConfig] = None, **download_kwargs):
165
+ r"""
166
+ Allow inspection/modification of a metric script by copying it on local drive at local_path.
167
+
168
+ <Deprecated version="2.5.0">
169
+
170
+ Use `evaluate.inspect_evaluation_module` instead, from the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
171
+
172
+ </Deprecated>
173
+
174
+ Args:
175
+ path (``str``): path to the dataset processing script with the dataset builder. Can be either:
176
+
177
+ - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
178
+ e.g. ``'./dataset/squad'`` or ``'./dataset/squad/squad.py'``
179
+ - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with ``datasets.list_datasets()``)
180
+ e.g. ``'squad'``, ``'glue'`` or ``'openai/webtext'``
181
+ local_path (``str``): path to the local folder to copy the datset script to.
182
+ download_config (Optional ``datasets.DownloadConfig``): specific download configuration parameters.
183
+ **download_kwargs (additional keyword arguments): optional attributes for DownloadConfig() which will override the attributes in download_config if supplied.
184
+ """
185
+ metric_module = metric_module_factory(path, download_config=download_config, **download_kwargs)
186
+ metric_cls = import_main_class(metric_module.module_path, dataset=False)
187
+ module_source_path = inspect.getsourcefile(metric_cls)
188
+ module_source_dirpath = os.path.dirname(module_source_path)
189
+ for dirpath, dirnames, filenames in os.walk(module_source_dirpath):
190
+ dst_dirpath = os.path.join(local_path, os.path.relpath(dirpath, module_source_dirpath))
191
+ os.makedirs(dst_dirpath, exist_ok=True)
192
+ # skipping hidden directories; prune the search
193
+ dirnames[:] = [dirname for dirname in dirnames if not dirname.startswith((".", "__"))]
194
+ for filename in filenames:
195
+ shutil.copy2(os.path.join(dirpath, filename), os.path.join(dst_dirpath, filename))
196
+ shutil.copystat(dirpath, dst_dirpath)
197
+ local_path = relative_to_absolute_path(local_path)
198
+ print(
199
+ f"The processing scripts for metric {path} can be inspected at {local_path}. "
200
+ f"The main class is in {module_source_dirpath}. "
201
+ f'You can modify this processing scripts and use it with `datasets.load_metric("{PurePath(local_path).as_posix()}")`.'
202
+ )
203
+
204
+
205
+ def get_dataset_infos(
206
+ path: str,
207
+ data_files: Optional[Union[Dict, List, str]] = None,
208
+ download_config: Optional[DownloadConfig] = None,
209
+ download_mode: Optional[Union[DownloadMode, str]] = None,
210
+ revision: Optional[Union[str, Version]] = None,
211
+ token: Optional[Union[bool, str]] = None,
212
+ use_auth_token="deprecated",
213
+ **config_kwargs,
214
+ ):
215
+ """Get the meta information about a dataset, returned as a dict mapping config name to DatasetInfoDict.
216
+
217
+ Args:
218
+ path (`str`): path to the dataset processing script with the dataset builder. Can be either:
219
+
220
+ - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
221
+ e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`
222
+ - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`datasets.list_datasets`])
223
+ e.g. `'squad'`, `'glue'` or``'openai/webtext'`
224
+ revision (`Union[str, datasets.Version]`, *optional*):
225
+ If specified, the dataset module will be loaded from the datasets repository at this version.
226
+ By default:
227
+ - it is set to the local version of the lib.
228
+ - it will also try to load it from the main branch if it's not available at the local version of the lib.
229
+ Specifying a version that is different from your local version of the lib might cause compatibility issues.
230
+ download_config ([`DownloadConfig`], *optional*):
231
+ Specific download configuration parameters.
232
+ download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
233
+ Download/generate mode.
234
+ data_files (`Union[Dict, List, str]`, *optional*):
235
+ Defining the data_files of the dataset configuration.
236
+ token (`str` or `bool`, *optional*):
237
+ Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
238
+ If `True`, or not specified, will get token from `"~/.huggingface"`.
239
+ use_auth_token (`str` or `bool`, *optional*):
240
+ Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
241
+ If `True`, or not specified, will get token from `"~/.huggingface"`.
242
+
243
+ <Deprecated version="2.14.0">
244
+
245
+ `use_auth_token` was deprecated in favor of `token` in version 2.14.0 and will be removed in 3.0.0.
246
+
247
+ </Deprecated>
248
+
249
+ **config_kwargs (additional keyword arguments):
250
+ Optional attributes for builder class which will override the attributes if supplied.
251
+
252
+ Example:
253
+
254
+ ```py
255
+ >>> from datasets import get_dataset_infos
256
+ >>> get_dataset_infos('rotten_tomatoes')
257
+ {'default': DatasetInfo(description="Movie Review Dataset.\nThis is a dataset of containing 5,331 positive and 5,331 negative processed\nsentences from Rotten Tomatoes movie reviews...), ...}
258
+ ```
259
+ """
260
+ if use_auth_token != "deprecated":
261
+ warnings.warn(
262
+ "'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.\n"
263
+ "You can remove this warning by passing 'token=<use_auth_token>' instead.",
264
+ FutureWarning,
265
+ )
266
+ token = use_auth_token
267
+
268
+ config_names = get_dataset_config_names(
269
+ path=path,
270
+ revision=revision,
271
+ download_config=download_config,
272
+ download_mode=download_mode,
273
+ data_files=data_files,
274
+ token=token,
275
+ )
276
+ return {
277
+ config_name: get_dataset_config_info(
278
+ path=path,
279
+ config_name=config_name,
280
+ data_files=data_files,
281
+ download_config=download_config,
282
+ download_mode=download_mode,
283
+ revision=revision,
284
+ token=token,
285
+ **config_kwargs,
286
+ )
287
+ for config_name in config_names
288
+ }
289
+
290
+
291
+ def get_dataset_config_names(
292
+ path: str,
293
+ revision: Optional[Union[str, Version]] = None,
294
+ download_config: Optional[DownloadConfig] = None,
295
+ download_mode: Optional[Union[DownloadMode, str]] = None,
296
+ dynamic_modules_path: Optional[str] = None,
297
+ data_files: Optional[Union[Dict, List, str]] = None,
298
+ **download_kwargs,
299
+ ):
300
+ """Get the list of available config names for a particular dataset.
301
+
302
+ Args:
303
+ path (`str`): path to the dataset processing script with the dataset builder. Can be either:
304
+
305
+ - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
306
+ e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`
307
+ - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`datasets.list_datasets`])
308
+ e.g. `'squad'`, `'glue'` or `'openai/webtext'`
309
+ revision (`Union[str, datasets.Version]`, *optional*):
310
+ If specified, the dataset module will be loaded from the datasets repository at this version.
311
+ By default:
312
+ - it is set to the local version of the lib.
313
+ - it will also try to load it from the main branch if it's not available at the local version of the lib.
314
+ Specifying a version that is different from your local version of the lib might cause compatibility issues.
315
+ download_config ([`DownloadConfig`], *optional*):
316
+ Specific download configuration parameters.
317
+ download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
318
+ Download/generate mode.
319
+ dynamic_modules_path (`str`, defaults to `~/.cache/huggingface/modules/datasets_modules`):
320
+ Optional path to the directory in which the dynamic modules are saved. It must have been initialized with `init_dynamic_modules`.
321
+ By default the datasets and metrics are stored inside the `datasets_modules` module.
322
+ data_files (`Union[Dict, List, str]`, *optional*):
323
+ Defining the data_files of the dataset configuration.
324
+ **download_kwargs (additional keyword arguments):
325
+ Optional attributes for [`DownloadConfig`] which will override the attributes in `download_config` if supplied,
326
+ for example `token`.
327
+
328
+ Example:
329
+
330
+ ```py
331
+ >>> from datasets import get_dataset_config_names
332
+ >>> get_dataset_config_names("glue")
333
+ ['cola',
334
+ 'sst2',
335
+ 'mrpc',
336
+ 'qqp',
337
+ 'stsb',
338
+ 'mnli',
339
+ 'mnli_mismatched',
340
+ 'mnli_matched',
341
+ 'qnli',
342
+ 'rte',
343
+ 'wnli',
344
+ 'ax']
345
+ ```
346
+ """
347
+ dataset_module = dataset_module_factory(
348
+ path,
349
+ revision=revision,
350
+ download_config=download_config,
351
+ download_mode=download_mode,
352
+ dynamic_modules_path=dynamic_modules_path,
353
+ data_files=data_files,
354
+ **download_kwargs,
355
+ )
356
+ builder_cls = get_dataset_builder_class(dataset_module, dataset_name=os.path.basename(path))
357
+ return list(builder_cls.builder_configs.keys()) or [
358
+ dataset_module.builder_kwargs.get("config_name", builder_cls.DEFAULT_CONFIG_NAME or "default")
359
+ ]
360
+
361
+
362
+ def get_dataset_default_config_name(
363
+ path: str,
364
+ revision: Optional[Union[str, Version]] = None,
365
+ download_config: Optional[DownloadConfig] = None,
366
+ download_mode: Optional[Union[DownloadMode, str]] = None,
367
+ dynamic_modules_path: Optional[str] = None,
368
+ data_files: Optional[Union[Dict, List, str]] = None,
369
+ **download_kwargs,
370
+ ) -> Optional[str]:
371
+ """Get the default config name for a particular dataset.
372
+ Can return None only if the dataset has multiple configurations and no default configuration.
373
+
374
+ Args:
375
+ path (`str`): path to the dataset processing script with the dataset builder. Can be either:
376
+
377
+ - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
378
+ e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`
379
+ - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`datasets.list_datasets`])
380
+ e.g. `'squad'`, `'glue'` or `'openai/webtext'`
381
+ revision (`Union[str, datasets.Version]`, *optional*):
382
+ If specified, the dataset module will be loaded from the datasets repository at this version.
383
+ By default:
384
+ - it is set to the local version of the lib.
385
+ - it will also try to load it from the main branch if it's not available at the local version of the lib.
386
+ Specifying a version that is different from your local version of the lib might cause compatibility issues.
387
+ download_config ([`DownloadConfig`], *optional*):
388
+ Specific download configuration parameters.
389
+ download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
390
+ Download/generate mode.
391
+ dynamic_modules_path (`str`, defaults to `~/.cache/huggingface/modules/datasets_modules`):
392
+ Optional path to the directory in which the dynamic modules are saved. It must have been initialized with `init_dynamic_modules`.
393
+ By default the datasets and metrics are stored inside the `datasets_modules` module.
394
+ data_files (`Union[Dict, List, str]`, *optional*):
395
+ Defining the data_files of the dataset configuration.
396
+ **download_kwargs (additional keyword arguments):
397
+ Optional attributes for [`DownloadConfig`] which will override the attributes in `download_config` if supplied,
398
+ for example `token`.
399
+
400
+ Returns:
401
+ Optional[str]: the default config name if there is one
402
+
403
+ Example:
404
+
405
+ ```py
406
+ >>> from datasets import get_dataset_default_config_name
407
+ >>> get_dataset_default_config_name("openbookqa")
408
+ 'main'
409
+ ```
410
+ """
411
+ dataset_module = dataset_module_factory(
412
+ path,
413
+ revision=revision,
414
+ download_config=download_config,
415
+ download_mode=download_mode,
416
+ dynamic_modules_path=dynamic_modules_path,
417
+ data_files=data_files,
418
+ **download_kwargs,
419
+ )
420
+ builder_cls = get_dataset_builder_class(dataset_module, dataset_name=os.path.basename(path))
421
+ builder_configs = list(builder_cls.builder_configs.keys())
422
+ if builder_configs:
423
+ default_config_name = builder_configs[0] if len(builder_configs) == 1 else None
424
+ else:
425
+ default_config_name = "default"
426
+ return builder_cls.DEFAULT_CONFIG_NAME or default_config_name
427
+
428
+
429
+ def get_dataset_config_info(
430
+ path: str,
431
+ config_name: Optional[str] = None,
432
+ data_files: Optional[Union[str, Sequence[str], Mapping[str, Union[str, Sequence[str]]]]] = None,
433
+ download_config: Optional[DownloadConfig] = None,
434
+ download_mode: Optional[Union[DownloadMode, str]] = None,
435
+ revision: Optional[Union[str, Version]] = None,
436
+ token: Optional[Union[bool, str]] = None,
437
+ use_auth_token="deprecated",
438
+ **config_kwargs,
439
+ ) -> DatasetInfo:
440
+ """Get the meta information (DatasetInfo) about a dataset for a particular config
441
+
442
+ Args:
443
+ path (``str``): path to the dataset processing script with the dataset builder. Can be either:
444
+
445
+ - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
446
+ e.g. ``'./dataset/squad'`` or ``'./dataset/squad/squad.py'``
447
+ - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with ``datasets.list_datasets()``)
448
+ e.g. ``'squad'``, ``'glue'`` or ``'openai/webtext'``
449
+ config_name (:obj:`str`, optional): Defining the name of the dataset configuration.
450
+ data_files (:obj:`str` or :obj:`Sequence` or :obj:`Mapping`, optional): Path(s) to source data file(s).
451
+ download_config (:class:`~download.DownloadConfig`, optional): Specific download configuration parameters.
452
+ download_mode (:class:`DownloadMode` or :obj:`str`, default ``REUSE_DATASET_IF_EXISTS``): Download/generate mode.
453
+ revision (:class:`~utils.Version` or :obj:`str`, optional): Version of the dataset script to load.
454
+ As datasets have their own git repository on the Datasets Hub, the default version "main" corresponds to their "main" branch.
455
+ You can specify a different version than the default "main" by using a commit SHA or a git tag of the dataset repository.
456
+ token (``str`` or :obj:`bool`, optional): Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
457
+ If True, or not specified, will get token from `"~/.huggingface"`.
458
+ use_auth_token (``str`` or :obj:`bool`, optional): Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
459
+ If True, or not specified, will get token from `"~/.huggingface"`.
460
+
461
+ <Deprecated version="2.14.0">
462
+
463
+ `use_auth_token` was deprecated in favor of `token` in version 2.14.0 and will be removed in 3.0.0.
464
+
465
+ </Deprecated>
466
+
467
+ **config_kwargs (additional keyword arguments): optional attributes for builder class which will override the attributes if supplied.
468
+
469
+ """
470
+ if use_auth_token != "deprecated":
471
+ warnings.warn(
472
+ "'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.\n"
473
+ "You can remove this warning by passing 'token=<use_auth_token>' instead.",
474
+ FutureWarning,
475
+ )
476
+ token = use_auth_token
477
+
478
+ builder = load_dataset_builder(
479
+ path,
480
+ name=config_name,
481
+ data_files=data_files,
482
+ download_config=download_config,
483
+ download_mode=download_mode,
484
+ revision=revision,
485
+ token=token,
486
+ **config_kwargs,
487
+ )
488
+ info = builder.info
489
+ if info.splits is None:
490
+ download_config = download_config.copy() if download_config else DownloadConfig()
491
+ if token is not None:
492
+ download_config.token = token
493
+ builder._check_manual_download(
494
+ StreamingDownloadManager(base_path=builder.base_path, download_config=download_config)
495
+ )
496
+ try:
497
+ info.splits = {
498
+ split_generator.name: {"name": split_generator.name, "dataset_name": path}
499
+ for split_generator in builder._split_generators(
500
+ StreamingDownloadManager(base_path=builder.base_path, download_config=download_config)
501
+ )
502
+ }
503
+ except Exception as err:
504
+ raise SplitsNotFoundError("The split names could not be parsed from the dataset config.") from err
505
+ return info
506
+
507
+
508
+ def get_dataset_split_names(
509
+ path: str,
510
+ config_name: Optional[str] = None,
511
+ data_files: Optional[Union[str, Sequence[str], Mapping[str, Union[str, Sequence[str]]]]] = None,
512
+ download_config: Optional[DownloadConfig] = None,
513
+ download_mode: Optional[Union[DownloadMode, str]] = None,
514
+ revision: Optional[Union[str, Version]] = None,
515
+ token: Optional[Union[bool, str]] = None,
516
+ use_auth_token="deprecated",
517
+ **config_kwargs,
518
+ ):
519
+ """Get the list of available splits for a particular config and dataset.
520
+
521
+ Args:
522
+ path (`str`): path to the dataset processing script with the dataset builder. Can be either:
523
+
524
+ - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
525
+ e.g. `'./dataset/squad'` or `'./dataset/squad/squad.py'`
526
+ - a dataset identifier on the Hugging Face Hub (list all available datasets and ids with [`datasets.list_datasets`])
527
+ e.g. `'squad'`, `'glue'` or `'openai/webtext'`
528
+ config_name (`str`, *optional*):
529
+ Defining the name of the dataset configuration.
530
+ data_files (`str` or `Sequence` or `Mapping`, *optional*):
531
+ Path(s) to source data file(s).
532
+ download_config ([`DownloadConfig`], *optional*):
533
+ Specific download configuration parameters.
534
+ download_mode ([`DownloadMode`] or `str`, defaults to `REUSE_DATASET_IF_EXISTS`):
535
+ Download/generate mode.
536
+ revision ([`Version`] or `str`, *optional*):
537
+ Version of the dataset script to load.
538
+ As datasets have their own git repository on the Datasets Hub, the default version "main" corresponds to their "main" branch.
539
+ You can specify a different version than the default "main" by using a commit SHA or a git tag of the dataset repository.
540
+ token (`str` or `bool`, *optional*):
541
+ Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
542
+ If `True`, or not specified, will get token from `"~/.huggingface"`.
543
+ use_auth_token (`str` or `bool`, *optional*):
544
+ Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
545
+ If `True`, or not specified, will get token from `"~/.huggingface"`.
546
+
547
+ <Deprecated version="2.14.0">
548
+
549
+ `use_auth_token` was deprecated in favor of `token` in version 2.14.0 and will be removed in 3.0.0.
550
+
551
+ </Deprecated>
552
+
553
+ **config_kwargs (additional keyword arguments):
554
+ Optional attributes for builder class which will override the attributes if supplied.
555
+
556
+ Example:
557
+
558
+ ```py
559
+ >>> from datasets import get_dataset_split_names
560
+ >>> get_dataset_split_names('rotten_tomatoes')
561
+ ['train', 'validation', 'test']
562
+ ```
563
+ """
564
+ if use_auth_token != "deprecated":
565
+ warnings.warn(
566
+ "'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.\n"
567
+ "You can remove this warning by passing 'token=<use_auth_token>' instead.",
568
+ FutureWarning,
569
+ )
570
+ token = use_auth_token
571
+
572
+ info = get_dataset_config_info(
573
+ path,
574
+ config_name=config_name,
575
+ data_files=data_files,
576
+ download_config=download_config,
577
+ download_mode=download_mode,
578
+ revision=revision,
579
+ token=token,
580
+ **config_kwargs,
581
+ )
582
+ return list(info.splits.keys())
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/keyhash.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 The HuggingFace Datasets Authors and the TensorFlow Datasets Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # Lint as: python3
16
+
17
+ """
18
+ Hashing function for dataset keys using `hashlib.md5`
19
+
20
+ Requirements for the hash function:
21
+
22
+ - Provides a uniformly distributed hash from random space
23
+ - Adequately fast speed
24
+ - Working with multiple input types (in this case, `str`, `int` or `bytes`)
25
+ - Should be platform independent (generates same hash on different OS and systems)
26
+
27
+ The hashing function provides a unique 128-bit integer hash of the key provided.
28
+
29
+ The split name is being used here as the hash salt to avoid having same hashes
30
+ in different splits due to same keys
31
+ """
32
+
33
+ from typing import Union
34
+
35
+ from huggingface_hub.utils import insecure_hashlib
36
+
37
+
38
+ def _as_bytes(hash_data: Union[str, int, bytes]) -> bytes:
39
+ """
40
+ Returns the input hash_data in its bytes form
41
+
42
+ Args:
43
+ hash_data: the hash salt/key to be converted to bytes
44
+ """
45
+ if isinstance(hash_data, bytes):
46
+ # Data already in bytes, returns as it as
47
+ return hash_data
48
+ elif isinstance(hash_data, str):
49
+ # We keep the data as it as for it ot be later encoded to UTF-8
50
+ # However replace `\\` with `/` for Windows compatibility
51
+ hash_data = hash_data.replace("\\", "/")
52
+ elif isinstance(hash_data, int):
53
+ hash_data = str(hash_data)
54
+ else:
55
+ # If data is not of the required type, raise error
56
+ raise InvalidKeyError(hash_data)
57
+
58
+ return hash_data.encode("utf-8")
59
+
60
+
61
+ class InvalidKeyError(Exception):
62
+ """Raises an error when given key is of invalid datatype."""
63
+
64
+ def __init__(self, hash_data):
65
+ self.prefix = "\nFAILURE TO GENERATE DATASET: Invalid key type detected"
66
+ self.err_msg = f"\nFound Key {hash_data} of type {type(hash_data)}"
67
+ self.suffix = "\nKeys should be either str, int or bytes type"
68
+ super().__init__(f"{self.prefix}{self.err_msg}{self.suffix}")
69
+
70
+
71
+ class DuplicatedKeysError(Exception):
72
+ """Raise an error when duplicate key found."""
73
+
74
+ def __init__(self, key, duplicate_key_indices, fix_msg=""):
75
+ self.key = key
76
+ self.duplicate_key_indices = duplicate_key_indices
77
+ self.fix_msg = fix_msg
78
+ self.prefix = "Found multiple examples generated with the same key"
79
+ if len(duplicate_key_indices) <= 20:
80
+ self.err_msg = f"\nThe examples at index {', '.join(duplicate_key_indices)} have the key {key}"
81
+ else:
82
+ self.err_msg = f"\nThe examples at index {', '.join(duplicate_key_indices[:20])}... ({len(duplicate_key_indices) - 20} more) have the key {key}"
83
+ self.suffix = "\n" + fix_msg if fix_msg else ""
84
+ super().__init__(f"{self.prefix}{self.err_msg}{self.suffix}")
85
+
86
+
87
+ class KeyHasher:
88
+ """KeyHasher class for providing hash using md5"""
89
+
90
+ def __init__(self, hash_salt: str):
91
+ self._split_md5 = insecure_hashlib.md5(_as_bytes(hash_salt))
92
+
93
+ def hash(self, key: Union[str, int, bytes]) -> int:
94
+ """Returns 128-bits unique hash of input key
95
+
96
+ Args:
97
+ key: the input key to be hashed (should be str, int or bytes)
98
+
99
+ Returns: 128-bit int hash key"""
100
+ md5 = self._split_md5.copy()
101
+ byte_key = _as_bytes(key)
102
+ md5.update(byte_key)
103
+ # Convert to integer with hexadecimal conversion
104
+ return int(md5.hexdigest(), 16)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/load.py ADDED
The diff for this file is too large to render. See raw diff
 
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/naming.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 The HuggingFace Datasets Authors and the TensorFlow Datasets Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # Lint as: python3
16
+ """Utilities for file names."""
17
+
18
+ import itertools
19
+ import os
20
+ import re
21
+
22
+
23
+ _uppercase_uppercase_re = re.compile(r"([A-Z]+)([A-Z][a-z])")
24
+ _lowercase_uppercase_re = re.compile(r"([a-z\d])([A-Z])")
25
+
26
+ _single_underscore_re = re.compile(r"(?<!_)_(?!_)")
27
+ _multiple_underscores_re = re.compile(r"(_{2,})")
28
+
29
+ _split_re = r"^\w+(\.\w+)*$"
30
+
31
+ INVALID_WINDOWS_CHARACTERS_IN_PATH = r"<>:/\|?*"
32
+
33
+
34
+ def camelcase_to_snakecase(name):
35
+ """Convert camel-case string to snake-case."""
36
+ name = _uppercase_uppercase_re.sub(r"\1_\2", name)
37
+ name = _lowercase_uppercase_re.sub(r"\1_\2", name)
38
+ return name.lower()
39
+
40
+
41
+ def snakecase_to_camelcase(name):
42
+ """Convert snake-case string to camel-case string."""
43
+ name = _single_underscore_re.split(name)
44
+ name = [_multiple_underscores_re.split(n) for n in name]
45
+ return "".join(n.capitalize() for n in itertools.chain.from_iterable(name) if n != "")
46
+
47
+
48
+ def filename_prefix_for_name(name):
49
+ if os.path.basename(name) != name:
50
+ raise ValueError(f"Should be a dataset name, not a path: {name}")
51
+ return camelcase_to_snakecase(name)
52
+
53
+
54
+ def filename_prefix_for_split(name, split):
55
+ if os.path.basename(name) != name:
56
+ raise ValueError(f"Should be a dataset name, not a path: {name}")
57
+ if not re.match(_split_re, split):
58
+ raise ValueError(f"Split name should match '{_split_re}'' but got '{split}'.")
59
+ return f"{filename_prefix_for_name(name)}-{split}"
60
+
61
+
62
+ def filepattern_for_dataset_split(dataset_name, split, data_dir, filetype_suffix=None):
63
+ prefix = filename_prefix_for_split(dataset_name, split)
64
+ if filetype_suffix:
65
+ prefix += f".{filetype_suffix}"
66
+ filepath = os.path.join(data_dir, prefix)
67
+ return f"{filepath}*"
68
+
69
+
70
+ def filenames_for_dataset_split(path, dataset_name, split, filetype_suffix=None, shard_lengths=None):
71
+ prefix = filename_prefix_for_split(dataset_name, split)
72
+ prefix = os.path.join(path, prefix)
73
+
74
+ if shard_lengths:
75
+ num_shards = len(shard_lengths)
76
+ filenames = [f"{prefix}-{shard_id:05d}-of-{num_shards:05d}" for shard_id in range(num_shards)]
77
+ if filetype_suffix:
78
+ filenames = [filename + f".{filetype_suffix}" for filename in filenames]
79
+ return filenames
80
+ else:
81
+ filename = prefix
82
+ if filetype_suffix:
83
+ filename += f".{filetype_suffix}"
84
+ return [filename]
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/streaming.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib
2
+ import inspect
3
+ from functools import wraps
4
+ from typing import TYPE_CHECKING, Optional
5
+
6
+ from .download.download_config import DownloadConfig
7
+ from .utils.file_utils import (
8
+ xbasename,
9
+ xdirname,
10
+ xet_parse,
11
+ xexists,
12
+ xgetsize,
13
+ xglob,
14
+ xgzip_open,
15
+ xisdir,
16
+ xisfile,
17
+ xjoin,
18
+ xlistdir,
19
+ xnumpy_load,
20
+ xopen,
21
+ xpandas_read_csv,
22
+ xpandas_read_excel,
23
+ xPath,
24
+ xpyarrow_parquet_read_table,
25
+ xrelpath,
26
+ xsio_loadmat,
27
+ xsplit,
28
+ xsplitext,
29
+ xwalk,
30
+ xxml_dom_minidom_parse,
31
+ )
32
+ from .utils.logging import get_logger
33
+ from .utils.patching import patch_submodule
34
+ from .utils.py_utils import get_imports, lock_importable_file
35
+
36
+
37
+ logger = get_logger(__name__)
38
+
39
+
40
+ if TYPE_CHECKING:
41
+ from .builder import DatasetBuilder
42
+
43
+
44
+ def extend_module_for_streaming(module_path, download_config: Optional[DownloadConfig] = None):
45
+ """Extend the module to support streaming.
46
+
47
+ We patch some functions in the module to use `fsspec` to support data streaming:
48
+ - We use `fsspec.open` to open and read remote files. We patch the module function:
49
+ - `open`
50
+ - We use the "::" hop separator to join paths and navigate remote compressed/archive files. We patch the module
51
+ functions:
52
+ - `os.path.join`
53
+ - `pathlib.Path.joinpath` and `pathlib.Path.__truediv__` (called when using the "/" operator)
54
+
55
+ The patched functions are replaced with custom functions defined to work with the
56
+ :class:`~download.streaming_download_manager.StreamingDownloadManager`.
57
+
58
+ Args:
59
+ module_path: Path to the module to be extended.
60
+ download_config : mainly use use_auth_token or storage_options to support different platforms and auth types.
61
+ """
62
+
63
+ module = importlib.import_module(module_path)
64
+
65
+ # TODO(QL): always update the module to add subsequent new authentication without removing old ones
66
+ if hasattr(module, "_patched_for_streaming") and module._patched_for_streaming:
67
+ if isinstance(module._patched_for_streaming, DownloadConfig):
68
+ module._patched_for_streaming.token = download_config.token
69
+ module._patched_for_streaming.storage_options = download_config.storage_options
70
+ return
71
+
72
+ def wrap_auth(function):
73
+ @wraps(function)
74
+ def wrapper(*args, **kwargs):
75
+ return function(*args, download_config=download_config, **kwargs)
76
+
77
+ wrapper._decorator_name_ = "wrap_auth"
78
+ return wrapper
79
+
80
+ # open files in a streaming fashion
81
+ patch_submodule(module, "open", wrap_auth(xopen)).start()
82
+ patch_submodule(module, "os.listdir", wrap_auth(xlistdir)).start()
83
+ patch_submodule(module, "os.walk", wrap_auth(xwalk)).start()
84
+ patch_submodule(module, "glob.glob", wrap_auth(xglob)).start()
85
+ # allow to navigate in remote zip files
86
+ patch_submodule(module, "os.path.join", xjoin).start()
87
+ patch_submodule(module, "os.path.dirname", xdirname).start()
88
+ patch_submodule(module, "os.path.basename", xbasename).start()
89
+ patch_submodule(module, "os.path.relpath", xrelpath).start()
90
+ patch_submodule(module, "os.path.split", xsplit).start()
91
+ patch_submodule(module, "os.path.splitext", xsplitext).start()
92
+ # allow checks on paths
93
+ patch_submodule(module, "os.path.exists", wrap_auth(xexists)).start()
94
+ patch_submodule(module, "os.path.isdir", wrap_auth(xisdir)).start()
95
+ patch_submodule(module, "os.path.isfile", wrap_auth(xisfile)).start()
96
+ patch_submodule(module, "os.path.getsize", wrap_auth(xgetsize)).start()
97
+ patch_submodule(module, "pathlib.Path", xPath).start()
98
+ # file readers
99
+ patch_submodule(module, "gzip.open", wrap_auth(xgzip_open)).start()
100
+ patch_submodule(module, "numpy.load", wrap_auth(xnumpy_load)).start()
101
+ patch_submodule(module, "pandas.read_csv", wrap_auth(xpandas_read_csv), attrs=["__version__"]).start()
102
+ patch_submodule(module, "pandas.read_excel", wrap_auth(xpandas_read_excel), attrs=["__version__"]).start()
103
+ patch_submodule(module, "scipy.io.loadmat", wrap_auth(xsio_loadmat), attrs=["__version__"]).start()
104
+ patch_submodule(module, "xml.etree.ElementTree.parse", wrap_auth(xet_parse)).start()
105
+ patch_submodule(module, "xml.dom.minidom.parse", wrap_auth(xxml_dom_minidom_parse)).start()
106
+ # pyarrow: do not patch pyarrow attribute in packaged modules
107
+ if not module.__name__.startswith("datasets.packaged_modules."):
108
+ patch_submodule(module, "pyarrow.parquet.read_table", wrap_auth(xpyarrow_parquet_read_table)).start()
109
+ module._patched_for_streaming = download_config
110
+
111
+
112
+ def extend_dataset_builder_for_streaming(builder: "DatasetBuilder"):
113
+ """Extend the dataset builder module and the modules imported by it to support streaming.
114
+
115
+ Args:
116
+ builder (:class:`DatasetBuilder`): Dataset builder instance.
117
+ """
118
+ # this extends the open and os.path.join functions for data streaming
119
+ download_config = DownloadConfig(storage_options=builder.storage_options, token=builder.token)
120
+ extend_module_for_streaming(builder.__module__, download_config=download_config)
121
+ # if needed, we also have to extend additional internal imports (like wmt14 -> wmt_utils)
122
+ if not builder.__module__.startswith("datasets."): # check that it's not a packaged builder like csv
123
+ importable_file = inspect.getfile(builder.__class__)
124
+ with lock_importable_file(importable_file):
125
+ for imports in get_imports(importable_file):
126
+ if imports[0] == "internal":
127
+ internal_import_name = imports[1]
128
+ internal_module_name = ".".join(builder.__module__.split(".")[:-1] + [internal_import_name])
129
+ extend_module_for_streaming(internal_module_name, download_config=download_config)
130
+
131
+ # builders can inherit from other builders that might use streaming functionality
132
+ # (for example, ImageFolder and AudioFolder inherit from FolderBuilder which implements examples generation)
133
+ # but these parents builders are not patched automatically as they are not instantiated, so we patch them here
134
+ from .builder import DatasetBuilder
135
+
136
+ parent_builder_modules = [
137
+ cls.__module__
138
+ for cls in type(builder).__mro__[1:] # make sure it's not the same module we've already patched
139
+ if issubclass(cls, DatasetBuilder) and cls.__module__ != DatasetBuilder.__module__
140
+ ] # check it's not a standard builder from datasets.builder
141
+ for module in parent_builder_modules:
142
+ extend_module_for_streaming(module, download_config=download_config)
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore-1.0.7.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore-1.0.7.dist-info/METADATA ADDED
@@ -0,0 +1,616 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.3
2
+ Name: httpcore
3
+ Version: 1.0.7
4
+ Summary: A minimal low-level HTTP client.
5
+ Project-URL: Documentation, https://www.encode.io/httpcore
6
+ Project-URL: Homepage, https://www.encode.io/httpcore/
7
+ Project-URL: Source, https://github.com/encode/httpcore
8
+ Author-email: Tom Christie <tom@tomchristie.com>
9
+ License: BSD-3-Clause
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Environment :: Web Environment
12
+ Classifier: Framework :: AsyncIO
13
+ Classifier: Framework :: Trio
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: BSD License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3 :: Only
19
+ Classifier: Programming Language :: Python :: 3.8
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Topic :: Internet :: WWW/HTTP
25
+ Requires-Python: >=3.8
26
+ Requires-Dist: certifi
27
+ Requires-Dist: h11<0.15,>=0.13
28
+ Provides-Extra: asyncio
29
+ Requires-Dist: anyio<5.0,>=4.0; extra == 'asyncio'
30
+ Provides-Extra: http2
31
+ Requires-Dist: h2<5,>=3; extra == 'http2'
32
+ Provides-Extra: socks
33
+ Requires-Dist: socksio==1.*; extra == 'socks'
34
+ Provides-Extra: trio
35
+ Requires-Dist: trio<1.0,>=0.22.0; extra == 'trio'
36
+ Description-Content-Type: text/markdown
37
+
38
+ # HTTP Core
39
+
40
+ [![Test Suite](https://github.com/encode/httpcore/workflows/Test%20Suite/badge.svg)](https://github.com/encode/httpcore/actions)
41
+ [![Package version](https://badge.fury.io/py/httpcore.svg)](https://pypi.org/project/httpcore/)
42
+
43
+ > *Do one thing, and do it well.*
44
+
45
+ The HTTP Core package provides a minimal low-level HTTP client, which does
46
+ one thing only. Sending HTTP requests.
47
+
48
+ It does not provide any high level model abstractions over the API,
49
+ does not handle redirects, multipart uploads, building authentication headers,
50
+ transparent HTTP caching, URL parsing, session cookie handling,
51
+ content or charset decoding, handling JSON, environment based configuration
52
+ defaults, or any of that Jazz.
53
+
54
+ Some things HTTP Core does do:
55
+
56
+ * Sending HTTP requests.
57
+ * Thread-safe / task-safe connection pooling.
58
+ * HTTP(S) proxy & SOCKS proxy support.
59
+ * Supports HTTP/1.1 and HTTP/2.
60
+ * Provides both sync and async interfaces.
61
+ * Async backend support for `asyncio` and `trio`.
62
+
63
+ ## Requirements
64
+
65
+ Python 3.8+
66
+
67
+ ## Installation
68
+
69
+ For HTTP/1.1 only support, install with:
70
+
71
+ ```shell
72
+ $ pip install httpcore
73
+ ```
74
+
75
+ There are also a number of optional extras available...
76
+
77
+ ```shell
78
+ $ pip install httpcore['asyncio,trio,http2,socks']
79
+ ```
80
+
81
+ ## Sending requests
82
+
83
+ Send an HTTP request:
84
+
85
+ ```python
86
+ import httpcore
87
+
88
+ response = httpcore.request("GET", "https://www.example.com/")
89
+
90
+ print(response)
91
+ # <Response [200]>
92
+ print(response.status)
93
+ # 200
94
+ print(response.headers)
95
+ # [(b'Accept-Ranges', b'bytes'), (b'Age', b'557328'), (b'Cache-Control', b'max-age=604800'), ...]
96
+ print(response.content)
97
+ # b'<!doctype html>\n<html>\n<head>\n<title>Example Domain</title>\n\n<meta charset="utf-8"/>\n ...'
98
+ ```
99
+
100
+ The top-level `httpcore.request()` function is provided for convenience. In practice whenever you're working with `httpcore` you'll want to use the connection pooling functionality that it provides.
101
+
102
+ ```python
103
+ import httpcore
104
+
105
+ http = httpcore.ConnectionPool()
106
+ response = http.request("GET", "https://www.example.com/")
107
+ ```
108
+
109
+ Once you're ready to get going, [head over to the documentation](https://www.encode.io/httpcore/).
110
+
111
+ ## Motivation
112
+
113
+ You *probably* don't want to be using HTTP Core directly. It might make sense if
114
+ you're writing something like a proxy service in Python, and you just want
115
+ something at the lowest possible level, but more typically you'll want to use
116
+ a higher level client library, such as `httpx`.
117
+
118
+ The motivation for `httpcore` is:
119
+
120
+ * To provide a reusable low-level client library, that other packages can then build on top of.
121
+ * To provide a *really clear interface split* between the networking code and client logic,
122
+ so that each is easier to understand and reason about in isolation.
123
+
124
+ ## Dependencies
125
+
126
+ The `httpcore` package has the following dependencies...
127
+
128
+ * `h11`
129
+ * `certifi`
130
+
131
+ And the following optional extras...
132
+
133
+ * `anyio` - Required by `pip install httpcore['asyncio']`.
134
+ * `trio` - Required by `pip install httpcore['trio']`.
135
+ * `h2` - Required by `pip install httpcore['http2']`.
136
+ * `socksio` - Required by `pip install httpcore['socks']`.
137
+
138
+ ## Versioning
139
+
140
+ We use [SEMVER for our versioning policy](https://semver.org/).
141
+
142
+ For changes between package versions please see our [project changelog](CHANGELOG.md).
143
+
144
+ We recommend pinning your requirements either the most current major version, or a more specific version range:
145
+
146
+ ```python
147
+ pip install 'httpcore==1.*'
148
+ ```
149
+ # Changelog
150
+
151
+ All notable changes to this project will be documented in this file.
152
+
153
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
154
+
155
+ ## Version 1.0.7 (November 15th, 2024)
156
+
157
+ - Support `proxy=…` configuration on `ConnectionPool()`. (#974)
158
+
159
+ ## Version 1.0.6 (October 1st, 2024)
160
+
161
+ - Relax `trio` dependency pinning. (#956)
162
+ - Handle `trio` raising `NotImplementedError` on unsupported platforms. (#955)
163
+ - Handle mapping `ssl.SSLError` to `httpcore.ConnectError`. (#918)
164
+
165
+ ## 1.0.5 (March 27th, 2024)
166
+
167
+ - Handle `EndOfStream` exception for anyio backend. (#899)
168
+ - Allow trio `0.25.*` series in package dependancies. (#903)
169
+
170
+ ## 1.0.4 (February 21st, 2024)
171
+
172
+ - Add `target` request extension. (#888)
173
+ - Fix support for connection `Upgrade` and `CONNECT` when some data in the stream has been read. (#882)
174
+
175
+ ## 1.0.3 (February 13th, 2024)
176
+
177
+ - Fix support for async cancellations. (#880)
178
+ - Fix trace extension when used with socks proxy. (#849)
179
+ - Fix SSL context for connections using the "wss" scheme (#869)
180
+
181
+ ## 1.0.2 (November 10th, 2023)
182
+
183
+ - Fix `float("inf")` timeouts in `Event.wait` function. (#846)
184
+
185
+ ## 1.0.1 (November 3rd, 2023)
186
+
187
+ - Fix pool timeout to account for the total time spent retrying. (#823)
188
+ - Raise a neater RuntimeError when the correct async deps are not installed. (#826)
189
+ - Add support for synchronous TLS-in-TLS streams. (#840)
190
+
191
+ ## 1.0.0 (October 6th, 2023)
192
+
193
+ From version 1.0 our async support is now optional, as the package has minimal dependencies by default.
194
+
195
+ For async support use either `pip install 'httpcore[asyncio]'` or `pip install 'httpcore[trio]'`.
196
+
197
+ The project versioning policy is now explicitly governed by SEMVER. See https://semver.org/.
198
+
199
+ - Async support becomes fully optional. (#809)
200
+ - Add support for Python 3.12. (#807)
201
+
202
+ ## 0.18.0 (September 8th, 2023)
203
+
204
+ - Add support for HTTPS proxies. (#745, #786)
205
+ - Drop Python 3.7 support. (#727)
206
+ - Handle `sni_hostname` extension with SOCKS proxy. (#774)
207
+ - Handle HTTP/1.1 half-closed connections gracefully. (#641)
208
+ - Change the type of `Extensions` from `Mapping[Str, Any]` to `MutableMapping[Str, Any]`. (#762)
209
+
210
+ ## 0.17.3 (July 5th, 2023)
211
+
212
+ - Support async cancellations, ensuring that the connection pool is left in a clean state when cancellations occur. (#726)
213
+ - The networking backend interface has [been added to the public API](https://www.encode.io/httpcore/network-backends). Some classes which were previously private implementation detail are now part of the top-level public API. (#699)
214
+ - Graceful handling of HTTP/2 GoAway frames, with requests being transparently retried on a new connection. (#730)
215
+ - Add exceptions when a synchronous `trace callback` is passed to an asynchronous request or an asynchronous `trace callback` is passed to a synchronous request. (#717)
216
+ - Drop Python 3.7 support. (#727)
217
+
218
+ ## 0.17.2 (May 23th, 2023)
219
+
220
+ - Add `socket_options` argument to `ConnectionPool` and `HTTProxy` classes. (#668)
221
+ - Improve logging with per-module logger names. (#690)
222
+ - Add `sni_hostname` request extension. (#696)
223
+ - Resolve race condition during import of `anyio` package. (#692)
224
+ - Enable TCP_NODELAY for all synchronous sockets. (#651)
225
+
226
+ ## 0.17.1 (May 17th, 2023)
227
+
228
+ - If 'retries' is set, then allow retries if an SSL handshake error occurs. (#669)
229
+ - Improve correctness of tracebacks on network exceptions, by raising properly chained exceptions. (#678)
230
+ - Prevent connection-hanging behaviour when HTTP/2 connections are closed by a server-sent 'GoAway' frame. (#679)
231
+ - Fix edge-case exception when removing requests from the connection pool. (#680)
232
+ - Fix pool timeout edge-case. (#688)
233
+
234
+ ## 0.17.0 (March 16th, 2023)
235
+
236
+ - Add DEBUG level logging. (#648)
237
+ - Respect HTTP/2 max concurrent streams when settings updates are sent by server. (#652)
238
+ - Increase the allowable HTTP header size to 100kB. (#647)
239
+ - Add `retries` option to SOCKS proxy classes. (#643)
240
+
241
+ ## 0.16.3 (December 20th, 2022)
242
+
243
+ - Allow `ws` and `wss` schemes. Allows us to properly support websocket upgrade connections. (#625)
244
+ - Forwarding HTTP proxies use a connection-per-remote-host. Required by some proxy implementations. (#637)
245
+ - Don't raise `RuntimeError` when closing a connection pool with active connections. Removes some error cases when cancellations are used. (#631)
246
+ - Lazy import `anyio`, so that it's no longer a hard dependancy, and isn't imported if unused. (#639)
247
+
248
+ ## 0.16.2 (November 25th, 2022)
249
+
250
+ - Revert 'Fix async cancellation behaviour', which introduced race conditions. (#627)
251
+ - Raise `RuntimeError` if attempting to us UNIX domain sockets on Windows. (#619)
252
+
253
+ ## 0.16.1 (November 17th, 2022)
254
+
255
+ - Fix HTTP/1.1 interim informational responses, such as "100 Continue". (#605)
256
+
257
+ ## 0.16.0 (October 11th, 2022)
258
+
259
+ - Support HTTP/1.1 informational responses. (#581)
260
+ - Fix async cancellation behaviour. (#580)
261
+ - Support `h11` 0.14. (#579)
262
+
263
+ ## 0.15.0 (May 17th, 2022)
264
+
265
+ - Drop Python 3.6 support (#535)
266
+ - Ensure HTTP proxy CONNECT requests include `timeout` configuration. (#506)
267
+ - Switch to explicit `typing.Optional` for type hints. (#513)
268
+ - For `trio` map OSError exceptions to `ConnectError`. (#543)
269
+
270
+ ## 0.14.7 (February 4th, 2022)
271
+
272
+ - Requests which raise a PoolTimeout need to be removed from the pool queue. (#502)
273
+ - Fix AttributeError that happened when Socks5Connection were terminated. (#501)
274
+
275
+ ## 0.14.6 (February 1st, 2022)
276
+
277
+ - Fix SOCKS support for `http://` URLs. (#492)
278
+ - Resolve race condition around exceptions during streaming a response. (#491)
279
+
280
+ ## 0.14.5 (January 18th, 2022)
281
+
282
+ - SOCKS proxy support. (#478)
283
+ - Add proxy_auth argument to HTTPProxy. (#481)
284
+ - Improve error message on 'RemoteProtocolError' exception when server disconnects without sending a response. (#479)
285
+
286
+ ## 0.14.4 (January 5th, 2022)
287
+
288
+ - Support HTTP/2 on HTTPS tunnelling proxies. (#468)
289
+ - Fix proxy headers missing on HTTP forwarding. (#456)
290
+ - Only instantiate SSL context if required. (#457)
291
+ - More robust HTTP/2 handling. (#253, #439, #440, #441)
292
+
293
+ ## 0.14.3 (November 17th, 2021)
294
+
295
+ - Fix race condition when removing closed connections from the pool. (#437)
296
+
297
+ ## 0.14.2 (November 16th, 2021)
298
+
299
+ - Failed connections no longer remain in the pool. (Pull #433)
300
+
301
+ ## 0.14.1 (November 12th, 2021)
302
+
303
+ - `max_connections` becomes optional. (Pull #429)
304
+ - `certifi` is now included in the install dependancies. (Pull #428)
305
+ - `h2` is now strictly optional. (Pull #428)
306
+
307
+ ## 0.14.0 (November 11th, 2021)
308
+
309
+ The 0.14 release is a complete reworking of `httpcore`, comprehensively addressing some underlying issues in the connection pooling, as well as substantially redesigning the API to be more user friendly.
310
+
311
+ Some of the lower-level API design also makes the components more easily testable in isolation, and the package now has 100% test coverage.
312
+
313
+ See [discussion #419](https://github.com/encode/httpcore/discussions/419) for a little more background.
314
+
315
+ There's some other neat bits in there too, such as the "trace" extension, which gives a hook into inspecting the internal events that occur during the request/response cycle. This extension is needed for the HTTPX cli, in order to...
316
+
317
+ * Log the point at which the connection is established, and the IP/port on which it is made.
318
+ * Determine if the outgoing request should log as HTTP/1.1 or HTTP/2, rather than having to assume it's HTTP/2 if the --http2 flag was passed. (Which may not actually be true.)
319
+ * Log SSL version info / certificate info.
320
+
321
+ Note that `curio` support is not currently available in 0.14.0. If you're using `httpcore` with `curio` please get in touch, so we can assess if we ought to prioritize it as a feature or not.
322
+
323
+ ## 0.13.7 (September 13th, 2021)
324
+
325
+ - Fix broken error messaging when URL scheme is missing, or a non HTTP(S) scheme is used. (Pull #403)
326
+
327
+ ## 0.13.6 (June 15th, 2021)
328
+
329
+ ### Fixed
330
+
331
+ - Close sockets when read or write timeouts occur. (Pull #365)
332
+
333
+ ## 0.13.5 (June 14th, 2021)
334
+
335
+ ### Fixed
336
+
337
+ - Resolved niggles with AnyIO EOF behaviours. (Pull #358, #362)
338
+
339
+ ## 0.13.4 (June 9th, 2021)
340
+
341
+ ### Added
342
+
343
+ - Improved error messaging when URL scheme is missing, or a non HTTP(S) scheme is used. (Pull #354)
344
+
345
+ ### Fixed
346
+
347
+ - Switched to `anyio` as the default backend implementation when running with `asyncio`. Resolves some awkward [TLS timeout issues](https://github.com/encode/httpx/discussions/1511).
348
+
349
+ ## 0.13.3 (May 6th, 2021)
350
+
351
+ ### Added
352
+
353
+ - Support HTTP/2 prior knowledge, using `httpcore.SyncConnectionPool(http1=False)`. (Pull #333)
354
+
355
+ ### Fixed
356
+
357
+ - Handle cases where environment does not provide `select.poll` support. (Pull #331)
358
+
359
+ ## 0.13.2 (April 29th, 2021)
360
+
361
+ ### Added
362
+
363
+ - Improve error message for specific case of `RemoteProtocolError` where server disconnects without sending a response. (Pull #313)
364
+
365
+ ## 0.13.1 (April 28th, 2021)
366
+
367
+ ### Fixed
368
+
369
+ - More resiliant testing for closed connections. (Pull #311)
370
+ - Don't raise exceptions on ungraceful connection closes. (Pull #310)
371
+
372
+ ## 0.13.0 (April 21st, 2021)
373
+
374
+ The 0.13 release updates the core API in order to match the HTTPX Transport API,
375
+ introduced in HTTPX 0.18 onwards.
376
+
377
+ An example of making requests with the new interface is:
378
+
379
+ ```python
380
+ with httpcore.SyncConnectionPool() as http:
381
+ status_code, headers, stream, extensions = http.handle_request(
382
+ method=b'GET',
383
+ url=(b'https', b'example.org', 443, b'/'),
384
+ headers=[(b'host', b'example.org'), (b'user-agent', b'httpcore')]
385
+ stream=httpcore.ByteStream(b''),
386
+ extensions={}
387
+ )
388
+ body = stream.read()
389
+ print(status_code, body)
390
+ ```
391
+
392
+ ### Changed
393
+
394
+ - The `.request()` method is now `handle_request()`. (Pull #296)
395
+ - The `.arequest()` method is now `.handle_async_request()`. (Pull #296)
396
+ - The `headers` argument is no longer optional. (Pull #296)
397
+ - The `stream` argument is no longer optional. (Pull #296)
398
+ - The `ext` argument is now named `extensions`, and is no longer optional. (Pull #296)
399
+ - The `"reason"` extension keyword is now named `"reason_phrase"`. (Pull #296)
400
+ - The `"reason_phrase"` and `"http_version"` extensions now use byte strings for their values. (Pull #296)
401
+ - The `httpcore.PlainByteStream()` class becomes `httpcore.ByteStream()`. (Pull #296)
402
+
403
+ ### Added
404
+
405
+ - Streams now support a `.read()` interface. (Pull #296)
406
+
407
+ ### Fixed
408
+
409
+ - Task cancellation no longer leaks connections from the connection pool. (Pull #305)
410
+
411
+ ## 0.12.3 (December 7th, 2020)
412
+
413
+ ### Fixed
414
+
415
+ - Abort SSL connections on close rather than waiting for remote EOF when using `asyncio`. (Pull #167)
416
+ - Fix exception raised in case of connect timeouts when using the `anyio` backend. (Pull #236)
417
+ - Fix `Host` header precedence for `:authority` in HTTP/2. (Pull #241, #243)
418
+ - Handle extra edge case when detecting for socket readability when using `asyncio`. (Pull #242, #244)
419
+ - Fix `asyncio` SSL warning when using proxy tunneling. (Pull #249)
420
+
421
+ ## 0.12.2 (November 20th, 2020)
422
+
423
+ ### Fixed
424
+
425
+ - Properly wrap connect errors on the asyncio backend. (Pull #235)
426
+ - Fix `ImportError` occurring on Python 3.9 when using the HTTP/1.1 sync client in a multithreaded context. (Pull #237)
427
+
428
+ ## 0.12.1 (November 7th, 2020)
429
+
430
+ ### Added
431
+
432
+ - Add connect retries. (Pull #221)
433
+
434
+ ### Fixed
435
+
436
+ - Tweak detection of dropped connections, resolving an issue with open files limits on Linux. (Pull #185)
437
+ - Avoid leaking connections when establishing an HTTP tunnel to a proxy has failed. (Pull #223)
438
+ - Properly wrap OS errors when using `trio`. (Pull #225)
439
+
440
+ ## 0.12.0 (October 6th, 2020)
441
+
442
+ ### Changed
443
+
444
+ - HTTP header casing is now preserved, rather than always sent in lowercase. (#216 and python-hyper/h11#104)
445
+
446
+ ### Added
447
+
448
+ - Add Python 3.9 to officially supported versions.
449
+
450
+ ### Fixed
451
+
452
+ - Gracefully handle a stdlib asyncio bug when a connection is closed while it is in a paused-for-reading state. (#201)
453
+
454
+ ## 0.11.1 (September 28nd, 2020)
455
+
456
+ ### Fixed
457
+
458
+ - Add await to async semaphore release() coroutine (#197)
459
+ - Drop incorrect curio classifier (#192)
460
+
461
+ ## 0.11.0 (September 22nd, 2020)
462
+
463
+ The Transport API with 0.11.0 has a couple of significant changes.
464
+
465
+ Firstly we've moved changed the request interface in order to allow extensions, which will later enable us to support features
466
+ such as trailing headers, HTTP/2 server push, and CONNECT/Upgrade connections.
467
+
468
+ The interface changes from:
469
+
470
+ ```python
471
+ def request(method, url, headers, stream, timeout):
472
+ return (http_version, status_code, reason, headers, stream)
473
+ ```
474
+
475
+ To instead including an optional dictionary of extensions on the request and response:
476
+
477
+ ```python
478
+ def request(method, url, headers, stream, ext):
479
+ return (status_code, headers, stream, ext)
480
+ ```
481
+
482
+ Having an open-ended extensions point will allow us to add later support for various optional features, that wouldn't otherwise be supported without these API changes.
483
+
484
+ In particular:
485
+
486
+ * Trailing headers support.
487
+ * HTTP/2 Server Push
488
+ * sendfile.
489
+ * Exposing raw connection on CONNECT, Upgrade, HTTP/2 bi-di streaming.
490
+ * Exposing debug information out of the API, including template name, template context.
491
+
492
+ Currently extensions are limited to:
493
+
494
+ * request: `timeout` - Optional. Timeout dictionary.
495
+ * response: `http_version` - Optional. Include the HTTP version used on the response.
496
+ * response: `reason` - Optional. Include the reason phrase used on the response. Only valid with HTTP/1.*.
497
+
498
+ See https://github.com/encode/httpx/issues/1274#issuecomment-694884553 for the history behind this.
499
+
500
+ Secondly, the async version of `request` is now namespaced as `arequest`.
501
+
502
+ This allows concrete transports to support both sync and async implementations on the same class.
503
+
504
+ ### Added
505
+
506
+ - Add curio support. (Pull #168)
507
+ - Add anyio support, with `backend="anyio"`. (Pull #169)
508
+
509
+ ### Changed
510
+
511
+ - Update the Transport API to use 'ext' for optional extensions. (Pull #190)
512
+ - Update the Transport API to use `.request` and `.arequest` so implementations can support both sync and async. (Pull #189)
513
+
514
+ ## 0.10.2 (August 20th, 2020)
515
+
516
+ ### Added
517
+
518
+ - Added Unix Domain Socket support. (Pull #139)
519
+
520
+ ### Fixed
521
+
522
+ - Always include the port on proxy CONNECT requests. (Pull #154)
523
+ - Fix `max_keepalive_connections` configuration. (Pull #153)
524
+ - Fixes behaviour in HTTP/1.1 where server disconnects can be used to signal the end of the response body. (Pull #164)
525
+
526
+ ## 0.10.1 (August 7th, 2020)
527
+
528
+ - Include `max_keepalive_connections` on `AsyncHTTPProxy`/`SyncHTTPProxy` classes.
529
+
530
+ ## 0.10.0 (August 7th, 2020)
531
+
532
+ The most notable change in the 0.10.0 release is that HTTP/2 support is now fully optional.
533
+
534
+ Use either `pip install httpcore` for HTTP/1.1 support only, or `pip install httpcore[http2]` for HTTP/1.1 and HTTP/2 support.
535
+
536
+ ### Added
537
+
538
+ - HTTP/2 support becomes optional. (Pull #121, #130)
539
+ - Add `local_address=...` support. (Pull #100, #134)
540
+ - Add `PlainByteStream`, `IteratorByteStream`, `AsyncIteratorByteStream`. The `AsyncByteSteam` and `SyncByteStream` classes are now pure interface classes. (#133)
541
+ - Add `LocalProtocolError`, `RemoteProtocolError` exceptions. (Pull #129)
542
+ - Add `UnsupportedProtocol` exception. (Pull #128)
543
+ - Add `.get_connection_info()` method. (Pull #102, #137)
544
+ - Add better TRACE logs. (Pull #101)
545
+
546
+ ### Changed
547
+
548
+ - `max_keepalive` is deprecated in favour of `max_keepalive_connections`. (Pull #140)
549
+
550
+ ### Fixed
551
+
552
+ - Improve handling of server disconnects. (Pull #112)
553
+
554
+ ## 0.9.1 (May 27th, 2020)
555
+
556
+ ### Fixed
557
+
558
+ - Proper host resolution for sync case, including IPv6 support. (Pull #97)
559
+ - Close outstanding connections when connection pool is closed. (Pull #98)
560
+
561
+ ## 0.9.0 (May 21th, 2020)
562
+
563
+ ### Changed
564
+
565
+ - URL port becomes an `Optional[int]` instead of `int`. (Pull #92)
566
+
567
+ ### Fixed
568
+
569
+ - Honor HTTP/2 max concurrent streams settings. (Pull #89, #90)
570
+ - Remove incorrect debug log. (Pull #83)
571
+
572
+ ## 0.8.4 (May 11th, 2020)
573
+
574
+ ### Added
575
+
576
+ - Logging via HTTPCORE_LOG_LEVEL and HTTPX_LOG_LEVEL environment variables
577
+ and TRACE level logging. (Pull #79)
578
+
579
+ ### Fixed
580
+
581
+ - Reuse of connections on HTTP/2 in close concurrency situations. (Pull #81)
582
+
583
+ ## 0.8.3 (May 6rd, 2020)
584
+
585
+ ### Fixed
586
+
587
+ - Include `Host` and `Accept` headers on proxy "CONNECT" requests.
588
+ - De-duplicate any headers also contained in proxy_headers.
589
+ - HTTP/2 flag not being passed down to proxy connections.
590
+
591
+ ## 0.8.2 (May 3rd, 2020)
592
+
593
+ ### Fixed
594
+
595
+ - Fix connections using proxy forwarding requests not being added to the
596
+ connection pool properly. (Pull #70)
597
+
598
+ ## 0.8.1 (April 30th, 2020)
599
+
600
+ ### Changed
601
+
602
+ - Allow inherintance of both `httpcore.AsyncByteStream`, `httpcore.SyncByteStream` without type conflicts.
603
+
604
+ ## 0.8.0 (April 30th, 2020)
605
+
606
+ ### Fixed
607
+
608
+ - Fixed tunnel proxy support.
609
+
610
+ ### Added
611
+
612
+ - New `TimeoutException` base class.
613
+
614
+ ## 0.7.0 (March 5th, 2020)
615
+
616
+ - First integration with HTTPX.
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore-1.0.7.dist-info/RECORD ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ httpcore-1.0.7.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
2
+ httpcore-1.0.7.dist-info/METADATA,sha256=ATe1rdfnyvJCveGq1xl8q7B27Suta1I2xVcfYU-my4M,21265
3
+ httpcore-1.0.7.dist-info/RECORD,,
4
+ httpcore-1.0.7.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
5
+ httpcore-1.0.7.dist-info/licenses/LICENSE.md,sha256=_ctZFUx0y6uhahEkL3dAvqnyPW_rVUeRfYxflKgDkqU,1518
6
+ httpcore/__init__.py,sha256=LrhuDP3kqwQW-464qRK_Q7B72Zp0LklpkEqbqUHAh2E,3357
7
+ httpcore/__pycache__/__init__.cpython-312.pyc,,
8
+ httpcore/__pycache__/_api.cpython-312.pyc,,
9
+ httpcore/__pycache__/_exceptions.cpython-312.pyc,,
10
+ httpcore/__pycache__/_models.cpython-312.pyc,,
11
+ httpcore/__pycache__/_ssl.cpython-312.pyc,,
12
+ httpcore/__pycache__/_synchronization.cpython-312.pyc,,
13
+ httpcore/__pycache__/_trace.cpython-312.pyc,,
14
+ httpcore/__pycache__/_utils.cpython-312.pyc,,
15
+ httpcore/_api.py,sha256=unZmeDschBWCGCPCwkS3Wot9euK6bg_kKxLtGTxw214,3146
16
+ httpcore/_async/__init__.py,sha256=EWdl2v4thnAHzJpqjU4h2a8DUiGAvNiWrkii9pfhTf0,1221
17
+ httpcore/_async/__pycache__/__init__.cpython-312.pyc,,
18
+ httpcore/_async/__pycache__/connection.cpython-312.pyc,,
19
+ httpcore/_async/__pycache__/connection_pool.cpython-312.pyc,,
20
+ httpcore/_async/__pycache__/http11.cpython-312.pyc,,
21
+ httpcore/_async/__pycache__/http2.cpython-312.pyc,,
22
+ httpcore/_async/__pycache__/http_proxy.cpython-312.pyc,,
23
+ httpcore/_async/__pycache__/interfaces.cpython-312.pyc,,
24
+ httpcore/_async/__pycache__/socks_proxy.cpython-312.pyc,,
25
+ httpcore/_async/connection.py,sha256=6OcPXqMEfc0BU38_-iHUNDd1vKSTc2UVT09XqNb_BOk,8449
26
+ httpcore/_async/connection_pool.py,sha256=DOIQ2s2ZCf9qfwxhzMprTPLqCL8OxGXiKF6qRHxvVyY,17307
27
+ httpcore/_async/http11.py,sha256=-qM9bV7PjSQF5vxs37-eUXOIFwbIjPcZbNliuX9TtBw,13880
28
+ httpcore/_async/http2.py,sha256=2mPEUDu8jwx99MVDhDKBu1e8ajCVEkBOu1jUQLk0KR8,23648
29
+ httpcore/_async/http_proxy.py,sha256=2zVkrlv-Ds-rWGaqaXlrhEJiAQFPo23BT3Gq_sWoBXU,14701
30
+ httpcore/_async/interfaces.py,sha256=jTiaWL83pgpGC9ziv90ZfwaKNMmHwmOalzaKiuTxATo,4455
31
+ httpcore/_async/socks_proxy.py,sha256=lLKgLlggPfhFlqi0ODeBkOWvt9CghBBUyqsnsU1tx6Q,13841
32
+ httpcore/_backends/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
+ httpcore/_backends/__pycache__/__init__.cpython-312.pyc,,
34
+ httpcore/_backends/__pycache__/anyio.cpython-312.pyc,,
35
+ httpcore/_backends/__pycache__/auto.cpython-312.pyc,,
36
+ httpcore/_backends/__pycache__/base.cpython-312.pyc,,
37
+ httpcore/_backends/__pycache__/mock.cpython-312.pyc,,
38
+ httpcore/_backends/__pycache__/sync.cpython-312.pyc,,
39
+ httpcore/_backends/__pycache__/trio.cpython-312.pyc,,
40
+ httpcore/_backends/anyio.py,sha256=x8PgEhXRC8bVqsdzk_YJx8Y6d9Tub06CuUSwnbmtqoY,5252
41
+ httpcore/_backends/auto.py,sha256=zO136PKZmsaTDK-HRk84eA-MUg8_2wJf4NvmK432Aio,1662
42
+ httpcore/_backends/base.py,sha256=aShgRdZnMmRhFWHetjumlM73f8Kz1YOAyCUP_4kHslA,3042
43
+ httpcore/_backends/mock.py,sha256=er9T436uSe7NLrfiLa4x6Nuqg5ivQ693CxWYCWsgbH4,4077
44
+ httpcore/_backends/sync.py,sha256=bhE4d9iK9Umxdsdsgm2EfKnXaBms2WggGYU-7jmUujU,7977
45
+ httpcore/_backends/trio.py,sha256=LHu4_Mr5MswQmmT3yE4oLgf9b_JJfeVS4BjDxeJc7Ro,5996
46
+ httpcore/_exceptions.py,sha256=looCKga3_YVYu3s-d3L9RMPRJyhsY7fiuuGxvkOD0c0,1184
47
+ httpcore/_models.py,sha256=IO2CcXcdpovRcLTdGFGB6RyBZdEm2h_TOmoCc4rEKho,17623
48
+ httpcore/_ssl.py,sha256=srqmSNU4iOUvWF-SrJvb8G_YEbHFELOXQOwdDIBTS9c,187
49
+ httpcore/_sync/__init__.py,sha256=JBDIgXt5la1LCJ1sLQeKhjKFpLnpNr8Svs6z2ni3fgg,1141
50
+ httpcore/_sync/__pycache__/__init__.cpython-312.pyc,,
51
+ httpcore/_sync/__pycache__/connection.cpython-312.pyc,,
52
+ httpcore/_sync/__pycache__/connection_pool.cpython-312.pyc,,
53
+ httpcore/_sync/__pycache__/http11.cpython-312.pyc,,
54
+ httpcore/_sync/__pycache__/http2.cpython-312.pyc,,
55
+ httpcore/_sync/__pycache__/http_proxy.cpython-312.pyc,,
56
+ httpcore/_sync/__pycache__/interfaces.cpython-312.pyc,,
57
+ httpcore/_sync/__pycache__/socks_proxy.cpython-312.pyc,,
58
+ httpcore/_sync/connection.py,sha256=9exGOb3PB-Mp2T1-sckSeL2t-tJ_9-NXomV8ihmWCgU,8238
59
+ httpcore/_sync/connection_pool.py,sha256=a-T8LTsUxc7r0Ww1atfHSDoWPjQ0fA8Ul7S3-F0Mj70,16955
60
+ httpcore/_sync/http11.py,sha256=IFobD1Md5JFlJGKWnh1_Q3epikUryI8qo09v8MiJIEA,13476
61
+ httpcore/_sync/http2.py,sha256=IZOBL1nNpOKJYwTSHYWtscD3zjSg8f85-63-o5RedVc,23112
62
+ httpcore/_sync/http_proxy.py,sha256=_al_6crKuEZu2wyvu493RZImJdBJnj5oGKNjLOJL2Zo,14463
63
+ httpcore/_sync/interfaces.py,sha256=snXON42vUDHO5JBJvo8D4VWk2Wat44z2OXXHDrjbl94,4344
64
+ httpcore/_sync/socks_proxy.py,sha256=zegZW9Snqj2_992DFJa8_CppOVBkVL4AgwduRkStakQ,13614
65
+ httpcore/_synchronization.py,sha256=zSi13mAColBnknjZBknUC6hKNDQT4C6ijnezZ-r0T2s,9434
66
+ httpcore/_trace.py,sha256=ck6ZoIzYTkdNAIfq5MGeKqBXDtqjOX-qfYwmZFbrGco,3952
67
+ httpcore/_utils.py,sha256=_RLgXYOAYC350ikALV59GZ68IJrdocRZxPs9PjmzdFY,1537
68
+ httpcore/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0