whenxuan commited on
Commit
233512e
·
verified ·
1 Parent(s): 0b3bc3b

whenxuan: init

Browse files
Files changed (9) hide show
  1. .gitignore +210 -0
  2. LICENSE +201 -0
  3. README.md +2 -0
  4. config.json +24 -0
  5. configuration_symtime.py +64 -0
  6. layers.py +401 -0
  7. model.py +140 -0
  8. model.safetensors +3 -0
  9. requirements.txt +3 -0
.gitignore ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+ #poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ #pdm.lock
116
+ #pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ #pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .envrc
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Abstra
179
+ # Abstra is an AI-powered process automation framework.
180
+ # Ignore directories containing user credentials, local state, and settings.
181
+ # Learn more at https://abstra.io/docs
182
+ .abstra/
183
+
184
+ # Visual Studio Code
185
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
188
+ # you could uncomment the following to ignore the entire vscode folder
189
+ .vscode/
190
+
191
+ # Ruff stuff:
192
+ .ruff_cache/
193
+
194
+ # PyPI configuration file
195
+ .pypirc
196
+
197
+ # Cursor
198
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200
+ # refer to https://docs.cursor.com/context/ignore-files
201
+ .cursorignore
202
+ .cursorindexingignore
203
+
204
+ # Marimo
205
+ marimo/_static/
206
+ marimo/_lsp/
207
+ __marimo__/
208
+
209
+ # The model params
210
+ .safetensors
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright wwhenxuan
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # SymTime-NeurIPS2025-Huggingface
2
+ The pipeline and model config of SymTime model for Huggingface
config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ {
3
+ "architectures": [
4
+ "SymTimeModel"
5
+ ],
6
+ "_name_or_path": "FlowVortex/SymTime",
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_symtime.SymTimeConfig",
9
+ "AutoModel": "model.SymTimeModel"
10
+ },
11
+ "patch_size": 16,
12
+ "num_layers": 6,
13
+ "d_model": 512,
14
+ "d_ff": 2048,
15
+ "num_heads": 8,
16
+ "norm": "BatchNorm",
17
+ "dropout": 0.1,
18
+ "act": "gelu",
19
+ "pre_norm": false,
20
+ "initializer_factor": 0.05,
21
+ "model_type": "time_series_transformer",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "5.5.4"
24
+ }
configuration_symtime.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from typing import List, Literal, Optional, Dict
3
+ from enum import Enum
4
+
5
+ from transformers.configuration_utils import PretrainedConfig
6
+
7
+
8
+ @dataclass
9
+ class SymTimeConfig(PretrainedConfig):
10
+ """
11
+ Time series encoder configuration for SymTime Model.
12
+
13
+ Parameters
14
+ -----------
15
+ patch_size
16
+ The size of the patch to be used for the input data.
17
+ num_layers
18
+ The number of layers to be used for the encoder.
19
+ d_model
20
+ The dimension of the model.
21
+ d_ff
22
+ The dimension of the feedforward network.
23
+ num_heads
24
+ The number of heads to be used for the attention mechanism.
25
+ norm
26
+ The normalization to be used for the encoder.
27
+ attn_dropout
28
+ The dropout rate to be used for the attention mechanism.
29
+ dropout
30
+ The dropout rate to be used for the encoder.
31
+ act
32
+ The activation function to be used for the encoder.
33
+ pre_norm
34
+ Whether to use pre-norm for the encoder.
35
+ """
36
+
37
+ model_type = "time_series_transformer"
38
+
39
+ def __init__(
40
+ self,
41
+ patch_size: int = 16,
42
+ num_layers: int = 6,
43
+ d_model: int = 512,
44
+ d_ff: int = 2048,
45
+ num_heads: int = 8,
46
+ norm: str = "BatchNorm",
47
+ dropout: float = 0.1,
48
+ act: str = "gelu",
49
+ pre_norm: bool = False,
50
+ initializer_factor: float = 0.05,
51
+ **kwargs,
52
+ ) -> None:
53
+ self.patch_size = patch_size
54
+ self.num_layers = num_layers
55
+ self.d_model = d_model
56
+ self.num_heads = num_heads
57
+ self.d_ff = d_ff
58
+ self.norm = norm
59
+ self.dropout = dropout
60
+ self.act = act
61
+ self.pre_norm = pre_norm
62
+ self.initializer_factor = initializer_factor
63
+
64
+ super().__init__(**kwargs)
layers.py ADDED
@@ -0,0 +1,401 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional, Union, Tuple, Callable
2
+ import math
3
+
4
+ import numpy as np
5
+ import torch
6
+ from torch import nn
7
+ from torch import Tensor
8
+ import torch.nn.functional as F
9
+ from einops import rearrange
10
+
11
+
12
+ def get_activation_fn(activation: Union[str, Callable]) -> nn.Module:
13
+ """Select the activation function to use."""
14
+ if callable(activation):
15
+ return activation()
16
+ elif activation.lower() == "relu":
17
+ return nn.ReLU()
18
+ elif activation.lower() == "gelu":
19
+ return nn.GELU()
20
+ raise ValueError(
21
+ f'{activation} is not available. You can use "relu", "gelu", or a callable'
22
+ )
23
+
24
+
25
+ class Transpose(nn.Module):
26
+ """Transpose the dimensions of the input tensor"""
27
+
28
+ def __init__(self, *dims, contiguous=False) -> None:
29
+ super().__init__()
30
+ self.dims, self.contiguous = dims, contiguous
31
+
32
+ def forward(self, x: Tensor) -> Tensor:
33
+ if self.contiguous:
34
+ return x.transpose(*self.dims).contiguous()
35
+ else:
36
+ return x.transpose(*self.dims)
37
+
38
+
39
+ class PositionalEmbedding(nn.Module):
40
+ """Adding the positional encoding to the input for Transformer"""
41
+
42
+ def __init__(self, hidden_size: int, max_len: int = 5000) -> None:
43
+ super(PositionalEmbedding, self).__init__()
44
+
45
+ # Calculate the positional encoding once in the logarithmic space.
46
+ pe = torch.zeros(
47
+ max_len, hidden_size
48
+ ).float() # Initialize a tensor of zeros with shape (max_len, hidden_size) to store positional encodings
49
+ pe.requires_grad = (
50
+ False # Positional encodings do not require gradients as they are fixed
51
+ )
52
+
53
+ position = (
54
+ torch.arange(0, max_len).float().unsqueeze(1)
55
+ ) # Generate a sequence from 0 to max_len-1 and add a dimension at the 1st axis
56
+ div_term = (
57
+ torch.arange(0, hidden_size, 2).float() * -(math.log(10000.0) / hidden_size)
58
+ ).exp() # Calculate the divisor term in the positional encoding formula
59
+
60
+ pe[:, 0::2] = torch.sin(
61
+ position * div_term
62
+ ) # Apply the sine function to the even columns of the positional encoding matrix
63
+ pe[:, 1::2] = torch.cos(
64
+ position * div_term
65
+ ) # Apply the cosine function to the odd columns of the positional encoding matrix
66
+
67
+ pe = pe.unsqueeze(
68
+ 0
69
+ ) # Add a batch dimension, changing the shape to (1, max_len, hidden_size)
70
+ self.register_buffer(
71
+ "pe", pe
72
+ ) # Register the positional encodings as a buffer, which will not be updated as model parameters
73
+
74
+ def forward(self, x: Tensor) -> Tensor:
75
+ # Return the first max_len positional encodings that match the length of input x
76
+ return x + self.pe[:, : x.size(1)]
77
+
78
+
79
+ class TSTEncoder(nn.Module):
80
+ """Time series encoder backbone of SymTime"""
81
+
82
+ def __init__(
83
+ self,
84
+ patch_size: int = 16,
85
+ num_layers: int = 3,
86
+ hidden_size: int = 128,
87
+ num_heads: int = 16,
88
+ d_k: int = None,
89
+ d_v: int = None,
90
+ d_ff: int = 256,
91
+ norm: str = "BatchNorm",
92
+ attn_dropout: float = 0.0,
93
+ dropout: float = 0.0,
94
+ act: str = "gelu",
95
+ store_attn: bool = False,
96
+ pre_norm: bool = False,
97
+ ) -> None:
98
+ super().__init__()
99
+ # The Linear layer to project the input patches to the model dimension
100
+ self.W_P = nn.Linear(patch_size, hidden_size)
101
+
102
+ # Positional encoding
103
+ self.pe = PositionalEmbedding(hidden_size=hidden_size)
104
+
105
+ # Residual dropout
106
+ self.dropout = nn.Dropout(dropout)
107
+
108
+ # Create the [CLS] token
109
+ self.cls_token = nn.Parameter(torch.zeros(1, 1, hidden_size))
110
+ self.cls_mask = nn.Parameter(torch.ones(1, 1).bool(), requires_grad=False)
111
+
112
+ # Create the encoder layer of the model backbone
113
+ self.layers = nn.ModuleList(
114
+ [
115
+ TSTEncoderLayer(
116
+ hidden_size=hidden_size,
117
+ num_heads=num_heads,
118
+ d_k=d_k,
119
+ d_v=d_v,
120
+ d_ff=d_ff,
121
+ norm=norm,
122
+ attn_dropout=attn_dropout,
123
+ dropout=dropout,
124
+ activation=act,
125
+ pre_norm=pre_norm,
126
+ store_attn=store_attn,
127
+ )
128
+ for _ in range(num_layers)
129
+ ]
130
+ )
131
+
132
+ # model params init
133
+ self.apply(self._init_weights)
134
+
135
+ def _init_weights(self, m: nn.Module) -> None:
136
+ """model params init through apply methods"""
137
+ if isinstance(m, nn.Linear):
138
+ nn.init.xavier_uniform_(m.weight)
139
+ if isinstance(m, nn.Linear) and m.bias is not None:
140
+ nn.init.constant_(m.bias, 0)
141
+ elif isinstance(m, nn.LayerNorm):
142
+ nn.init.constant_(m.bias, 0)
143
+ nn.init.constant_(m.weight, 1.0)
144
+
145
+ def forward(
146
+ self,
147
+ x: Tensor, # x: [batch_size, patch_num, patch_size]
148
+ attn_mask: Optional[Tensor] = None, # attn_mask: [batch, num_patch]
149
+ return_cls_token: bool = True, # whether to return the CLS token
150
+ ) -> Tensor:
151
+ """ """
152
+ batch_size = x.size(0)
153
+
154
+ # Input patching embedding
155
+ x = self.W_P(x) # x: [batch_size, patch_num, model_dim]
156
+
157
+ # Add the [CLS] token
158
+ cls_token = self.cls_token.expand(batch_size, -1, -1)
159
+ x = torch.cat([cls_token, x], dim=1)
160
+ # adjust the attn mask
161
+ if attn_mask is not None:
162
+ attn_mask = torch.cat(
163
+ [self.cls_mask.expand(batch_size, -1), attn_mask], dim=1
164
+ )
165
+
166
+ # Add the positional embedding
167
+ x = self.pe(x)
168
+ x = self.dropout(x) # x: [batch_size, patch_num, hidden_size]
169
+
170
+ for mod in self.layers:
171
+ x = mod(x, attn_mask=attn_mask)
172
+
173
+ if not return_cls_token:
174
+ # If not returning the CLS token, remove it from the output
175
+ return x[:, 1:, :]
176
+
177
+ return x
178
+
179
+
180
+ class TSTEncoderLayer(nn.Module):
181
+ """Patch-based Transformer module sublayer"""
182
+
183
+ def __init__(
184
+ self,
185
+ hidden_size: int,
186
+ num_heads: int,
187
+ d_k: int = None,
188
+ d_v: int = None,
189
+ d_ff: int = 256,
190
+ store_attn: int = False,
191
+ norm: str = "BatchNorm",
192
+ attn_dropout: float = 0.0,
193
+ dropout: float = 0.0,
194
+ bias: bool = True,
195
+ activation: str = "gelu",
196
+ pre_norm: bool = False,
197
+ ) -> None:
198
+ super(TSTEncoderLayer, self).__init__()
199
+
200
+ assert (
201
+ not hidden_size % num_heads
202
+ ), f"hidden_size ({hidden_size}) must be divisible by num_heads ({num_heads})"
203
+ # If not specified, the number of heads is divided
204
+ d_k = hidden_size // num_heads if d_k is None else d_k
205
+ d_v = hidden_size // num_heads if d_v is None else d_v
206
+
207
+ # Create the multi-head attention
208
+ self.self_attn = MultiHeadAttention(
209
+ hidden_size,
210
+ num_heads,
211
+ d_k,
212
+ d_v,
213
+ attn_dropout=attn_dropout,
214
+ proj_dropout=dropout,
215
+ )
216
+
217
+ # Add & Norm
218
+ self.dropout_attn = nn.Dropout(dropout)
219
+ if "batch" in norm.lower():
220
+ self.norm_attn = nn.Sequential(
221
+ Transpose(1, 2), nn.BatchNorm1d(hidden_size), Transpose(1, 2)
222
+ )
223
+ else:
224
+ self.norm_attn = nn.LayerNorm(hidden_size)
225
+
226
+ # Position-wise Feed-Forward
227
+ self.ff = nn.Sequential(
228
+ nn.Linear(hidden_size, d_ff, bias=bias),
229
+ get_activation_fn(activation),
230
+ nn.Dropout(dropout),
231
+ nn.Linear(d_ff, hidden_size, bias=bias),
232
+ )
233
+
234
+ # Add & Norm
235
+ self.dropout_ffn = nn.Dropout(dropout)
236
+ if "batch" in norm.lower():
237
+ self.norm_ffn = nn.Sequential(
238
+ Transpose(1, 2), nn.BatchNorm1d(hidden_size), Transpose(1, 2)
239
+ )
240
+ else:
241
+ self.norm_ffn = nn.LayerNorm(hidden_size)
242
+
243
+ # use pre-norm or not
244
+ self.pre_norm = pre_norm
245
+ self.store_attn = store_attn
246
+ self.attn = None
247
+
248
+ def forward(
249
+ self, src: Tensor, attn_mask: Optional[Tensor] = None
250
+ ) -> Union[Tuple[Tensor, Tensor], Tensor]:
251
+ """Multi-Head attention sublayer"""
252
+
253
+ # Whether to use pre-norm for attention layer
254
+ if self.pre_norm:
255
+ src = self.norm_attn(src)
256
+
257
+ # Multi-Head attention
258
+ src2, attn = self.self_attn(src, src, src, attn_mask=attn_mask)
259
+ if self.store_attn:
260
+ self.attn = attn
261
+
262
+ # Add: residual connection with residual dropout
263
+ src = src + self.dropout_attn(src2)
264
+ if not self.pre_norm:
265
+ src = self.norm_attn(src)
266
+
267
+ # Whether to use pre-norm for ffn layer
268
+ if self.pre_norm:
269
+ src = self.norm_ffn(src)
270
+
271
+ # Position-wise Feed-Forward
272
+ src2 = self.ff(src)
273
+
274
+ # Add: residual connection with residual dropout
275
+ src = src + self.dropout_ffn(src2)
276
+ if not self.pre_norm:
277
+ src = self.norm_ffn(src)
278
+
279
+ return src
280
+
281
+
282
+ class MultiHeadAttention(nn.Module):
283
+ """Multi-head attention mechanism layer"""
284
+
285
+ def __init__(
286
+ self,
287
+ hidden_size: int,
288
+ num_heads: int,
289
+ d_k: int = None,
290
+ d_v: int = None,
291
+ attn_dropout: float = 0.0,
292
+ proj_dropout: float = 0.0,
293
+ qkv_bias: bool = True,
294
+ ) -> None:
295
+ """Multi Head Attention Layer
296
+ Input shape:
297
+ Q: [batch_size (bs) x max_q_len x hidden_size]
298
+ K, V: [batch_size (bs) x q_len x hidden_size]
299
+ mask: [q_len x q_len]
300
+ """
301
+ super().__init__()
302
+ d_k = hidden_size // num_heads if d_k is None else d_k
303
+ d_v = hidden_size // num_heads if d_v is None else d_v
304
+
305
+ self.num_heads, self.d_k, self.d_v = num_heads, d_k, d_v
306
+
307
+ self.W_Q = nn.Linear(hidden_size, d_k * num_heads, bias=qkv_bias)
308
+ self.W_K = nn.Linear(hidden_size, d_k * num_heads, bias=qkv_bias)
309
+ self.W_V = nn.Linear(hidden_size, d_v * num_heads, bias=qkv_bias)
310
+
311
+ # Scaled Dot-Product Attention (multiple heads)
312
+ self.sdp_attn = _ScaledDotProductAttention(
313
+ hidden_size, num_heads, attn_dropout=attn_dropout
314
+ )
315
+
316
+ # Project output
317
+ self.to_out = nn.Sequential(
318
+ nn.Linear(num_heads * d_v, hidden_size), nn.Dropout(proj_dropout)
319
+ )
320
+
321
+ def forward(
322
+ self,
323
+ q: Tensor,
324
+ k: Optional[Tensor] = None,
325
+ v: Optional[Tensor] = None,
326
+ attn_mask: Optional[Tensor] = None,
327
+ ):
328
+ bs = q.size(0)
329
+ if k is None:
330
+ k = q
331
+ if v is None:
332
+ v = q
333
+
334
+ # Linear (+ split in multiple heads)
335
+ q_s = self.W_Q(q).view(bs, -1, self.num_heads, self.d_k).transpose(1, 2)
336
+ k_s = self.W_K(k).view(bs, -1, self.num_heads, self.d_k).permute(0, 2, 3, 1)
337
+ v_s = self.W_V(v).view(bs, -1, self.num_heads, self.d_v).transpose(1, 2)
338
+
339
+ # Apply Scaled Dot-Product Attention (multiple heads)
340
+ output, attn_weights = self.sdp_attn(q_s, k_s, v_s, attn_mask=attn_mask)
341
+
342
+ # back to the original inputs dimensions
343
+ output = (
344
+ output.transpose(1, 2).contiguous().view(bs, -1, self.num_heads * self.d_v)
345
+ )
346
+ output = self.to_out(output)
347
+
348
+ return output, attn_weights
349
+
350
+
351
+ class _ScaledDotProductAttention(nn.Module):
352
+ r"""Scaled Dot-Product Attention module (Attention is all you need by Vaswani et al., 2017) with optional residual attention from previous layer
353
+ (Realformer: Transformer likes residual attention by He et al, 2020) and locality self sttention (Vision Transformer for Small-Size Datasets
354
+ by Lee et al, 2021)"""
355
+
356
+ def __init__(
357
+ self,
358
+ hidden_size: int,
359
+ num_heads: int,
360
+ attn_dropout: float = 0.0,
361
+ res_attention: bool = False,
362
+ ):
363
+ super().__init__()
364
+ self.attn_dropout = nn.Dropout(attn_dropout)
365
+ self.res_attention = res_attention
366
+ head_dim = hidden_size // num_heads
367
+ self.scale = nn.Parameter(torch.tensor(head_dim**-0.5), requires_grad=False)
368
+
369
+ def forward(
370
+ self, q: Tensor, k: Tensor, v: Tensor, attn_mask: Optional[Tensor] = None
371
+ ) -> Union[Tuple[Tensor, Tensor, Tensor], Tuple[Tensor, Tensor]]:
372
+ """
373
+ :param q: [batch_size, num_heads, num_token, d_k]
374
+ :param k: [batch_size, num_heads, d_k, num_token]
375
+ :param v: [batch_size, num_heads, num_token, d_k]
376
+ :param attn_mask: [batch_size, num_heads, num_token]
377
+ """
378
+
379
+ # Scaled MatMul (q, k) - similarity scores for all pairs of positions in an input sequence
380
+ attn_scores = torch.matmul(q, k) * self.scale
381
+
382
+ # Attention mask (optional)
383
+ if (
384
+ attn_mask is not None
385
+ ): # attn_mask with shape [q_len x seq_len] - only used when q_len == seq_len
386
+ attn_mask = rearrange(attn_mask, "b i -> b 1 i 1") * rearrange(
387
+ attn_mask, "b i -> b 1 1 i"
388
+ )
389
+ if attn_mask.dtype == torch.bool:
390
+ attn_scores.masked_fill_(attn_mask, -np.inf)
391
+ else:
392
+ attn_scores += attn_mask
393
+
394
+ # normalize the attention weights
395
+ attn_weights = F.softmax(attn_scores, dim=-1)
396
+ attn_weights = self.attn_dropout(attn_weights)
397
+
398
+ # compute the new values given the attention weights
399
+ output = torch.matmul(attn_weights, v)
400
+
401
+ return output, attn_weights
model.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ from torch import Tensor
6
+ from torch.nn import functional as F
7
+ from einops import rearrange, repeat
8
+ from transformers.modeling_utils import PreTrainedModel
9
+
10
+ from configuration_symtime import SymTimeConfig
11
+ from layers import MultiHeadAttention, TSTEncoder, TSTEncoderLayer
12
+
13
+
14
+ class SymTimeModel(PreTrainedModel):
15
+ """
16
+ SymTime Model for Huggingface.
17
+
18
+ Parameters
19
+ ----------
20
+ config: SymTimeConfig
21
+ The configuration of the SymTime model.
22
+
23
+ Attributes
24
+ ----------
25
+ config: SymTimeConfig
26
+ The configuration of the SymTime model.
27
+ encoder: TSTEncoder
28
+ The encoder of the SymTime model.
29
+
30
+ Methods
31
+ -------
32
+ forward(x: Tensor) -> Tuple[Tensor, Tensor]:
33
+ Forward pass of the SymTime model.
34
+
35
+ _init_weights(module: nn.Module) -> None:
36
+ Initialize weights for the SymTime encoder stack.
37
+ """
38
+
39
+ def __init__(self, config: SymTimeConfig):
40
+ super().__init__(config)
41
+ self.config = config
42
+ self.encoder = TSTEncoder(
43
+ patch_size=config.patch_size,
44
+ num_layers=config.num_layers,
45
+ hidden_size=config.d_model,
46
+ num_heads=config.num_heads,
47
+ d_ff=config.d_ff,
48
+ norm=config.norm,
49
+ attn_dropout=config.dropout,
50
+ dropout=config.dropout,
51
+ act=config.act,
52
+ pre_norm=config.pre_norm,
53
+ )
54
+
55
+ # Initialize weights and apply final processing
56
+ self.post_init()
57
+
58
+ def _init_weights(self, module) -> None:
59
+ """Initialize weights for the SymTime encoder stack.
60
+
61
+ The model is built on top of Hugging Face `PreTrainedModel`, so this method
62
+ is called recursively via `post_init()`. We keep the initialization aligned
63
+ with the current backbone structure in `layers.py`:
64
+
65
+ - `TSTEncoder.W_P`: patch projection linear layer
66
+ - `TSTEncoder.cls_token`: learnable CLS token
67
+ - `TSTEncoderLayer.self_attn`: Q/K/V and output projections
68
+ - `TSTEncoderLayer.ff`: feed-forward linear layers
69
+ - `LayerNorm` / `BatchNorm1d`: normalization layers
70
+ """
71
+ super()._init_weights(module)
72
+
73
+ factor = self.config.initializer_factor
74
+ d_model = self.config.d_model
75
+ num_heads = self.config.num_heads
76
+ d_k = d_model // num_heads
77
+ d_v = d_k
78
+
79
+ if isinstance(module, nn.Linear):
80
+ nn.init.normal_(
81
+ module.weight, mean=0.0, std=factor * (module.in_features**-0.5)
82
+ )
83
+ if module.bias is not None:
84
+ nn.init.zeros_(module.bias)
85
+
86
+ elif isinstance(module, nn.LayerNorm):
87
+ nn.init.ones_(module.weight)
88
+ nn.init.zeros_(module.bias)
89
+
90
+ elif isinstance(module, nn.BatchNorm1d):
91
+ if module.weight is not None:
92
+ nn.init.ones_(module.weight)
93
+ if module.bias is not None:
94
+ nn.init.zeros_(module.bias)
95
+
96
+ elif isinstance(module, TSTEncoder):
97
+ if hasattr(module, "cls_token") and module.cls_token is not None:
98
+ nn.init.normal_(module.cls_token, mean=0.0, std=factor)
99
+ if hasattr(module, "W_P") and isinstance(module.W_P, nn.Linear):
100
+ nn.init.normal_(
101
+ module.W_P.weight,
102
+ mean=0.0,
103
+ std=factor * (module.W_P.in_features**-0.5),
104
+ )
105
+ if module.W_P.bias is not None:
106
+ nn.init.zeros_(module.W_P.bias)
107
+
108
+ elif isinstance(module, MultiHeadAttention):
109
+ nn.init.normal_(module.W_Q.weight, mean=0.0, std=factor * (d_model**-0.5))
110
+ nn.init.normal_(module.W_K.weight, mean=0.0, std=factor * (d_model**-0.5))
111
+ nn.init.normal_(module.W_V.weight, mean=0.0, std=factor * (d_model**-0.5))
112
+ if module.W_Q.bias is not None:
113
+ nn.init.zeros_(module.W_Q.bias)
114
+ if module.W_K.bias is not None:
115
+ nn.init.zeros_(module.W_K.bias)
116
+ if module.W_V.bias is not None:
117
+ nn.init.zeros_(module.W_V.bias)
118
+
119
+ out_proj = module.to_out[0]
120
+ nn.init.normal_(
121
+ out_proj.weight, mean=0.0, std=factor * ((num_heads * d_v) ** -0.5)
122
+ )
123
+ if out_proj.bias is not None:
124
+ nn.init.zeros_(out_proj.bias)
125
+
126
+ elif isinstance(module, TSTEncoderLayer):
127
+ for submodule in module.ff:
128
+ if isinstance(submodule, nn.Linear):
129
+ nn.init.normal_(
130
+ submodule.weight,
131
+ mean=0.0,
132
+ std=factor * (submodule.in_features**-0.5),
133
+ )
134
+ if submodule.bias is not None:
135
+ nn.init.zeros_(submodule.bias)
136
+
137
+ def forward(
138
+ self, x: Tensor, return_cls_token: bool = True
139
+ ) -> Tuple[Tensor, Tensor]:
140
+ return self.encoder(x, return_cls_token=return_cls_token)
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00fd4239b3110418392f60f6b5fc93604b75bb7aad45b05be94e0397b3d81334
3
+ size 85998105
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ einops
2
+ torch
3
+ transformers