Feature Extraction
Transformers
PyTorch
roberta
code-understanding
unixcoder
text-embeddings-inference
Instructions to use Henry65/RepoSim4Py with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Henry65/RepoSim4Py with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("feature-extraction", model="Henry65/RepoSim4Py")# Load model directly from transformers import AutoTokenizer, AutoModel tokenizer = AutoTokenizer.from_pretrained("Henry65/RepoSim4Py") model = AutoModel.from_pretrained("Henry65/RepoSim4Py") - Notebooks
- Google Colab
- Kaggle
Update RepoPipeline.py
Browse files- RepoPipeline.py +3 -3
RepoPipeline.py
CHANGED
|
@@ -113,7 +113,7 @@ def extract_information(repos, headers=None):
|
|
| 113 |
with tarfile.open(fileobj=response.raw, mode="r|gz") as tar:
|
| 114 |
for member in tar:
|
| 115 |
# 2. Extracting codes and docs.
|
| 116 |
-
if
|
| 117 |
try:
|
| 118 |
file_content = tar.extractfile(member).read().decode("utf-8")
|
| 119 |
# extract_code_and_docs
|
|
@@ -127,7 +127,7 @@ def extract_information(repos, headers=None):
|
|
| 127 |
except SyntaxError as e:
|
| 128 |
tqdm.write(f"[-] SyntaxError in {member.name}, skipping: \n{e}")
|
| 129 |
# 3. Extracting readme.
|
| 130 |
-
elif (
|
| 131 |
try:
|
| 132 |
file_content = tar.extractfile(member).read().decode("utf-8")
|
| 133 |
# extract readme
|
|
@@ -141,7 +141,7 @@ def extract_information(repos, headers=None):
|
|
| 141 |
except SyntaxError as e:
|
| 142 |
tqdm.write(f"[-] SyntaxError in {member.name}, skipping: \n{e}")
|
| 143 |
# 4. Extracting requirements.
|
| 144 |
-
elif
|
| 145 |
try:
|
| 146 |
lines = tar.extractfile(member).readlines().decode("utf-8")
|
| 147 |
# extract readme
|
|
|
|
| 113 |
with tarfile.open(fileobj=response.raw, mode="r|gz") as tar:
|
| 114 |
for member in tar:
|
| 115 |
# 2. Extracting codes and docs.
|
| 116 |
+
if member.name.endswith(".py") and member.isfile():
|
| 117 |
try:
|
| 118 |
file_content = tar.extractfile(member).read().decode("utf-8")
|
| 119 |
# extract_code_and_docs
|
|
|
|
| 127 |
except SyntaxError as e:
|
| 128 |
tqdm.write(f"[-] SyntaxError in {member.name}, skipping: \n{e}")
|
| 129 |
# 3. Extracting readme.
|
| 130 |
+
elif (member.name == "README.md" or member.name == "README.rst") and member.isfile():
|
| 131 |
try:
|
| 132 |
file_content = tar.extractfile(member).read().decode("utf-8")
|
| 133 |
# extract readme
|
|
|
|
| 141 |
except SyntaxError as e:
|
| 142 |
tqdm.write(f"[-] SyntaxError in {member.name}, skipping: \n{e}")
|
| 143 |
# 4. Extracting requirements.
|
| 144 |
+
elif member.name == "requirements.txt" and member.isfile():
|
| 145 |
try:
|
| 146 |
lines = tar.extractfile(member).readlines().decode("utf-8")
|
| 147 |
# extract readme
|