Instructions to use openpecha/aligner with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use openpecha/aligner with Transformers:
# Load model directly from transformers import AutoTokenizer, AutoModelForSeq2SeqLM tokenizer = AutoTokenizer.from_pretrained("openpecha/aligner") model = AutoModelForSeq2SeqLM.from_pretrained("openpecha/aligner") - Notebooks
- Google Colab
- Kaggle
Update handler.py
Browse files- handler.py +31 -25
handler.py
CHANGED
|
@@ -241,42 +241,48 @@ def download_file(s3_public_url: str, output_fn) -> Path:
|
|
| 241 |
|
| 242 |
def _run_align_script(bo_fn, en_fn, output_dir):
|
| 243 |
start = time.time()
|
|
|
|
|
|
|
| 244 |
cmd = [str(ALIGNER_SCRIPT_PATH), str(bo_fn), str(en_fn), str(output_dir)]
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
|
|
|
|
|
|
|
|
|
| 253 |
|
| 254 |
-
|
| 255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
logging.error("Failed to find output file path in script output.")
|
| 257 |
return None
|
| 258 |
|
| 259 |
-
output_fn = match.group(1).strip()
|
| 260 |
-
# Normalize the file path
|
| 261 |
output_fn = Path(output_fn)
|
| 262 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
end = time.time()
|
| 264 |
total_time = round((end - start) / 60, 2)
|
| 265 |
logging.info(f"Total time taken for Aligning: {total_time} mins")
|
| 266 |
|
| 267 |
-
# Read and log the contents of the output file
|
| 268 |
-
try:
|
| 269 |
-
if output_fn.exists():
|
| 270 |
-
content = output_fn.read_text()
|
| 271 |
-
if content:
|
| 272 |
-
logging.info(f"First 100 characters of {output_fn}: {content[:100]}...")
|
| 273 |
-
else:
|
| 274 |
-
logging.error(f"The file {output_fn} is empty.")
|
| 275 |
-
else:
|
| 276 |
-
logging.error(f"The file {output_fn} does not exist.")
|
| 277 |
-
except Exception as e:
|
| 278 |
-
logging.error(f"Error reading file {output_fn}: {e}")
|
| 279 |
-
|
| 280 |
return output_fn
|
| 281 |
def align(text_pair):
|
| 282 |
|
|
|
|
| 241 |
|
| 242 |
def _run_align_script(bo_fn, en_fn, output_dir):
|
| 243 |
start = time.time()
|
| 244 |
+
|
| 245 |
+
# Execute the alignment script
|
| 246 |
cmd = [str(ALIGNER_SCRIPT_PATH), str(bo_fn), str(en_fn), str(output_dir)]
|
| 247 |
+
try:
|
| 248 |
+
output = subprocess.run(
|
| 249 |
+
cmd,
|
| 250 |
+
check=True,
|
| 251 |
+
capture_output=True,
|
| 252 |
+
text=True,
|
| 253 |
+
cwd=str(ALIGNER_SCRIPT_DIR),
|
| 254 |
+
)
|
| 255 |
+
except subprocess.CalledProcessError as e:
|
| 256 |
+
logging.error(f"Alignment script failed with error: {e}")
|
| 257 |
+
return None
|
| 258 |
|
| 259 |
+
# Extract the output file path
|
| 260 |
+
try:
|
| 261 |
+
output_fn = re.search(r"\[OUTPUT\] (.*)", output.stdout).group(1)
|
| 262 |
+
output_fn = "/" + output_fn.split("//")[-1] # Correcting the duplicated path
|
| 263 |
+
except IndexError as e:
|
| 264 |
+
logging.error(f"Error processing file path: {e}")
|
| 265 |
+
return None
|
| 266 |
+
except AttributeError as e:
|
| 267 |
logging.error("Failed to find output file path in script output.")
|
| 268 |
return None
|
| 269 |
|
|
|
|
|
|
|
| 270 |
output_fn = Path(output_fn)
|
| 271 |
|
| 272 |
+
# Check if the file exists and read its content
|
| 273 |
+
if output_fn.exists() and output_fn.is_file():
|
| 274 |
+
content = output_fn.read_text()
|
| 275 |
+
if content:
|
| 276 |
+
logging.info(f"Contents of {output_fn}: {content[:100]}...") # Log first 100 characters
|
| 277 |
+
else:
|
| 278 |
+
logging.warning(f"The file {output_fn} is empty.")
|
| 279 |
+
else:
|
| 280 |
+
logging.error(f"The file {output_fn} does not exist.")
|
| 281 |
+
|
| 282 |
end = time.time()
|
| 283 |
total_time = round((end - start) / 60, 2)
|
| 284 |
logging.info(f"Total time taken for Aligning: {total_time} mins")
|
| 285 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
return output_fn
|
| 287 |
def align(text_pair):
|
| 288 |
|