Buckets:
| { | |
| "subset": "tiny", | |
| "total_rows": 100000, | |
| "entries": [ | |
| { | |
| "config": "agnews", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/agnews/train/0.parquet" | |
| ], | |
| "config_size": 564258, | |
| "take_rows": 2942 | |
| }, | |
| { | |
| "config": "altlex", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/altlex/train/0.parquet" | |
| ], | |
| "config_size": 83053, | |
| "take_rows": 2942 | |
| }, | |
| { | |
| "config": "amazon_qa", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_qa/train/0.parquet" | |
| ], | |
| "config_size": 761984, | |
| "take_rows": 2942 | |
| }, | |
| { | |
| "config": "amazon_reviews", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/0.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/1.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/2.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/3.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/4.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/5.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/6.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/7.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/8.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/9.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/10.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/11.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/12.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/13.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/14.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/15.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/16.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/17.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/18.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/19.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/20.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/21.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/22.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/23.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/24.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/25.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/26.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/27.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/28.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/29.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/30.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/31.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/32.parquet" | |
| ], | |
| "config_size": 33666382, | |
| "take_rows": 2942 | |
| }, | |
| { | |
| "config": "arxiv_title_abstract", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/arxiv_title_abstract/train/0.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/arxiv_title_abstract/train/1.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/arxiv_title_abstract/train/2.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/arxiv_title_abstract/train/3.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/arxiv_title_abstract/train/4.parquet" | |
| ], | |
| "config_size": 1862039, | |
| "take_rows": 2942 | |
| }, | |
| { | |
| "config": "beir_dbpedia", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/beir_dbpedia/train/0.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/beir_dbpedia/train/1.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/beir_dbpedia/train/2.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/beir_dbpedia/train/3.parquet" | |
| ], | |
| "config_size": 2170696, | |
| "take_rows": 2942 | |
| }, | |
| { | |
| "config": "biorxiv_title_abstract", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/biorxiv_title_abstract/train/0.parquet" | |
| ], | |
| "config_size": 275247, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "cc_news_en", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/cc_news_en/train/0.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/cc_news_en/train/1.parquet" | |
| ], | |
| "config_size": 284667, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "cnn_dailymail", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/cnn_dailymail/train/0.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/cnn_dailymail/train/1.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/cnn_dailymail/train/2.parquet" | |
| ], | |
| "config_size": 284911, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "fw_edu", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/0.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/1.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/2.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/3.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/4.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/5.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/6.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/7.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/8.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/9.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/10.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/11.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/12.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/13.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/14.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/15.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/16.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/17.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/18.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/19.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/20.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/21.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/22.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/23.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/24.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/25.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/26.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/27.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/28.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/29.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/30.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/31.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/32.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/33.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/34.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/35.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/36.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/37.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/38.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/39.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/40.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/41.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/42.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/43.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/44.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/45.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/46.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/47.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/48.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/49.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/50.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/51.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/52.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/53.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/54.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/55.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/56.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/57.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/58.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/59.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/60.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/61.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/62.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/63.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/64.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/65.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/66.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/67.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/68.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/69.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/70.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/71.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/72.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/73.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/74.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/75.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/76.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/77.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/78.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/79.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/80.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/81.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/82.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/83.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/84.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/85.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/86.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/87.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/88.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/89.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/90.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/91.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/92.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/93.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/94.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/95.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/96.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/97.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/98.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/99.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/100.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/101.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/102.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/103.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/104.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/105.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/106.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/107.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/108.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/109.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/110.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/111.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/112.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/113.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/114.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/115.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/116.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/117.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/118.parquet" | |
| ], | |
| "config_size": 141250012, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "gooaq_qa", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/gooaq_qa/train/0.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/gooaq_qa/train/1.parquet" | |
| ], | |
| "config_size": 2503330, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "medrxiv_title_abstract", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/medrxiv_title_abstract/train/0.parquet" | |
| ], | |
| "config_size": 188665, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "msmarco", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/msmarco/train/0.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/msmarco/train/1.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/msmarco/train/2.parquet" | |
| ], | |
| "config_size": 3780906, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "mtp", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/0.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/1.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/2.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/3.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/4.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/5.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/6.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/7.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/8.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/9.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/10.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/11.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/12.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/13.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/14.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/15.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/16.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/17.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/18.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/19.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/20.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/21.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/22.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/23.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/24.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/25.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/26.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/27.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/28.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/29.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/30.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/31.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/32.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/33.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/34.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/35.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/36.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/37.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/38.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/39.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/40.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/41.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/42.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/43.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/44.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/45.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/46.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/47.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/48.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/49.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/50.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/51.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/52.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/53.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/54.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/55.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/56.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/57.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/58.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/59.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/60.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/61.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/62.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/63.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/64.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/65.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/66.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/67.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/68.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/69.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/70.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/71.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/72.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/73.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/74.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/75.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/76.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/77.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/78.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/79.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/80.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/81.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/82.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/83.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/84.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/85.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/86.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/87.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/88.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/89.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/90.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/91.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/92.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/93.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/94.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/95.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/96.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/97.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/98.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/99.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/100.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/101.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/102.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/103.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/104.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/105.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/106.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/107.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/108.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/109.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/110.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/111.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/112.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/113.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/114.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/115.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/116.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/117.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/118.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/119.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/120.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/121.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/122.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/123.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/124.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/125.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/126.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/127.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/128.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/129.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/130.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/131.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/132.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/133.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/134.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/135.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/136.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/137.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/138.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/139.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/140.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/141.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/142.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/143.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/144.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/145.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/146.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/147.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/148.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/149.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/150.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/151.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/152.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/153.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/154.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/155.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/156.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/157.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/158.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/159.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/160.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/161.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/162.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/163.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/164.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/165.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/166.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/167.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/168.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/169.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/170.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/171.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/172.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/173.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/174.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/175.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/176.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/177.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/178.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/179.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/180.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/181.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/182.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/183.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/184.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/185.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/186.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/187.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/188.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/189.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/190.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/191.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/192.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/193.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/194.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/195.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/196.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/197.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/198.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/199.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/200.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/201.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/202.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/203.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/204.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/205.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/206.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/207.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/208.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/209.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/210.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/211.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/212.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/213.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/214.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/215.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/216.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/217.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/218.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/219.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/220.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/221.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/222.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/223.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/224.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/225.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/226.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/227.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/228.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/229.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/230.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/231.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/232.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/233.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/234.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/235.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/236.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/237.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/238.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/239.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/240.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/241.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/242.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/243.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/244.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/245.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/246.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/247.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/248.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/249.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/250.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/251.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/252.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/253.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/254.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/255.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/256.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/257.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/258.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/259.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/260.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/261.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/262.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/263.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/264.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/265.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/266.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/267.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/268.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/269.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/270.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/271.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/272.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/273.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/274.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/275.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/276.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/277.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/278.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/279.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/280.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/281.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/282.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/283.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/284.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/285.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/286.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/287.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/288.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/289.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/290.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/291.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/292.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/293.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/294.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/295.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/296.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/297.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/298.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/299.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/300.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/301.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/302.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/303.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/304.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/305.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/306.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/307.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/308.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/309.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/310.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/311.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/312.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/313.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/314.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/315.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/316.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/317.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/318.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/319.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/320.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/321.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/322.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/323.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/324.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/325.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/326.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/327.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/328.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/329.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/330.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/331.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/332.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/333.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/334.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/335.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/336.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/337.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/338.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/339.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/340.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/341.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/342.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/343.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/344.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/345.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/346.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/347.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/348.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/349.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/350.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/351.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/352.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/353.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/354.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/355.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/356.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/357.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/358.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/359.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/360.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/361.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/362.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/363.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/364.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/365.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/366.parquet" | |
| ], | |
| "config_size": 190723370, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "npr", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/npr/train/0.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/npr/train/1.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/npr/train/2.parquet" | |
| ], | |
| "config_size": 430674, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "paq", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/0.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/1.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/2.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/3.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/4.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/5.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/6.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/7.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/8.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/9.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/10.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/11.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/12.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/13.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/14.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/15.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/16.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/17.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/18.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/19.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/20.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/21.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/22.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/23.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/24.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/25.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/26.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/27.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/28.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/29.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/30.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/31.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/32.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/33.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/34.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/35.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/36.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/37.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/38.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/39.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/40.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/41.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/42.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/43.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/44.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/45.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/46.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/47.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/48.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/49.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/50.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/51.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/52.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/53.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/54.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/55.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/56.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/57.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/58.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/59.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/60.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/61.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/62.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/63.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/64.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/65.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/66.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/67.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/68.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/69.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/70.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/71.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/72.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/73.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/74.parquet" | |
| ], | |
| "config_size": 48566967, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "quora", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/quora/train/0.parquet" | |
| ], | |
| "config_size": 44874, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "reddit", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/0.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/1.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/2.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/3.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/4.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/5.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/6.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/7.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/8.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/9.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/10.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/11.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/12.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/13.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/14.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/15.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/16.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/17.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/18.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/19.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/20.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/21.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/22.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/23.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/24.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/25.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/26.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/27.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/28.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/29.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/30.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/31.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/32.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/33.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/34.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/35.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/36.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/37.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/38.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/39.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/40.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/41.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/42.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/43.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/44.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/45.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/46.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/47.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/48.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/49.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/50.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/51.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/52.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/53.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/54.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/55.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/56.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/57.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/58.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/59.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/60.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/61.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/62.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/63.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/64.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/65.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/66.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/67.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/68.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/69.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/70.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/71.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/72.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/73.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/74.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/75.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/76.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/77.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/78.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/79.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/80.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/81.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/82.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/83.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/84.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/85.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/86.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/87.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/88.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/89.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/90.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/91.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/92.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/93.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/94.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/95.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/96.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/97.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/98.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/99.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/100.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/101.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/102.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/103.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/104.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/105.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/106.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/107.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/108.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/109.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/110.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/111.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/112.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/113.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/114.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/115.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/116.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/117.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/118.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/119.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/120.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/121.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/122.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/123.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/124.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/125.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/126.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/127.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/128.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/129.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/130.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/131.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/132.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/133.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/134.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/135.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/136.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/137.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/138.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/139.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/140.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/141.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/142.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/143.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/144.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/145.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/146.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/147.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/148.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/149.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/150.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/151.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/152.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/153.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/154.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/155.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/156.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/157.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/158.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/159.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/160.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/161.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/162.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/163.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/164.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/165.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/166.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/167.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/168.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/169.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/170.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/171.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/172.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/173.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/174.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/175.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/176.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/177.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/178.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/179.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/180.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/181.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/182.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/183.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/184.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/185.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/186.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/187.parquet" | |
| ], | |
| "config_size": 82911310, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "reddit_body_comment", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/0.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/1.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/2.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/3.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/4.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/5.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/6.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/7.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/8.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/9.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/10.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/11.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/12.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/13.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/14.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/15.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/16.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/17.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/18.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/19.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/20.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/21.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/22.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/23.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/24.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/25.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/26.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/27.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/28.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/29.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/30.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/31.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/32.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/33.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/34.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/35.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/36.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/37.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/38.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/39.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/40.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/41.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/42.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/43.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/44.parquet" | |
| ], | |
| "config_size": 14896056, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "s2orc_abstract_citation", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/0.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/1.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/2.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/3.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/4.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/5.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/6.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/7.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/8.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/9.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/10.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/11.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/12.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/13.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/14.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/15.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/16.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/17.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/18.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/19.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/20.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/21.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/22.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/23.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/24.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/25.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/26.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/27.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/28.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/29.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/30.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/31.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/32.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/33.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/34.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/35.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/36.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/37.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/38.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/39.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/40.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/41.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/42.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/43.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/44.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/45.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/46.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/47.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/48.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/49.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/50.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/51.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/52.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/53.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/54.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/55.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/56.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/57.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/58.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/59.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/60.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/61.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/62.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/63.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/64.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/65.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/66.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/67.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/68.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/69.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/70.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/71.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/72.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/73.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/74.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/75.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/76.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/77.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/78.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/79.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/80.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/81.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/82.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/83.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/84.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/85.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/86.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/87.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/88.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/89.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/90.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/91.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/92.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/93.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/94.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/95.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/96.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/97.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/98.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/99.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/100.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/101.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/102.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/103.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/104.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/105.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/106.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/107.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/108.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/109.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/110.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/111.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/112.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/113.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/114.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/115.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/116.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/117.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/118.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/119.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/120.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/121.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/122.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/123.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/124.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/125.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/126.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/127.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/128.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/129.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/130.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/131.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/132.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/133.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/134.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/135.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/136.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/137.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/138.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/139.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/140.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/141.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/142.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/143.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/144.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/145.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/146.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/147.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/148.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/149.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/150.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/151.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/152.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/153.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/154.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/155.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/156.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/157.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/158.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/159.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/160.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/161.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/162.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/163.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/164.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/165.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/166.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/167.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/168.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/169.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/170.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/171.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/172.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/173.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/174.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/175.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/176.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/177.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/178.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/179.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/180.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/181.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/182.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/183.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/184.parquet" | |
| ], | |
| "config_size": 25262155, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "s2orc_citation_titles", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/0.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/1.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/2.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/3.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/4.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/5.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/6.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/7.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/8.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/9.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/10.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/11.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/12.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/13.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/14.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/15.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/16.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/17.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/18.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/19.parquet" | |
| ], | |
| "config_size": 26272703, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "s2orc_title_abstract", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/0.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/1.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/2.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/3.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/4.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/5.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/6.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/7.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/8.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/9.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/10.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/11.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/12.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/13.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/14.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/15.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/16.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/17.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/18.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/19.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/20.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/21.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/22.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/23.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/24.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/25.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/26.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/27.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/28.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/29.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/30.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/31.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/32.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/33.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/34.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/35.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/36.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/37.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/38.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/39.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/40.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/41.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/42.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/43.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/44.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/45.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/46.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/47.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/48.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/49.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/50.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/51.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/52.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/53.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/54.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/55.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/56.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/57.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/58.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/59.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/60.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/61.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/62.parquet" | |
| ], | |
| "config_size": 35120706, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "stackexchange_body_body", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_body_body/train/0.parquet" | |
| ], | |
| "config_size": 55553, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "stackexchange_duplicate_questions", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_duplicate_questions/train/0.parquet" | |
| ], | |
| "config_size": 56827, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "stackexchange_qa", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_qa/train/0.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_qa/train/1.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_qa/train/2.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_qa/train/3.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_qa/train/4.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_qa/train/5.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_qa/train/6.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_qa/train/7.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_qa/train/8.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_qa/train/9.parquet" | |
| ], | |
| "config_size": 3586152, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "stackexchange_title_body", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_title_body/train/0.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_title_body/train/1.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_title_body/train/2.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_title_body/train/3.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_title_body/train/4.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_title_body/train/5.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_title_body/train/6.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_title_body/train/7.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_title_body/train/8.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_title_body/train/9.parquet" | |
| ], | |
| "config_size": 4565167, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "stackoverflow_title_body", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/0.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/1.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/2.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/3.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/4.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/5.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/6.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/7.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/8.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/9.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/10.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/11.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/12.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/13.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/14.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/15.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/16.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/17.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/18.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/19.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/20.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/21.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/22.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/23.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/24.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/25.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/26.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/27.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/28.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/29.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/30.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/31.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/32.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/33.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/34.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/35.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/36.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/37.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/38.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/39.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/40.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/41.parquet" | |
| ], | |
| "config_size": 12847704, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "wikianswers", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/0.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/1.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/2.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/3.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/4.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/5.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/6.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/7.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/8.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/9.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/10.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/11.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/12.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/13.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/14.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/15.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/16.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/17.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/18.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/19.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/20.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/21.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/22.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/23.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/24.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/25.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/26.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/27.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/28.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/29.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/30.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/31.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/32.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/33.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/34.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/35.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/36.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/37.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/38.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/39.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/40.parquet" | |
| ], | |
| "config_size": 9994370, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "wikihow", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikihow/train/0.parquet" | |
| ], | |
| "config_size": 127637, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "wikipedia_hlp_cm", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikipedia_hlp_cm/train/0.parquet" | |
| ], | |
| "config_size": 10000000, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "wikipedia_hlp_dl", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikipedia_hlp_dl/train/0.parquet" | |
| ], | |
| "config_size": 10000000, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "yahoo_answer", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/yahoo_answer/train/0.parquet" | |
| ], | |
| "config_size": 571439, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "yahoo_qa", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/yahoo_qa/train/0.parquet", | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/yahoo_qa/train/1.parquet" | |
| ], | |
| "config_size": 853872, | |
| "take_rows": 2941 | |
| }, | |
| { | |
| "config": "yahoo_question_body", | |
| "urls": [ | |
| "https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/yahoo_question_body/train/0.parquet" | |
| ], | |
| "config_size": 448475, | |
| "take_rows": 2941 | |
| } | |
| ] | |
| } |
Xet Storage Details
- Size:
- 159 kB
- Xet hash:
- 851e9fc0da087390415b03ece774deb327ea4bd63c1528171506ada6eb2a26ec
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.