erikkaum/training-cache / tiny /partition.json
erikkaum's picture
download
raw
159 kB
{
"subset": "tiny",
"total_rows": 100000,
"entries": [
{
"config": "agnews",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/agnews/train/0.parquet"
],
"config_size": 564258,
"take_rows": 2942
},
{
"config": "altlex",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/altlex/train/0.parquet"
],
"config_size": 83053,
"take_rows": 2942
},
{
"config": "amazon_qa",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_qa/train/0.parquet"
],
"config_size": 761984,
"take_rows": 2942
},
{
"config": "amazon_reviews",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/0.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/1.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/2.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/3.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/4.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/5.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/6.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/7.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/8.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/9.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/10.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/11.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/12.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/13.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/14.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/15.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/16.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/17.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/18.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/19.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/20.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/21.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/22.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/23.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/24.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/25.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/26.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/27.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/28.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/29.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/30.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/31.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/amazon_reviews/train/32.parquet"
],
"config_size": 33666382,
"take_rows": 2942
},
{
"config": "arxiv_title_abstract",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/arxiv_title_abstract/train/0.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/arxiv_title_abstract/train/1.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/arxiv_title_abstract/train/2.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/arxiv_title_abstract/train/3.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/arxiv_title_abstract/train/4.parquet"
],
"config_size": 1862039,
"take_rows": 2942
},
{
"config": "beir_dbpedia",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/beir_dbpedia/train/0.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/beir_dbpedia/train/1.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/beir_dbpedia/train/2.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/beir_dbpedia/train/3.parquet"
],
"config_size": 2170696,
"take_rows": 2942
},
{
"config": "biorxiv_title_abstract",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/biorxiv_title_abstract/train/0.parquet"
],
"config_size": 275247,
"take_rows": 2941
},
{
"config": "cc_news_en",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/cc_news_en/train/0.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/cc_news_en/train/1.parquet"
],
"config_size": 284667,
"take_rows": 2941
},
{
"config": "cnn_dailymail",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/cnn_dailymail/train/0.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/cnn_dailymail/train/1.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/cnn_dailymail/train/2.parquet"
],
"config_size": 284911,
"take_rows": 2941
},
{
"config": "fw_edu",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/0.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/1.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/2.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/3.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/4.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/5.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/6.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/7.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/8.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/9.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/10.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/11.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/12.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/13.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/14.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/15.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/16.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/17.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/18.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/19.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/20.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/21.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/22.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/23.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/24.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/25.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/26.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/27.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/28.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/29.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/30.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/31.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/32.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/33.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/34.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/35.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/36.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/37.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/38.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/39.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/40.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/41.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/42.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/43.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/44.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/45.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/46.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/47.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/48.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/49.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/50.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/51.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/52.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/53.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/54.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/55.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/56.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/57.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/58.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/59.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/60.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/61.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/62.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/63.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/64.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/65.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/66.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/67.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/68.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/69.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/70.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/71.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/72.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/73.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/74.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/75.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/76.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/77.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/78.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/79.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/80.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/81.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/82.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/83.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/84.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/85.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/86.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/87.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/88.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/89.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/90.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/91.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/92.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/93.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/94.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/95.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/96.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/97.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/98.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/99.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/100.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/101.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/102.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/103.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/104.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/105.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/106.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/107.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/108.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/109.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/110.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/111.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/112.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/113.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/114.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/115.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/116.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/117.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/fw_edu/train/118.parquet"
],
"config_size": 141250012,
"take_rows": 2941
},
{
"config": "gooaq_qa",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/gooaq_qa/train/0.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/gooaq_qa/train/1.parquet"
],
"config_size": 2503330,
"take_rows": 2941
},
{
"config": "medrxiv_title_abstract",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/medrxiv_title_abstract/train/0.parquet"
],
"config_size": 188665,
"take_rows": 2941
},
{
"config": "msmarco",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/msmarco/train/0.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/msmarco/train/1.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/msmarco/train/2.parquet"
],
"config_size": 3780906,
"take_rows": 2941
},
{
"config": "mtp",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/0.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/1.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/2.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/3.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/4.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/5.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/6.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/7.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/8.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/9.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/10.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/11.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/12.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/13.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/14.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/15.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/16.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/17.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/18.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/19.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/20.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/21.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/22.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/23.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/24.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/25.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/26.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/27.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/28.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/29.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/30.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/31.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/32.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/33.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/34.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/35.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/36.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/37.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/38.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/39.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/40.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/41.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/42.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/43.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/44.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/45.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/46.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/47.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/48.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/49.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/50.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/51.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/52.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/53.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/54.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/55.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/56.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/57.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/58.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/59.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/60.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/61.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/62.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/63.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/64.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/65.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/66.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/67.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/68.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/69.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/70.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/71.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/72.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/73.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/74.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/75.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/76.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/77.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/78.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/79.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/80.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/81.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/82.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/83.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/84.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/85.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/86.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/87.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/88.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/89.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/90.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/91.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/92.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/93.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/94.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/95.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/96.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/97.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/98.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/99.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/100.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/101.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/102.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/103.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/104.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/105.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/106.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/107.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/108.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/109.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/110.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/111.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/112.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/113.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/114.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/115.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/116.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/117.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/118.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/119.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/120.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/121.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/122.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/123.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/124.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/125.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/126.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/127.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/128.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/129.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/130.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/131.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/132.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/133.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/134.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/135.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/136.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/137.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/138.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/139.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/140.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/141.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/142.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/143.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/144.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/145.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/146.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/147.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/148.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/149.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/150.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/151.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/152.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/153.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/154.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/155.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/156.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/157.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/158.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/159.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/160.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/161.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/162.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/163.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/164.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/165.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/166.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/167.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/168.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/169.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/170.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/171.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/172.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/173.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/174.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/175.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/176.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/177.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/178.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/179.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/180.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/181.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/182.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/183.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/184.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/185.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/186.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/187.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/188.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/189.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/190.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/191.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/192.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/193.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/194.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/195.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/196.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/197.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/198.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/199.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/200.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/201.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/202.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/203.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/204.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/205.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/206.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/207.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/208.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/209.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/210.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/211.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/212.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/213.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/214.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/215.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/216.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/217.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/218.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/219.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/220.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/221.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/222.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/223.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/224.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/225.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/226.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/227.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/228.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/229.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/230.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/231.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/232.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/233.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/234.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/235.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/236.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/237.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/238.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/239.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/240.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/241.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/242.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/243.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/244.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/245.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/246.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/247.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/248.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/249.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/250.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/251.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/252.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/253.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/254.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/255.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/256.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/257.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/258.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/259.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/260.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/261.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/262.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/263.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/264.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/265.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/266.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/267.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/268.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/269.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/270.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/271.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/272.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/273.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/274.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/275.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/276.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/277.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/278.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/279.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/280.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/281.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/282.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/283.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/284.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/285.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/286.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/287.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/288.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/289.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/290.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/291.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/292.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/293.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/294.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/295.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/296.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/297.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/298.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/299.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/300.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/301.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/302.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/303.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/304.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/305.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/306.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/307.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/308.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/309.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/310.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/311.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/312.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/313.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/314.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/315.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/316.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/317.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/318.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/319.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/320.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/321.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/322.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/323.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/324.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/325.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/326.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/327.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/328.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/329.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/330.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/331.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/332.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/333.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/334.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/335.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/336.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/337.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/338.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/339.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/340.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/341.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/342.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/343.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/344.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/345.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/346.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/347.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/348.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/349.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/350.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/351.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/352.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/353.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/354.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/355.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/356.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/357.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/358.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/359.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/360.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/361.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/362.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/363.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/364.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/365.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/mtp/train/366.parquet"
],
"config_size": 190723370,
"take_rows": 2941
},
{
"config": "npr",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/npr/train/0.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/npr/train/1.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/npr/train/2.parquet"
],
"config_size": 430674,
"take_rows": 2941
},
{
"config": "paq",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/0.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/1.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/2.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/3.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/4.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/5.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/6.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/7.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/8.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/9.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/10.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/11.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/12.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/13.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/14.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/15.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/16.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/17.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/18.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/19.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/20.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/21.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/22.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/23.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/24.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/25.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/26.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/27.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/28.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/29.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/30.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/31.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/32.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/33.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/34.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/35.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/36.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/37.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/38.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/39.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/40.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/41.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/42.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/43.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/44.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/45.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/46.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/47.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/48.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/49.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/50.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/51.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/52.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/53.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/54.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/55.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/56.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/57.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/58.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/59.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/60.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/61.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/62.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/63.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/64.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/65.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/66.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/67.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/68.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/69.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/70.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/71.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/72.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/73.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/paq/train/74.parquet"
],
"config_size": 48566967,
"take_rows": 2941
},
{
"config": "quora",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/quora/train/0.parquet"
],
"config_size": 44874,
"take_rows": 2941
},
{
"config": "reddit",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/0.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/1.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/2.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/3.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/4.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/5.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/6.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/7.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/8.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/9.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/10.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/11.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/12.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/13.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/14.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/15.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/16.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/17.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/18.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/19.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/20.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/21.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/22.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/23.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/24.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/25.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/26.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/27.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/28.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/29.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/30.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/31.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/32.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/33.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/34.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/35.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/36.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/37.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/38.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/39.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/40.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/41.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/42.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/43.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/44.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/45.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/46.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/47.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/48.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/49.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/50.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/51.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/52.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/53.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/54.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/55.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/56.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/57.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/58.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/59.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/60.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/61.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/62.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/63.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/64.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/65.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/66.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/67.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/68.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/69.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/70.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/71.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/72.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/73.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/74.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/75.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/76.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/77.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/78.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/79.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/80.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/81.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/82.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/83.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/84.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/85.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/86.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/87.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/88.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/89.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/90.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/91.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/92.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/93.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/94.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/95.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/96.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/97.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/98.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/99.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/100.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/101.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/102.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/103.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/104.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/105.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/106.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/107.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/108.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/109.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/110.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/111.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/112.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/113.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/114.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/115.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/116.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/117.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/118.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/119.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/120.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/121.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/122.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/123.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/124.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/125.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/126.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/127.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/128.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/129.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/130.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/131.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/132.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/133.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/134.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/135.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/136.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/137.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/138.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/139.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/140.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/141.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/142.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/143.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/144.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/145.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/146.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/147.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/148.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/149.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/150.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/151.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/152.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/153.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/154.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/155.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/156.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/157.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/158.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/159.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/160.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/161.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/162.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/163.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/164.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/165.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/166.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/167.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/168.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/169.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/170.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/171.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/172.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/173.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/174.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/175.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/176.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/177.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/178.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/179.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/180.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/181.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/182.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/183.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/184.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/185.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/186.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit/train/187.parquet"
],
"config_size": 82911310,
"take_rows": 2941
},
{
"config": "reddit_body_comment",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/0.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/1.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/2.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/3.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/4.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/5.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/6.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/7.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/8.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/9.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/10.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/11.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/12.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/13.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/14.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/15.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/16.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/17.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/18.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/19.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/20.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/21.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/22.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/23.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/24.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/25.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/26.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/27.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/28.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/29.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/30.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/31.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/32.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/33.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/34.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/35.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/36.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/37.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/38.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/39.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/40.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/41.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/42.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/43.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/reddit_body_comment/train/44.parquet"
],
"config_size": 14896056,
"take_rows": 2941
},
{
"config": "s2orc_abstract_citation",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/0.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/1.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/2.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/3.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/4.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/5.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/6.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/7.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/8.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/9.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/10.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/11.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/12.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/13.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/14.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/15.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/16.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/17.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/18.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/19.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/20.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/21.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/22.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/23.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/24.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/25.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/26.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/27.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/28.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/29.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/30.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/31.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/32.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/33.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/34.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/35.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/36.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/37.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/38.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/39.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/40.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/41.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/42.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/43.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/44.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/45.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/46.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/47.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/48.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/49.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/50.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/51.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/52.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/53.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/54.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/55.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/56.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/57.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/58.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/59.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/60.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/61.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/62.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/63.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/64.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/65.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/66.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/67.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/68.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/69.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/70.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/71.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/72.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/73.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/74.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/75.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/76.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/77.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/78.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/79.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/80.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/81.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/82.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/83.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/84.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/85.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/86.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/87.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/88.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/89.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/90.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/91.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/92.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/93.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/94.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/95.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/96.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/97.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/98.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/99.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/100.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/101.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/102.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/103.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/104.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/105.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/106.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/107.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/108.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/109.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/110.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/111.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/112.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/113.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/114.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/115.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/116.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/117.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/118.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/119.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/120.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/121.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/122.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/123.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/124.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/125.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/126.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/127.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/128.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/129.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/130.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/131.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/132.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/133.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/134.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/135.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/136.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/137.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/138.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/139.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/140.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/141.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/142.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/143.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/144.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/145.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/146.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/147.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/148.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/149.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/150.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/151.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/152.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/153.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/154.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/155.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/156.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/157.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/158.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/159.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/160.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/161.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/162.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/163.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/164.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/165.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/166.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/167.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/168.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/169.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/170.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/171.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/172.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/173.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/174.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/175.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/176.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/177.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/178.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/179.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/180.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/181.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/182.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/183.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_abstract_citation/train/184.parquet"
],
"config_size": 25262155,
"take_rows": 2941
},
{
"config": "s2orc_citation_titles",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/0.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/1.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/2.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/3.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/4.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/5.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/6.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/7.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/8.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/9.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/10.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/11.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/12.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/13.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/14.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/15.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/16.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/17.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/18.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_citation_titles/train/19.parquet"
],
"config_size": 26272703,
"take_rows": 2941
},
{
"config": "s2orc_title_abstract",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/0.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/1.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/2.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/3.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/4.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/5.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/6.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/7.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/8.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/9.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/10.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/11.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/12.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/13.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/14.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/15.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/16.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/17.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/18.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/19.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/20.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/21.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/22.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/23.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/24.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/25.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/26.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/27.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/28.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/29.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/30.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/31.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/32.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/33.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/34.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/35.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/36.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/37.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/38.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/39.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/40.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/41.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/42.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/43.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/44.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/45.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/46.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/47.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/48.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/49.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/50.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/51.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/52.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/53.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/54.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/55.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/56.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/57.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/58.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/59.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/60.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/61.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/s2orc_title_abstract/train/62.parquet"
],
"config_size": 35120706,
"take_rows": 2941
},
{
"config": "stackexchange_body_body",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_body_body/train/0.parquet"
],
"config_size": 55553,
"take_rows": 2941
},
{
"config": "stackexchange_duplicate_questions",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_duplicate_questions/train/0.parquet"
],
"config_size": 56827,
"take_rows": 2941
},
{
"config": "stackexchange_qa",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_qa/train/0.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_qa/train/1.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_qa/train/2.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_qa/train/3.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_qa/train/4.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_qa/train/5.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_qa/train/6.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_qa/train/7.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_qa/train/8.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_qa/train/9.parquet"
],
"config_size": 3586152,
"take_rows": 2941
},
{
"config": "stackexchange_title_body",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_title_body/train/0.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_title_body/train/1.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_title_body/train/2.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_title_body/train/3.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_title_body/train/4.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_title_body/train/5.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_title_body/train/6.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_title_body/train/7.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_title_body/train/8.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackexchange_title_body/train/9.parquet"
],
"config_size": 4565167,
"take_rows": 2941
},
{
"config": "stackoverflow_title_body",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/0.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/1.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/2.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/3.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/4.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/5.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/6.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/7.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/8.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/9.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/10.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/11.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/12.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/13.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/14.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/15.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/16.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/17.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/18.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/19.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/20.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/21.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/22.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/23.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/24.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/25.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/26.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/27.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/28.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/29.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/30.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/31.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/32.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/33.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/34.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/35.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/36.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/37.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/38.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/39.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/40.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/stackoverflow_title_body/train/41.parquet"
],
"config_size": 12847704,
"take_rows": 2941
},
{
"config": "wikianswers",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/0.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/1.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/2.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/3.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/4.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/5.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/6.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/7.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/8.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/9.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/10.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/11.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/12.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/13.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/14.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/15.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/16.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/17.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/18.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/19.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/20.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/21.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/22.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/23.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/24.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/25.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/26.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/27.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/28.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/29.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/30.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/31.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/32.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/33.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/34.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/35.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/36.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/37.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/38.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/39.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikianswers/train/40.parquet"
],
"config_size": 9994370,
"take_rows": 2941
},
{
"config": "wikihow",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikihow/train/0.parquet"
],
"config_size": 127637,
"take_rows": 2941
},
{
"config": "wikipedia_hlp_cm",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikipedia_hlp_cm/train/0.parquet"
],
"config_size": 10000000,
"take_rows": 2941
},
{
"config": "wikipedia_hlp_dl",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/wikipedia_hlp_dl/train/0.parquet"
],
"config_size": 10000000,
"take_rows": 2941
},
{
"config": "yahoo_answer",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/yahoo_answer/train/0.parquet"
],
"config_size": 571439,
"take_rows": 2941
},
{
"config": "yahoo_qa",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/yahoo_qa/train/0.parquet",
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/yahoo_qa/train/1.parquet"
],
"config_size": 853872,
"take_rows": 2941
},
{
"config": "yahoo_question_body",
"urls": [
"https://huggingface.co/api/datasets/lightonai/embeddings-pre-training-curated/parquet/yahoo_question_body/train/0.parquet"
],
"config_size": 448475,
"take_rows": 2941
}
]
}

Xet Storage Details

Size:
159 kB
·
Xet hash:
851e9fc0da087390415b03ece774deb327ea4bd63c1528171506ada6eb2a26ec

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.