forked from XetHub/RedPajama-Data-1T
Assembled from URLs hosted at https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T
Zach Nation
1 year ago
parent
7bf143df64
commit
e259594b2e
-
3SHA256SUMS.txt
-
4arxiv/arxiv_11c659c1-ffbf-4455-abfd-058f6bbf4bb2.jsonl
-
4arxiv/arxiv_5a02d9ee-12a0-437d-808f-d26f0eb2012b.jsonl
-
4arxiv/arxiv_5d8d402b-8277-480a-b5fa-71169726864f.jsonl
-
4arxiv/arxiv_5ee33ef7-455e-4fd5-9512-c4771dd802c1.jsonl
-
4arxiv/arxiv_610c82ed-b9ee-449c-83b0-601205f3a74a.jsonl
-
4arxiv/arxiv_629fe3ca-075f-4663-9b81-b807f3b42bf2.jsonl
-
4arxiv/arxiv_8ede1b71-6846-439a-acba-86a57cfec3d2.jsonl
-
4book/book.jsonl
-
4c4/c4-train.00001-of-01024.jsonl
-
4common_crawl/2019-30/en_middle_0000.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2019-30/en_middle_0048.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2019-30/en_middle_0070.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2019-30/en_middle_0092.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_head_0000.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_head_0007.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_head_0036.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_head_0039.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0052.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0066.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0095.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0098.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0104.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0125.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2021-04/en_head_0020.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2021-04/en_head_0050.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2021-04/en_head_0065.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2021-04/en_middle_0000.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2021-04/en_middle_0014.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2021-04/en_middle_0030.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2021-04/en_middle_0080.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2022-05/en_head_0002.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2022-05/en_head_0007.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2022-05/en_head_0021.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2022-05/en_head_0023.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2022-05/en_head_0033.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2022-05/en_head_0043.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2022-05/en_head_0047.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2022-05/en_head_0053.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2022-05/en_middle_0007.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2022-05/en_middle_0008.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2022-05/en_middle_0010.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2022-05/en_middle_0019.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2022-05/en_middle_0022.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2022-05/en_middle_0028.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2022-05/en_middle_0055.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2023-06/en_head_0004.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2023-06/en_head_0006.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2023-06/en_head_0019.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2023-06/en_head_0028.json.gz.dedup.classifier.jsonl.zst
- Some files were not shown because too many files have changed in this diff Show More
Write
Preview
Loading…
Cancel
Save
Reference in new issue