forked from XetHub/RedPajama-Data-1T
Assembled from URLs hosted at https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T
Zach Nation
1 year ago
parent
e259594b2e
commit
2d0351412d
-
4book/book.jsonl
-
4common_crawl/2019-30/en_head_0014.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2019-30/en_head_0023.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2019-30/en_head_0025.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2019-30/en_head_0032.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2019-30/en_head_0039.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2019-30/en_middle_0007.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2019-30/en_middle_0013.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2019-30/en_middle_0026.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2019-30/en_middle_0036.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2019-30/en_middle_0039.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2019-30/en_middle_0057.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2019-30/en_middle_0062.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2019-30/en_middle_0074.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2019-30/en_middle_0091.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_head_0006.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_head_0011.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_head_0018.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_head_0019.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_head_0040.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_head_0053.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_head_0056.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_head_0058.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_head_0061.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_head_0062.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_head_0063.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_head_0065.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0015.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0026.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0029.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0033.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0096.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0097.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0099.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0100.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0101.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0102.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0103.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0104.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0106.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0111.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0113.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0114.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0120.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2020-05/en_middle_0126.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2021-04/en_head_0002.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2021-04/en_head_0006.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2021-04/en_head_0010.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2021-04/en_head_0014.json.gz.dedup.classifier.jsonl.zst
-
4common_crawl/2021-04/en_head_0020.json.gz.dedup.classifier.jsonl.zst
- Some files were not shown because too many files have changed in this diff Show More
Write
Preview
Loading…
Cancel
Save
Reference in new issue