1
0
Fork 0

Assembled from URLs hosted at https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T

Third batch of data

main
Zach Nation 1 year ago
parent 7bf143df64
commit e259594b2e
85 changed files (34 GiB → 89 GiB)
  1. 3
      SHA256SUMS.txt
  2. 4
      arxiv/arxiv_11c659c1-ffbf-4455-abfd-058f6bbf4bb2.jsonl
  3. 4
      arxiv/arxiv_5a02d9ee-12a0-437d-808f-d26f0eb2012b.jsonl
  4. 4
      arxiv/arxiv_5d8d402b-8277-480a-b5fa-71169726864f.jsonl
  5. 4
      arxiv/arxiv_5ee33ef7-455e-4fd5-9512-c4771dd802c1.jsonl
  6. 4
      arxiv/arxiv_610c82ed-b9ee-449c-83b0-601205f3a74a.jsonl
  7. 4
      arxiv/arxiv_629fe3ca-075f-4663-9b81-b807f3b42bf2.jsonl
  8. 4
      arxiv/arxiv_8ede1b71-6846-439a-acba-86a57cfec3d2.jsonl
  9. 4
      book/book.jsonl
  10. 4
      c4/c4-train.00001-of-01024.jsonl
  11. 4
      common_crawl/2019-30/en_middle_0000.json.gz.dedup.classifier.jsonl.zst
  12. 4
      common_crawl/2019-30/en_middle_0048.json.gz.dedup.classifier.jsonl.zst
  13. 4
      common_crawl/2019-30/en_middle_0070.json.gz.dedup.classifier.jsonl.zst
  14. 4
      common_crawl/2019-30/en_middle_0092.json.gz.dedup.classifier.jsonl.zst
  15. 4
      common_crawl/2020-05/en_head_0000.json.gz.dedup.classifier.jsonl.zst
  16. 4
      common_crawl/2020-05/en_head_0007.json.gz.dedup.classifier.jsonl.zst
  17. 4
      common_crawl/2020-05/en_head_0036.json.gz.dedup.classifier.jsonl.zst
  18. 4
      common_crawl/2020-05/en_head_0039.json.gz.dedup.classifier.jsonl.zst
  19. 4
      common_crawl/2020-05/en_middle_0052.json.gz.dedup.classifier.jsonl.zst
  20. 4
      common_crawl/2020-05/en_middle_0066.json.gz.dedup.classifier.jsonl.zst
  21. 4
      common_crawl/2020-05/en_middle_0095.json.gz.dedup.classifier.jsonl.zst
  22. 4
      common_crawl/2020-05/en_middle_0098.json.gz.dedup.classifier.jsonl.zst
  23. 4
      common_crawl/2020-05/en_middle_0104.json.gz.dedup.classifier.jsonl.zst
  24. 4
      common_crawl/2020-05/en_middle_0125.json.gz.dedup.classifier.jsonl.zst
  25. 4
      common_crawl/2021-04/en_head_0020.json.gz.dedup.classifier.jsonl.zst
  26. 4
      common_crawl/2021-04/en_head_0050.json.gz.dedup.classifier.jsonl.zst
  27. 4
      common_crawl/2021-04/en_head_0065.json.gz.dedup.classifier.jsonl.zst
  28. 4
      common_crawl/2021-04/en_middle_0000.json.gz.dedup.classifier.jsonl.zst
  29. 4
      common_crawl/2021-04/en_middle_0014.json.gz.dedup.classifier.jsonl.zst
  30. 4
      common_crawl/2021-04/en_middle_0030.json.gz.dedup.classifier.jsonl.zst
  31. 4
      common_crawl/2021-04/en_middle_0080.json.gz.dedup.classifier.jsonl.zst
  32. 4
      common_crawl/2022-05/en_head_0002.json.gz.dedup.classifier.jsonl.zst
  33. 4
      common_crawl/2022-05/en_head_0007.json.gz.dedup.classifier.jsonl.zst
  34. 4
      common_crawl/2022-05/en_head_0021.json.gz.dedup.classifier.jsonl.zst
  35. 4
      common_crawl/2022-05/en_head_0023.json.gz.dedup.classifier.jsonl.zst
  36. 4
      common_crawl/2022-05/en_head_0033.json.gz.dedup.classifier.jsonl.zst
  37. 4
      common_crawl/2022-05/en_head_0043.json.gz.dedup.classifier.jsonl.zst
  38. 4
      common_crawl/2022-05/en_head_0047.json.gz.dedup.classifier.jsonl.zst
  39. 4
      common_crawl/2022-05/en_head_0053.json.gz.dedup.classifier.jsonl.zst
  40. 4
      common_crawl/2022-05/en_middle_0007.json.gz.dedup.classifier.jsonl.zst
  41. 4
      common_crawl/2022-05/en_middle_0008.json.gz.dedup.classifier.jsonl.zst
  42. 4
      common_crawl/2022-05/en_middle_0010.json.gz.dedup.classifier.jsonl.zst
  43. 4
      common_crawl/2022-05/en_middle_0019.json.gz.dedup.classifier.jsonl.zst
  44. 4
      common_crawl/2022-05/en_middle_0022.json.gz.dedup.classifier.jsonl.zst
  45. 4
      common_crawl/2022-05/en_middle_0028.json.gz.dedup.classifier.jsonl.zst
  46. 4
      common_crawl/2022-05/en_middle_0055.json.gz.dedup.classifier.jsonl.zst
  47. 4
      common_crawl/2023-06/en_head_0004.json.gz.dedup.classifier.jsonl.zst
  48. 4
      common_crawl/2023-06/en_head_0006.json.gz.dedup.classifier.jsonl.zst
  49. 4
      common_crawl/2023-06/en_head_0019.json.gz.dedup.classifier.jsonl.zst
  50. 4
      common_crawl/2023-06/en_head_0028.json.gz.dedup.classifier.jsonl.zst
  51. Some files were not shown because too many files have changed in this diff Show More
Loading…
Cancel
Save