Files
cloud_removal/Tải_Dự_liệu_train_xóa_mây.ipynb
Victor Phan fb8a107e77 init
2026-01-24 15:48:52 +00:00

322 lines
9.7 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "d-0LFUETwSj8",
"outputId": "3b84f2ea-e6d6-4404-a0ae-68ed567ce5ef"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"✅ Đã cấu hình xong! Hãy chạy các cell bên dưới để tải từng phần.\n"
]
}
],
"source": [
"# --- CELL 1: SETUP ---\n",
"import os\n",
"import time\n",
"\n",
"# 1. Cấu hình\n",
"os.environ['RSYNC_PASSWORD'] = 'm1554803'\n",
"output_dir = \"./dataset/\"\n",
"os.makedirs(output_dir, exist_ok=True)\n",
"base_url = \"rsync://m1554803@dataserv.ub.tum.de/m1554803/\"\n",
"\n",
"# 2. Hàm tải file (Dùng chung cho các cell bên dưới)\n",
"def download_files(file_list):\n",
" print(f\"📂 Lưu tại: {output_dir}\")\n",
" print(\"-\" * 50)\n",
" for filename in file_list:\n",
" print(f\"\\n🚀 Đang tải: {filename}\")\n",
"\n",
" # Lệnh rsync chuẩn, có resume (-P) và bỏ qua lỗi permission\n",
" command = f\"rsync -rvP --no-o --no-g --no-p --no-t {base_url}{filename} {output_dir}\"\n",
"\n",
" exit_code = os.system(command)\n",
" if exit_code == 0:\n",
" print(f\"✅ XONG: {filename}\")\n",
" else:\n",
" print(f\"❌ LỖI: {filename} (Code: {exit_code})\")\n",
" time.sleep(1)\n",
"\n",
"print(\"✅ Đã cấu hình xong! Hãy chạy các cell bên dưới để tải từng phần.\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "JIijwdmmwSho",
"outputId": "b84d0afb-66bf-4fb7-b2c6-3db3c55c49d0"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"📂 Lưu tại: ./dataset/\n",
"--------------------------------------------------\n",
"\n",
"🚀 Đang tải: checksums.sha512\n",
"receiving incremental file list\n",
"checksums.sha512\n",
" 2,230 100% 2.13MB/s 0:00:00 (xfr#1, to-chk=0/1)\n",
"\n",
"sent 43 bytes received 2,330 bytes 431.45 bytes/sec\n",
"total size is 2,230 speedup is 0.94\n",
"✅ XONG: checksums.sha512\n",
"\n",
"🚀 Đang tải: sen12ms_cr_dataLoader.py\n",
"receiving incremental file list\n",
"sen12ms_cr_dataLoader.py\n",
" 9,285 100% 8.85MB/s 0:00:00 (xfr#1, to-chk=0/1)\n",
"\n",
"sent 43 bytes received 9,394 bytes 1,715.82 bytes/sec\n",
"total size is 9,285 speedup is 0.98\n",
"✅ XONG: sen12ms_cr_dataLoader.py\n"
]
}
],
"source": [
"# --- CELL 2: FILES NHỎ ---\n",
"files = [\n",
" \"checksums.sha512\",\n",
" \"sen12ms_cr_dataLoader.py\"\n",
"]\n",
"download_files(files)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "2SezTEcFwSf7",
"outputId": "5fe6e5ed-4267-4623-8554-8f9646074639"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"📂 Lưu tại: ./dataset/\n",
"--------------------------------------------------\n",
"\n",
"🚀 Đang tải: ROIs2017_winter_s1.tar.gz\n",
"receiving incremental file list\n",
"ROIs2017_winter_s1.tar.gz\n",
" 8,294,291,725 100% 78.02MB/s 0:01:41 (xfr#1, to-chk=0/1)\n",
"\n",
"sent 728,687 bytes received 364,428 bytes 6,726.86 bytes/sec\n",
"total size is 8,294,291,725 speedup is 7,587.76\n",
"✅ XONG: ROIs2017_winter_s1.tar.gz\n",
"\n",
"🚀 Đang tải: ROIs2017_winter_s2.tar.gz\n",
"receiving incremental file list\n",
"ROIs2017_winter_s2.tar.gz\n",
" 21,959,347,301 100% 76.10MB/s 0:04:35 (xfr#1, to-chk=0/1)\n",
"\n",
"sent 1,340,419 bytes received 670,295 bytes 5,215.86 bytes/sec\n",
"total size is 21,959,347,301 speedup is 10,921.17\n",
"✅ XONG: ROIs2017_winter_s2.tar.gz\n",
"\n",
"🚀 Đang tải: ROIs2017_winter_s2_cloudy.tar.gz\n",
"receiving incremental file list\n",
"ROIs2017_winter_s2_cloudy.tar.gz\n",
" 13,391,641,622 100% 74.42MB/s 0:02:51 (xfr#1, to-chk=0/1)\n",
"\n",
"sent 925,899 bytes received 463,043 bytes 5,566.90 bytes/sec\n",
"total size is 13,391,641,622 speedup is 9,641.61\n",
"✅ XONG: ROIs2017_winter_s2_cloudy.tar.gz\n"
]
}
],
"source": [
"# --- CELL 3: MÙA ĐÔNG (WINTER) ---\n",
"# Tổng dung lượng: ~40.6 GB\n",
"files = [\n",
" \"ROIs2017_winter_s1.tar.gz\",\n",
" \"ROIs2017_winter_s2.tar.gz\",\n",
" \"ROIs2017_winter_s2_cloudy.tar.gz\"\n",
"]\n",
"download_files(files)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "7iIFPApowSbu",
"outputId": "9e4d45c7-0ee5-45f6-860e-99af4601aa22"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"📂 Lưu tại: ./dataset/\n",
"--------------------------------------------------\n",
"\n",
"🚀 Đang tải: ROIs1158_spring_s1.tar.gz\n",
"receiving incremental file list\n",
"ROIs1158_spring_s1.tar.gz\n",
" 13,229,071,106 100% 79.26MB/s 0:02:39 (xfr#1, to-chk=0/1)\n",
"\n",
"sent 920,259 bytes received 460,216 bytes 5,763.99 bytes/sec\n",
"total size is 13,229,071,106 speedup is 9,582.98\n",
"✅ XONG: ROIs1158_spring_s1.tar.gz\n",
"\n",
"🚀 Đang tải: ROIs1158_spring_s2.tar.gz\n",
"receiving incremental file list\n",
"ROIs1158_spring_s2.tar.gz\n",
" 34,953,924,853 100% 76.34MB/s 0:07:16 (xfr#1, to-chk=0/1)\n",
"\n",
"sent 2,133,595 bytes received 1,066,884 bytes 5,419.95 bytes/sec\n",
"total size is 34,953,924,853 speedup is 10,921.47\n",
"✅ XONG: ROIs1158_spring_s2.tar.gz\n",
"\n",
"🚀 Đang tải: ROIs1158_spring_s2_cloudy.tar.gz\n",
"receiving incremental file list\n",
"ROIs1158_spring_s2_cloudy.tar.gz\n",
" 21,910,216,767 100% 77.47MB/s 0:04:29 (xfr#1, to-chk=0/1)\n",
"\n",
"sent 1,337,419 bytes received 668,802 bytes 5,400.33 bytes/sec\n",
"total size is 21,910,216,767 speedup is 10,921.14\n",
"✅ XONG: ROIs1158_spring_s2_cloudy.tar.gz\n"
]
}
],
"source": [
"# --- CELL 4: MÙA XUÂN (SPRING) ---\n",
"# Tổng dung lượng: ~65.3 GB\n",
"files = [\n",
" \"ROIs1158_spring_s1.tar.gz\",\n",
" \"ROIs1158_spring_s2.tar.gz\",\n",
" \"ROIs1158_spring_s2_cloudy.tar.gz\"\n",
"]\n",
"download_files(files)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "s6t_5qyUwSZ-",
"outputId": "ddc101ae-753d-44f1-c3ee-c8413c16fb99"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"📂 Lưu tại: ./dataset/\n",
"--------------------------------------------------\n",
"\n",
"🚀 Đang tải: ROIs1868_summer_s2_cloudy.tar.gz\n",
"receiving incremental file list\n",
"ROIs1868_summer_s2_cloudy.tar.gz\n",
" 24,766,970,197 100% 77.10MB/s 0:05:06 (xfr#1, to-chk=0/1)\n",
"\n",
"sent 1,511,791 bytes received 755,986 bytes 5,329.68 bytes/sec\n",
"total size is 24,766,970,197 speedup is 10,921.25\n",
"✅ XONG: ROIs1868_summer_s2_cloudy.tar.gz\n"
]
}
],
"source": [
"# --- CELL 5: MÙA HÈ (SUMMER) ---\n",
"# Tổng dung lượng: ~74.7 GB\n",
"files = [\n",
" \"ROIs1868_summer_s1.tar.gz\",\n",
" \"ROIs1868_summer_s2.tar.gz\",\n",
" \"ROIs1868_summer_s2_cloudy.tar.gz\"\n",
"]\n",
"download_files(files)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "KbccbbI9wSUd",
"outputId": "7b208f9d-8c3f-45e8-9582-88a872527cb0"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"📂 Lưu tại: ./dataset/\n",
"--------------------------------------------------\n",
"\n",
"🚀 Đang tải: ROIs1970_fall_s2_cloudy.tar.gz\n",
"receiving incremental file list\n",
"ROIs1970_fall_s2_cloudy.tar.gz\n",
" 30,149,143,027 100% 36.83MB/s 0:13:00 (xfr#1, to-chk=0/1)\n",
"\n",
"sent 1,785,615 bytes received 897,343,620 bytes 1,008,557.75 bytes/sec\n",
"total size is 30,149,143,027 speedup is 33.53\n",
"✅ XONG: ROIs1970_fall_s2_cloudy.tar.gz\n"
]
}
],
"source": [
"### --- CELL 6: MÙA THU (FALL) ---\n",
"# Tổng dung lượng: ~91.2 GB\n",
"files = [\n",
" \"ROIs1970_fall_s1.tar.gz\",\n",
" \"ROIs1970_fall_s2.tar.gz\",\n",
" \"ROIs1970_fall_s2_cloudy.tar.gz\"\n",
"]\n",
"download_files(files)"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}