bdill

azure_blob_iterate_to_csv_files.ps1

Aug 15th, 2025 (edited)
644
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PowerShell 2.80 KB | Source Code | 0 0
  1. # Desc: Connect to a SAS URL blob container and iterate through all the files.
  2. #       Saves a numbered csv file for each 5000 files.  Easy on RAM even for containers with millions of files
  3. #       CSV file contains: FileName, FilePath, FileSizeBytes
  4. # ================================
  5. # Settings
  6. # ================================
  7. Clear-Host
  8. $containerSasUrl = "https://my-storage-acct.blob.core.windows.net/my-container?sp=rl&st=2025-08-13T19:48:50Z&se=2025-08-22T04:03:50Z&spr=https&sv=2024-11-04&sr=c&sig=***********"
  9. $outputDir       = "C:\logs"
  10. $chunkSize       = 5000
  11.  
  12. # ================================
  13. # Vars
  14. # ================================
  15. $chunkIndex      = 1
  16. $rowBuffer       = @()
  17. $marker          = ""
  18. $totalCount      = 0
  19. # ================================
  20.  
  21. $StartDate = Get-Date
  22. Write-Host "Started: $(($StartDate).ToString('yyyy-MM-dd HH:mm:ss'))"
  23.  
  24. # Ensure output directory exists
  25. if (-not (Test-Path $outputDir)) { New-Item -Path $outputDir -ItemType Directory | Out-Null }
  26.  
  27. do {
  28.     $url = "$containerSasUrl&restype=container&comp=list&maxresults=5000"
  29.     if ($marker) { $url += "&marker=$marker" }
  30.  
  31.     Write-Host "Getting chunk $chunkIndex with marker: $marker"
  32.     # Use Invoke-WebRequest to get raw content
  33.     $rawContent = (Invoke-WebRequest -Uri $url -UseBasicParsing).Content
  34.     $cleanXml = $rawContent -replace "^[^\<]+",""    # Strip any junk characters before <?xml
  35.     [xml]$xml = $cleanXml
  36.  
  37.     foreach ($blob in $xml.EnumerationResults.Blobs.Blob) {
  38.         $fileName = $blob.Name.Split('/')[-1]
  39.         $filePath = ($blob.Name -replace "/$fileName$","")
  40.         $fileSize = [int64]$blob.Properties."Content-Length"
  41.  
  42.         $rowBuffer += [PSCustomObject]@{
  43.             FileName      = $fileName
  44.             FilePath      = $filePath
  45.             FileSizeBytes = $fileSize
  46.         }
  47.         $totalCount++
  48.  
  49.         if ($rowBuffer.Count -ge $chunkSize) {
  50.             $chunkFile = Join-Path $outputDir ("files_{0:D4}.csv" -f $chunkIndex)
  51.             $rowBuffer | Export-Csv -Path $chunkFile -NoTypeInformation
  52.             Write-Host "Wrote chunk $chunkIndex with $($rowBuffer.Count) rows. Total: $totalCount"
  53.             $chunkIndex++
  54.             $rowBuffer = @()
  55.         }
  56.     }
  57.  
  58.     $marker = $xml.EnumerationResults.NextMarker  # Update marker for next page
  59. } while ($marker -ne "")
  60.  
  61. # Write any remaining rows
  62. if ($rowBuffer.Count -gt 0) {
  63.     $chunkFile = Join-Path $outputDir ("files_{0:D4}.csv" -f $chunkIndex)
  64.     $rowBuffer | Export-Csv -Path $chunkFile -NoTypeInformation
  65.     Write-Host "Wrote final chunk $chunkIndex with $($rowBuffer.Count) rows. Total: $totalCount"
  66. }
  67.  
  68. Write-Host "Done! CSVs saved to $outputDir"
  69.  
  70. $ts = (Get-Date) - $StartDate
  71. Write-Host "Finished: $((Get-Date).ToString('yyyy-MM-dd HH:mm:ss'))"
  72. Write-Host "Elapsesd time: $ts"
  73.  
Advertisement
Add Comment
Please, Sign In to add comment