I am writing a function that will loop through large directories that only have txt files in them. The purpose is to get the basename and size of the txt files, store them in a variable and then I’m running a bulk copy into sql.
The issue I’m running into is, the directories can get quite large, each subfolder will have a max file count of 1000, so if there at 200K files, there will be 200 subfolders with 1000 in each and the process to get all the info we need can take a while sometimes…Is there a more efficient way to approach this?
See function below…
Function Get-ExtractedTextSize
{
param
(
[Parameter(Position = 1, Mandatory = $true)] [string]$ServerInstance,
[Parameter(Position = 2, Mandatory = $true)] [string]$ProjDatabase,
[Parameter(Position = 3, Mandatory = $true)] [string]$ExportPath,
[Parameter(Position = 4, Mandatory = $true)] [string]$PsScriptPath,
[Parameter(Position = 5, Mandatory = $true)] [string]$ExportSet
)
$startTime = Get-Date
$fnName = $MyInvocation.MyCommand
Write-Host "Function Location: $fnName"
# Load helper functions
. "$PsScriptPath\Functions\GetQueryOutput.ps1"
. "$PsScriptPath\Functions\Out-DataTable.ps1"
# Identify all files with extensions longer than 25 characters
# $count = @(Get-ChildItem "$ExportPath\TEXT" -Recurse -File).Count
$files = Get-ChildItem "$ExportPath\TEXT" -Recurse -File
Write-Host "Export Set: $ExportSet - Text file count is " $files.count
#$array = @()
#$hashValues = @{}
Write-Host "Beginning SQL inserts for Extracted Text File Size to the EXT.LegalExportDocumentMetric table..."
ForEach ($file in $files)
{
#Write-Host "Inserting Extracted Text Size for: $file"
$i++
# Get values required for database inserts
$docId = $file.BaseName
$extractedTextfileSize = (Get-Item $file.FullName).Length
$docValues += '"' + $docId + '"' + ',' + '"' + $ExportSet + '"' + ',' + '"' + $extractedTextfileSize + '"' + "`n"
}
$docValues | Out-File "E:\test\CSVTest\test.txt"
$queryCreateTempTable = "CREATE TABLE TEMP.LegalExportDocumentMetric
(
[DocID] NVARCHAR (50) NULL,
[ReviewExport] VARCHAR (4) NULL,
[ExtractedTextFileSize] BIGINT NULL
);"
Get-QueryOutput $ServerInstance $ProjDatabase $queryCreateTempTable
$cn = new-object System.Data.SqlClient.SqlConnection("Server=$ServerInstance;Database=$ProjDatabase;Integrated Security=True");
$cn.Open()
$csvDataTable = Import-Csv -Path "E:\test\CSVTest\test.txt" | Out-DataTable
$bc = new-object ("System.Data.SqlClient.SqlBulkCopy") $cn
$bc.DestinationTableName = "TEMP.LegalExportDocumentMetric"
$bc.WriteToServer($csvDataTable)
$cn.Close()
$queryText = "INSERT INTO EXT.LegalExportDocumentMetric (DocID, reviewExport, ExtractedTextFileSize)
SELECT * FROM Temp.LegalExportDocumentMetric"
Try
{
Get-QueryOutput $ServerInstance $ProjDatabase $queryText -ErrorAction 'Stop'
}
Catch
{
Write-Host -ForegroundColor Red "There was in issue inserting records into EXT.LegalExportDocumentMetric, please verify that the DocID does not already exist in the table..."
return "Failed", "Verify the data that is trying to be inserted."
}
Finally
{
#Drop the TEMP table
$queryDropTable = "DROP TABLE Temp.LegalExportDocumentMetric;"
Get-QueryOutput $ServerInstance $ProjDatabase $queryDropTable
}
Write-Host "Count: $i"
$endTime = Get-Date
$duration = New-TimeSpan -Start $startTime -End $endTime
Write-Host "Export Set: $ExportSet - Extracted Text Size has been recorded for $i text files...Duration: $duration"
return "Success"
}
#Sample Function Call
$Result = Get-ExtractedTextSize -ServerInstance "DBSERVERNAME" -ProjDatabase "H52062_EDD" -ExportPath "E:\EXPORT\Immaterial Item Rollup Testing Exports\MBOX_A" -PsScriptPath "\\FILESERVER\engineering_store\Development\Powershell\DEV" -ExportSet "0025"
Write-Host $Result