我有一个函数从一个目录获取文件列表,然后从列表中搜索匹配的文件名。性能很糟糕。linq查询的性能问题
下面是函数:
public List<fileStatus> checkFilesStatus(List<string> permitNumbers, string serverDirectory, fileType type)
{
XmlConfigurator.Configure();
log.Debug(string.Format("Beginning checkFilesStatus with following parameters > permitNumbers: {0} > serverDirectory: {1} > type: {2}", string.Join(",", permitNumbers.ToArray()), serverDirectory, type.ToString()));
List<fileStatus> results = new List<fileStatus>();
DirectoryInfo dirInfo = new DirectoryInfo(serverDirectory);
if (dirInfo.Exists)
{
// GET LIST OF ALL FILES IN DIRECTORY
string[] files = System.IO.Directory.GetFiles(serverDirectory, "*", System.IO.SearchOption.AllDirectories);
log.Debug(string.Format("List of all files in directory: {0}", string.Join(",", files)));
if (files.Length > 0 && permitNumbers.Count > 0)
{
log.Debug("Checking for matching files");
// CHECK FOR MATCHING FILES
switch (type)
{
case fileType.Well:
var matchingFiles = (from f in files
where f.Substring(f.LastIndexOf("\\") + 1).Length > 4
where permitNumbers.Contains(f.Substring(f.LastIndexOf("\\") + 1, 5))
select new fileStatus(fileType.Well, f.Substring(f.LastIndexOf("\\") + 1, 5), 1, f.Substring(f.LastIndexOf("\\") + 1)));
var permitNumbersWithMatches = (from x in matchingFiles
select x.PermitNumber);
var nonMatchingFiles = (from p in permitNumbers
where !permitNumbersWithMatches.Contains(p)
select new fileStatus(fileType.Well, p, 0, string.Empty));
results.AddRange(matchingFiles);
results.AddRange(nonMatchingFiles);
break;
case fileType.DrillerLog:
matchingFiles = (from f in files
where f.Substring(f.LastIndexOf("\\") + 1).Length > 4
where permitNumbers.Contains(f.Substring(f.LastIndexOf("\\") + 1, 5))
select new fileStatus(fileType.DrillerLog, f.Substring(f.LastIndexOf("\\") + 1, 5), 1, f.Substring(f.LastIndexOf("\\") + 1)));
permitNumbersWithMatches = (from x in matchingFiles
select x.PermitNumber);
nonMatchingFiles = (from p in permitNumbers
where !permitNumbersWithMatches.Contains(p)
select new fileStatus(fileType.DrillerLog, p, 0, string.Empty));
results.AddRange(matchingFiles);
results.AddRange(nonMatchingFiles);
break;
case fileType.RasterLog:
matchingFiles = (from f in files
where f.Substring(f.LastIndexOf("\\") + 1).Length > 13
where permitNumbers.Contains(f.Substring(f.LastIndexOf("\\") + 1, 14))
select new fileStatus(fileType.RasterLog, f.Substring(f.LastIndexOf("\\") + 1, 14), 1, f.Substring(f.LastIndexOf("\\") + 1)));
permitNumbersWithMatches = (from x in matchingFiles
select x.PermitNumber);
nonMatchingFiles = (from p in permitNumbers
where !permitNumbersWithMatches.Contains(p)
select new fileStatus(fileType.RasterLog, p, 0, string.Empty));
results.AddRange(matchingFiles);
results.AddRange(nonMatchingFiles);
break;
default:
break;
}
log.Debug("Done checking for matching files");
}
}
return results;
}
一旦它到达LINQ查询,对“matchingFiles”提供的价值,它只是挂起。这是一个大的“许可证号码”(如5000),也是一大组“文件”。
我能做些什么来加快速度?
考虑到下面提供的建议,我将功能修改为如下,现在性能按预期工作。非常感谢你! =)
public List<fileStatus> checkFilesStatus(List<string> permitNumbers, string serverDirectory, fileType type)
{
HashSet<string> numbers = new HashSet<string>(permitNumbers);
XmlConfigurator.Configure();
log.Debug(string.Format("Beginning checkFilesStatus with following parameters > permitNumbers: {0} > serverDirectory: {1} > type: {2}", string.Join(",", permitNumbers.ToArray()), serverDirectory, type.ToString()));
List<fileStatus> results = new List<fileStatus>();
DirectoryInfo dirInfo = new DirectoryInfo(serverDirectory);
if (dirInfo.Exists)
{
// GET LIST OF ALL FILES IN DIRECTORY
string[] files = System.IO.Directory.GetFiles(serverDirectory, "*", System.IO.SearchOption.AllDirectories);
HashSet<string> fileNames = new HashSet<string>(files.Select(f => Path.GetFileName(f)));
log.Debug(string.Format("List of all files in directory: {0}", string.Join(",", files)));
if (fileNames.Count > 0 && numbers.Count > 0)
{
log.Debug("Checking for matching files");
// CHECK FOR MATCHING FILES
switch (type)
{
case fileType.Well:
var matchingFiles = (from f in fileNames
where f.Length > 4
where numbers.Contains(f.Substring(0, 5))
select new fileStatus(fileType.Well, f.Substring(0, 5), 1, f));
var permitNumbersWithMatches = (from x in matchingFiles
select x.PermitNumber);
var nonMatchingFiles = numbers.Except(permitNumbersWithMatches)
.Select(p => new fileStatus(fileType.Well, p, 0, string.Empty));
results.AddRange(matchingFiles);
results.AddRange(nonMatchingFiles);
break;
case fileType.DrillerLog:
matchingFiles = (from f in fileNames
where f.Length > 4
where numbers.Contains(f.Substring(0, 5))
select new fileStatus(fileType.DrillerLog, f.Substring(0, 5), 1, f));
permitNumbersWithMatches = (from x in matchingFiles
select x.PermitNumber);
nonMatchingFiles = numbers.Except(permitNumbersWithMatches)
.Select(p => new fileStatus(fileType.DrillerLog, p, 0, string.Empty));
results.AddRange(matchingFiles);
results.AddRange(nonMatchingFiles);
break;
case fileType.RasterLog:
matchingFiles = (from f in fileNames
where f.Length > 13
where numbers.Contains(f.Substring(0, 14))
select new fileStatus(fileType.RasterLog, f.Substring(0, 14), 1, f));
permitNumbersWithMatches = (from x in matchingFiles
select x.PermitNumber);
nonMatchingFiles = numbers.Except(permitNumbersWithMatches)
.Select(p => new fileStatus(fileType.RasterLog, p, 0, string.Empty));
results.AddRange(matchingFiles);
results.AddRange(nonMatchingFiles);
break;
default:
break;
}
log.Debug("Done checking for matching files");
}
}
return results;
}
“一旦它到达LINQ查询” 哪一个?你有几个。另外,5000并不是一个“非常大的集合”。 – 2014-10-03 15:06:56
你是否分析了代码?哪个linq查询很慢? – 2014-10-03 15:07:06
只要它击中提供“matchingFiles”值的linq查询。 – 2014-10-03 15:08:07