如何解决如何使用C#在Parallel中正确运行大量计算? 编辑:
目标
目标是计算一定数量正方形的所有可能的多边形形状。由于这对于大量应用来说是非常繁重的计算,因此我想利用计算机拥有的多个内核。
问题
通过创建以下情形,我使问题更易于解释和测试:
1) for each value of 2,3,5,and 7:
2) find all multiples (up to a certain value) and add them to the same List
3) remove all duplicates from said list
在我的最终程序中,第2步要庞大得多并且计算量大,因此我更愿意将任务2分成多个我想根据第1步的值检查的值。
我尝试过的
我用C#Core和5个按钮制作了一个winforms应用程序,尝试在Stackoverflow和互联网上的其他地方找到的并行度的不同变化:
这是代码(看起来很多,但只是同一个想法的5个变体),它们都计数以检查它们是否产生相同的结果以及花费的时间:
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Security.Permissions;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace Parallelism
{
public partial class Form1 : Form
{
private readonly int Repeat = 10000000;
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender,EventArgs e)
{
var watch = System.Diagnostics.Stopwatch.StartNew();
List<int> output = new List<int>();
foreach (int x in new int[] { 2,7 })
{
for (int i = 0; i < Repeat; i++)
{
output.Add(x * i);
}
}
output = output.Distinct().ToList();
watch.Stop();
(sender as Button).Text += $",c:{output.Count} - {watch.ElapsedMilliseconds}ms";
}
private void button2_Click(object sender,EventArgs e)
{
var watch = System.Diagnostics.Stopwatch.StartNew();
ConcurrentBag<int> output = new ConcurrentBag<int>();
Task task = Task.WhenAll(
Task.Run(() => button2_Calculation(2,output)),Task.Run(() => button2_Calculation(3,Task.Run(() => button2_Calculation(5,Task.Run(() => button2_Calculation(7,output))
);
task.Wait();
HashSet<int> output2 = new HashSet<int>(output);
watch.Stop();
(sender as Button).Text += $",c:{output2.Count} - {watch.ElapsedMilliseconds}ms";
}
private void button2_Calculation(int x,ConcurrentBag<int> output)
{
for (int i = 0; i < Repeat; i++)
{
output.Add(x * i);
}
}
private void button3_Click(object sender,EventArgs e)
{
var watch = System.Diagnostics.Stopwatch.StartNew();
List<int> output = new List<int>();
foreach (int x in (new int[] { 2,7 }).AsParallel())
{
for (int i = 0; i < Repeat; i++)
{
output.Add(x * i);
}
}
output = output.Distinct().ToList();
watch.Stop();
(sender as Button).Text += $",c:{output.Count} - {watch.ElapsedMilliseconds}ms";
}
private void button4_Click(object sender,EventArgs e)
{
var watch = System.Diagnostics.Stopwatch.StartNew();
ConcurrentBag<int> output = new ConcurrentBag<int>();
Dictionary<int,Task> runningTasks = new Dictionary<int,Task>();
foreach (int x in new int[] { 2,7 })
{
int value = x;
runningTasks.Add(x,Task.Factory.StartNew(() => button2_Calculation(value,output)));
}
foreach (Task t in runningTasks.Select(c => c.Value))
t.Wait();
HashSet<int> output2 = new HashSet<int>(output);
watch.Stop();
(sender as Button).Text += $",c:{output2.Count} - {watch.ElapsedMilliseconds}ms";
}
private void button5_Click(object sender,EventArgs e)
{
var watch = System.Diagnostics.Stopwatch.StartNew();
ConcurrentBag<int> output = new ConcurrentBag<int>();
Parallel.ForEach(new int[] { 2,7 },x => button5_Calculation(x,output));
HashSet<int> output2 = new HashSet<int>(output);
watch.Stop();
(sender as Button).Text += $",c:{output2.Count} - {watch.ElapsedMilliseconds}ms";
}
private void button5_Calculation(int x,ConcurrentBag<int> output)
{
for (int i = 0; i < Repeat; i++)
output.Add(x * i);
}
}
}
到目前为止的结果
到目前为止,以上所有方法的持续时间都在1s-1.5s之间。 实际上,有时正常的串行执行似乎要快得多。 这怎么可能?我希望使用8个内核(16个虚拟内核)来拆分任务会导致更快的总体速度吗?
非常感谢您的帮助!
未来
了解了有关如何正确实现并行性的更多信息之后,我希望也可以在另一个线程/异步上运行所有计算,以使GUI保持响应状态。
编辑:
对@ Pac0的响应: 这是我对您建议的实施。似乎并没有太大的区别:
private void button6_Click(object sender,EventArgs e)
{
var watch = System.Diagnostics.Stopwatch.StartNew();
ConcurrentBag<HashSet<int>> bag = new ConcurrentBag<HashSet<int>>();
var output = Parallel.ForEach(new int[] { 2,x =>
{
HashSet<int> temp = new HashSet<int>();
for (int i = 0; i < Repeat; i++)
temp.Add(x * i);
bag.Add(temp);
});
HashSet<int> output2 = new HashSet<int>();
foreach (var hash in bag)
output2.UnionWith(hash);
watch.Stop();
(sender as Button).Text += $",c:{output2.Count} - {watch.ElapsedMilliseconds}ms";
}
解决方法
正如评论中提到的那样,您对单个集合的使用导致大量锁定。通过计算得出,基于任务的解决方案的速度提高了约50%(请参见下面我们不管理合并输出的内容)。它管理着引起某种绑定的集合。根据处理方式的不同,它可能比串行执行慢3倍以上。
并发的斗争总是在平衡瓶颈的负载。
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
namespace ConsoleApp5
{
class Program
{
static int Repeat = 100000000;
static int[] worklist = new int[] { 2,3,5,7 };
static void Main(string[] args)
{
var watch = System.Diagnostics.Stopwatch.StartNew();
Console.WriteLine("Hello World! Launching Threads");
Task launcher = Task.Run(()=>LaunchThreads());
launcher.Wait();
Console.WriteLine("Hello World! Threads Complete");
watch.Stop();
Console.WriteLine($"Threads took: {watch.ElapsedMilliseconds}");
watch = System.Diagnostics.Stopwatch.StartNew();
Console.WriteLine("Serial Execution Starting");
foreach (int i in worklist)
{
DoWork(i);
}
watch.Stop();
Console.WriteLine($"Serial Execution took: {watch.ElapsedMilliseconds}");
}
static async void LaunchThreads()
{
//Dictionary<int,List<int>> mywork = new Dictionary<int,List<int>>();
HashSet<int> output = new HashSet<int>();
var worktasks = new List<Task<List<int>>>();
foreach (int i in worklist)
{
worktasks.Add(Task.Run(() => DoWork(i)));
}
await Task.WhenAll(worktasks);
}
static List<int> DoWork(int x)
{
Console.WriteLine($"Thread Worker: {x}");
List<int> output = new List<int>();
for (int i = 0; i < Repeat; i++)
{
output.Add(x * i);
}
Console.WriteLine($"Thread Worker: {x} - Exiting");
return output;
}
}
}
,
我想将其发布为遮篷,因为一个名叫Yugami的人发布了与我尝试的内容不同的东西,虽然它是有用且很好的答复,但已被删除。
因此,我正在努力在测试台上重新创建他们的代码:
private async void button9_Click(object sender,EventArgs e)
{
var watch = System.Diagnostics.Stopwatch.StartNew();
HashSet<int> output = new HashSet<int>();
var worktasks = new List<Task<List<int>>>();
foreach (int i in new int[] { 2,7 })
worktasks.Add(Task.Run(() => button9_Calculation(i)));
await Task.WhenAll(worktasks);
foreach (Task<List<int>> tsk in worktasks)
foreach (int i in tsk.Result)
output.Add(i);
watch.Stop();
(sender as Button).Text += $",c:{output.Count} - {watch.ElapsedMilliseconds}ms";
}
private List<int> button9_Calculation(int x)
{
List<int> output = new List<int>();
for (int i = 0; i < Repeat; i++)
output.Add(x * i);
return output;
}
以下是尝试100.000.000次的系列和最佳两种解决方案的结果。 在这里,我终于看到了并行执行步骤2的一些改进,但是现在最大的瓶颈是删除重复项/将所有内容过滤到单个HashSet中。
因此,我认为这解决了我必须改进步骤2的最初问题。 现在,我将继续搜索以改进步骤3;删除重复项。
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。