Mutex experiments

2020-07-10 16:00:00 +03:00
commit 0817636635
9 changed files with 264 additions and 0 deletions
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "can"]
+	path = can
+	url = https://git.shs.tools/SM/can.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -0,0 +1,48 @@
+project(multithread_experiments)
+cmake_minimum_required(VERSION  2.8.6)
+set(CMAKE_CXX_STANDART 11)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+option(USE_SMSDK "Use libraries from SMSDK directory" 1)
+if(USE_SMSDK)
+    set(PATH_TO_SMSDK $ENV{SMSDK_DIR})
+    set(SM_BIN_DIR $ENV{SM_BIN_DIR})
+    set(PIP_DIR ${PATH_TO_SMSDK})
+    set(SMBRICKS_INCLUDES ${PATH_TO_SMSDK}/include/SMBricks)
+    set(SMBRICKS_LIB_DIR ${PATH_TO_SMSDK}/lib)
+endif()
+
+find_package(PIP REQUIRED)
+find_package(QAD REQUIRED)
+
+if (CMAKE_BUILD_TYPE MATCHES Debug)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g3 -fPIC -std=c++11")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g3 -fPIC")
+else()
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fPIC -std=c++11")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -fPIC")
+endif()
+
+include_directories(experiments ${PIP_INCLUDES} can)
+
+add_subdirectory(can)
+
+add_custom_target(copy_dependencies
+        COMMAND ${CMAKE_COMMAND} -E copy ${PCAN_LIB} ${CMAKE_CURRENT_BINARY_DIR}/PCANBasic${CMAKE_SHARED_LIBRARY_SUFFIX}
+        COMMAND ${CMAKE_COMMAND} -E copy ${VSCAN_LIB} ${CMAKE_CURRENT_BINARY_DIR}/vs_can_api${CMAKE_SHARED_LIBRARY_SUFFIX})
+
+add_executable(can_send_multithread experiments/can_send_multithread.cpp)
+target_link_libraries(can_send_multithread can)
+add_dependencies(can_send_multithread copy_dependencies)
+
+add_executable(can_send experiments/can_send.cpp)
+target_link_libraries(can_send can)
+add_dependencies(can_send copy_dependencies)
+
+add_executable(mutex experiments/mutex.cpp)
+target_link_libraries(mutex ${PIP_LIBRARY} ${PIP_CONCURRENT_LIBRARY})
+add_dependencies(mutex copy_dependencies)
+
+add_executable(mutex_multithread experiments/mutex_multithread.cpp)
+target_link_libraries(mutex_multithread ${PIP_LIBRARY} ${PIP_CONCURRENT_LIBRARY})
+add_dependencies(mutex_multithread copy_dependencies)
--- a/1
+++ b/1
--- a/experiments/can_send.cpp
+++ b/experiments/can_send.cpp
@@ -0,0 +1,16 @@
+#include "can_send.h"
+#include <future>
+#include <picout.h>
+
+int main() {
+	auto time1 = std::async(std::launch::deferred, [] { return test_send(PCAN_USBBUS1); });
+	auto time2 = std::async(std::launch::deferred, [] { return test_send(PCAN_USBBUS2); });
+
+	time1.wait();
+	piCout << "measurements for PCAN_USBBUS1:" << time1.get() / 1000.f << "ms";
+
+	time2.wait();
+	piCout << "measurements for PCAN_USBBUS2:" << time2.get() / 1000.f << "ms";
+
+	return 0;
+}
--- a/experiments/can_send.h
+++ b/experiments/can_send.h
@@ -0,0 +1,23 @@
+#ifndef MULTITHREAD_EXPERIMENTS_CAN_SEND_H
+#define MULTITHREAD_EXPERIMENTS_CAN_SEND_H
+
+#include "pcan/peakcandevice.h"
+#include <chrono>
+
+float test_send(int device_id) {
+	PeakCANDevice canDevice(PeakCANDevice::CAN_SPEED_500K, device_id);
+	if (!canDevice.open()) {
+		return 0.f;
+	}
+
+	auto start = std::chrono::high_resolution_clock::now();
+	CAN_Raw msg = { .Id = 0x72, .Size = 8, .Data = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 } };
+	for (int i = 0; i < 30 * 1000; ++i) {
+		canDevice.send(msg);
+	}
+	auto end = std::chrono::high_resolution_clock::now();
+
+	return std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.f;
+}
+
+#endif //MULTITHREAD_EXPERIMENTS_CAN_SEND_H
--- a/experiments/can_send_multithread.cpp
+++ b/experiments/can_send_multithread.cpp
@@ -0,0 +1,13 @@
+#include "can_send.h"
+#include <future>
+#include <picout.h>
+
+int main() {
+	auto time1 = std::async(std::launch::async, [] { return test_send(PCAN_USBBUS1); });
+	auto time2 = std::async(std::launch::async, [] { return test_send(PCAN_USBBUS2); });
+
+	piCout << "measurements for PCAN_USBBUS1:" << time1.get() / 1000.f << "ms";
+	piCout << "measurements for PCAN_USBBUS2:" << time2.get() / 1000.f << "ms";
+
+	return 0;
+}
--- a/experiments/mutex.cpp
+++ b/experiments/mutex.cpp
@@ -0,0 +1,58 @@
+#include <pimutex.h>
+#include <piconditionlock.h>
+#include <atomic>
+#include <future>
+#include <picout.h>
+
+std::future<float> check_performance(std::function<void()> test_function) {
+	return std::async(std::launch::deferred, [=](){
+		auto start = std::chrono::high_resolution_clock::now();
+		for (int i = 0; i < 1000 * 1000; ++i) {
+			test_function();
+		}
+		auto end = std::chrono::high_resolution_clock::now();
+		return std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.f;
+	});
+}
+
+int main() {
+	auto withoutSyncPerformance = check_performance([](){
+		int i = 0; while (i < 1000) i++;
+	});
+	piCout << "without synchronization:" << withoutSyncPerformance.get() << "ms";
+
+
+	PIMutex piMutex;
+	auto piMutexPerformance = check_performance([&piMutex](){
+		piMutex.lock();
+		int i = 0; while (i < 1000) i++;
+		piMutex.unlock();
+	});
+	piCout << "piMutex:" << piMutexPerformance.get() << "ms";
+
+	PIConditionLock piConditionLock;
+	auto piConditionLockPerformance = check_performance([&piConditionLock](){
+		piConditionLock.lock();
+		int i = 0; while (i < 1000) i++;
+		piConditionLock.unlock();
+	});
+	piCout << "piConditionLock:" << piConditionLockPerformance.get() << "ms";
+
+	std::mutex stdMutex;
+	auto stdMutexPerformance = check_performance([&stdMutex](){
+		stdMutex.lock();
+		int i = 0; while (i < 1000) i++;
+		stdMutex.unlock();
+	});
+	piCout << "stdMutex:" << stdMutexPerformance.get() << "ms";
+
+	std::atomic_flag stdAtomic;
+	auto stdAtomicPerformance = check_performance([&stdAtomic](){
+		while(stdAtomic.test_and_set(std::memory_order_acquire)) {
+			std::this_thread::yield();
+		}
+		int i = 0; while (i < 1000) i++;
+		stdAtomic.clear(std::memory_order_release);
+	});
+	piCout << "stdAtomic:" << stdAtomicPerformance.get() << "ms";
+}
--- a/experiments/mutex_multithread.cpp
+++ b/experiments/mutex_multithread.cpp
@@ -0,0 +1,71 @@
+#include <pimutex.h>
+#include <piconditionlock.h>
+#include <atomic>
+#include <future>
+#include <picout.h>
+#include <vector>
+
+float check_performance(std::function<long(long&)> test_function) {
+	long k = 0;
+
+	std::vector<std::future<float>> perThreadPerformance;
+	for (int j = 0; j < 6; ++j) {
+		auto future = std::async(std::launch::async, [=, &k](){
+			auto start = std::chrono::high_resolution_clock::now();
+			while (test_function(k) < 1000 * 1000) { }
+			auto end = std::chrono::high_resolution_clock::now();
+			return std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.f;
+		});
+		perThreadPerformance.push_back(std::move(future));
+	}
+
+	float res = 0;
+	for (int j = 0; j < perThreadPerformance.size(); ++j) {
+		res += perThreadPerformance[j].get();
+	}
+	return res / perThreadPerformance.size();
+}
+
+int main() {
+	PIMutex piMutex;
+	auto piMutexPerformance = check_performance([&piMutex](long& k){
+		piMutex.lock();
+		int i = 0; while (i < 1000) { i++; }
+		long res = ++k;
+		piMutex.unlock();
+		return res;
+	});
+	piCout << "piMutex:" << piMutexPerformance << "ms";
+
+	PIConditionLock piConditionLock;
+	auto piConditionLockPerformance = check_performance([&piConditionLock](long& k){
+		piConditionLock.lock();
+		int i = 0; while (i < 1000) { i++; }
+		long res = ++k;
+		piConditionLock.unlock();
+		return res;
+	});
+	piCout << "piConditionLock:" << piConditionLockPerformance << "ms";
+
+	std::mutex stdMutex;
+	auto stdMutexPerformance = check_performance([&stdMutex](long& k){
+		stdMutex.lock();
+		int i = 0; while (i < 1000) { i++; }
+		long res = ++k;
+		stdMutex.unlock();
+		return res;
+	});
+	piCout << "stdMutex:" << stdMutexPerformance << "ms";
+
+	std::atomic_flag stdAtomic;
+	auto stdAtomicPerformance = check_performance([&stdAtomic](long& k){
+		while(stdAtomic.test_and_set(std::memory_order_acquire)) {
+			std::this_thread::yield();
+		}
+		int i = 0; while (i < 1000) { i++; }
+		long res = ++k;
+		stdAtomic.clear(std::memory_order_release);
+		return res;
+	});
+	piCout << "stdAtomic:" << stdAtomicPerformance << "ms";
+}
--- a/readme.md
+++ b/readme.md
@@ -0,0 +1,31 @@
+# multithread_experiments
+
+## Результаты на Windows
+
+Измерения производительности проводились с `-O3` оптимизацией.
+
+### Эксперименты с мьютексами в одном потоке
+
+Наибольшие накладные расходы на синхронизацию несет `PIMutex` проигрывающий остальным способам в `~50-100 раз`. Остальные результаты соизмеримы, при этом вариант `stdAtomic` в
+эксперименте использует менее строгие правила защиты кода и в реальных задачах применим не всегда. 
+
+```cmd
+without synchronization: 1.487 ms
+piMutex: 959.774 ms
+piConditionLock: 15.873 ms
+stdMutex: 13.414 ms
+stdAtomic: 6.448 ms
+```
+
+### Эксперименты с мьютексами в нескольких потоках
+
+Как и в однопоточном тесте, `PIMutex` оказывается самым медленным, проигрывая остальным механизмам синхронизации в
+`~18-120 раз`. При этом производительность`PIConditionLock` начинает заметно снижаться по сравнению с однопоточным
+тестом, отставая в `~4 раза` от `std::mutex`.
+
+```cmd
+piMutex: 3237.55 ms
+piConditionLock: 179.799 ms
+stdMutex: 25.5425 ms
+stdAtomic: 16.6967 ms
+```