Mutex experiments

This commit is contained in:
9 changed files with 264 additions and 0 deletions

3
.gitmodules vendored Normal file
View File

@@ -0,0 +1,3 @@
[submodule "can"]
path = can
url = https://git.shs.tools/SM/can.git

48
CMakeLists.txt Normal file
View File

@@ -0,0 +1,48 @@
project(multithread_experiments)
cmake_minimum_required(VERSION 2.8.6)
set(CMAKE_CXX_STANDART 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
option(USE_SMSDK "Use libraries from SMSDK directory" 1)
if(USE_SMSDK)
set(PATH_TO_SMSDK $ENV{SMSDK_DIR})
set(SM_BIN_DIR $ENV{SM_BIN_DIR})
set(PIP_DIR ${PATH_TO_SMSDK})
set(SMBRICKS_INCLUDES ${PATH_TO_SMSDK}/include/SMBricks)
set(SMBRICKS_LIB_DIR ${PATH_TO_SMSDK}/lib)
endif()
find_package(PIP REQUIRED)
find_package(QAD REQUIRED)
if (CMAKE_BUILD_TYPE MATCHES Debug)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g3 -fPIC -std=c++11")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g3 -fPIC")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fPIC -std=c++11")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -fPIC")
endif()
include_directories(experiments ${PIP_INCLUDES} can)
add_subdirectory(can)
add_custom_target(copy_dependencies
COMMAND ${CMAKE_COMMAND} -E copy ${PCAN_LIB} ${CMAKE_CURRENT_BINARY_DIR}/PCANBasic${CMAKE_SHARED_LIBRARY_SUFFIX}
COMMAND ${CMAKE_COMMAND} -E copy ${VSCAN_LIB} ${CMAKE_CURRENT_BINARY_DIR}/vs_can_api${CMAKE_SHARED_LIBRARY_SUFFIX})
add_executable(can_send_multithread experiments/can_send_multithread.cpp)
target_link_libraries(can_send_multithread can)
add_dependencies(can_send_multithread copy_dependencies)
add_executable(can_send experiments/can_send.cpp)
target_link_libraries(can_send can)
add_dependencies(can_send copy_dependencies)
add_executable(mutex experiments/mutex.cpp)
target_link_libraries(mutex ${PIP_LIBRARY} ${PIP_CONCURRENT_LIBRARY})
add_dependencies(mutex copy_dependencies)
add_executable(mutex_multithread experiments/mutex_multithread.cpp)
target_link_libraries(mutex_multithread ${PIP_LIBRARY} ${PIP_CONCURRENT_LIBRARY})
add_dependencies(mutex_multithread copy_dependencies)

1
can Submodule

Submodule can added at 8e8151b9e8

16
experiments/can_send.cpp Normal file
View File

@@ -0,0 +1,16 @@
#include "can_send.h"
#include <future>
#include <picout.h>
int main() {
auto time1 = std::async(std::launch::deferred, [] { return test_send(PCAN_USBBUS1); });
auto time2 = std::async(std::launch::deferred, [] { return test_send(PCAN_USBBUS2); });
time1.wait();
piCout << "measurements for PCAN_USBBUS1:" << time1.get() / 1000.f << "ms";
time2.wait();
piCout << "measurements for PCAN_USBBUS2:" << time2.get() / 1000.f << "ms";
return 0;
}

23
experiments/can_send.h Normal file
View File

@@ -0,0 +1,23 @@
#ifndef MULTITHREAD_EXPERIMENTS_CAN_SEND_H
#define MULTITHREAD_EXPERIMENTS_CAN_SEND_H
#include "pcan/peakcandevice.h"
#include <chrono>
float test_send(int device_id) {
PeakCANDevice canDevice(PeakCANDevice::CAN_SPEED_500K, device_id);
if (!canDevice.open()) {
return 0.f;
}
auto start = std::chrono::high_resolution_clock::now();
CAN_Raw msg = { .Id = 0x72, .Size = 8, .Data = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 } };
for (int i = 0; i < 30 * 1000; ++i) {
canDevice.send(msg);
}
auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.f;
}
#endif //MULTITHREAD_EXPERIMENTS_CAN_SEND_H

View File

@@ -0,0 +1,13 @@
#include "can_send.h"
#include <future>
#include <picout.h>
int main() {
auto time1 = std::async(std::launch::async, [] { return test_send(PCAN_USBBUS1); });
auto time2 = std::async(std::launch::async, [] { return test_send(PCAN_USBBUS2); });
piCout << "measurements for PCAN_USBBUS1:" << time1.get() / 1000.f << "ms";
piCout << "measurements for PCAN_USBBUS2:" << time2.get() / 1000.f << "ms";
return 0;
}

58
experiments/mutex.cpp Normal file
View File

@@ -0,0 +1,58 @@
#include <pimutex.h>
#include <piconditionlock.h>
#include <atomic>
#include <future>
#include <picout.h>
std::future<float> check_performance(std::function<void()> test_function) {
return std::async(std::launch::deferred, [=](){
auto start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < 1000 * 1000; ++i) {
test_function();
}
auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.f;
});
}
int main() {
auto withoutSyncPerformance = check_performance([](){
int i = 0; while (i < 1000) i++;
});
piCout << "without synchronization:" << withoutSyncPerformance.get() << "ms";
PIMutex piMutex;
auto piMutexPerformance = check_performance([&piMutex](){
piMutex.lock();
int i = 0; while (i < 1000) i++;
piMutex.unlock();
});
piCout << "piMutex:" << piMutexPerformance.get() << "ms";
PIConditionLock piConditionLock;
auto piConditionLockPerformance = check_performance([&piConditionLock](){
piConditionLock.lock();
int i = 0; while (i < 1000) i++;
piConditionLock.unlock();
});
piCout << "piConditionLock:" << piConditionLockPerformance.get() << "ms";
std::mutex stdMutex;
auto stdMutexPerformance = check_performance([&stdMutex](){
stdMutex.lock();
int i = 0; while (i < 1000) i++;
stdMutex.unlock();
});
piCout << "stdMutex:" << stdMutexPerformance.get() << "ms";
std::atomic_flag stdAtomic;
auto stdAtomicPerformance = check_performance([&stdAtomic](){
while(stdAtomic.test_and_set(std::memory_order_acquire)) {
std::this_thread::yield();
}
int i = 0; while (i < 1000) i++;
stdAtomic.clear(std::memory_order_release);
});
piCout << "stdAtomic:" << stdAtomicPerformance.get() << "ms";
}

View File

@@ -0,0 +1,71 @@
#include <pimutex.h>
#include <piconditionlock.h>
#include <atomic>
#include <future>
#include <picout.h>
#include <vector>
float check_performance(std::function<long(long&)> test_function) {
long k = 0;
std::vector<std::future<float>> perThreadPerformance;
for (int j = 0; j < 6; ++j) {
auto future = std::async(std::launch::async, [=, &k](){
auto start = std::chrono::high_resolution_clock::now();
while (test_function(k) < 1000 * 1000) { }
auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.f;
});
perThreadPerformance.push_back(std::move(future));
}
float res = 0;
for (int j = 0; j < perThreadPerformance.size(); ++j) {
res += perThreadPerformance[j].get();
}
return res / perThreadPerformance.size();
}
int main() {
PIMutex piMutex;
auto piMutexPerformance = check_performance([&piMutex](long& k){
piMutex.lock();
int i = 0; while (i < 1000) { i++; }
long res = ++k;
piMutex.unlock();
return res;
});
piCout << "piMutex:" << piMutexPerformance << "ms";
PIConditionLock piConditionLock;
auto piConditionLockPerformance = check_performance([&piConditionLock](long& k){
piConditionLock.lock();
int i = 0; while (i < 1000) { i++; }
long res = ++k;
piConditionLock.unlock();
return res;
});
piCout << "piConditionLock:" << piConditionLockPerformance << "ms";
std::mutex stdMutex;
auto stdMutexPerformance = check_performance([&stdMutex](long& k){
stdMutex.lock();
int i = 0; while (i < 1000) { i++; }
long res = ++k;
stdMutex.unlock();
return res;
});
piCout << "stdMutex:" << stdMutexPerformance << "ms";
std::atomic_flag stdAtomic;
auto stdAtomicPerformance = check_performance([&stdAtomic](long& k){
while(stdAtomic.test_and_set(std::memory_order_acquire)) {
std::this_thread::yield();
}
int i = 0; while (i < 1000) { i++; }
long res = ++k;
stdAtomic.clear(std::memory_order_release);
return res;
});
piCout << "stdAtomic:" << stdAtomicPerformance << "ms";
}

31
readme.md Normal file
View File

@@ -0,0 +1,31 @@
# multithread_experiments
## Результаты на Windows
Измерения производительности проводились с `-O3` оптимизацией.
### Эксперименты с мьютексами в одном потоке
Наибольшие накладные расходы на синхронизацию несет `PIMutex` проигрывающий остальным способам в `~50-100 раз`. Остальные результаты соизмеримы, при этом вариант `stdAtomic` в
эксперименте использует менее строгие правила защиты кода и в реальных задачах применим не всегда.
```cmd
without synchronization: 1.487 ms
piMutex: 959.774 ms
piConditionLock: 15.873 ms
stdMutex: 13.414 ms
stdAtomic: 6.448 ms
```
### Эксперименты с мьютексами в нескольких потоках
Как и в однопоточном тесте, `PIMutex` оказывается самым медленным, проигрывая остальным механизмам синхронизации в
`~18-120 раз`. При этом производительность`PIConditionLock` начинает заметно снижаться по сравнению с однопоточным
тестом, отставая в `~4 раза` от `std::mutex`.
```cmd
piMutex: 3237.55 ms
piConditionLock: 179.799 ms
stdMutex: 25.5425 ms
stdAtomic: 16.6967 ms
```