'lpthread' 태그의 글 목록

lpthread

POSIX Threads - create 부터 Mutex까지 2018.12.04

POSIX Threads - create 부터 Mutex까지

2018. 12. 4. 18:08

프로세스란 : 코드실행을 한 순간에 한 부분만 실행하는 것
multi-thread : 하나의 프로세스의 code를 수행하는 부분들이 여러개 -> 동시에 처리하므로 처리시간이 빨라짐
-> CPU들이 여러개(core)가 있는 경우에 process는 core하나만 사용하고 나머지 core는 사용하지 못하지만 쓰레드를 이용하면 core들이 각 쓰레드를 동시에 처리
Pthread가 default로 사용됨
thread를 만든 후 main쓰레드는 그대로 동작, 만들어진 쓰레드는 지정된 함수를 수행
thread들이 다른 쓰레드가 끝나기를 기다리는 경우 pthread_join을 사용
예시 : 두 쓰레드가 동시에 같은 함수를 수행하는 경우 -> for문으로 50000번씩 i를 증가시키며 출력하는 경우

원하는 결과값이 나오지 않고 조금 작은 값이 나옴
critical section이 동시에 수행되면 안됨
한 쓰레드가 수행중에 다른 쓰레드가 대기하도록 하는 mutex 사용하여 해결

pthread_crate

쓰레드로 실행하는 코드 작성

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23

#include<stdio.h>
#include<stdlib.h>
#include<pthread.h>
 
void *worker(void *arg){
        printf("hello worker\n");
        pthread_exit(NULL);
}
 
int main(void)
{
        pthread_t mythread;
        int result;
 
        result = pthread_create(&mythread,NULL,worker,NULL);
 
        if(result){
                exit(1);
        }
 
        printf("main thread exit\n");
        pthread_exit(NULL);
}
Colored by Color Scripter

cs

worker라는 함수를 선언 -> hello worker를 출력
pthread_t 타입으로 thread로 지정할 변수를 선언
pthread_create로 mythread(worker)를 실행
result값이 1이면 프로그램 종료
메인문에서는 main thread exit라는 문장을 출력
결과

결과값이 실행 되지만 메인문의 출력이 먼저 실행됨

pthread_join

메인문을 수행하기 전에 지정한 쓰레드를 수행할 수 있도록 join으로 기다리기

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

#include<stdio.h>
#include<stdlib.h>
#include<pthread.h>
 
void *worker(void *arg){
        printf("hello worker\n");
        pthread_exit(NULL);
}
 
int main(void)
{
        pthread_t mythread;
        int result;
 
        result = pthread_create(&mythread,NULL,worker,NULL);
 
        if(result){
                perror("pthread_create");
                exit(0);
        }
 
        pthread_join(mythread,NULL);
        printf("main thread exit\n");
        pthread_exit(NULL);
}
Colored by Color Scripter

cs

pthread_join을 메인문 출력 전에 사용하여 한 쓰레드가 끝날때 까지 기다리도록 함
결과

두 출력의 위치가 달라짐

쓰레드간의 데이터 공유

전역변수 두개를 선언 - counter, running -> 모든 쓰레드에서 접근 가능한 변수
worker함수는 counter값 ++ -> running이 0이 되면 중단
main문에서 counter값을 1초에 한번씩 총 10번 출력 후 나와서 running을 0으로 바꿔줌
worker쓰레드 종료

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37

#include<stdio.h>
#include<stdlib.h>
#include<pthread.h>
#include<unistd.h>
 
volatile long long counter = 0;
volatile int running = 1;
 
void *worker(void *arg){
        while(running){
                counter++;
        }
        pthread_exit(NULL);
}
 
int main(void)
{
        pthread_t mythread;
        int result;
 
        result = pthread_create(&mythread,NULL,worker,NULL);
 
        if(result){
                perror("pthread_create");
                exit(0);
        }
 
        for(int i = 0; i<10; i++){
                printf("%lld\n",counter);
                sleep(1);
        }
 
        running = 0;
        pthread_join(mythread,NULL);
        printf("main thread exit\n");
        pthread_exit(NULL);
}
Colored by Color Scripter

cs

counter와 running을 선언 -> counter는 while문에서 늘어날 값, running은 flag로 작용할 값
위와 동일하게 쓰레드를 생성하고 worker에는 running이 0이되면 counter증가를 멈추는 함수를 넣음
메인문에서는 1초에 한번씩 counter를 총 10번 출력하는 for문을 작성
10번을 모두 출력하면 running을 0으로 바꿔줌
결과

점점 값이 늘어나다가 10번 출력한 후 종료됨

멀티프로세서(코어) 시스템에서의 유의점

gcc myprog.c -lpthread

gcc -O2 myprog.c -lpthread

이유 : cpu들에는 각각 cach가 존재하는데 cach와 메인메모리 사이에 데이터를 주고 받을 때 많은 시간이 걸리므로 optimization을 하는 경우 counter값과 running값을 캐시로 불러와서 수행하기 때문에 각 쓰레드간 데이터 공유가 되지 않음 -> cach로 가져오지 않고 메모리에서만 사용하도록 지정하는 volatile으로 선언해야 -O 명령을 쓸 수 있음

쓰레드에 인자 전달

쓰레드 4개를 만듦
각각의 쓰레드에게 파라미터 값을 전달
start부터 end까지 더함
myid에 전부 더한값을 넣음
1~1000, 1001~2000, 2001~3000, 3001~4000

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50

#include<stdio.h>
#include<stdlib.h>
#include<pthread.h>
#include<unistd.h>
 
#define NUM_THREADS 4
#define WORK_PER_THREAD 1000
 
struct thread_args{
    int myld;
        long long start;
        long long end;
};
 
volatile long long mysum[NUM_THREADS];
 
void *worker(void *arg){
    struct thread_args *ta;
 
    ta = (struct thread_args *)arg;
    long long sum = 0;
 
    for(int j=ta->start; j <= ta->end; j++){
        sum = sum + j;
    }
    mysum[ta->myld] = sum;
 
    pthread_exit(NULL);
}
 
int main(void)
{
    pthread_t mythread[NUM_THREADS];
    struct thread_args range[NUM_THREADS];
    int i;
 
    for(i=0; i<NUM_THREADS; i++){
        range[i].myld = i;
        range[i].start = i*WORK_PER_THREAD+1;
        range[i].end = (i+1)*WORK_PER_THREAD;
        pthread_create(&mythread[i],NULL,worker,&range[i]);
    }
 
    for(i=0; i<NUM_THREADS; i++){
        pthread_join(mythread[i],NULL);
        printf("mysum[%d]: %lld\n",i,mysum[i]);
    }
    printf("main thread exit\n");
    pthread_exit(NULL);
}
Colored by Color Scripter

cs

쓰레드를 4개 선언
구조체(myld - 위치를 저장, start - 덧셈을 시작할 숫자를 저장, end - 덧셈을 끝낼 숫자를 저장)역시 4개 선언
1~1000, 1001~2000, 2001~3000, 3001~4000을 각 myld = 0, 1, 2, 3에 저장
만들면서 동시에 쓰레드 생성 -> 4개의 쓰레드가 동시에 실행됨(시작 숫자 부터 끝 숫자까지 모두 더함)
pthread_join으로 0번 ~ 3번 쓰레드가 끝나는 것을 순서대로 기다림
여기서는 sum이라는 변수도 4개를 선언하여 사용했으므로 레지스터값이 겹치지 않고 잘 실행됨
결과

만약 sum이라는 변수를 4개 사용하지 않고 그냥 sum 하나로 모든 쓰레드에서 사용한다면?
실행할 때마다 다른 값이 나올 것이다
고친부분 sum[0~4] -> sum으로 통일
마지막 결과 출력도 가장 마지막 값만 출력

쓰레드간 동기화

위에서 사용한 mysum배열이 아닌 그냥 mysum이라는 변수 하나에만 더하는 경우
모든 쓰레드가 동시에 한 변수를 사용하기 때문에 race condition이 발생
mutex를 사용해보자

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

#include<stdio.h>
#include<stdlib.h>
#include<pthread.h>
#include<unistd.h>
 
#define NUM_THREADS 4
#define WORK_PER_THREAD 1000
 
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
 
struct thread_args{
    int myld;
        long long start;
        long long end;
};
 
volatile long long mysum;
 
void *worker(void *arg){
    struct thread_args *ta;
 
    ta = (struct thread_args *)arg;
    long long sum = 0;
 
    for(int j=ta->start; j <= ta->end; j++){
        pthread_mutex_lock(&mutex);
        mysum = mysum + j;
        pthread_mutex_unlock(&mutex);
    }
 
    pthread_exit(NULL);
}
 
int main(void)
{
    pthread_t mythread[NUM_THREADS];
    struct thread_args range[NUM_THREADS];
    int i;
 
    for(i=0; i<NUM_THREADS; i++){
        range[i].myld = i;
        range[i].start = i*WORK_PER_THREAD+1;
        range[i].end = (i+1)*WORK_PER_THREAD;
        pthread_create(&mythread[i],NULL,worker,&range[i]);
    }
    for(i=0; i<NUM_THREADS; i++)
        pthread_join(mythread[i],NULL);
 
    printf("mysum: %lld\n",mysum);
    printf("main thread exit\n");
    pthread_exit(NULL);
}
Colored by Color Scripter

cs

mutex를 선언하여 pthread_mutex_lock과 unlock사용
lock과 unlock사이의 코드는 쓰레드들이 동시에 수행하지 않고 하나의 쓰레드가 끝날때까지 대기함
결과

프로그램 수행시간 확인

mutex를 사용한 경우

mutex를 사용하지 않은 경우

바른 결과를 얻기 위해 mutex를 사용하면서도 성능 감소를 막기 위해서 어떻게 해야 할까?

위의 쓰레드의 인자전달 실습에서 사용한 것처럼 4개의 sum변수를 사용하여 각자 따로 worker함수를 수행시킨 후 마지막에 각각을 더해주는 방식으로 수행하고자 한다

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57

#include<stdio.h>
#include<stdlib.h>
#include<pthread.h>
#include<unistd.h>
 
#define NUM_THREADS 4
#define WORK_PER_THREAD 1000
 
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
 
struct thread_args{
    int myld;
        long long start;
        long long end;
};
 
volatile long long mysum[NUM_THREADS];
volatile long long result = 0;
 
void *worker(void *arg){
    struct thread_args *ta;
 
    ta = (struct thread_args *)arg;
    long long sum = 0;
 
    for(int j=ta->start; j <= ta->end; j++){
        sum = sum + j;
    }
    mysum[ta->myld] = sum;
 
//    pthread_mutex_lock(&mutex);
    result = result + mysum[ta->myld];
//    pthread_mutex_unlock(&mutex);
    pthread_exit(NULL);
}
 
int main(void)
{
    pthread_t mythread[NUM_THREADS];
    struct thread_args range[NUM_THREADS];
    int i;
 
    for(i=0; i<NUM_THREADS; i++){
        range[i].myld = i;
        range[i].start = i*WORK_PER_THREAD+1;
        range[i].end = (i+1)*WORK_PER_THREAD;
        pthread_create(&mythread[i],NULL,worker,&range[i]);
    }
 
    for(i=0; i<NUM_THREADS; i++){
        pthread_join(mythread[i],NULL);
        printf("mysum[%d]: %lld\n",i,mysum[i]);
    }
    printf("main thread exit\n");
    printf("final sum = %lld\n",result);
    pthread_exit(NULL);
}
Colored by Color Scripter

cs

수정된 부분

volatile long long mysum; -> volatile long long mysum[NUM_THREADS];
volatile long long result = 0; 추가 -> 마지막에 모든 합들을 더하기 위한 변수
//    pthread_mutex_lock(&mutex);

    result = result + mysum[ta->myld]; -> 주석처리된 부분을 사용하면 완전한 결과값만 나오고 사용하지 않으면 가끔 다른 결과값이 나온다

//    pthread_mutex_unlock(&mutex);
printf("final sum = %lld\n",result); -> 결과값 출력
결과비교

Mutex를 사용하지 않았을 때

Mutex를 사용했을 때
Mutex수행 때문에 조금 느려졌었던 이전과는 다르게 Mutex를 사용하지 않았을 때와 사용했을 때의 수행 시간이 차이가 나지 않는 것을 확인할 수 있다.

저작자표시

'LINUX 실습' 카테고리의 다른 글

Signal 시스템 콜 실습 (0)	2018.11.29
[함수 만들기]프로세스 시스템 콜 (0)	2018.11.26
[시스템 콜 파일(2)]myls, mycp2 함수 만들기 (0)	2018.11.18
[File I/O]시스템 콜을 이용하여 함수 만들기 (0)	2018.11.12
[System call 실습]System call의 기본 개념 (0)	2018.11.02

PREV 1 NEXT

Study & Project repository

lpthread

POSIX Threads - create 부터 Mutex까지

'LINUX 실습' 카테고리의 다른 글

+ Recent posts

티스토리툴바