#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <sys/types.h>

#define NTHREADS 2
#define NITERS 100000
#define SPIN_DEBUG_THRESHOLD 2000000

#define FENCE asm volatile("mfence":::"memory")
//#define FENCE asm volatile("lfence":::"memory")
//#define FENCE asm volatile("sfence":::"memory")
//#define FENCE

volatile int turn = 0;
volatile int try[2] = {0, 0};

volatile int counter;

#define DEBUG(s,i,k,spin) debug(s, i, k, spin)
//#define DEBUG(s,i,k,spin)

void debug(char *s, int i, int k, int spin) {
  fprintf(stderr, "DEBUG counter=%d\ti=%d, k=%5d, spin=%3d\t(%s)\n",
	  counter, i, k, spin, s);
}

void *thread_fun(void *num) {
  int i = *(int*)num;
  int k, spin, countdown;

  for (k=0; k<NITERS; k++) {
    try[i] = 1;
    turn = i;
    FENCE; // Make sure everybody knows we try to enter the critical section

    spin = 0;
    countdown = SPIN_DEBUG_THRESHOLD;
    while (try[1-i] && turn==i) {
      FENCE; // Make sure we observe updates to shared variables

      // Print debug message when spinning for too long
      spin++;
      if (countdown == 1) {
        countdown = SPIN_DEBUG_THRESHOLD;
        DEBUG("spinning", i, k, spin);
      } else
        countdown--;
    }

    /* BEGIN critical section */    
    
    counter++;

    /* END critical section */

    FENCE; // Finalize memory accesses before leaving the critical section
    try[i] = 0;
    FENCE; // Make sure everybody knows we left the critical section
  }

  return NULL;
}

pthread_t threads[NTHREADS];

int main(int argc, char **argv){
  pthread_attr_t attr;
  int i, error;

  while (1) {
    counter = 0;

    for (i=0; i<NTHREADS; i++) {
      int *ii = malloc(sizeof(int));
      *ii = i;
      pthread_attr_init(&attr);
      error = pthread_create(&threads[i], &attr, &thread_fun, ii);
      if (error != 0) {
	fprintf(stderr, "pthread_create: %s \n", strerror(error));
	exit(2);
      }
    }
    
    for (i=0; i<NTHREADS; i++) {
      void *ptr;
      int error = pthread_join(threads[i], &ptr);
      if (error != 0){
	fprintf(stderr, "pthread_join: %s \n", strerror(error));
	exit(2);
      }
    }
    if (counter != NTHREADS * NITERS)
      fprintf(stderr, "counter=[1m%d[0m\n", counter);
    else
      fprintf(stderr, "counter=%d\r", counter);
  }

  exit(0);
}

