/* 

This program has several large inefficiencies, and does not do much 
error checking for memory/files etc.  Otherwise it's OK!
Note that when a generated clause has a duplicate, that duplicate variable
is randomly regenerated.  If the clusters are not fully connected via the 
links (in terms of variable occurences) then the entire set of links
is regenerated.  Regeneration can be dangerous with silly parameters as 
the program is also not very intelligent - it will not perform
a check to see whether the requested problem is actually possible.
For example a problem with 7 clusters and twenty variables and
10% links leaves only 2 variables for a cluster, and thus generating 
a single clause for this cluster without duplicating variables is impossible. 

Choose your parameters wisely.

The code is pretty messy - I made this up from varios other bits of code
I had and filled in the blanks.  It should be easy to understand though
(I hope), and thus easy to modify too.

Andrew Slater 
andrews @ csl . anu . edu . au

*/

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

//default formula size params - defunct assignments really
/*
static int numVars = 200;
static int numClauses = 840;//4.2*numVars

static int numVars = 100;
static int numClauses = 420;//4.2*numVars
*/
static int numVars = 250;
int numClauses = 1061;//4.25*numVars

//cluster range params
static int minClusters = 1;
static int maxClusters = 9;

//percent of clauses that link clusters
static int percentLinks = 0;

static int numProblems = 1000;

//globals!
int *problem; //space for problem
char *adjMatr; //an adjacency Matric (upper diag) to check for connectedness
int adjMatrMemSize;//total number of slots
int adjMatrSize;//width/height of equivalent square matrix

unsigned short int seed[3] = {42753,59120,9834};

void InitRandom()
{
  FILE *f;
  int i;

  if ((f = fopen("clusgen.seed","r")) ==NULL) {
    //fprintf(stderr,"New Seed\n");
   for (i=0;i<100;i++)
     erand48(seed);
 }
 else {
   //fprintf(stderr,"Old Seed\n");
   fscanf(f,"%hd %hd %hd\n",&(seed[0]),&(seed[1]),&(seed[2]));
   fclose(f);
 }
}
void SaveRandom()
{
  FILE *f;

  if ((f = fopen("clusgen.seed","w")) != NULL) {
   fprintf(f,"%d %d %d\n",(seed[0]),(seed[1]),(seed[2]));
   fclose(f);
   //fprintf(stderr,"Saving Seed\n");
 }
 else {
   fprintf(stderr,"Can't save to seed file.\n");
 }

}

int RandomVariable(int minVal,int maxVal)
{
  int x;
  int range;
  range = maxVal - minVal + 1;
  x = (int)floor((erand48(seed) * (double)(range)));
  x = x + minVal;//put it back in range
  if (erand48(seed) > 0.50000) x = -x; //choose sign

  //  printf("x = %d %d %d \n",x+minVal,minVal,maxVal);

  return (x);
}

void RandomClause(int *clause, int minVal,int maxVal)
{
  clause[0] = RandomVariable(minVal,maxVal);
  
  clause[1] = RandomVariable(minVal,maxVal);
  while (abs(clause[1]) == abs(clause[0]))
    clause[1] = RandomVariable(minVal,maxVal);
  
  clause[2]   = RandomVariable(minVal,maxVal);
  while ((abs(clause[2]) == abs(clause[0])) 
	 ||(abs(clause[2]) == abs(clause[1])) )
    clause[2]   = RandomVariable(minVal,maxVal);
}

//compute index into array representing a
//square upper diagonal matrix
int SUDMIndex(int size, int row, int column){
  return((row*(2*size - row - 1))/2 + column -row - 1);
  //(( (size*(size-1))-((size-row)*(size-row-1)))/2 +column-row-1);
}
//compute the number of elements in a
//square upper diagonal matrix
int SUDMSize(int size){
  return((size*(size-1))/2);
}

char Connected(int v1, int v2)
{
  if (v1 < v2) {
    return adjMatr[SUDMIndex(adjMatrSize,v1,v2)];
  }
  else if (v1 > v2) {
    return adjMatr[SUDMIndex(adjMatrSize,v2,v1)];
  }
  else {
    return (char)1;
  }
}

void Connect(int v1, int v2)
{

  if (v1 < v2) {
    adjMatr[SUDMIndex(adjMatrSize,v1,v2)] = (char)1;
  }
  else {
    adjMatr[SUDMIndex(adjMatrSize,v2,v1)] = (char)1;
  }
}

//this is a quick and naive way to check....
int NotFullyConnected(int *problem, int numVars, int numClauses)
{
  int retval = 0;
  int i;
  int clauseIndex;
  int v1,v2,v3;
  char *visited;
  int *visitList,*nextVisitList, *tmpList;
  int visitListPos, nextVisitListPos;
  int currentVisitIndex;

  //build graph
  for (i=0;i<adjMatrSize;i++){
    adjMatr[i] = (char)0;
  }

  clauseIndex=0;
  //printf("start build\n");

  for (i=0;i<numClauses;i++) {
    //note node labels start at zero - vars start at 1
    v1 = abs(problem[clauseIndex]) -1;
    v2 = abs(problem[clauseIndex+1]) -1;
    v3 = abs(problem[clauseIndex+2]) - 1;
    //printf(" C %d \n", clauseIndex);
    Connect(v1,v2);
    Connect(v1,v3);
    Connect(v2,v3);
    clauseIndex+=3;
  }
  
  /*
  for (i=0;i<numVars;i++){
    for (j=0;j<numVars;j++){
      if (j > i) {
	if (adjMatr[SUDMIndex(adjMatrSize,i,j)]) printf("1");
	else printf("0");
      }
      else {
	if (j<i) {
	  if (adjMatr[SUDMIndex(adjMatrSize,j,i)]) printf("1");
	  else printf("0");
	}
	else printf("0");
      }
    }
    printf("\n");
  }
  */
  //printf("done connect\n");

  //do DFS quick and dirty
  visited = malloc(sizeof(char)*numVars);
  for (i=0;i<numVars;i++) visited[i] = 0;

  visitList     = malloc(sizeof(int)*numVars);
  nextVisitList = malloc(sizeof(int)*numVars);
  visitListPos = nextVisitListPos = 0;

  //start from node zero
  visitList[visitListPos] = 0;
  visitListPos++;
  visited[0] = 1;

  while (visitListPos != 0) {
    currentVisitIndex = 0;    
    //foreach member of the visit list
    while (visitListPos != currentVisitIndex) {
      //scan through edges from node+1 to N
      //printf(" [%d:",visitList[currentVisitIndex]);
      for (i=0;i<numVars;i++) {
	//add unvisited nodes to the next visit list
	if (Connected(visitList[currentVisitIndex],i)) {
	  if (visited[i] == 0) {
	    //printf(" %d",i);
	    visited[i] = 1;
	    nextVisitList[nextVisitListPos] = i;
	    nextVisitListPos++;
	  }
	}
      }
      currentVisitIndex++;
    }
    //printf("\n");
    //swap lists and index
    visitListPos = nextVisitListPos;
    tmpList = visitList;
    visitList = nextVisitList;
    nextVisitList = tmpList;
    nextVisitListPos = 0;
  }
  //now check
  //printf("Fail:");
  for (i=0;i<numVars;i++)
    if (visited[i] == 0) {
      //printf(" %d",i);
      retval = 1;
    }
  //printf("\n");

  //if (retval) exit(0);

  //printf("Returning connect = %d\n",retval);
  free(visitList);free(nextVisitList);
  return retval;
}

void RandomProblem(int *problem, int numVars, int numClauses,int numClusters, 
		  int percentLinks, int problemNum)
{
  int clusterSize;
  int numLinks;
  int i,j;
  int minClusterVal,maxClusterVal;
  int clausePos;

  //spreading remaining vars over clusters when not even
  int varsPerCluster, extraVars, varOffset;

  //numbers for clusters for generating links correctly
  int c1,c2,c3;
  int connectFails;

  clusterSize =  ((100 - percentLinks)*numClauses)
		  / (numClusters*100);
  numLinks = numClauses - (numClusters * clusterSize);
  varsPerCluster = numVars/numClusters;
  extraVars = numVars % numClusters;

  //  printf("clusters %d of size %d with %d vars - links %d\n",numClusters, clusterSize, varsPerCluster, numLinks);
  
  clausePos = 0;
  maxClusterVal = 0;//start position
  for (i=0;i<numClusters;i++) {//each cluster has a fixed range of variables
    (i<extraVars)?(varOffset = 1):(varOffset = 0);//spread out remainders
    minClusterVal = maxClusterVal + 1;
    maxClusterVal = minClusterVal + varsPerCluster + varOffset - 1;
    //printf("Range %d %d\n", minClusterVal,maxClusterVal);
    //printf("ev vo %d %d\n", extraVars, varOffset);    

    for (j=0;j<clusterSize;j++) {//fill in cluster
      RandomClause(&(problem[clausePos]), minClusterVal, maxClusterVal);
      clausePos +=3;
    }
  }

  //  printf("clusters done\n");

  //this is the link generation process:
  if (numClusters > 1){
    connectFails = 0;
    do {//we repeat the link generation until we get a connected problem
      
      if (connectFails) fprintf(stderr,"clusgen: (Warning) Cluster Connection Failure - redoing.\n");
      
      for (i=0;i<numLinks;i++) { //fill in links
	RandomClause(&(problem[clausePos]), 1, numVars);
	if (numClusters > 1) { //make sure links join at least two clusters
	  c1 = abs(problem[clausePos])/numClusters; 
	  c2 = abs(problem[clausePos+1])/numClusters; 
	  c3 = abs(problem[clausePos+2])/numClusters; 
	  while ((c1 == c2) && (c2 == c3)) {
	    RandomClause(&(problem[clausePos]), 1, numVars);
	    c1 = abs(problem[clausePos])/numClusters; 
	    c2 = abs(problem[clausePos+1])/numClusters; 
	    c3 = abs(problem[clausePos+2])/numClusters; 
	  }
	}
	clausePos+=3;
      }
      
      //printf("links done\n");    
      
      connectFails++;
      
    }while(NotFullyConnected(problem,numVars,numClauses));
  }
}

void PrintProblem(int *problem, int numVars, int numClauses,int numClusters, 
		  int percentLinks, int problemNum)
{
  char fname[256];
  int i,j;
  FILE *fp;

  sprintf(fname,"clus-%d-%d-%.2d-%.2d-%.3d.cnf",numVars,numClauses,numClusters,
	  percentLinks, problemNum);
  fp = fopen(fname,"w");
  
  fprintf(fp,"c Cluster Problem\n");
  fprintf(fp,"c %d clusters with %d percent clauses as links\n", 
	  numClusters, percentLinks);
  fprintf(fp,"c %d vars %d clauses\n", numVars,numClauses);
  fprintf(fp,"c Problem number %d\n", problemNum);
  fprintf(fp,"p cnf %d %d\n",numVars,numClauses);
  
  for (i=0;i<numClauses;i++) {
    j=3*i;
    fprintf(fp,"%d %d %d 0\n", problem[j], problem[j+1], problem[j+2]);
  }

  fclose(fp);
}

int main(int argc, char **argv)
{
  //int numVars, numClauses, numClusters, percentLinks, numProbs;
  int i,j;
  float ratio;
  int doStandardRandom3SAT = 0;
  int fixedNumClusters = 0;

  if (argc == 4) {
    doStandardRandom3SAT = 1;

    percentLinks= 0;
    numProblems = atoi(argv[3]);
    ratio       = atof(argv[2]);
    numVars     = atoi(argv[1]);
    numClauses = (int)((float)numVars * ratio);
  }
  else if (argc == 5) {
    doStandardRandom3SAT = 0;

    percentLinks= atoi(argv[4]);
    numProblems = atoi(argv[3]);
    ratio       = atof(argv[2]);
    numVars     = atoi(argv[1]);
    numClauses = (int)((float)numVars * ratio);

    //printf("clusgen - %d vars %d clauses at %f with %d links\n",numVars, numClauses,
    //ratio, percentLinks);
  }
  else if (argc == 6) {
    doStandardRandom3SAT = 0;

    fixedNumClusters = atoi(argv[5]);
    percentLinks= atoi(argv[4]);
    numProblems = atoi(argv[3]);
    ratio       = atof(argv[2]);
    numVars     = atoi(argv[1]);
    numClauses = (int)((float)numVars * ratio);
  }
  else {
    printf("Usage: clusgen num_vars clause/var_ratio num_problems\n");
    printf("      generates random 3-SAT problems as per usual\n");

    printf("Usage: clusgen num_vars clause/var_ratio num_problems percent_links\n");
    printf("      generates a set of random 3-SAT problems for each of 1 to 9 \nclusters using the \n");
    printf("      percent_links clauses to join clusters randomly\n");
    printf("Usage: clusgen num_vars clause/var_ratio num_problems percent_links numclusters\n");
    printf("      generates random 3-SAT problems with numclusters clusters using the \n");
    printf("      percent_links clauses to join clusters randomly\n");
    return 1;
  }


  problem = malloc(sizeof(int)*3*numClauses);
  adjMatrSize = numVars;
  adjMatrMemSize = SUDMSize(adjMatrSize);  
  adjMatr = malloc(sizeof(char)*adjMatrMemSize);  

  InitRandom();

  if (doStandardRandom3SAT) {
    for (i=0;i<numProblems;i++) {  
      RandomProblem(problem,numVars,numClauses,1,0, i);
      PrintProblem(problem,numVars,numClauses,1,0, i);
    }
  }
  else {
    if (fixedNumClusters) {
      minClusters = maxClusters = fixedNumClusters;
    }
    for (j=minClusters;j<=maxClusters;j++){
      for (i=0;i<numProblems;i++) {
      
	RandomProblem(problem,numVars,numClauses,j,
		      percentLinks, i);
	PrintProblem(problem,numVars,numClauses,j,
		     percentLinks, i);
	
      }
      printf("Done clusters %d - %d probs\n",j,i);
    }
  }

  SaveRandom();

  return 1;
}


