Saturday, May 29, 2010

Shell Sort

Shell sort

Basically the idea of shell sort is to move numbers in a large step first, and slow reduce this step down to 1. The idea summarizes as simulated annealing. The whole reason for the additionally outer loop is to reduce the overall randomness in inital sort the data by quickly and roughly grouping low value items to the left and high value items to the right.

The inner core of it is insertion sort which will correct the sorting of the data irrespective of what your choice of outer steps are so long as the last step is 1.

How it Works;
1) choose a step by which to do your insertion sort
2) preform insertion sort as normal but using the step to determine which data is side by side.

The understandable version of the algorithm works as follows:
1) break the list of data into rows and columns
2) preform insertion sort on each column
3) repeat with more less columns until we have a single column.

In my code example I have have created an easy to understand implementation of shell sort it explicitly names out the columns and rows


Properties;
Worst case speed is O(n^2) or O(nlog^2n) if you use the special sequence of steps.
Memory is 1
It is Unstable (due the interleaving of the data several neigbour items are skipped)

//compile with g++
#include <iostream>
#include <iomanip>
using namespace std;

void print(int data[], int size);

void shellSort(int data[], int size)
{
  int tmp;
  int i; 
   // int step = 1; //comment the for and uncomment this to see that shell is just insertation sort!
   for(int step = size/2; step > 0; step = step/2)
      for(int j = step; j < size; j++)
        {
          tmp = data[j];
          i = j;
          while(
                (i >= step) &&
                (data[i-step] > tmp)
                )
            {
              data[i] = data[i-step];
              i -=step;
            }

          if(i != j)
            data[i] = tmp;
        }
}

void shellSortUnderstandable(int data[], int size)
{
  int tmp;
  int srow;
  for(int step = size/2; step > 0; step = step/2)
    {
      for(int column= 0; column < step; column++)
        {
          int rows = (int)((size-column)/step) + 1;
          for(int row = 1; row < rows-1; row++)
            {
              tmp = data[row*step + column];

              for(srow = row-1; srow >= 0; srow--)
                if(data[srow*step + column] > tmp)
                  data[(srow+1)*step + column] = data[(srow*step) + column];
                else
                  break;
              
              if(srow+1 != row)
                data[(srow+1)*step + column] = tmp;
            }
        }
    }
}

//test
#define SCRAMBLE(x, y) ((0xa57b & ~y) + ((0x3829 & x) << 1))

bool check(int data[], int size)
{
  for(int i = 1; i < size; i++)
    if(data[i] < data[i-1])
      {
        cout << "FAIL!" << endl;
        return false;
      }
  cout << "PASS" << endl;
  return true;
}

void print(int data[], int size)
{
  for(int i = 0; i < size; i++)
    cout << setw(5) << data[i] << " ";
  cout << endl;
}

bool test(int data[], int size)
{
  print(data, size);
  shellSort(data,size);
  print(data, size);
  return check(data, size);
}

#define SIZE 300
int main()
{
  int data[SIZE];
  bool pass = true;

  //easy data
  data[0] = 1;
  for(int i = 0; i < SIZE; i++)
    data[i] = SIZE - i;
  pass &= test(data, SIZE);

  //semi repeated data
  data[0] = 1;
  for(int i = 1; i < SIZE; i++)
    data[i] = SCRAMBLE(i, data[i-1]);
  pass &= test(data, SIZE);

  //the sort killer!
  for(int i = 0; i < SIZE; i++)
    data[i] = 5;
  pass &= test(data, SIZE);

  //and some randoms to catch anything i missed
  srand ( time(NULL) );

  for(int j = 1; j < 100; j++)
    {
      for(int i = 0; i < 3*j; i++)
        data[i] = (int)((float)(j+1)*((float)rand()/(float)RAND_MAX));
      pass &= test(data, 3*j);
    }

  if(pass)
    cout << "ALL PASSED" << endl;
  else
    cout << "FAILED" << endl;
}

No comments:

Post a Comment