5 files changed, 369 insertions, 258 deletions
diff --git a/.gitignore b/.gitignore
index 56d2382..360a8c3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
-*.o fft
+*.o
+fft
diff --git a/Makefile b/Makefile
index 8a88b0c..deedfd3 100644
--- a/Makefile
+++ b/Makefile
@@ -9,9 +9,15 @@ CXXFLAGS=-stdlib=libc++ -Wall -O2 -std=c++17
 
 all: fft
 
-fft: fft.cpp
+fft: fft.o main.o
 	$(CXX) $(CXXFLAGS) -o $@ $^
 
+fft.o: fft.cpp fft.h
+	$(CXX) $(CXXFLAGS) -c -o $@ $<
+
+main.o: main.cpp fft.h
+	$(CXX) $(CXXFLAGS) -c -o $@ $<
+
 install:
 
 clean:
diff --git a/fft.cpp b/fft.cpp
index e7d5de6..29fd405 100644
--- a/fft.cpp
+++ b/fft.cpp
@@ -1,6 +1,8 @@
 // FFT
 
-#include <complex>
+#include "fft.h"
+
+#include <algorithm>
 #include <chrono>
 #include <cmath>
 #include <ctime>
@@ -10,291 +12,113 @@
 #include <memory>
 #include <numeric>
 #include <string>
-#include <vector>
-
-const double PI = std::acos(-1);
-const std::complex<double> i(0, 1);
-
-const double tolerance = 0.000001;
-
-using namespace std::complex_literals;
-
-/// CPU time clock
-class Timer {
-public:
-	Timer(): mStart(std::clock()){}
-	void start(){mStart = std::clock();}
-	double elapsed() { return (double(std::clock()) - mStart) / CLOCKS_PER_SEC; }
-	static double precision() {
-		static double result{0};
-		
-		if (result != 0)
-			return result;
-		
-		std::clock_t start = std::clock();
-		while (start == std::clock()) {}
-		start = std::clock();
 
-		std::clock_t end = start;
-		while (end == std::clock()) {}
-		end = std::clock();
 
-		result = (double(end) - start) / CLOCKS_PER_SEC;
-		return result;
+namespace { // Helper functions
+	bool is_power_of_two(unsigned int n) {
+		return n != 0 && (n & (n - 1)) == 0;
 	}
-private:
-	std::clock_t mStart;
-};
 
-// (Slow) DFT
-std::vector<std::complex<double>> dft(const std::vector<std::complex<double>> &v) {
-	std::vector<std::complex<double>> result(v.size(), 0);
-
-	const double N = v.size();
-
-	for (int k = 0; k < v.size(); k++) {
-		for (int j = 0; j < result.size(); j++) {
-			result[k] += v[j] * std::exp(-i * 2. * PI * double(k * j) / N);
-		}
-	}
+}
 
+std::vector<double> RIT::magnitudes(std::vector<std::complex<double>>& v) {
+	std::vector<double> result(v.size());
+	std::transform(std::begin(v), std::end(v), std::begin(result), std::abs<double>);
 	return result;
 }
 
-std::vector<double> freqs{ 2, 5, 11, 17, 29 }; // known freqs for testing
+RIT::FFT::FFT(int size, bool halfOnly): mSize(size), order(size), expLUT(size/2), mFlagHalfOnly(halfOnly) {
 
-void generate(std::vector<std::complex<double>>& v) {
-    for (int i = 0; i < v.size(); i++) {
-        v[i] = 0.;
-        // sum several known sinusoids into v[]
-        for(int j = 0; j < freqs.size(); j++)
-            v[i] += sin(2 * M_PI * freqs[j] * i / v.size() );
-    }
-}
+	if (!is_power_of_two(size))
+		throw std::invalid_argument("Size must be a power of two");
 
-namespace CooleyTukey {
+	// reorder LUT
+	for (int i = 0; i < size; ++i) {
+		order[i] = bitreverse(i);
+	}
 
-// separate even/odd elements to lower/upper halves of array respectively.
-// Due to Butterfly combinations, this turns out to be the simplest way 
-// to get the job done without clobbering the wrong elements.
-void separate(std::vector<std::complex<double>>::iterator a, int n) {
-	std::vector<std::complex<double>> b(n/2);
-	for(int i=0; i<n/2; i++)    // copy all odd elements to heap storage
-		b[i] = a[i*2+1];
-	for(int i=0; i<n/2; i++)    // copy all even elements to lower-half of a[]
-		a[i] = a[i*2];
-	for(int i=0; i<n/2; i++)    // copy all odd (from heap) to upper-half of a[]
-		a[i+n/2] = b[i];
+	// exp LUT
+	for (int i = 0; i < size / 2; ++i) {
+		expLUT[i] = exp(std::complex<double>(0,-2.*M_PI*i/size));
+	}
 }
 
-// N must be a power-of-2, or bad things will happen.
-// Currently no check for this condition.
-//
-// N input samples in X[] are FFT'd and results left in X[].
-// Because of Nyquist theorem, N samples means 
-// only first N/2 FFT results in X[] are the answer.
-// (upper half of X[] is a reflection with no new information).
-void fft_recursive(std::vector<std::complex<double>>::iterator X, int N) {
-    if(N < 2) {
-        // bottom of recursion.
-        // Do nothing here, because already X[0] = x[0]
-    } else {
-        separate(X,N);      // all evens to lower half, all odds to upper half
-        fft_recursive(X,     N/2);   // recurse even items
-        fft_recursive(X+N/2, N/2);   // recurse odd  items
-        // combine results of two half recursions
-        for(int k=0; k<N/2; k++) {
-            std::complex<double> e = X[k    ];   // even
-            std::complex<double> o = X[k+N/2];   // odd
-                         // w is the "twiddle-factor"
-            std::complex<double> w = exp( std::complex<double>(0,-2.*M_PI*k/N) );
-            X[k    ] = e + w * o;
-            X[k+N/2] = e - w * o;
-        }
-    }
-}
 
-// Cooley-Tukey
-std::vector<std::complex<double>> fft(const std::vector<std::complex<double>> &v) {
-	std::vector<std::complex<double>> result(v);
+std::vector<std::complex<double>> RIT::FFT::operator()(const std::vector<std::complex<double>> &v) {
+	if (v.size() != mSize)
+		throw std::length_error("Bad input size");
+
+	std::vector<std::complex<double>> result;
 
-	fft_recursive(std::begin(result), result.size());
+	reorder(v, result);
+	if (mFlagHalfOnly) {
+		fft_half(std::begin(result), mSize);
+		result.resize(mSize/2);
+	} else
+		fft_recursive(std::begin(result), mSize);
 	
 	return result;
 }
-}; // namespace CooleyTukey
-
-// FFT Roland Reichwein
-class RR {
-private:
-	int mSize;
-	std::vector<int> order;
-	std::vector<std::complex<double>> expLUT;
-
-public:
-	RR(int size): mSize(size), order(size), expLUT(size/2) {
-		// reorder LUT
-		for (int i = 0; i < size; ++i) {
-			order[i] = bitreverse(i);
-		}
-
-		// exp LUT
-		for (int i = 0; i < size / 2; ++i) {
-			expLUT[i] = exp(std::complex<double>(0,-2.*M_PI*i/size));
-		}
-	}
 
-	std::vector<std::complex<double>> fft(const std::vector<std::complex<double>> &v) {
-		if (v.size() != mSize)
-			throw std::length_error("Bad input size");
+RIT::FFT& RIT::FFT::SetHalfOnly(bool enable) {
+	mFlagHalfOnly = enable;
+	return *this;
+}
 
-		std::vector<std::complex<double>> result;
+int RIT::FFT::bitreverse(int i) {
+	int size{mSize};
+	int result{0};
 
-		reorder(v, result);
-		fft_recursive(std::begin(result), mSize);
-		
-		return result;
+	while (size > 1) {
+		result <<= 1;
+		result |= i & 1;
+		i >>= 1;
+		size >>= 1;
 	}
 
-// TODO: option for only half FFT due to real redundancy
-
-private:
-	int bitreverse(int i) {
-		int size{mSize};
-		int result{0};
-
-		while (size > 1) {
-			result <<= 1;
-			result |= i & 1;
-			i >>= 1;
-			size >>= 1;
-		}
-
-		return result;
-	}
+	return result;
+}
 
-	void reorder(const std::vector<std::complex<double>>& src, std::vector<std::complex<double>>& dst) {
-		dst.resize(src.size());
-		for (int i = 0; i < mSize; ++i) {
-			dst[order[i]] = src[i];
-		}
-	}
+void RIT::FFT::reorder(const std::vector<std::complex<double>>& src, std::vector<std::complex<double>>& dst) {
+	int size = src.size();
 
-	// N must be a power-of-2, or bad things will happen.
-	// Currently no check for this condition.
-	//
-	// N input samples in X[] are FFT'd and results left in X[].
-	// Because of Nyquist theorem, N samples means 
-	// only first N/2 FFT results in X[] are the answer.
-	// (upper half of X[] is a reflection with no new information).
-	void fft_recursive(std::vector<std::complex<double>>::iterator X, int N) {
-		if(N > 2) {
-			fft_recursive(X,     N/2);   // recurse even items
-			fft_recursive(X+N/2, N/2);   // recurse odd  items
-		}
-		// combine results of two half recursions
-		for(int k=0; k<N/2; k++) {
-			std::complex<double> e = X[k    ];   // even
-			std::complex<double> o = X[k+N/2];   // odd
-			// w is the "twiddle-factor"
-			std::complex<double> w = expLUT[k * mSize / N];
-			X[k    ] = e + w * o;
-			X[k+N/2] = e - w * o;
-		}
+	dst.resize(size);
+	for (int i = 0; i < size; ++i) {
+		dst[order[i]] = src[i];
 	}
+}
 
-}; // class RR
-
-class Measure {
-	Timer mTimer;
-
-public:
-	using Data = std::vector<std::complex<double>>;
-	
-protected:
-	Measure* mReference; // Reference measurement: we should calculate similar to this one and be faster than this one
-	
-	const Data& mIn;
-	Data mResult;
-
-	std::string mName;
-	double mElapsed;
-
-public:
-	Measure(const Data& in): mReference(nullptr), mIn(in), mResult(in.size()) {}
-
-	virtual void run_impl() = 0; // implemented by subclasses
-
-	void run() {
-		mTimer.start();
-		run_impl();
-		mElapsed = mTimer.elapsed();
-		std::cout << mName << ": " << mElapsed << "s\n";
-
-		if (mReference) {
-			if (mResult != mReference->mResult) {
-				double diff = std::transform_reduce(std::begin(mReference->mResult), std::end(mReference->mResult), std::begin(mResult), double(0.0),
-								    [](const double& x, const double& y) -> double
-								    { return x + y;},
-								    [](const std::complex<double>& x, const std::complex<double>&y) -> double
-								    { return abs(y - x);}
-								    );
-				if (diff > tolerance)
-					std::cout << "Error: Results diff: " << diff << "\n";
-			}
-
-			if (mElapsed > mReference->mElapsed) {
-				std::cout << "Error: " << mName << " too slow!\n";
-			}
-		}
+// N must be a power-of-2, or bad things will happen.
+// Currently no check for this condition.
+//
+// N input samples in X[] are FFT'd and results left in X[].
+// Because of Nyquist theorem, N samples means 
+// only first N/2 FFT results in X[] are the answer.
+// (upper half of X[] is a reflection with no new information).
+void RIT::FFT::fft_recursive(std::vector<std::complex<double>>::iterator X, int N) {
+	if(N > 2) {
+		fft_recursive(X,     N/2);   // recurse even items
+		fft_recursive(X+N/2, N/2);   // recurse odd  items
 	}
-
-	double elapsed() { return mElapsed; }
-
-	Data& result() { return mResult; }
-};
-
-class MeasureDFT: public Measure {
-public:
-	MeasureDFT(const Data& in): Measure(in){ mName = "DFT";}
-	void run_impl() override {
-		mResult = dft(mIn);
+	// combine results of two half recursions
+	for(int k=0; k<N/2; k++) {
+		std::complex<double> e = X[k    ];   // even
+		std::complex<double> o = X[k+N/2];   // odd
+		// w is the "twiddle-factor"
+		std::complex<double> w = expLUT[k * mSize / N];
+		X[k    ] = e + w * o;
+		X[k+N/2] = e - w * o;
 	}
-};
+}
 
-class MeasureFFT: public Measure {
-public:
-	MeasureFFT(const Data& in, Measure& reference): Measure(in) { mName = "FFT Cooley-Tukey"; mReference = &reference;}
-	void run_impl() override {
-		mResult = CooleyTukey::fft(mIn);
+// Same as fft_recursive, but results in only half the result due to symmetry in real input
+void RIT::FFT::fft_half(std::vector<std::complex<double>>::iterator X, int N) {
+	if(N > 2) {
+		fft_recursive(X,     N/2);   // recurse even items
+		fft_recursive(X+N/2, N/2);   // recurse odd  items
 	}
-};
-
-class MeasureFFT_RR: public Measure {
-	RR mRR;
-public:
-	MeasureFFT_RR(const Data& in, Measure& reference): Measure(in), mRR(in.size()){ mName = "FFT RR"; mReference = &reference;}
-	void run_impl() override {
-		mResult = mRR.fft(mIn);
+	// combine results of two half recursions
+	for(int k=0; k<N/2; k++) {
+		X[k] += expLUT[k * mSize / N] * X[k+N/2];
 	}
-};
-
-int main(int argc, char* argv[]) {
-	std::vector<std::complex<double>> v(4096, 0);
-
-	generate(v);
-
-	std::cout << "Timer precision: " << Timer::precision() << "s\n";
-
-	MeasureDFT measureDFT(v);
-	measureDFT.run();
-
-	MeasureFFT measureFFT(v, measureDFT);
-	measureFFT.run();
-
-	MeasureFFT_RR measureFFT_RR(v, measureFFT);
-	measureFFT_RR.run();
-
-	return 0;
 }
-
diff --git a/fft.h b/fft.h
new file mode 100644
index 0000000..f2b7038
--- /dev/null
+++ b/fft.h
@@ -0,0 +1,34 @@
+// FFT
+
+#pragma once
+
+#include <complex>
+#include <vector>
+
+namespace RIT {
+
+class FFT {
+private:
+	int mSize;
+	std::vector<int> order;
+	std::vector<std::complex<double>> expLUT;
+
+	bool mFlagHalfOnly;
+
+public:
+	FFT(int size, bool halfOnly = false);
+	std::vector<std::complex<double>> operator()(const std::vector<std::complex<double>> &v);
+
+	FFT& SetHalfOnly(bool enable);
+
+private:
+	int bitreverse(int i);
+
+	void reorder(const std::vector<std::complex<double>>& src, std::vector<std::complex<double>>& dst);
+	void fft_recursive(std::vector<std::complex<double>>::iterator X, int N);
+	void fft_half(std::vector<std::complex<double>>::iterator X, int N);
+}; // class FFT
+
+std::vector<double> magnitudes(std::vector<std::complex<double>>& v);
+
+} // namespace RIT
diff --git a/main.cpp b/main.cpp
new file mode 100644
index 0000000..42cd466
--- /dev/null
+++ b/main.cpp
@@ -0,0 +1,246 @@
+// FFT Test
+
+#include "fft.h"
+
+#include <complex>
+#include <chrono>
+#include <cmath>
+#include <ctime>
+#include <exception>
+#include <functional>
+#include <iostream>
+#include <memory>
+#include <numeric>
+#include <string>
+#include <vector>
+
+const std::complex<double> i(0, 1);
+
+const double tolerance = 0.000001;
+
+using namespace std::complex_literals;
+
+/// CPU time clock
+class Timer {
+public:
+	Timer(): mStart(std::clock()){}
+	void start(){mStart = std::clock();}
+	double elapsed() { return (double(std::clock()) - mStart) / CLOCKS_PER_SEC; }
+	static double precision() {
+		static double result{0};
+		
+		if (result != 0)
+			return result;
+		
+		std::clock_t start = std::clock();
+		while (start == std::clock()) {}
+		start = std::clock();
+
+		std::clock_t end = start;
+		while (end == std::clock()) {}
+		end = std::clock();
+
+		result = (double(end) - start) / CLOCKS_PER_SEC;
+		return result;
+	}
+private:
+	std::clock_t mStart;
+};
+
+// (Slow) DFT
+std::vector<std::complex<double>> dft(const std::vector<std::complex<double>> &v) {
+	std::vector<std::complex<double>> result(v.size(), 0);
+
+	const double N = v.size();
+
+	for (int k = 0; k < v.size(); k++) {
+		for (int j = 0; j < result.size(); j++) {
+			result[k] += v[j] * std::exp(-i * 2. * M_PI * double(k * j) / N);
+		}
+	}
+
+	return result;
+}
+
+std::vector<double> freqs{ 2, 5, 11, 17, 29 }; // known freqs for testing
+
+void generate(std::vector<std::complex<double>>& v) {
+    for (int i = 0; i < v.size(); i++) {
+        v[i] = 0.;
+        // sum several known sinusoids into v[]
+        for(int j = 0; j < freqs.size(); j++)
+            v[i] += sin(2 * M_PI * freqs[j] * i / v.size() );
+    }
+}
+
+namespace CooleyTukey {
+
+// separate even/odd elements to lower/upper halves of array respectively.
+// Due to Butterfly combinations, this turns out to be the simplest way 
+// to get the job done without clobbering the wrong elements.
+void separate(std::vector<std::complex<double>>::iterator a, int n) {
+	std::vector<std::complex<double>> b(n/2);
+	for(int i=0; i<n/2; i++)    // copy all odd elements to heap storage
+		b[i] = a[i*2+1];
+	for(int i=0; i<n/2; i++)    // copy all even elements to lower-half of a[]
+		a[i] = a[i*2];
+	for(int i=0; i<n/2; i++)    // copy all odd (from heap) to upper-half of a[]
+		a[i+n/2] = b[i];
+}
+
+// N must be a power-of-2, or bad things will happen.
+// Currently no check for this condition.
+//
+// N input samples in X[] are FFT'd and results left in X[].
+// Because of Nyquist theorem, N samples means 
+// only first N/2 FFT results in X[] are the answer.
+// (upper half of X[] is a reflection with no new information).
+void fft_recursive(std::vector<std::complex<double>>::iterator X, int N) {
+    if(N < 2) {
+        // bottom of recursion.
+        // Do nothing here, because already X[0] = x[0]
+    } else {
+        separate(X,N);      // all evens to lower half, all odds to upper half
+        fft_recursive(X,     N/2);   // recurse even items
+        fft_recursive(X+N/2, N/2);   // recurse odd  items
+        // combine results of two half recursions
+        for(int k=0; k<N/2; k++) {
+            std::complex<double> e = X[k    ];   // even
+            std::complex<double> o = X[k+N/2];   // odd
+                         // w is the "twiddle-factor"
+            std::complex<double> w = exp( std::complex<double>(0,-2.*M_PI*k/N) );
+            X[k    ] = e + w * o;
+            X[k+N/2] = e - w * o;
+        }
+    }
+}
+
+// Cooley-Tukey
+std::vector<std::complex<double>> fft(const std::vector<std::complex<double>> &v) {
+	std::vector<std::complex<double>> result(v);
+
+	fft_recursive(std::begin(result), result.size());
+	
+	return result;
+}
+}; // namespace CooleyTukey
+
+class Measure {
+	Timer mTimer;
+
+public:
+	using Data = std::vector<std::complex<double>>;
+	
+protected:
+	Measure* mReference; // Reference measurement: we should calculate similar to this one and be faster than this one
+	
+	const Data& mIn;
+	Data mResult;
+
+	std::string mName;
+	double mElapsed;
+
+public:
+	Measure(const Data& in): mReference(nullptr), mIn(in), mResult(in.size()) {}
+
+	virtual void run_impl() = 0; // implemented by subclasses
+
+	void run() {
+		mTimer.start();
+		run_impl();
+		mElapsed = mTimer.elapsed();
+		std::cout << mName << ": " << mElapsed << "s\n";
+
+		if (mReference) {
+			if (mResult != mReference->mResult) {
+				double diff = std::transform_reduce(std::begin(mResult), std::end(mResult), std::begin(mReference->mResult), double(0.0),
+								    [](const double& x, const double& y) -> double
+								    { return x + y;},
+								    [](const std::complex<double>& x, const std::complex<double>&y) -> double
+								    { return abs(y - x);}
+								    );
+				if (diff > tolerance)
+					std::cout << "Error: Results diff: " << diff << "\n";
+			}
+
+			if (mElapsed > mReference->mElapsed) {
+				std::cout << "Error: " << mName << " too slow!\n";
+			}
+		}
+	}
+
+	double elapsed() { return mElapsed; }
+
+	Data& result() { return mResult; }
+};
+
+class MeasureDFT: public Measure {
+public:
+	MeasureDFT(const Data& in): Measure(in){ mName = "DFT";}
+	void run_impl() override {
+		mResult = dft(mIn);
+	}
+};
+
+class MeasureFFT: public Measure {
+public:
+	MeasureFFT(const Data& in, Measure& reference): Measure(in) { mName = "FFT Cooley-Tukey"; mReference = &reference;}
+	void run_impl() override {
+		mResult = CooleyTukey::fft(mIn);
+	}
+};
+
+class MeasureFFT_RR: public Measure {
+	RIT::FFT mRR;
+public:
+	MeasureFFT_RR(const Data& in, Measure& reference): Measure(in), mRR(in.size()){ mName = "FFT RR"; mReference = &reference;}
+	void run_impl() override {
+		mResult = mRR(mIn);
+	}
+};
+
+class MeasureFFT_RR_half: public Measure {
+	RIT::FFT mRR;
+public:
+	MeasureFFT_RR_half(const Data& in, Measure& reference): Measure(in), mRR(in.size(), true){ mName = "FFT RR half"; mReference = &reference; }
+	void run_impl() override {
+		mResult = mRR(mIn);
+	}
+};
+
+class MeasureFFT_RR_half_magnitudes: public Measure {
+	RIT::FFT mRR;
+public:
+	MeasureFFT_RR_half_magnitudes(const Data& in, Measure& reference): Measure(in), mRR(in.size(), true){ mName = "FFT RR half magnitudes"; mReference = &reference; }
+	void run_impl() override {
+		mResult = mRR(mIn);
+		RIT::magnitudes(mResult);
+	}
+};
+
+int main(int argc, char* argv[]) {
+	std::vector<std::complex<double>> v(4096, 0);
+
+	generate(v);
+
+	std::cout << "Timer precision: " << Timer::precision() << "s\n";
+
+	MeasureDFT measureDFT(v);
+	measureDFT.run();
+
+	MeasureFFT measureFFT(v, measureDFT);
+	measureFFT.run();
+
+	MeasureFFT_RR measureFFT_RR(v, measureFFT);
+	measureFFT_RR.run();
+
+	MeasureFFT_RR_half measureFFT_RR_half(v, measureFFT_RR);
+	measureFFT_RR_half.run();
+	
+	MeasureFFT_RR_half_magnitudes measureFFT_RR_half_magnitudes(v, measureDFT);
+	measureFFT_RR_half_magnitudes.run();
+	
+	return 0;
+}
+
+