Skip to content

Quickstart: Your First Cython Module

This guide walks you through creating, compiling, and using your first Cython module with progressively advanced examples.

Example 1: Simple Function Optimization

Pure Python Version

Create fibonacci_py.py:

def fib(n):
    """Calculate nth Fibonacci number (slow recursive)"""
    if n <= 1:
        return n
    return fib(n-1) + fib(n-2)

def fib_iterative(n):
    """Calculate nth Fibonacci number (iterative)"""
    a, b = 0, 1
    for _ in range(n):
        a, b = b, a + b
    return a

Benchmark:

import time
from fibonacci_py import fib, fib_iterative

start = time.time()
result = fib_iterative(1000000)
print(f"Time: {time.time() - start:.4f}s")

Cython Version (Basic)

Create fibonacci.pyx:

def fib(int n):
    """Typed function - modest speedup"""
    if n <= 1:
        return n
    return fib(n-1) + fib(n-2)

def fib_iterative(int n):
    """Typed iterative - significant speedup"""
    cdef int a = 0
    cdef int b = 1
    cdef int i
    for i in range(n):
        a, b = b, a + b
    return a

Advanced Cython Version

# cython: boundscheck=False, wraparound=False, cdivision=True

cdef long _fib_internal(long n) nogil:
    """Pure C function - maximum speed"""
    cdef long a = 0
    cdef long b = 1
    cdef long i, tmp

    for i in range(n):
        tmp = a
        a = b
        b = tmp + b
    return a

cpdef long fib_fast(long n):
    """Python-accessible fast Fibonacci"""
    if n < 0:
        raise ValueError("n must be non-negative")
    return _fib_internal(n)

def fib_array(long n):
    """Generate Fibonacci sequence as list"""
    cdef long i
    cdef list result = []
    for i in range(n):
        result.append(_fib_internal(i))
    return result

Compilation Methods

Method 1: Using setup.py (Standard)

Create setup.py:

from setuptools import setup, Extension
from Cython.Build import cythonize
import numpy as np

extensions = [
    Extension(
        "fibonacci",
        ["fibonacci.pyx"],
        include_dirs=[np.get_include()],
        extra_compile_args=["-O3", "-march=native"],
        extra_link_args=[],
    )
]

setup(
    name="fibonacci",
    ext_modules=cythonize(
        extensions,
        compiler_directives={
            'language_level': "3",
            'boundscheck': False,
            'wraparound': False,
            'cdivision': True,
        },
        annotate=True,  # Generate HTML annotation
    ),
)

Build:

python setup.py build_ext --inplace

Method 2: Using pyximport (Quick Prototyping)

import pyximport
pyximport.install(
    language_level=3,
    setup_args={'include_dirs': [np.get_include()]}
)

# Now you can import .pyx files directly
import fibonacci
print(fibonacci.fib_fast(100))

Note: pyximport is convenient but doesn't support all features (e.g., OpenMP, custom compiler flags).

Method 3: Manual Compilation

# Step 1: Generate C code
cython fibonacci.pyx --embed

# Step 2: Compile to shared library
gcc -shared -pthread -fPIC -fwrapv -O3 -Wall -fno-strict-aliasing \
    -I/usr/include/python3.10 -o fibonacci.so fibonacci.c

# Step 3: Import
python -c "import fibonacci; print(fibonacci.fib_fast(100))"

Method 4: Jupyter Notebook

%load_ext cython
%%cython --annotate

def fib_notebook(int n):
    cdef int a = 0, b = 1, i
    for i in range(n):
        a, b = b, a + b
    return a

Method 5: Using pyproject.toml (Modern)

Create pyproject.toml:

[build-system]
requires = ["setuptools>=60", "wheel", "cython>=3.0", "numpy"]
build-backend = "setuptools.build_meta"

[project]
name = "fibonacci-cython"
version = "0.1.0"
requires-python = ">=3.8"
dependencies = ["numpy>=1.20"]

[tool.cython]
language_level = 3

Build:

pip install build
python -m build
pip install dist/*.whl

Example 2: Working with NumPy Arrays

Create array_ops.pyx:

# cython: boundscheck=False, wraparound=False
import numpy as np
cimport numpy as cnp

def sum_array(cnp.ndarray[cnp.float64_t, ndim=1] arr):
    """Sum array elements (NumPy array interface)"""
    cdef cnp.float64_t total = 0.0
    cdef Py_ssize_t i
    for i in range(arr.shape[0]):
        total += arr[i]
    return total

def sum_memoryview(double[:] arr):
    """Sum using typed memoryviews (faster)"""
    cdef double total = 0.0
    cdef Py_ssize_t i
    for i in range(arr.shape[0]):
        total += arr[i]
    return total

def matrix_multiply(double[:, :] A, double[:, :] B):
    """Manual matrix multiplication"""
    cdef Py_ssize_t i, j, k
    cdef Py_ssize_t m = A.shape[0]
    cdef Py_ssize_t n = A.shape[1]
    cdef Py_ssize_t p = B.shape[1]

    cdef double[:, :] C = np.zeros((m, p), dtype=np.float64)

    for i in range(m):
        for j in range(p):
            for k in range(n):
                C[i, j] += A[i, k] * B[k, j]

    return np.asarray(C)

Usage:

import numpy as np
import array_ops

arr = np.random.rand(1000000)
result = array_ops.sum_memoryview(arr)
print(f"Sum: {result}")

A = np.random.rand(100, 100)
B = np.random.rand(100, 100)
C = array_ops.matrix_multiply(A, B)

Example 3: C Data Structures

Create structures.pyx:

from libc.stdlib cimport malloc, free

cdef struct Point:
    double x
    double y
    double z

cdef struct Particle:
    Point position
    Point velocity
    double mass

def create_particle(double px, double py, double pz, 
                    double vx, double vy, double vz, 
                    double mass):
    """Create and manipulate C structures"""
    cdef Particle* p = <Particle*>malloc(sizeof(Particle))

    if p is NULL:
        raise MemoryError("Unable to allocate particle")

    try:
        p.position.x = px
        p.position.y = py
        p.position.z = pz
        p.velocity.x = vx
        p.velocity.y = vy
        p.velocity.z = vz
        p.mass = mass

        # Calculate kinetic energy
        cdef double v_squared = (p.velocity.x ** 2 + 
                                 p.velocity.y ** 2 + 
                                 p.velocity.z ** 2)
        kinetic_energy = 0.5 * p.mass * v_squared

        return {
            'position': (p.position.x, p.position.y, p.position.z),
            'velocity': (p.velocity.x, p.velocity.y, p.velocity.z),
            'mass': p.mass,
            'kinetic_energy': kinetic_energy
        }
    finally:
        free(p)

Example 4: Extension Types (cdef classes)

Create point_class.pyx:

cdef class Point:
    """High-performance point class"""
    cdef public double x, y, z
    cdef double _magnitude  # Private attribute

    def __init__(self, double x=0, double y=0, double z=0):
        self.x = x
        self.y = y
        self.z = z
        self._magnitude = -1  # Lazy evaluation flag

    cpdef double magnitude(self):
        """Calculate magnitude (cached)"""
        if self._magnitude < 0:
            self._magnitude = (self.x*self.x + self.y*self.y + self.z*self.z) ** 0.5
        return self._magnitude

    cpdef Point add(self, Point other):
        """Add two points"""
        return Point(self.x + other.x, self.y + other.y, self.z + other.z)

    cdef void _normalize(self):
        """Internal normalization (C-only method)"""
        cdef double mag = self.magnitude()
        if mag > 0:
            self.x /= mag
            self.y /= mag
            self.z /= mag
            self._magnitude = 1.0

    def normalize(self):
        """Public normalize method"""
        self._normalize()

    def __repr__(self):
        return f"Point({self.x}, {self.y}, {self.z})"

Testing Your Module

Create test_fibonacci.py:

import fibonacci
import pytest

def test_fib_fast():
    assert fibonacci.fib_fast(0) == 0
    assert fibonacci.fib_fast(1) == 1
    assert fibonacci.fib_fast(10) == 55

def test_fib_negative():
    with pytest.raises(ValueError):
        fibonacci.fib_fast(-1)

def test_fib_array():
    result = fibonacci.fib_array(10)
    expected = [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
    assert result == expected

Run tests:

pytest test_fibonacci.py -v

Benchmarking

Create benchmark.py:

import time
import numpy as np
import fibonacci
import array_ops

def benchmark(func, *args, iterations=100):
    start = time.perf_counter()
    for _ in range(iterations):
        result = func(*args)
    elapsed = time.perf_counter() - start
    return elapsed / iterations

# Fibonacci benchmark
n = 100000
time_cython = benchmark(fibonacci.fib_fast, n)
print(f"Cython fib: {time_cython*1000:.3f} ms")

# Array operations benchmark
arr = np.random.rand(1000000)
time_memview = benchmark(array_ops.sum_memoryview, arr, iterations=1000)
time_numpy = benchmark(np.sum, arr, iterations=1000)
print(f"Cython sum: {time_memview*1000:.3f} ms")
print(f"NumPy sum: {time_numpy*1000:.3f} ms")
print(f"Speedup: {time_numpy/time_memview:.2f}x")

Common Pitfalls

  1. Forgetting to rebuild: Changes to .pyx require recompilation
  2. Import errors: Module not in path or wrong Python version
  3. Type mismatches: Passing wrong types to typed functions
  4. Memory leaks: Not freeing allocated memory in C code