A neuro-symbolic ML framework inspired by Kolmogorov-Arnold representation theorem to make models both accurate and Interpretable.
This is an experimental research project (v0.0.3). For educational and research purposes only.
High-level Architecture:
UML Diagram:
pip -qU install oikan
git clone https://github.com/silvermete0r/OIKAN.git
cd OIKAN
pip install -e .
Combines neural network learning with symbolic mathematics
Generates human-readable mathematical expressions
Familiar .fit() and .predict() interface
Extract and save symbolic formulas for analysis
from oikan import OIKANRegressor
from sklearn.metrics import r2_score
# Initialize model
model = OIKANRegressor(
hidden_sizes=[32, 32], # Hidden layer sizes
activation='relu', # Activation function (other options: 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu')
augmentation_factor=5, # Augmentation factor for data generation
alpha=1.0, # ElasticNet regularization strength (Symbolic regression)
l1_ratio=0.5, # ElasticNet mixing parameter (0 <= l1_ratio <= 1). 0 is equivalent to Ridge regression, 1 is equivalent to Lasso (Symbolic regression)
sigma=5, # Standard deviation of Gaussian noise for data augmentation
top_k=5, # Number of top features to select (Symbolic regression)
epochs=100, # Number of training epochs
lr=0.001, # Learning rate
batch_size=32, # Batch size for training
verbose=True, # Verbose output during training
evaluate_nn=True, # Validate neural network performance before full process
random_state=42 # Random seed for reproducibility
)
# Fit the model
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
# Evaluate performance
r2_value = r2_score(y_test, y_pred)
print("R2 Score:", r2_value)
# Get symbolic formula
formula = model.get_formula() # default: type='original' -> returns all formula without pruning | other options: 'sympy' -> simplified formula using sympy; 'latex' -> LaTeX format
print("Symbolic Formula:", formula)
# Get feature importances
importances = model.feature_importances()
print("Feature Importances:", importances)
# Save the model (optional)
model.save("outputs/model.json")
# Load the model (optional)
loaded_model = OIKANRegressor()
loaded_model.load("outputs/model.json")
from oikan import OIKANClassifier
from sklearn.metrics import accuracy_score
# Initialize model
model = OIKANClassifier(
hidden_sizes=[32, 32], # Hidden layer sizes
activation='relu', # Activation function (other options: 'tanh', 'leaky_relu', 'elu', 'swish', 'gelu')
augmentation_factor=10, # Augmentation factor for data generation
alpha=1.0, # ElasticNet regularization strength (Symbolic regression)
l1_ratio=0.5, # ElasticNet mixing parameter (0 <= l1_ratio <= 1). 0 is equivalent to Ridge regression, 1 is equivalent to Lasso (Symbolic regression)
sigma=5, # Standard deviation of Gaussian noise for data augmentation
top_k=5, # Number of top features to select (Symbolic regression)
epochs=100, # # Number of training epochs
lr=0.001, # Learning rate
batch_size=32, # Batch size for training
verbose=True, # Verbose output during training
evaluate_nn=True, # Validate neural network performance before full process
random_state=42 # Random seed for reproducibility
)
# Fit the model
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
# Evaluate performance
accuracy = accuracy_score(X_test, y_test)
print("Accuracy:", accuracy)
# Get symbolic formulas for each class
formulas = model.get_formula() # default: type='original' -> returns all formula without pruning | other options: 'sympy' -> simplified formula using sympy; 'latex' -> LaTeX format
for i, formula in enumerate(formulas):
print(f"Class {i} Formula:", formula)
# Get feature importances
importances = model.feature_importances()
print("Feature Importances:", importances)
# Save the model (optional)
model.save("outputs/model.json")
# Load the model (optional)
loaded_model = OIKANClassifier()
loaded_model.load("outputs/model.json")
OIKAN provides symbolic model compilers to convert generated formulas into different programming languages, making them suitable for low-resource systems and computationally efficient solutions.
Currently supported languages: Python C++ C JavaScript Rust Go
import numpy as np
import json
def predict(X, symbolic_model):
X = np.asarray(X)
X_transformed = evaluate_basis_functions(X, symbolic_model['basis_functions'],
symbolic_model['n_features'])
return np.dot(X_transformed, symbolic_model['coefficients'])
if __name__ == "__main__":
with open('outputs/california_housing_model.json', 'r') as f:
symbolic_model = json.load(f)
X = np.random.rand(10, symbolic_model['n_features'])
y_pred = predict(X, symbolic_model)
print(y_pred)
import numpy as np
import json
def predict(X, symbolic_model):
X = np.asarray(X)
X_transformed = evaluate_basis_functions(X, symbolic_model['basis_functions'],
symbolic_model['n_features'])
logits = np.dot(X_transformed, np.array(symbolic_model['coefficients_list']).T)
probabilities = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)
return np.argmax(probabilities, axis=1)
if __name__ == "__main__":
with open('outputs/iris_model.json', 'r') as f:
symbolic_model = json.load(f)
X = np.array([[5.1, 3.5, 1.4, 0.2],
[7.0, 3.2, 4.7, 1.4],
[6.3, 3.3, 6.0, 2.5]])
y_pred = predict(X, symbolic_model)
print(y_pred)
Explore OIKAN's potential by contributing new use cases and experiments. Visit our Featured Projects page to see examples or share your own research findings through our GitHub repository.