File size: 8,253 Bytes
8462fc9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import cv2
import numpy as np
import pytesseract
import torch
import torch.nn as nn
class NeuralNetworkDesigner:
def __init__(self):
self.layer_maps = {}
def process_image(self, image_path):
# Read the image
image = cv2.imread(image_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Threshold the image
_, binary = cv2.threshold(gray, 225, 255, cv2.THRESH_BINARY_INV)
# Find contours
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Sort contours from top to bottom
contours = sorted(contours, key=lambda c: cv2.boundingRect(c)[1])
for i, contour in enumerate(contours):
x, y, w, h = cv2.boundingRect(contour)
roi = gray[y:y+h, x:x+w]
# Perform OCR on the ROI
text = pytesseract.image_to_string(roi).strip()
self.parse_layer_info(i, text)
def parse_layer_info(self, layer_index, text):
lines = text.split('\n')
layer_info = {'type': 'Unknown', 'text': text}
try:
if 'Input' in lines[0]:
layer_info['type'] = 'Input'
layer_info['channels'] = int(lines[1]) if len(lines) > 1 else None
elif 'Conv' in lines[0]:
layer_info['type'] = 'Conv2d'
layer_info['out_channels'] = int(lines[1]) if len(lines) > 1 else None
layer_info['kernel_size'] = int(lines[2]) if len(lines) > 2 else None
elif any(x in lines[0] for x in ['MaxPool', 'AvgPool']):
layer_info['type'] = 'MaxPool2d' if 'Max' in lines[0] else 'AvgPool2d'
layer_info['kernel_size'] = int(lines[1]) if len(lines) > 1 else None
elif 'Linear' in lines[0]:
layer_info['type'] = 'Linear'
if len(lines) > 1 and '*' in lines[1]:
layer_info['in_features'] = lines[1]
layer_info['out_features'] = int(lines[-1]) if lines[-1].isdigit() else None
elif 'BatchNorm' in lines[0]:
layer_info['type'] = 'BatchNorm2d'
layer_info['num_features'] = int(lines[1]) if len(lines) > 1 else None
elif any(x in lines[0] for x in ['ReLU', 'LeakyReLU', 'Sigmoid', 'Tanh']):
layer_info['type'] = lines[0]
elif 'Dropout' in lines[0]:
layer_info['type'] = 'Dropout'
layer_info['p'] = float(lines[1]) if len(lines) > 1 else 0.5
elif 'Transformer' in lines[0]:
layer_info['type'] = 'Transformer'
layer_info['d_model'] = int(lines[1]) if len(lines) > 1 else 512
layer_info['nhead'] = int(lines[2]) if len(lines) > 2 else 8
elif 'Attention' in lines[0]:
layer_info['type'] = 'MultiheadAttention'
layer_info['embed_dim'] = int(lines[1]) if len(lines) > 1 else 512
layer_info['num_heads'] = int(lines[2]) if len(lines) > 2 else 8
elif 'LSTM' in lines[0] or 'GRU' in lines[0]:
layer_info['type'] = lines[0]
layer_info['hidden_size'] = int(lines[1]) if len(lines) > 1 else 256
layer_info['num_layers'] = int(lines[2]) if len(lines) > 2 else 1
except ValueError as e:
print(f"Error parsing layer {layer_index}: {e}")
self.layer_maps[layer_index] = layer_info
print(f"Parsed layer {layer_index}: {layer_info}") # Debug print
def generate_pytorch_code(self):
code = "import torch\nimport torch.nn as nn\n\n"
code += "class CustomNN(nn.Module):\n"
code += " def __init__(self):\n"
code += " super(CustomNN, self).__init__()\n"
forward_code = " def forward(self, x):\n"
in_channels = None
for i, layer_info in sorted(self.layer_maps.items()):
if layer_info['type'] == 'Input':
in_channels = layer_info.get('channels', 3)
continue
if layer_info['type'] == 'Conv2d':
out_channels = layer_info.get('out_channels', 64)
kernel_size = layer_info.get('kernel_size', 3)
code += f" self.conv{i} = nn.Conv2d({in_channels}, {out_channels}, kernel_size={kernel_size}, padding=1)\n"
forward_code += f" x = self.conv{i}(x)\n"
in_channels = out_channels
elif layer_info['type'] in ['MaxPool2d', 'AvgPool2d']:
kernel_size = layer_info.get('kernel_size', 2)
code += f" self.pool{i} = nn.{layer_info['type']}(kernel_size={kernel_size})\n"
forward_code += f" x = self.pool{i}(x)\n"
elif layer_info['type'] == 'Linear':
out_features = layer_info.get('out_features')
if i == 1 or (i > 1 and self.layer_maps[i-1]['type'] not in ['Linear', 'Flatten']):
code += f" self.flatten = nn.Flatten()\n"
forward_code += f" x = self.flatten(x)\n"
in_features = layer_info.get('in_features', 'x.shape[1]')
else:
in_features = self.layer_maps[i-1].get('out_features', 64)
code += f" self.fc{i} = nn.Linear({in_features}, {out_features})\n"
forward_code += f" x = self.fc{i}(x)\n"
elif layer_info['type'] == 'BatchNorm2d':
num_features = layer_info.get('num_features', in_channels)
code += f" self.bn{i} = nn.BatchNorm2d({num_features})\n"
forward_code += f" x = self.bn{i}(x)\n"
elif layer_info['type'] in ['ReLU', 'LeakyReLU', 'Sigmoid', 'Tanh']:
code += f" self.act{i} = nn.{layer_info['type']}()\n"
forward_code += f" x = self.act{i}(x)\n"
elif layer_info['type'] == 'Dropout':
p = layer_info.get('p', 0.5)
code += f" self.dropout{i} = nn.Dropout(p={p})\n"
forward_code += f" x = self.dropout{i}(x)\n"
elif layer_info['type'] == 'Transformer':
d_model = layer_info.get('d_model', 512)
nhead = layer_info.get('nhead', 8)
code += f" self.transformer{i} = nn.Transformer(d_model={d_model}, nhead={nhead})\n"
forward_code += f" x = self.transformer{i}(x)\n"
elif layer_info['type'] == 'MultiheadAttention':
embed_dim = layer_info.get('embed_dim', 512)
num_heads = layer_info.get('num_heads', 8)
code += f" self.attention{i} = nn.MultiheadAttention(embed_dim={embed_dim}, num_heads={num_heads})\n"
forward_code += f" x, _ = self.attention{i}(x, x, x)\n"
elif layer_info['type'] in ['LSTM', 'GRU']:
hidden_size = layer_info.get('hidden_size', 256)
num_layers = layer_info.get('num_layers', 1)
code += f" self.rnn{i} = nn.{layer_info['type']}(input_size={in_channels}, hidden_size={hidden_size}, num_layers={num_layers}, batch_first=True)\n"
forward_code += f" x, _ = self.rnn{i}(x)\n"
elif layer_info['type'] == 'Unknown':
print(f"Warning: Unknown layer type at index {i}. Layer info: {layer_info}")
code += "\n" + forward_code
code += " return x\n"
return code
def write_to_file(self, code, filename):
with open(filename, 'w') as f:
f.write(code)
def design_network(self, image_path, output_file):
self.process_image(image_path)
pytorch_code = self.generate_pytorch_code()
self.write_to_file(pytorch_code, output_file)
# print(f"Neural network code has been generated and saved to '{output_file}'")
# print("\nGenerated PyTorch Code:")
# print(pytorch_code) |