pythonflea
Coder
This is concerning the source code I am developing:
I encountered the following set of error messages during the run:
Any idea what needs to done?
Python:
# Import modules
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
# Define the file path and name
file_path = r"C:\\Users\\PC-1\\Desktop\\stringpred.txt"
# Open the file for reading
with open(file_path, "r") as f:
# Read all the lines from the file into a list of strings
strings = f.readlines()
# Convert the strings to numerical arrays
arrays = [np.array(list(map(int, sum([s.split() for s in strings], [])))) for s in strings]
# Split the arrays into input and target sequences
# The input sequence is the first five characters
# The target sequence is the last five characters shifted by one position
input_sequences = [a[:-1] for a in arrays]
target_sequences = [a[1:] for a in arrays]
# Define some constants
vocab_size = 10 # number of possible tokens (digits from 0 to 9)
embed_size = 32 # size of the embedding vectors
rnn_units = 32 # size of the LSTM output vectors
batch_size = 20 # number of sequences to process in each batch
# Split the data into training and testing sets with a ratio of 0.8:0.2
X_train, X_test, y_train, y_test = train_test_split(input_sequences, target_sequences, test_size=0.2, random_state=42)
# Reshape your input data to match the expected input shape of the model
X_train = np.reshape(X_train, (-1, 5))
X_test = np.reshape(X_test, (-1, 5))
y_train = np.reshape(y_train, (-1, 5))
y_test = np.reshape(y_test, (-1, 5))
# Split the X_test array into subarrays of size 5
subarrays = np.array_split(X_test, len(X_test) // 5)
# Define the model architecture
model = keras.Sequential([
# Embedding layer that maps tokens to vectors
layers.Embedding(input_dim=vocab_size, output_dim=embed_size),
# LSTM layer that processes the embedded vectors
layers.LSTM(units=rnn_units, return_sequences=True),
# Dense layer that outputs probabilities over tokens
layers.Dense(units=vocab_size, activation="softmax")
])
# Compile the model with loss and optimizer
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam")
# Train the model for some epochs
model.fit(X_train, y_train, batch_size=batch_size, epochs=10)
# Define a function to generate a new string given a seed string
def generate_string(seed):
# Convert the seed string to an array of tokens
seed_array = np.array(list(map(int, seed)))
# Initialize an empty list to store the generated tokens
output_array = []
# Loop for six positions in the sequence
for i in range(6):
# Predict the probabilities for the next token using the model
probs = model.predict(seed_array[np.newaxis, :])
# Sample from the probabilities or take the most likely token
# Here we use sampling for more diversity, but you can change it as you like
next_token = np.random.choice(vocab_size, p=probs[0, -1])
# Append the token to the output list
output_array.append(next_token)
# Update the seed array with the new token
seed_array = np.append(seed_array[1:], next_token)
# Convert the output list to a string and return it
output_string = "".join(map(str, output_array))
return output_string
# Test the function with some seed strings
print(generate_string("55420"))
print(generate_string("13120"))
print(generate_string("25050"))
I encountered the following set of error messages during the run:
Code:
"Traceback (most recent call last):
File "D:/Python/stringpred ver01.py", line 37, in <module>
X_test = np.reshape(X_test, (-1, 5))
File "<__array_function__ internals>", line 200, in reshape
File "C:\Users\PC-1\AppData\Local\Programs\Python\Python311\Lib\site-packages\numpy\core\fromnumeric.py", line 298, in reshape
return _wrapfunc(a, 'reshape', newshape, order=order)
File "C:\Users\PC-1\AppData\Local\Programs\Python\Python311\Lib\site-packages\numpy\core\fromnumeric.py", line 54, in _wrapfunc
return _wrapit(obj, method, *args, **kwds)
File "C:\Users\PC-1\AppData\Local\Programs\Python\Python311\Lib\site-packages\numpy\core\fromnumeric.py", line 43, in _wrapit
result = getattr(asarray(obj), method)(*args, **kwds)
ValueError: cannot reshape array of size 14102 into shape (5)"
Any idea what needs to done?