Welcome!

By registering with us, you'll be able to discuss, share and private message with other members of our community.

SignUp Now!
  • Guest, before posting your code please take these rules into consideration:
    • It is required to use our BBCode feature to display your code. While within the editor click < / > or >_ and place your code within the BB Code prompt. This helps others with finding a solution by making it easier to read and easier to copy.
    • You can also use markdown to share your code. When using markdown your code will be automatically converted to BBCode. For help with markdown check out the markdown guide.
    • Don't share a wall of code. All we want is the problem area, the code related to your issue.


    To learn more about how to use our BBCode feature, please click here.

    Thank you, Code Forum.

Python RecursionError: maximum recursion depth exceeded: main.exe file

Jean Yomin

New Coder
Python:
import os
import re
import difflib
import pytesseract
import fitz  # PyMuPDF
from PIL import Image
from kivy.app import App
from kivy.uix.button import Button
from kivy.uix.boxlayout import BoxLayout
from kivy.uix.textinput import TextInput
from plyer import filechooser
import sqlite3

# Set the TESSDATA_PREFIX environment variable
[CODE=python]import os
import re
import difflib
import pytesseract
import fitz  # PyMuPDF
from PIL import Image
from kivy.app import App
from kivy.uix.button import Button
from kivy.uix.boxlayout import BoxLayout
from kivy.uix.textinput import TextInput
from plyer import filechooser
import sqlite3

# Définir l'environnement variable TESSDATA_PREFIX
os.environ['TESSDATA_PREFIX'] = r'C:\\Program Files\\Tesseract-OCR\\tessdata'

# Spécifiez le chemin de l'exécutable Tesseract ici
pytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

def read_words_from_db(db_file):
    # Créer une connexion à la base de données SQLite
    conn = sqlite3.connect(db_file)

    # Créer un curseur
    c = conn.cursor()

    # Exécuter une requête SQL pour sélectionner tous les mots de la table mots
    c.execute('SELECT * FROM mots')

    # Récupérer tous les résultats de la requête
    words = c.fetchall()

    # Fermer la connexion à la base de données
    conn.close()

    # Retourner la liste des mots
    return [word[0] for word in words]  # Chaque mot est un tuple, nous voulons juste le mot lui-même

# Votre liste de mots corrects
correct_words_list = read_words_from_db(r"C:\Users\bideg\Desktop\py_fichiers\ma_base_de_donnees.db")

def read_segments_from_db(db_file):
    # Créer une connexion à la base de données SQLite
    conn = sqlite3.connect(db_file)

    # Créer un curseur
    c = conn.cursor()

    # Exécuter une requête SQL pour sélectionner tous les segments de la table segments
    c.execute('SELECT * FROM segments')

    # Récupérer tous les résultats de la requête
    segments = c.fetchall()

    # Fermer la connexion à la base de données
    conn.close()

    # Retourner le dictionnaire des segments
    return {segment: replacement for segment, replacement in segments}

# Votre dictionnaire de remplacements de segments
segment_replacements = read_segments_from_db(r"C:\Users\bideg\Desktop\uncle bernard\ma_base_de_donnees_segments.db")

# Rest of your code...
def pdf_to_img(pdf_file, page_num=0):
    doc = fitz.open(pdf_file)  # open document
    pix = doc.load_page(page_num).get_pixmap()  # render page to an image
    img_path = f"page_{page_num}.png"
    pix.save(img_path)
    return img_path

def ocr_core(file): 
    text = pytesseract.image_to_string(file, lang='fra') 
    return text

def replace_segments(text):
    for segment, replacement in segment_replacements.items():
        text = re.sub(segment, replacement, text)

    return text

def correct_words(text):
    words = text.split()
    for i, word in enumerate(words):
        # Trouver le mot le plus similaire dans la liste
        match = difflib.get_close_matches(word, correct_words_list, n=1, cutoff=0.7)
        if match:
            # Remplacer le mot par le mot correspondant de la liste
            words[i] = match[0]
    return ' '.join(words)

class OCRApp(App):
    def build(self):
        self.title = 'MinyogogƁàsàaOcr'
        self.icon = "C:\\Users\\bideg\\Desktop\\fin_de_ocrapp\\icone_app.jpeg"
        
        layout = BoxLayout(orientation='vertical')

        upload_button = Button(text='Upload Image',
                               background_color=[0, 1, 0, 1])  # Vert
        upload_button.bind(on_press=self.upload_image)

        extract_button = Button(text='Extract Text',
                                background_color=[1, 0, 0, 1])  # Rouge
        extract_button.bind(on_press=self.extract_text)

        upload_pdf_button = Button(text='Upload PDF',
                                   background_color=[1, 1, 0, 1]) # Jaune
        upload_pdf_button.bind(on_press=self.upload_pdf)

        self.page_num_input = TextInput(hint_text='Enter page number')

        layout.add_widget(upload_button)
        layout.add_widget(extract_button)
        layout.add_widget(upload_pdf_button)
        layout.add_widget(self.page_num_input)

        return layout

    def upload_image(self, instance):
        filepath = filechooser.open_file(title="Choisissez une image", filters=[("Images", "*.jpg;*.png;*.jpeg")])
        if filepath:
            self.image_path = filepath[0]
            print(f"Image uploaded from {self.image_path}")

    def extract_text(self, instance):
        if hasattr(self, 'image_path'):
            image = Image.open(self.image_path)
            result = pytesseract.image_to_string(image, lang='fra')
            result = replace_segments(result)
            result = correct_words(result)
            print(f"Text extracted: {result}")
        else:
            print("No image found to extract text.")

    def upload_pdf(self, instance):
        filepath = filechooser.open_file(title="Choose a file", filters=[("PDF", "*.pdf")])
        if filepath:
            self.pdf_path = filepath[0]
            print(f"PDF uploaded from {self.pdf_path}")
            
            page_num_str = self.page_num_input.text.strip()
            page_num = int(page_num_str) if page_num_str.isdigit() else 0

            img = pdf_to_img(self.pdf_path, page_num)
            print(f"Text extracted from page {page_num+1}:")
            with open(img, 'rb') as f:
                result_img = Image.open(f)
                result = ocr_core(result_img)
                result = replace_segments(result)
                result = correct_words(result)
                print(result)

if __name__ == '__main__':
    OCRApp().run()
os.environ['TESSDATA_PREFIX'] = r'path_to_tessdata'

# Specify the path to the Tesseract executable here
pytesseract.pytesseract.tesseract_cmd = r'path_to_tesseract.exe'

# Function to read words from the database (anonymized)
def read_words_from_db(db_file):
# [Code to read words from the database]

# List of correct words (anonymized)
correct_words_list = read_words_from_db('path_to_my_database.db')

# Function to read segments from the database (anonymized)
def read_segments_from_db(db_file):
# [Code to read segments from the database]

# Dictionary of segment replacements (anonymized)
segment_replacements = read_segments_from_db('path_to_my_database_segments.db')

# Rest of your code...
# [Insert here the relevant anonymized functions and classes]

# Part 2: Function definitions for the OCR application (anonymized)
def upload_image(self, instance):
# [Code to upload an image from the user's system]

def extract_text(self, instance):
# [Code to extract text from the uploaded image using OCR]

def upload_pdf(self, instance):
# [Code to upload a PDF file from the user's system]

# [Insert the anonymized relevant code snippet here]

if name == 'main':
# [Code to run the OCR application]
[/CODE]
 
Python:
import os
import re
import difflib
import pytesseract
import fitz  # PyMuPDF
from PIL import Image
from kivy.app import App
from kivy.uix.button import Button
from kivy.uix.boxlayout import BoxLayout
from kivy.uix.textinput import TextInput
from plyer import filechooser
import sqlite3

# Set the TESSDATA_PREFIX environment variable
[CODE=python]import os
import re
import difflib
import pytesseract
import fitz  # PyMuPDF
from PIL import Image
from kivy.app import App
from kivy.uix.button import Button
from kivy.uix.boxlayout import BoxLayout
from kivy.uix.textinput import TextInput
from plyer import filechooser
import sqlite3

# Définir l'environnement variable TESSDATA_PREFIX
os.environ['TESSDATA_PREFIX'] = r'C:\\Program Files\\Tesseract-OCR\\tessdata'

# Spécifiez le chemin de l'exécutable Tesseract ici
pytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

def read_words_from_db(db_file):
    # Créer une connexion à la base de données SQLite
    conn = sqlite3.connect(db_file)

    # Créer un curseur
    c = conn.cursor()

    # Exécuter une requête SQL pour sélectionner tous les mots de la table mots
    c.execute('SELECT * FROM mots')

    # Récupérer tous les résultats de la requête
    words = c.fetchall()

    # Fermer la connexion à la base de données
    conn.close()

    # Retourner la liste des mots
    return [word[0] for word in words]  # Chaque mot est un tuple, nous voulons juste le mot lui-même

# Votre liste de mots corrects
correct_words_list = read_words_from_db(r"C:\Users\bideg\Desktop\py_fichiers\ma_base_de_donnees.db")

def read_segments_from_db(db_file):
    # Créer une connexion à la base de données SQLite
    conn = sqlite3.connect(db_file)

    # Créer un curseur
    c = conn.cursor()

    # Exécuter une requête SQL pour sélectionner tous les segments de la table segments
    c.execute('SELECT * FROM segments')

    # Récupérer tous les résultats de la requête
    segments = c.fetchall()

    # Fermer la connexion à la base de données
    conn.close()

    # Retourner le dictionnaire des segments
    return {segment: replacement for segment, replacement in segments}

# Votre dictionnaire de remplacements de segments
segment_replacements = read_segments_from_db(r"C:\Users\bideg\Desktop\uncle bernard\ma_base_de_donnees_segments.db")

# Rest of your code...
def pdf_to_img(pdf_file, page_num=0):
    doc = fitz.open(pdf_file)  # open document
    pix = doc.load_page(page_num).get_pixmap()  # render page to an image
    img_path = f"page_{page_num}.png"
    pix.save(img_path)
    return img_path

def ocr_core(file):
    text = pytesseract.image_to_string(file, lang='fra')
    return text

def replace_segments(text):
    for segment, replacement in segment_replacements.items():
        text = re.sub(segment, replacement, text)

    return text

def correct_words(text):
    words = text.split()
    for i, word in enumerate(words):
        # Trouver le mot le plus similaire dans la liste
        match = difflib.get_close_matches(word, correct_words_list, n=1, cutoff=0.7)
        if match:
            # Remplacer le mot par le mot correspondant de la liste
            words[i] = match[0]
    return ' '.join(words)

class OCRApp(App):
    def build(self):
        self.title = 'MinyogogƁàsàaOcr'
        self.icon = "C:\\Users\\bideg\\Desktop\\fin_de_ocrapp\\icone_app.jpeg"
       
        layout = BoxLayout(orientation='vertical')

        upload_button = Button(text='Upload Image',
                               background_color=[0, 1, 0, 1])  # Vert
        upload_button.bind(on_press=self.upload_image)

        extract_button = Button(text='Extract Text',
                                background_color=[1, 0, 0, 1])  # Rouge
        extract_button.bind(on_press=self.extract_text)

        upload_pdf_button = Button(text='Upload PDF',
                                   background_color=[1, 1, 0, 1]) # Jaune
        upload_pdf_button.bind(on_press=self.upload_pdf)

        self.page_num_input = TextInput(hint_text='Enter page number')

        layout.add_widget(upload_button)
        layout.add_widget(extract_button)
        layout.add_widget(upload_pdf_button)
        layout.add_widget(self.page_num_input)

        return layout

    def upload_image(self, instance):
        filepath = filechooser.open_file(title="Choisissez une image", filters=[("Images", "*.jpg;*.png;*.jpeg")])
        if filepath:
            self.image_path = filepath[0]
            print(f"Image uploaded from {self.image_path}")

    def extract_text(self, instance):
        if hasattr(self, 'image_path'):
            image = Image.open(self.image_path)
            result = pytesseract.image_to_string(image, lang='fra')
            result = replace_segments(result)
            result = correct_words(result)
            print(f"Text extracted: {result}")
        else:
            print("No image found to extract text.")

    def upload_pdf(self, instance):
        filepath = filechooser.open_file(title="Choose a file", filters=[("PDF", "*.pdf")])
        if filepath:
            self.pdf_path = filepath[0]
            print(f"PDF uploaded from {self.pdf_path}")
           
            page_num_str = self.page_num_input.text.strip()
            page_num = int(page_num_str) if page_num_str.isdigit() else 0

            img = pdf_to_img(self.pdf_path, page_num)
            print(f"Text extracted from page {page_num+1}:")
            with open(img, 'rb') as f:
                result_img = Image.open(f)
                result = ocr_core(result_img)
                result = replace_segments(result)
                result = correct_words(result)
                print(result)

if __name__ == '__main__':
    OCRApp().run()
os.environ['TESSDATA_PREFIX'] = r'path_to_tessdata'

# Specify the path to the Tesseract executable here
pytesseract.pytesseract.tesseract_cmd = r'path_to_tesseract.exe'

# Function to read words from the database (anonymized)
def read_words_from_db(db_file):
# [Code to read words from the database]

# List of correct words (anonymized)
correct_words_list = read_words_from_db('path_to_my_database.db')

# Function to read segments from the database (anonymized)
def read_segments_from_db(db_file):
# [Code to read segments from the database]

# Dictionary of segment replacements (anonymized)
segment_replacements = read_segments_from_db('path_to_my_database_segments.db')

# Rest of your code...
# [Insert here the relevant anonymized functions and classes]

# Part 2: Function definitions for the OCR application (anonymized)
def upload_image(self, instance):
# [Code to upload an image from the user's system]

def extract_text(self, instance):
# [Code to extract text from the uploaded image using OCR]

def upload_pdf(self, instance):
# [Code to upload a PDF file from the user's system]

# [Insert the anonymized relevant code snippet here]

if name == 'main':
# [Code to run the OCR application]
[/CODE]
Firstly, there is a missing import statement for fitz at the beginning of your code. Also, there is a missing import for the filechooser module from the plyer package. Additionally, the last block of code is outside the if name == 'main': block, and there is a missing '==' in the condition. Please see the corrected code below:

Python:
import os
import re
import difflib
import pytesseract
import fitz  # PyMuPDF
from PIL import Image
from kivy.app import App
from kivy.uix.button import Button
from kivy.uix.boxlayout import BoxLayout
from kivy.uix.textinput import TextInput
from plyer import filechooser
import sqlite3

# Set the TESSDATA_PREFIX environment variable
os.environ['TESSDATA_PREFIX'] = r'C:\\Program Files\\Tesseract-OCR\\tessdata'

# Specify the path to the Tesseract executable here
pytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

# Function to read words from the database
def read_words_from_db(db_file):
# [Code to read words from the database]
pass

# List of correct words
correct_words_list = read_words_from_db(r"C:\Users\bideg\Desktop\py_fichiers\ma_base_de_donnees.db")

# Function to read segments from the database
def read_segments_from_db(db_file):
# [Code to read segments from the database]
pass

# Dictionary of segment replacements
segment_replacements = read_segments_from_db(r"C:\Users\bideg\Desktop\uncle bernard\ma_base_de_donnees_segments.db")

# Rest of your code...
def pdf_to_img(pdf_file, page_num=0):
# [Code to convert PDF to image]
pass

def ocr_core(file):
# [Code for OCR processing]
pass

def replace_segments(text):
# [Code to replace segments in text]
pass

def correct_words(text):
# [Code to correct words in text]
pass

class OCRApp(App):
# [Code for Kivy application]
pass

# Check if the script is being run directly
if __name__ == '__main__':
OCRApp().run()


Please note that I've added placeholder comments for the functions that are not provided in your code. You need to fill in the actual code for those functions. Also, ensure that you have the necessary dependencies installed, such as Kivy, PyMuPDF, and Plyer.
 

New Threads

Buy us a coffee!

Back
Top Bottom