python PyPDF2 인식 안됨
-
게시물 수정 , 삭제는 로그인 필요
pdf 파일의 정보들을 추출하려 다음과 같은 코드를 짰습니다
# Import necessary libraries
import os
import PyPDF2
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
# Define function to extract text from PDF files
def extract_text_from_pdf(pdf_file_path):
# Open PDF file
pdf_file = open(pdf_file_path, 'rb')
# Create PDF reader object
pdf_reader = PyPDF2.PdfFileReader(pdf_file)
# Initialize empty string to store text
text = ''
# Loop through each page in the PDF file
for page_num in range(pdf_reader.numPages):
# Get page object
page = pdf_reader.getPage(page_num)
# Extract text from page
page_text = page.extractText()
# Add page text to text string
text += page_text
# Close PDF file
pdf_file.close()
# Return text
return text
# Define function to preprocess text
def preprocess_text(text):
# Tokenize text
tokens = word_tokenize(text)
# Remove stop words
stop_words = set(stopwords.words('english'))
filtered_tokens = [token for token in tokens if token.lower() not in stop_words]
# Join filtered tokens
filtered_text = ' '.join(filtered_tokens)
# Return filtered text
return filtered_text
# Define main function
def main():
# Define directory containing PDF files
directory = 'example'
# Loop through PDF files in directory
for filename in os.listdir(directory):
# Check if file is a PDF file
if filename.endswith('.pdf'):
# Get PDF file path
pdf_file_path = os.path.join(directory, filename)
# Extract text from PDF file
text = extract_text_from_pdf(pdf_file_path)
# Preprocess text
preprocessed_text = preprocess_text(text)
# Print preprocessed text
print(preprocessed_text)
# Call main function
if __name__ == '__main__':
main()
그런데 다음과 같이 나오며 실행이 되지 않습니다
Traceback (most recent call last): File "/workspace/pdf_extraction.py", line 3, in <module> import PyPDF2 ModuleNotFoundError: No module named 'PyPDF2'
pip install PyPDF2
pip install nltk
pip3 install PyPDF2
pip3 install nltk
모두 해봐도 똑같습니다
windows11입니다 도와주세요 ㅠㅠㅠ
#python pypdf2 #python pypdf2 install #python pypdf2 pdfreader #python pypdf2 extract_text #python pypdf2 pdfreader example #python pypdf2 pdfmerger #python pypdf2 pdffilereader #python pypdf2 cropbox #python pypdf2 read all pages #python pypdf2 merge pdf