i have an ebook site, where the ebook will be converted to audio, im running into error during the parsing of the ebook into json so it can be made into ssml, so that text to speech can read it, below is the code that needs to be fixed. code works somewhat, it just isnt getting past 3rd and 4th chapters.
1. i need it to be able to be able to replace the words "Project Gutenberg" with "[login to view URL]"
2. it needs to be able to distinguish chapters, 1. Title, Author, Forward, Preface, Acknowledgments, Introduction, Prologue, Sub-chapters, Sections, Epilogue, Conclusion, Summary, Appendix, Appendices, Endnotes, Footnotes, Glossary) correctly, with correct breaks
3. needs to be able tp parse entire book
4. open to suggestions from anyone that has worked on ebooks and audio books before
5. also i am using Mimic 3, text to speech engine on my godaddy dedicated server, just in case one needs to know
<?php
require '/vendor/[login to view URL]';
use Smalot\PdfParser\Parser;
// Add hooks to check and parse downloadable files when a product is accessed
add_action('template_redirect', 'check_and_parse_downloadable_file');
add_action('admin_init', 'check_and_parse_downloadable_file');
function check_and_parse_downloadable_file() {
global $post;
// Exit if it's not a product or book_json meta is already set
if (!$post || get_post_type($post) !== 'product' || get_post_meta($post->ID, 'book_json', true)) {
return;
}
$product = wc_get_product($post->ID);
if ($product && $product->is_virtual() && $product->is_downloadable()) {
$downloads = $product->get_downloads();
foreach ($downloads as $download) {
$file_path = ABSPATH . ltrim($download->get_file(), '/');
$parsed_content = '';
if (pathinfo($file_path, PATHINFO_EXTENSION) === 'txt') {
$file_content = file_get_contents($file_path);
$parsed_content = parse_txt_content($file_content);
} elseif (pathinfo($file_path, PATHINFO_EXTENSION) === 'pdf') {
$file_content = parse_pdf_content($file_path);
$parsed_content = parse_txt_content($file_content);
}
if ($parsed_content) {
$upload_dir = wp_upload_dir();
$json_dir = $upload_dir['basedir'] . '/json/';
if (!file_exists($json_dir)) {
mkdir($json_dir, 0755, true);
}
$json_file_path = $json_dir . sanitize_title($post->post_title) . '.json';
file_put_contents($json_file_path, $parsed_content);
update_post_meta($post->ID, 'book_json', $json_file_path);
}
}
}
}
function parse_txt_content($content) {
$elements = [
'Title', 'Author', 'Forward', 'Preface', 'Acknowledgments', 'Introduction',
'Prologue', 'Chapter', 'Sub-chapters', 'Sections', 'Epilogue', 'Conclusion', 'Summary',
'Appendix', 'Appendices', 'Endnotes', 'Footnotes', 'Glossary'
];
$parsed_elements = [];
foreach ($elements as $element) {
// Adjusted regular expression pattern
preg_match("/\b$element\b[:\s]*(.*?)(?=\b" . implode('\b|\b', $elements) . "\b|$)/si", $content, $matches);
if (isset($matches[1]) && trim($matches[1])) {
$parsed_elements[$element] = trim($matches[1]);
}
}
$parsed_content_with_ssml = apply_ssml_tags(implode("\n", $parsed_elements));
return json_encode($parsed_content_with_ssml);
}
function apply_ssml_tags($content) {
$content = preg_replace('/"(.*?)"/', '<speak><s>"$1"</s></speak>', $content);
$content = preg_replace('/!/', '!<break time="1s" />', $content);
$content = preg_replace('/,/', ',<break time="500ms" />', $content);
$content = preg_replace('/[.!?]\s/', '$0<break time="1s" />', $content);
// Add sound effects
$sound_effects = [
'door opens' => '<audio src="[login to view URL]" />',
'glass breaks' => '<audio src="[login to view URL]" />',
'car crash' => '<audio src="[login to view URL]" />'
];
foreach ($sound_effects as $phrase => $effect) {
$content = str_replace($phrase, $effect, $content);
}
return $content;
}
function parse_pdf_content($file_path) {
$parser = new Parser();
$pdf = $parser->parseFile($file_path);
return $pdf->getText();
}
// Adding a meta box to show the JSON in WP admin
function book_json_meta_box() {
add_meta_box('book_json_box', 'Book JSON', 'book_json_meta_box_callback', 'product', 'normal', 'default');
}
add_action('add_meta_boxes', 'book_json_meta_box');
function book_json_meta_box_callback($post) {
$json_file = get_post_meta($post->ID, 'book_json', true);
if ($json_file && file_exists($json_file)) {
$json_content = file_get_contents($json_file);
echo '<textarea style="width:100%; height:300px;">' . esc_textarea($json_content) . '</textarea>';
} else {
echo 'No JSON file has been created for this product.';
}
}