- Implement pdf_to_markdown.py script with pypdf for text extraction - Extract metadata (title, author, creation date) from PDFs - Generate clean Markdown files with YAML front matter - Add comprehensive error handling and logging - Create mise.toml with 10+ convenient tasks for conversion - Provide detailed documentation (4 guides + quick reference) - Successfully convert all 18 PDF files in artikel/ folder to Markdown - Include .gitignore for Python cache and local config
65 lines
1.8 KiB
TOML
65 lines
1.8 KiB
TOML
[env]
|
|
PYTHONUNBUFFERED = "1"
|
|
|
|
[tasks.install]
|
|
description = "Install project dependencies"
|
|
run = "pip install -r requirements.txt"
|
|
|
|
[tasks.convert]
|
|
description = "Convert all PDFs in artikel folder to Markdown"
|
|
run = "python3 pdf_to_markdown.py"
|
|
depends = ["install"]
|
|
|
|
[tasks."convert-verbose"]
|
|
description = "Convert PDFs with verbose logging"
|
|
run = "python3 pdf_to_markdown.py -v"
|
|
depends = ["install"]
|
|
|
|
[tasks."convert-quiet"]
|
|
description = "Convert PDFs quietly (errors only)"
|
|
run = "python3 pdf_to_markdown.py -q"
|
|
depends = ["install"]
|
|
|
|
[tasks."dry-run"]
|
|
description = "Preview conversion without writing files"
|
|
run = "python3 pdf_to_markdown.py --dry-run"
|
|
depends = ["install"]
|
|
|
|
[tasks."convert-custom"]
|
|
description = "Convert PDFs from custom input folder"
|
|
run = "python3 pdf_to_markdown.py ${INPUT_DIR:-./artikel} ${OUTPUT_DIR:-./artikel/converted}"
|
|
depends = ["install"]
|
|
|
|
[tasks.clean]
|
|
description = "Remove converted markdown files"
|
|
run = "rm -rf artikel/converted/*.md && echo 'Cleaned converted markdown files'"
|
|
|
|
[tasks.clean-all]
|
|
description = "Remove all converted files and cache"
|
|
run = "rm -rf artikel/converted && rm -rf __pycache__ && rm -rf *.pyc && echo 'Cleaned all build artifacts'"
|
|
|
|
[tasks.status]
|
|
description = "Show conversion status (count PDFs and converted files)"
|
|
run = """
|
|
echo "=== PDF Conversion Status ==="
|
|
PDF_COUNT=$(find artikel -maxdepth 1 -name "*.pdf" | wc -l)
|
|
MD_COUNT=$(find artikel/converted -maxdepth 1 -name "*.md" 2>/dev/null | wc -l || echo "0")
|
|
echo "PDF files in artikel/: $PDF_COUNT"
|
|
echo "Markdown files in artikel/converted/: $MD_COUNT"
|
|
if [ $PDF_COUNT -eq $MD_COUNT ]; then
|
|
echo "✓ All PDFs converted!"
|
|
else
|
|
echo "⚠ Unconverted PDFs: $((PDF_COUNT - MD_COUNT))"
|
|
fi
|
|
"""
|
|
|
|
[tasks.help]
|
|
description = "Show available tasks"
|
|
run = "echo 'Available tasks:' && mise tasks"
|
|
|
|
[tools.python]
|
|
version = "3.11"
|
|
|
|
[tools.pipenv]
|
|
version = "2023"
|