maturaarbeit/mise.toml
MM4go c7ff6a8a29 Add PDF to Markdown converter with mise task runner
- Implement pdf_to_markdown.py script with pypdf for text extraction
- Extract metadata (title, author, creation date) from PDFs
- Generate clean Markdown files with YAML front matter
- Add comprehensive error handling and logging
- Create mise.toml with 10+ convenient tasks for conversion
- Provide detailed documentation (4 guides + quick reference)
- Successfully convert all 18 PDF files in artikel/ folder to Markdown
- Include .gitignore for Python cache and local config
2026-02-23 14:58:58 +01:00

65 lines
1.8 KiB
TOML

[env]
PYTHONUNBUFFERED = "1"
[tasks.install]
description = "Install project dependencies"
run = "pip install -r requirements.txt"
[tasks.convert]
description = "Convert all PDFs in artikel folder to Markdown"
run = "python3 pdf_to_markdown.py"
depends = ["install"]
[tasks."convert-verbose"]
description = "Convert PDFs with verbose logging"
run = "python3 pdf_to_markdown.py -v"
depends = ["install"]
[tasks."convert-quiet"]
description = "Convert PDFs quietly (errors only)"
run = "python3 pdf_to_markdown.py -q"
depends = ["install"]
[tasks."dry-run"]
description = "Preview conversion without writing files"
run = "python3 pdf_to_markdown.py --dry-run"
depends = ["install"]
[tasks."convert-custom"]
description = "Convert PDFs from custom input folder"
run = "python3 pdf_to_markdown.py ${INPUT_DIR:-./artikel} ${OUTPUT_DIR:-./artikel/converted}"
depends = ["install"]
[tasks.clean]
description = "Remove converted markdown files"
run = "rm -rf artikel/converted/*.md && echo 'Cleaned converted markdown files'"
[tasks.clean-all]
description = "Remove all converted files and cache"
run = "rm -rf artikel/converted && rm -rf __pycache__ && rm -rf *.pyc && echo 'Cleaned all build artifacts'"
[tasks.status]
description = "Show conversion status (count PDFs and converted files)"
run = """
echo "=== PDF Conversion Status ==="
PDF_COUNT=$(find artikel -maxdepth 1 -name "*.pdf" | wc -l)
MD_COUNT=$(find artikel/converted -maxdepth 1 -name "*.md" 2>/dev/null | wc -l || echo "0")
echo "PDF files in artikel/: $PDF_COUNT"
echo "Markdown files in artikel/converted/: $MD_COUNT"
if [ $PDF_COUNT -eq $MD_COUNT ]; then
echo " All PDFs converted!"
else
echo " Unconverted PDFs: $((PDF_COUNT - MD_COUNT))"
fi
"""
[tasks.help]
description = "Show available tasks"
run = "echo 'Available tasks:' && mise tasks"
[tools.python]
version = "3.11"
[tools.pipenv]
version = "2023"