better grammarly prep
This commit is contained in:
48
thesis/filters/math-omit.lua
Normal file
48
thesis/filters/math-omit.lua
Normal file
@@ -0,0 +1,48 @@
|
||||
-- math-omit.lua
|
||||
-- Replace any math with a placeholder and ensure a space before it when appropriate.
|
||||
local PH = "[math omitted]"
|
||||
|
||||
function Math(el)
|
||||
-- Emit the placeholder as a Str; spacing is fixed in Para/Plain below.
|
||||
return pandoc.Str(PH)
|
||||
end
|
||||
|
||||
local function ensure_space_before_ph(inlines)
|
||||
local out = {}
|
||||
for i = 1, #inlines do
|
||||
local cur = inlines[i]
|
||||
if cur.t == "Str" and cur.text == PH then
|
||||
local prev = out[#out]
|
||||
local need_space = true
|
||||
|
||||
-- No space if it's the first token in the block
|
||||
if not prev then
|
||||
need_space = false
|
||||
elseif prev.t == "Space" then
|
||||
need_space = false
|
||||
elseif prev.t == "Str" then
|
||||
-- If previous char is an opening bracket/paren/slash/hyphen or whitespace, skip
|
||||
local last = prev.text:sub(-1)
|
||||
if last:match("[%(%[%{%/%-]") or last:match("%s") then
|
||||
need_space = false
|
||||
end
|
||||
end
|
||||
|
||||
if need_space then table.insert(out, pandoc.Space()) end
|
||||
table.insert(out, cur)
|
||||
else
|
||||
table.insert(out, cur)
|
||||
end
|
||||
end
|
||||
return out
|
||||
end
|
||||
|
||||
function Para(el)
|
||||
el.content = ensure_space_before_ph(el.content)
|
||||
return el
|
||||
end
|
||||
|
||||
function Plain(el)
|
||||
el.content = ensure_space_before_ph(el.content)
|
||||
return el
|
||||
end
|
||||
@@ -18,18 +18,24 @@ PART1_TXT="${OUT_BASE}_part1.txt"
|
||||
PART2_TXT="${OUT_BASE}_part2.txt"
|
||||
MARKER="Data and Preprocessing"
|
||||
|
||||
echo "[1/4] Flattening with latexpand -> ${FLAT_TEX}"
|
||||
echo "[1/5] Flattening with latexpand -> ${FLAT_TEX}"
|
||||
latexpand "${INPUT_TEX}" > "${FLAT_TEX}"
|
||||
|
||||
echo "[2/4] Removing tabular/tabularx environments -> ${NO_TABLES_TEX}"
|
||||
echo "[2/5] Removing tabular/tabularx environments -> ${NO_TABLES_TEX}"
|
||||
# Replace entire tabular / tabularx environments with a placeholder
|
||||
perl -0777 -pe 's/\\begin\{(tabularx?)\}.*?\\end\{\1\}/[table omitted]/gs' \
|
||||
"${FLAT_TEX}" > "${NO_TABLES_TEX}"
|
||||
|
||||
echo "[3/4] Converting to plain text with pandoc -> ${PLAIN_TXT}"
|
||||
pandoc -f latex -t plain --wrap=none "${NO_TABLES_TEX}" -o "${PLAIN_TXT}"
|
||||
echo "[3/5] Converting to plain text with pandoc -> ${PLAIN_TXT}"
|
||||
pandoc -f latex -t plain --wrap=none \
|
||||
--lua-filter=filters/keep-citations.lua \
|
||||
--lua-filter=filters/math-omit.lua \
|
||||
"${NO_TABLES_TEX}" -o "${PLAIN_TXT}"
|
||||
|
||||
echo "[4/4] Splitting ${PLAIN_TXT} before the marker line: \"${MARKER}\""
|
||||
echo "[4/5] Replacing [] placeholders with [figure]"
|
||||
sed -i 's/\[\]/[figure]/g' "${PLAIN_TXT}"
|
||||
|
||||
echo "[5/5] Splitting ${PLAIN_TXT} before the marker line: \"${MARKER}\""
|
||||
|
||||
# Ensure the marker exists exactly on its own line
|
||||
if ! grep -xq "${MARKER}" "${PLAIN_TXT}"; then
|
||||
|
||||
Reference in New Issue
Block a user