@lars.peyer
We have further investigated and found that you can achieve your requirements using the following steps. Please check the sample code for reference and let us know it suits to your scenario.
- Convert PDF to Word format
- Read the footnotes from the Word document
- Remove the footnotes from the Word document
- Convert Word to MD format
However, unfortunately, the only problem we see right now is that API does not detect footnotes in the PDF that doesn’t have a horizontal divider above the footnotes. This is the case with your shared sample PDF. So we logged another ticket PDF2WORD-961 to fix the issue. And hopefully, this problem will be fixed in the upcoming release, 21.12.
# Import required modules
import asposewordscloud
import asposewordscloud.models.requests
from shutil import copyfile
# Init the client
client_id = 'xxxxxx-xxxx-xxxx-xxxx-xxxxxxxx'
client_secret = 'xxxxxxxxxxxxxxxxxxxxxxxxxx'
base_url = 'https://api.aspose.cloud'
words_api = asposewordscloud.WordsApi(client_id, client_secret, base_url)
words_api.api_client.configuration.host = base_url
# File names
documentName = 'document_with_footnotes'
inputFilePath = f'c:/tmp/{documentName}.pdf'
outputFilePath = f'c:/tmp/{documentName}.md'
tempCloudFolder = "pdf2md"
# Step 1. Upload PDF file to Cloud
input_document = open(inputFilePath, 'rb')
upload_request = asposewordscloud.models.requests.UploadFileRequest(input_document, f'{tempCloudFolder}/{documentName}.pdf')
upload_response = words_api.upload_file(upload_request)
print('Step 1: uploaded {}'.format(upload_response.uploaded))
# Step 2. Convert PDF to DOCX in Cloud
save_options = asposewordscloud.SaveOptionsData(save_format='docx', file_name=f'{documentName}.docx')
save_request = asposewordscloud.models.requests.SaveAsRequest(name=f'{documentName}.pdf', save_options_data=save_options, folder=tempCloudFolder)
save_response = words_api.save_as(save_request)
print('Step 2: converted to {}'.format(save_response.save_result.dest_document.href))
# Step 3. Get number of footnotes
get_footnotes_request = asposewordscloud.models.requests.GetFootnotesRequest(name=f'{documentName}.docx', folder=tempCloudFolder)
get_footnotes_response = words_api.get_footnotes_online(get_footnotes_request)
footnotes_count = len(get_footnotes_response.footnotes.list)
print(f'Step 3: found {footnotes_count} footnotes')
# Step 4. Delete footnotes
for number in range(footnotes_count):
delete_footnote_request = asposewordscloud.models.requests.DeleteFootnoteRequest(name=f'{documentName}.docx', index=0, folder=tempCloudFolder)
delete_footnote_response = words_api.delete_footnote(delete_footnote_request)
print(f'Step 4: deleted footnote n.{number}')
# Step 5. Convert DOCX to MD
save_options = asposewordscloud.SaveOptionsData(save_format='md', file_name=f'{documentName}.md')
save_request = asposewordscloud.models.requests.SaveAsRequest(name=f'{documentName}.docx', save_options_data=save_options, folder=tempCloudFolder)
save_response = words_api.save_as(save_request)
print('Step 5: converted to {}'.format(save_response.save_result.dest_document.href))
#Step 6. Download MD file
download_request = asposewordscloud.models.requests.DownloadFileRequest(path=f'{tempCloudFolder}/{documentName}.md')
download_response = words_api.download_file(download_request)
copyfile(download_response, outputFilePath)
print(f'Step 6: downloaded {outputFilePath}')