Aspose.Slides Cloud SDK for Python Returns the Wrong Number of Shapes

Hello!
I want to extract all the text on a slide. To do this, I am going through each shape on the slide and checking for text. However, some shapes are groups shapes. For these, I need to use the sub_shape parameter. But what if a group shape is nested within a group shape? For such scenarios, I made a recursive function that either grabs the text or dives deeper into the inner group shape.

The function seems to mostly work, however there are weird edge cases where Aspose returns the wrong number of nested shapes. Specifically, if I call aspose_slides_api.get_shapes() on a triply nested group_shape, an incorrect, partial set of shape_links is returned.

Below is my code and attached is the file you can use if you would like to replicate this bug.

from asposeslidescloud.apis.slides_api import SlidesApi

ASPOSE_APP_SID = "side"
ASPOSE_APP_KEY = "key"
STORAGE = "Dev Internal Storage"


slides_api = SlidesApi(app_sid=ASPOSE_APP_SID, app_key=ASPOSE_APP_KEY)


def aspose_slides_upload_file(slides_api, aspose_file_name: str, file_bytes: bytes):
    try:
        slides_api.upload_file(
            path=aspose_file_name, file=file_bytes, storage_name=STORAGE
        )
    except Exception as e:
        print("Exception in aspose_slides_upload_file: " + str(e))
        raise e


def aspose_slides_get_slides(slides_api, aspose_file_name: str, aspose_folder: str):
    try:
        return slides_api.get_slides(
            name=aspose_file_name, folder=aspose_folder, storage=STORAGE
        )
    except Exception as e:
        print("Exception in aspose_slides_get_slides: " + str(e))
        raise e


def aspose_slides_get_shapes(
    slides_api,
    aspose_file_name: str,
    aspose_folder: str,
    slide_index: int,
    sub_shape: str = None,
):
    try:
        return slides_api.get_shapes(
            name=aspose_file_name,
            folder=aspose_folder,
            storage=STORAGE,
            slide_index=slide_index,
            sub_shape=sub_shape,
        )
    except Exception as e:
        print("Exception in aspose_slides_get_shapes: " + str(e))
        raise e


def aspose_slides_get_shape_info(
    slides_api,
    aspose_file_name: str,
    aspose_folder: str,
    slide_index: int,
    shape_index: int,
    sub_shape: str = None,
):
    try:
        return slides_api.get_shape(
            name=aspose_file_name,
            folder=aspose_folder,
            storage=STORAGE,
            slide_index=slide_index,
            shape_index=shape_index,
            sub_shape=sub_shape,
        )
    except Exception as e:
        print("Exception in aspose_slides_get_shape_info: " + str(e))
        raise e


def aspose_slides_get_text_from_group_shapes(
    slides_api,
    aspose_file_name: str,
    slide_index: int,
    path_for_get_shapes: str,
    path_for_get_details: str,
    outermost_group_shape_idx: int,
) -> list:
    print()
    shapes = aspose_slides_get_shapes(
        slides_api=slides_api,
        aspose_file_name=aspose_file_name,
        aspose_folder="",
        slide_index=slide_index,
        sub_shape=path_for_get_shapes,
    )
    # This is where things are going wrong.
    # The wrong number of shapes gets returned for shapes nested within group shapes
    print(f"number of shapes: {len(shapes.shapes_links)}")

    text_list = []
    for shape in shapes.shapes_links:
        print(f"shape_index: {shape.shape_index}")

        if not path_for_get_details:
            details_sub_path = shape.shape_index
        else:
            details_sub_path = f"{path_for_get_details}/shapes/{shape.shape_index}"

        shape_details = aspose_slides_get_shape_info(
            slides_api=slides_api,
            aspose_file_name=aspose_file_name,
            aspose_folder="",
            shape_index=outermost_group_shape_idx,
            slide_index=slide_index,
            sub_shape=details_sub_path,
        )
        print(f"shape_name: {shape_details.name}")

        if hasattr(shape_details, "text"):
            text_list.append(shape_details.text)

        if shape_details.type == "GroupShape" and shape_details.shapes:
            print(f"group shape found: {shape_details.name}. Diving deeper...")
            new_words = aspose_slides_get_text_from_group_shapes(
                slides_api=slides_api,
                aspose_file_name=aspose_file_name,
                slide_index=slide_index,
                path_for_get_shapes=f"{outermost_group_shape_idx}/shapes/{shape.shape_index}",
                path_for_get_details=details_sub_path,
                outermost_group_shape_idx=outermost_group_shape_idx,
            )
            text_list.extend(new_words)

    return text_list


def main():
    aspose_file_name = "nested_test.pptx"
    with open("input/nested_test.pptx", "rb") as f:
        file_bytes = f.read()

    aspose_slides_upload_file(
        slides_api=slides_api, aspose_file_name=aspose_file_name, file_bytes=file_bytes
    )

    slides = aspose_slides_get_slides(
        slides_api=slides_api, aspose_file_name=aspose_file_name, aspose_folder=""
    )

    text_list = []
    for slide_index, slide in enumerate(slides.slide_list, start=1):
        print()
        print(f"new slide: {slide_index}")
        # Get all shapes on the slide
        shapes = aspose_slides_get_shapes(
            slides_api=slides_api,
            aspose_file_name=aspose_file_name,
            aspose_folder="",
            slide_index=slide_index,
        )
        for shape in shapes.shapes_links:
            shape_details = aspose_slides_get_shape_info(
                slides_api=slides_api,
                aspose_file_name=aspose_file_name,
                aspose_folder="",
                shape_index=shape.shape_index,
                slide_index=slide_index,
            )
            print(f"number of shapes: {len(shapes.shapes_links)}")
            print(f"shape_index: {shape.shape_index}")
            print(f"shape_name: {shape_details.name}")

            if hasattr(shape_details, "text"):
                text_list.append(shape_details.text)

            if shape_details.type == "GroupShape" and shape_details.shapes:
                # Group shapes must be recursively checked for nested text
                print(f"group shape found: {shape_details.name}. Diving deeper...")

                nested_text = aspose_slides_get_text_from_group_shapes(
                    slides_api=slides_api,
                    aspose_file_name=aspose_file_name,
                    slide_index=slide_index,
                    path_for_get_shapes=shape.shape_index,
                    path_for_get_details="",
                    outermost_group_shape_idx=shape.shape_index,
                )


if __name__ == "__main__":
    main()

Here is the output I get when running this code:

new slide: 1
number of shapes: 1
shape_index: 1
shape_name: Group 5
group shape found: Group 5. Diving deeper...

number of shapes: 2
shape_index: 1
shape_name: TextBox 6
shape_index: 2
shape_name: Group 7
group shape found: Group 7. Diving deeper...

number of shapes: 2
shape_index: 1
shape_name: Smiley Face 8
shape_index: 2
shape_name: Group 9
group shape found: Group 9. Diving deeper...

number of shapes: 2 -> This is wrong. There are 3 shapes inside this group.
shape_index: 1
shape_name: Picture 10
shape_index: 2
shape_name: TextBox 11

nested_test.pptx.zip (439.1 KB)

@jamieMacabacus

It seems you are encountering an issue with the Aspose API where it returns an incorrect number of shapes when dealing with nested group shapes. This can happen in scenarios where you have multiple levels of group shapes, and the API may not accurately reflect the total count of shapes within these nested groups.

To address this, you can consider the following steps:

  1. Recursive Function: Ensure that your recursive function correctly traverses all levels of nested group shapes. You should call get_shapes() for each group shape and check if it contains any sub-shapes. If it does, you should recursively call your function on those sub-shapes.

  2. Debugging: Add additional logging to your function to track the shape indices and the number of shapes returned at each level. This can help you identify where the discrepancy occurs.

  3. API Limitations: Be aware that there may be limitations or bugs in the API that could affect the shape count. If you consistently receive incorrect counts for specific presentations, it may be worth reporting this to Aspose support with a sample file for further investigation.

  4. Testing with Different Files: Test your function with various PowerPoint files to see if the issue is consistent across different presentations or specific to certain files.

Here’s a simplified version of how your recursive function might look:

def extract_text_from_shapes(slides_api, file_name, slide_index, group_shape_index):
    shapes = slides_api.get_shapes(file_name, slide_index, sub_shape=group_shape_index)
    text_list = []

    for shape in shapes.shapes_links:
        shape_details = slides_api.get_shape(file_name, slide_index, shape.shape_index, sub_shape=group_shape_index)
        if hasattr(shape_details, "text"):
            text_list.append(shape_details.text)

        if shape_details.type == "GroupShape":
            nested_text = extract_text_from_shapes(slides_api, file_name, slide_index, shape.shape_index)
            text_list.extend(nested_text)

    return text_list

This function retrieves shapes from a specified group shape and checks for nested group shapes recursively. Make sure to adjust the parameters as needed based on your implementation.

If the problem persists, consider reaching out to Aspose support with your findings and the specific presentation file you are using, as they may provide insights or fixes for the API behavior.

If you have any further questions or need additional assistance, feel free to ask!

The code you posted does not work for shapes within group shapes nested within group shapes.

@jamieMacabacus,
I’ve reproduced the problem with the number of shapes when running your code example. We apologize for any inconvenience caused.

We have opened the following new ticket(s) in our internal issue tracking system and will deliver their fixes according to the terms mentioned in Free Support Policies.

Issue ID(s): SLIDESCLOUD-2019

You can obtain Paid Support Services if you need support on a priority basis, along with the direct access to our Paid Support management team.

Please note that you can also use the get_slide_text_items method to extract all the text from a slide.

storage_name = "Dev Internal Storage"
file_name = "nested_test.pptx"
slide_index = 1

text_items = slides_api.get_slide_text_items(file_name, slide_index, None, None, None, storage_name)

for item in text_items.items:
    print(item.text)

https://docs.aspose.cloud/slides/read-text-items/