Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3010,6 +3010,64 @@ void get_assetsCountsWithNoParent(TestNamespace ns) {
assertNotNull(counts);
}

@Test
void get_assetsCountsPaginationSlicesAndPreservesTotal(TestNamespace ns) throws Exception {
OpenMetadataClient client = SdkClients.adminClient();

CreateGlossary createGlossary =
new CreateGlossary()
.withName(ns.prefix("asset_count_pagination_glossary"))
.withDescription("Glossary for asset count pagination test");
Glossary glossary = client.glossaries().create(createGlossary);

int termCount = 5;
java.util.List<String> createdTermFqns = new java.util.ArrayList<>();
for (int i = 0; i < termCount; i++) {
CreateGlossaryTerm req =
new CreateGlossaryTerm()
.withName(ns.prefix("count_pagination_term_" + i))
.withGlossary(glossary.getFullyQualifiedName())
.withDescription("Pagination term " + i);
createdTermFqns.add(createEntity(req).getFullyQualifiedName());
}

String fullCounts = getAssetCounts(client, glossary.getFullyQualifiedName());
ObjectMapper mapper = new ObjectMapper();
JsonNode unpaged = mapper.readTree(fullCounts);
for (String fqn : createdTermFqns) {
assertTrue(unpaged.has(fqn), "Unpaged response should contain term " + fqn);
}

String firstPageBody = getAssetCountsWithPaging(client, glossary.getFullyQualifiedName(), 2, 0);
JsonNode firstPage = mapper.readTree(firstPageBody);
assertEquals(
2,
firstPage.size(),
"First page should contain at most `limit` glossary terms when limit=2");

String secondPageBody =
getAssetCountsWithPaging(client, glossary.getFullyQualifiedName(), 2, 2);
JsonNode secondPage = mapper.readTree(secondPageBody);
assertTrue(
secondPage.size() <= 2,
"Second page should contain at most `limit` glossary terms when limit=2");

java.util.Set<String> firstPageKeys = new java.util.HashSet<>();
firstPage.fieldNames().forEachRemaining(firstPageKeys::add);
secondPage
.fieldNames()
.forEachRemaining(
key ->
assertFalse(
firstPageKeys.contains(key),
"Pages should not overlap: " + key + " appeared on both pages"));

String beyondEndBody =
getAssetCountsWithPaging(client, glossary.getFullyQualifiedName(), 5, 10000);
JsonNode beyondEnd = mapper.readTree(beyondEndBody);
assertEquals(0, beyondEnd.size(), "Offset past the end should return an empty asset-count map");
}
Comment on lines +3013 to +3069

@Test
void get_termAssetsById(TestNamespace ns) {
OpenMetadataClient client = SdkClients.adminClient();
Expand Down Expand Up @@ -3176,6 +3234,21 @@ private String getAssetCounts(OpenMetadataClient client, String parent) {
HttpMethod.GET, "/v1/glossaryTerms/assets/counts", null, optionsBuilder.build());
}

private String getAssetCountsWithPaging(
OpenMetadataClient client, String parent, int limit, int offset) {
RequestOptions.Builder optionsBuilder =
RequestOptions.builder()
.queryParam("limit", String.valueOf(limit))
.queryParam("offset", String.valueOf(offset));
if (parent != null) {
optionsBuilder.queryParam("parent", parent);
}
return client
.getHttpClient()
.executeForString(
HttpMethod.GET, "/v1/glossaryTerms/assets/counts", null, optionsBuilder.build());
}

private String getTermAssetsById(OpenMetadataClient client, String id) {
RequestOptions.Builder optionsBuilder = RequestOptions.builder();
optionsBuilder.queryParam("limit", "10");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
Expand Down Expand Up @@ -116,10 +117,12 @@
import org.openmetadata.service.resources.glossary.GlossaryTermResource;
import org.openmetadata.service.resources.settings.SettingsCache;
import org.openmetadata.service.search.DefaultInheritedFieldEntitySearch;
import org.openmetadata.service.search.EntityBuilderConstant;
import org.openmetadata.service.search.InheritedFieldEntitySearch;
import org.openmetadata.service.search.InheritedFieldEntitySearch.InheritedFieldQuery;
import org.openmetadata.service.search.InheritedFieldEntitySearch.InheritedFieldResult;
import org.openmetadata.service.search.PropagationDescriptor;
import org.openmetadata.service.search.QueryFilterBuilder;
import org.openmetadata.service.security.AuthorizationException;
import org.openmetadata.service.security.policyevaluator.PolicyConditionUpdater;
import org.openmetadata.service.util.EntityUtil;
Expand Down Expand Up @@ -191,43 +194,66 @@ public ResultList<EntityReference> getGlossaryTermAssetsByName(
}

public Map<String, Integer> getAllGlossaryTermsWithAssetsCount(String parent) {
return getGlossaryTermsAssetsCount(parent, null, null).counts();
}

public GlossaryTermAssetCountResult getGlossaryTermsAssetsCount(
String parent, Integer limit, Integer offset) {
if (inheritedFieldEntitySearch == null) {
LOG.warn("Search unavailable for glossary term asset counts");
return new HashMap<>();
return new GlossaryTermAssetCountResult(new LinkedHashMap<>(), 0);
}

List<String> fqns;
if (parent != null && !parent.isEmpty()) {
fqns =
daoCollection.glossaryTermDAO().getNestedTerms(parent).stream()
.map(json -> JsonUtils.readTree(json).path("fullyQualifiedName").asText())
.collect(Collectors.toList());
} else {
fqns =
listAll(getFields("fullyQualifiedName"), new ListFilter(null)).stream()
.map(GlossaryTerm::getFullyQualifiedName)
.collect(Collectors.toList());
}
List<String> orderedFqns = listGlossaryTermFqns(parent);
int total = orderedFqns.size();
List<String> pageFqns = sliceFqns(orderedFqns, limit, offset);

Map<String, Integer> glossaryTermAssetCounts = new HashMap<>();
if (pageFqns.isEmpty()) {
return new GlossaryTermAssetCountResult(new LinkedHashMap<>(), total);
}

for (String fqn : fqns) {
InheritedFieldQuery query = InheritedFieldQuery.forGlossaryTerm(fqn, 0, 0);
Map<String, Integer> aggregatedCounts = fetchAssetCountsByGlossaryTermFqn();
LinkedHashMap<String, Integer> pageCounts = new LinkedHashMap<>();
for (String fqn : pageFqns) {
pageCounts.put(fqn, aggregatedCounts.getOrDefault(fqn, 0));
}
return new GlossaryTermAssetCountResult(pageCounts, total);
}

Integer count =
inheritedFieldEntitySearch.getCountForField(
query,
() -> {
LOG.warn("Search fallback for glossary term {} asset count. Returning 0.", fqn);
return 0;
});
private List<String> listGlossaryTermFqns(String parent) {
if (parent != null && !parent.isEmpty()) {
return daoCollection.glossaryTermDAO().getNestedTerms(parent).stream()
.map(json -> JsonUtils.readTree(json).path("fullyQualifiedName").asText())
.sorted()
.collect(Collectors.toList());
}
return listAll(getFields("fullyQualifiedName"), new ListFilter(null)).stream()
.map(GlossaryTerm::getFullyQualifiedName)
.sorted()
.collect(Collectors.toList());
Comment on lines +230 to +233
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Quality: listGlossaryTermFqns loads all entities into memory on every call

When parent is null, listGlossaryTermFqns calls listAll(...) which loads every GlossaryTerm entity in the system into memory just to extract FQNs. For large deployments with thousands of glossary terms, this creates GC pressure on every paginated request. Consider using a lighter DAO query that returns only FQNs (a projection), or caching the sorted FQN list with a short TTL.

Was this helpful? React with 👍 / 👎 | Reply gitar fix to apply this suggestion

}

glossaryTermAssetCounts.put(fqn, count);
private static List<String> sliceFqns(List<String> fqns, Integer limit, Integer offset) {
if (limit == null && offset == null) {
return fqns;
}
int total = fqns.size();
int from = offset == null ? 0 : Math.max(0, offset);
if (from >= total) {
return Collections.emptyList();
}
int to = limit == null ? total : Math.min(total, from + Math.max(0, limit));
return fqns.subList(from, to);
}

return glossaryTermAssetCounts;
private Map<String, Integer> fetchAssetCountsByGlossaryTermFqn() {
String queryFilter = QueryFilterBuilder.buildGenericAssetsCountFilter(TAGS_FQN, true);
return inheritedFieldEntitySearch.getAggregatedCountsByField(
TAGS_FQN, queryFilter, EntityBuilderConstant.MAX_AGGREGATE_SIZE);
}
Comment on lines +249 to 253
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance: Global aggregation fetched on every paginated call ignores parent filter

Every call to getGlossaryTermsAssetsCount invokes fetchAssetCountsByGlossaryTermFqn() which runs an unfiltered aggregation on tags.tagFQN across ALL indexed entities, bounded only by MAX_AGGREGATE_SIZE (10,000 buckets). This has two implications:

  1. Pagination doesn't reduce ES work — requesting limit=2 still triggers the full aggregation. The perf win comes from replacing N searches with 1, but the single aggregation is still global.
  2. Silent truncation at 10k buckets — the aggregation includes ALL tag FQNs (classification tags + glossary terms). On large deployments with >10k distinct tags, some glossary term FQNs may be missing from the aggregation result, causing them to silently report 0 assets.

Consider scoping the aggregation to only the FQNs in pageFqns (e.g., via a terms filter on the tag FQN field) or at least filtering to the parent prefix. This would both reduce the bucket count and make paginated calls genuinely lighter.

Suggested fix:

private Map<String, Integer> fetchAssetCountsByGlossaryTermFqn(List<String> fqns) {
  // Scope the aggregation to only the requested FQNs
  String queryFilter = QueryFilterBuilder.buildFilteredAssetsCountFilter(
      TAGS_FQN, fqns, true);
  return inheritedFieldEntitySearch.getAggregatedCountsByField(
      TAGS_FQN, queryFilter, fqns.size());
}

Was this helpful? React with 👍 / 👎 | Reply gitar fix to apply this suggestion

Comment on lines +249 to 253

public record GlossaryTermAssetCountResult(LinkedHashMap<String, Integer> counts, int total) {}

public Map<String, Integer> getRelationTypeUsageCounts() {
Map<String, Integer> usageCounts = new HashMap<>();
List<CollectionDAO.RelationTypeUsageCount> counts =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ public class GlossaryTermResource extends EntityResource<GlossaryTerm, GlossaryT
public static final String COLLECTION_PATH = "/v1/glossaryTerms/";
static final String FIELDS =
"children,relatedTerms,reviewers,owners,tags,usageCount,domains,extension,childrenCount";
private static final String TOTAL_COUNT_HEADER = "X-Total-Count";

@Override
public GlossaryTerm addHref(UriInfo uriInfo, GlossaryTerm term) {
Expand Down Expand Up @@ -425,7 +426,10 @@ public Response getRelationTypeUsageCounts(
operationId = "getAllGlossaryTermsWithAssetsCount",
summary = "Get all glossary terms with their asset counts",
description =
"Get a map of glossary term fully qualified names to their asset counts using search aggregation.",
"Get a map of glossary term fully qualified names to their asset counts using search aggregation. "
+ "Supports pagination via `limit` and `offset` query parameters. When pagination is used, "
+ "the total number of glossary terms (before slicing) is returned in the `X-Total-Count` "
+ "response header so callers can drive paged loading.",
responses = {
@ApiResponse(
responseCode = "200",
Expand All @@ -440,12 +444,24 @@ public Response getAllGlossaryTermsWithAssetsCount(
"Filter by parent glossary or glossary term FQN. "
+ "When provided, only returns asset counts for children whose FQN starts with this value.")
@QueryParam("parent")
String parent) {
String parent,
@Parameter(
description =
"Maximum number of glossary terms to return. When omitted, all matching terms are returned.")
@QueryParam("limit")
@Min(value = 0, message = "limit must be >= 0")
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Edge Case: limit=0 returns empty result — may confuse callers expecting metadata only

The @Min(0) annotation on the limit parameter allows limit=0. In sliceFqns, this produces an empty sublist, and because pageFqns.isEmpty() is true, the method returns early with an empty counts map (but a valid total). While technically correct, limit=0 is an unusual API contract — some callers might use it expecting to fetch only the total count. If that's intentional, it's fine as-is. If not, consider changing @Min to 1.

Was this helpful? React with 👍 / 👎 | Reply gitar fix to apply this suggestion

@Max(value = 10000, message = "limit must be <= 10000")
Integer limit,
@Parameter(description = "Offset into the ordered list of glossary terms to start from.")
@QueryParam("offset")
@Min(value = 0, message = "offset must be >= 0")
Integer offset) {
OperationContext operationContext =
new OperationContext(entityType, MetadataOperation.VIEW_ALL);
authorizer.authorize(securityContext, operationContext, getResourceContext());
java.util.Map<String, Integer> result = repository.getAllGlossaryTermsWithAssetsCount(parent);
return Response.ok(result).build();
GlossaryTermRepository.GlossaryTermAssetCountResult result =
repository.getGlossaryTermsAssetsCount(parent, limit, offset);
return Response.ok(result.counts()).header(TOTAL_COUNT_HEADER, result.total()).build();
Comment on lines 428 to +464
}

@GET
Expand Down
36 changes: 34 additions & 2 deletions openmetadata-ui/src/main/resources/ui/src/rest/glossaryAPI.ts
Original file line number Diff line number Diff line change
Expand Up @@ -322,17 +322,49 @@ export const getGlossaryTermAssets = async (
return response.data;
};

export type GlossaryTermAssetCountsParams = {
parent?: string;
limit?: number;
offset?: number;
};

export type GlossaryTermAssetCountsResponse = {
data: Record<string, number>;
total: number;
};

export const getGlossaryTermsAssetCounts = async (
parent?: string
parentOrParams?: string | GlossaryTermAssetCountsParams
): Promise<Record<string, number>> => {
const params: GlossaryTermAssetCountsParams =
typeof parentOrParams === 'string'
? { parent: parentOrParams }
: parentOrParams ?? {};
const response = await APIClient.get<Record<string, number>>(
'/glossaryTerms/assets/counts',
{ params: parent ? { parent } : undefined }
{ params }
);

return response.data;
};

export const getGlossaryTermsAssetCountsPage = async (
params: GlossaryTermAssetCountsParams
): Promise<GlossaryTermAssetCountsResponse> => {
const response = await APIClient.get<Record<string, number>>(
'/glossaryTerms/assets/counts',
{ params }
);
const totalHeader =
response.headers?.['x-total-count'] ?? response.headers?.['X-Total-Count'];
const parsedTotal = totalHeader ? Number(totalHeader) : NaN;
const total = Number.isFinite(parsedTotal)
? parsedTotal
: Object.keys(response.data ?? {}).length;

return { data: response.data ?? {}, total };
};

export const searchGlossaryTerms = async (search: string, page = 1) => {
const apiUrl = `/search/query?q=${search ?? ''}`;

Expand Down
Loading