Search for an XML Node using libxml2 in C

XML is widely used format to store or transmit data over internet. Here we’ll see how to search for an XML node in a file. We’ll use this XML file as an example.

<?xml version="1.0"?>
<catalog>
   <book id="bk101">
      <author>Gambardella, Matthew</author>
      <title>XML Developer's Guide</title>
      <genre>Computer</genre>
      <price>44.95</price>
      <publish_date>2000-10-01</publish_date>
      <description>An in-depth look at creating applications
      with XML.</description>
   </book>
   <book id="bk102">
      <author>Ralls, Kim</author>
      <title>Midnight Rain</title>
      <genre>Fantasy</genre>
      <price>5.95</price>
      <publish_date>2000-12-16</publish_date>
      <description>A former architect battles corporate zombies,
      an evil sorceress, and her own childhood to become queen
      of the world.</description>
   </book>
   <book id="bk103">
      <author>Corets, Eva</author>
      <title>Maeve Ascendant</title>
      <genre>Fantasy</genre>
      <price>5.95</price>
      <publish_date>2000-11-17</publish_date>
      <description>After the collapse of a nanotechnology
      society in England, the young survivors lay the
      foundation for a new society.</description>
   </book>
</catalog>

Search XML Node by its Name

First we’ll see how to search a node based on its name. In the above example XML file, node names are ‘catalog‘, ‘book‘, ‘author‘ etc.

#include <stdio.h>
#include <libxml/parser.h>

/*gcc `xml2-config --cflags --libs` test.c*/

xmlNode * find_node(xmlNode * node, char * node_name) {
  
  xmlNode * result;
  
  if (node == NULL) return NULL;
  
  while(node) {
    if((node->type == XML_ELEMENT_NODE) 
        && (strcmp(node->name, node_name) == 0)) {
      return node;
    }
    
    if (result = find_node(node->children, node_name)) return result;
    
    node = node->next;
  }
  
  return NULL;
}

int main(){
  xmlDoc *doc = NULL;
  xmlNode *root_element = NULL;
  xmlNode *result;

  char node_name[256];

  doc = xmlReadFile("input.xml", NULL, 0);

  if (doc == NULL) {
    printf("Could not parse the XML file.\n");
    return 1;
  }

  printf("Node to search: ");
  scanf("%s", node_name);
  
  root_element = xmlDocGetRootElement(doc);

  result = find_node(root_element, node_name);
  
  if (result) {
    printf ("Node '%s' is found in the XML.\n", node_name);
  } else {
    printf ("Node '%s' is not found in the XML.\n", node_name);
  }
  
  xmlFreeDoc(doc);
  xmlCleanupParser();
  
  return 0;
}

This program takes a node name as input. Then calls the find_node() function to check whether a node with that name exists. If the find_node() function finds a match, then it returns that node pointer, otherwise, NULL. To compile the code, libxml2 development library is required. If you don’t have it installed, install it first.

To compile the program, run this command.

gcc `xml2-config --cflags --libs` test.c -o test

Here is the output of the program.

search xml node by its name

From the output, we can see that searching with node names ‘catalog‘ and ‘book‘ succeeded but failed for node name ‘pen‘.

Search XML Node by its Property Name

In our example XML file, there are three ‘book‘ nodes. If we want to search for a specific ‘book‘ entry, we can search for the ‘id‘ property. The find_node() function will search based on the ‘id‘ property.

#include <stdio.h>
#include <libxml/parser.h>

/*gcc `xml2-config --cflags --libs` test.c*/

xmlNode * find_node(xmlNode * node, char * prop_val) {
  
  xmlNode * result;
  
  if (node == NULL) return NULL;
  
  while(node) {
    if((node->type == XML_ELEMENT_NODE)
        && xmlGetProp(node, "id")
        && (strcmp(xmlGetProp(node, "id"), prop_val) == 0)) {
      return node;
    }
    
    if (result = find_node(node->children, prop_val)) return result;
    
    node = node->next;
  }
  
  return NULL;
}

int main(){
  xmlDoc *doc = NULL;
  xmlNode *root_element = NULL;
  xmlNode *result;

  char prop_val[256];

  doc = xmlReadFile("input.xml", NULL, 0);

  if (doc == NULL) {
    printf("Could not parse the XML file.\n");
    return 1;
  }

  printf("Enter id property: ");
  scanf("%s", prop_val);
  
  root_element = xmlDocGetRootElement(doc);

  result = find_node(root_element, prop_val);
  
  if (result) {
    printf ("Node with id '%s' is found in the XML.\n", prop_val);
  } else {
    printf ("Node with id '%s' is not found in the XML.\n", prop_val);
  }
  
  xmlFreeDoc(doc);
  xmlCleanupParser();
  
  return 0;
}

Here is the output of the program.

Search XML node by property value

From the output, we can see that searching for book id ‘bh102‘ was successful. But search for ‘bk100’ failed.

The property name here is ‘id‘ but it could be different in different XML file. We can slightly modify our find_node() function to take the property name as input also.

xmlNode * find_node(xmlNode * node, char * prop_name, char * prop_val) {
  
  xmlNode * result;
  
  if (node == NULL) return NULL;
  
  while(node) {
    if((node->type == XML_ELEMENT_NODE)
        && xmlGetProp(node, prop_name)
        && (strcmp(xmlGetProp(node, prop_name), prop_val) == 0)) {
      return node;
    }
    
    if (result = find_node(node->children, prop_name, prop_val)) return result;
    
    node = node->next;
  }
  
  return NULL;
}

Search XML Node by Content

We can search by a content, which lies between a start node and a end node, also.

#include <stdio.h>
#include <libxml/parser.h>

/*gcc `xml2-config --cflags --libs` test.c*/

xmlNode * find_node(xmlNode * node, char * content) {
  
  xmlNode * result;
  
  if (node == NULL) return NULL;
  
  while(node) {
    if((node->type == XML_ELEMENT_NODE)
        && (strcmp(xmlNodeGetContent(node), content) == 0)) {
      return node;
    }
    
    if (result = find_node(node->children, content)) return result;
    
    node = node->next;
  }
  
  return NULL;
}

int main(){
  xmlDoc *doc = NULL;
  xmlNode *root_element = NULL;
  xmlNode *result;

  char content[256];

  doc = xmlReadFile("input.xml", NULL, 0);

  if (doc == NULL) {
    printf("Could not parse the XML file.\n");
    return 1;
  }

  printf("Enter id property: ");
  gets(content);
  
  root_element = xmlDocGetRootElement(doc);

  result = find_node(root_element, content);
  
  if (result) {
    printf ("Node with content '%s' is found in the XML.\n", content);
  } else {
    printf ("Node with content '%s' is not found in the XML.\n", content);
  }
  
  xmlFreeDoc(doc);
  xmlCleanupParser();
  
  return 0;
}

Here is the output.

Search xml node by content

It could find a node with content ‘Gambardella, Matthew‘ but failed for ‘Mark Twain‘.

Author: Srikanta

I write here to help the readers learn and understand computer programing, algorithms, networking, OS concepts etc. in a simple way. I have 20 years of working experience in computer networking and industrial automation.


If you also want to contribute, click here.

Leave a Reply

Your email address will not be published. Required fields are marked *

4
0
1
4
0
0