HTTP request with Glib

author Emmanuel Rodriguez <emmanuel.rodriguez@gmail.com>

Mon, 30 Mar 2009 05:27:32 +0000 (07:27 +0200)

committer Pierre-Luc Beaudoin <pierre-luc@pierlux.com>

Tue, 31 Mar 2009 09:04:20 +0000 (12:04 +0300)
author Emmanuel Rodriguez <emmanuel.rodriguez@gmail.com>
Mon, 30 Mar 2009 05:27:32 +0000 (07:27 +0200)
committer Pierre-Luc Beaudoin <pierre-luc@pierlux.com>
Tue, 31 Mar 2009 09:04:20 +0000 (12:04 +0300)
diff --git a/bindings/perl/Champlain/examples/capitals.pl b/bindings/perl/Champlain/examples/capitals.pl

new file mode 100755 (executable)

index 0000000..e06771a
--- /dev/null
+++ b/bindings/perl/Champlain/examples/capitals.pl
@@ -0,0 +1,256 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+use open ':std', ':utf8';
+ 
+use Net::HTTP::NB;
+use IO::Select;
+use Glib qw(TRUE FALSE);
+use URI;
+use Carp;
+use XML::LibXML;
+use Data::Dumper;
+
+exit main();
+
+
+sub main {
+
+       my $loop = Glib::MainLoop->new();
+       
+       my $capitals_url = "http://en.wikipedia.org/wiki/List_of_national_capitals";
+       my $soup = My::Soup->new($capitals_url);
+       
+       my $parser = XML::LibXML->new();
+       $parser->recover_silently(1);
+
+       $soup->do_get(
+               $capitals_url,
+               \&capitals_main_callback,
+               {
+                       loop => $loop,
+                       parser => $parser,
+               }
+       );
+       
+       Glib::Idle->add(sub {
+               $soup->do_get($capitals_url, sub {
+                       my ($soup, $uri, $response, $data) = @_;
+                       printf "Downloaded $uri %s - %s\n", $response->code, $response->message;
+                       
+                       $soup->do_get("/wiki/Madrid", sub {
+                               my ($soup, $uri, $response, $data) = @_;
+                               printf "Downloaded $uri %s - %s\n", $response->code, $response->message;
+                               
+                               my $document = $parser->parse_html_string($response->content);
+                               my $name = $document->getElementById('firstHeading')->textContent;
+                               my ($latitude, $longitude) = (0, 0);
+                               
+                               my ($geo) = $document->findnodes('id("coordinates")//span[@class="geo"]');
+                               if ($geo) {
+#                                      my @nodes = $coord_node->findnodes('//span[@class="latitude"]');
+#                                      print "nodes = @nodes\n";
+#                                      print $coord_node->toString(2);
+#                                      my $latitude = $coord_node->findvalue('//span[@class="latitude"]');
+#                                      my $longitude = $coord_node->findvalue('//span[@class="longitude"]');
+                                       
+#                                      my ($geo) = $coord_node->findnodes('//span[@class="geo"]');
+                                       
+                                       ($latitude, $longitude) = split /\s*;\s*/, $geo->textContent;
+                                       
+#                                      printf "latitude = --$latitude-- = %.4f\n", dms_to_decimal($latitude);
+#                                      printf "longitude = $longitude = %.4f\n", dms_to_decimal($longitude);
+                               }
+                               
+                               printf "$name %.4f, %.4f\n", $latitude, $longitude;
+                               
+                               $loop->quit();
+                       });
+                       
+               });
+               return FALSE;
+       }) if 0;
+       
+       $loop->run();
+       
+       return 0;
+}
+
+
+#
+# Called when the main page with all the capitals is downloaded
+#
+sub capitals_main_callback {
+       my ($soup, $uri, $response, $data) = @_;
+       
+       printf "Downloaded $uri %s - %s\n", $response->code, $response->message;
+       
+       my $document = $data->{parser}->parse_html_string($response->content);
+       my @nodes = $document->findnodes('//table[@class="wikitable sortable"]/tr/td[1]/a');
+       
+       $data->{nodes} = \@nodes;
+       print "Download the first capital\n";
+       download_capital($soup, $data);
+       
+#      foreach my $node (@nodes) {
+#              my $uri = $node->getAttribute('href');
+#              my $name = $node->getAttribute('title');
+#              print "$name -> $uri\n";
+#      }
+#      printf "Found %d\n", scalar @nodes;
+       
+#      $data->{loop}->quit();
+}
+
+
+#
+# Called when the page of a capital is downloaded
+#
+sub capital_callback {
+       my ($soup, $uri, $response, $data) = @_;
+       
+       printf "Downloaded $uri %s - %s\n", $response->code, $response->message;
+       
+       my $document = $data->{parser}->parse_html_string($response->content);
+       my $heading = $document->getElementById('firstHeading');
+       if ($heading) {
+               my $name = $document->getElementById('firstHeading')->textContent;
+               my ($latitude, $longitude) = (0, 0);
+                               
+               my ($geo) = $document->findnodes('id("coordinates")//span[@class="geo"]');
+               if ($geo) {
+                       ($latitude, $longitude) = split /\s*;\s*/, $geo->textContent;
+               }
+               printf "$name %.4f, %.4f\n", $latitude, $longitude;
+       }
+       
+       download_capital($soup, $data);
+}
+
+
+sub download_capital {
+       my ($soup, $data) = @_;
+
+       print "download_capital nodes data = $data $data->{nodes}\n";
+       
+       my $node = shift @{ $data->{nodes} };
+       if (! defined $node) {
+               print "No more capitals to download\n";
+               $data->{loop}->quit();
+               return;
+       }
+       
+       my $uri = $node->getAttribute('href');
+       my $name = $node->getAttribute('title');
+       print "$name -> $uri\n";
+       $soup->do_get($uri, \&capital_callback, $data);
+}
+
+
+sub dms_to_decimal {
+       my ($dms) = @_;
+       
+       my $regexp = qr/
+               (\d+)\x{b0}        # Degrees
+               (\d+)\x{2032}      # Minutes
+               (?:(\d+)\x{2032})? # Seconds
+               ([NSEW])           # Direction
+       /x;
+       my (@fields) = ($dms =~ /$regexp/);
+       
+       return 0 unless @fields;
+       
+       my ($degrees, $minutes, $seconds, $direction) = map { $_ || 0 } @fields;
+       my $decimal = $degrees + ($minutes * 60 + $seconds)/3600;
+       return ($direction eq 'N' or $direction eq 'E') ? $decimal : -$decimal;
+}
+
+
+package My::Soup;
+
+use Glib qw(TRUE FALSE);
+use Data::Dumper;
+use HTTP::Response;
+
+
+sub new {
+       my $class = shift;
+       my ($uri) = @_;
+       
+       my $self = bless {}, ref $class || $class;
+
+       $uri = to_uri($uri);
+       my $http = Net::HTTP::NB->new(
+               Host      => $uri->host,
+               PeerPort  => $uri->port,
+               KeepAlive => 1,
+       );
+       $self->http($http);
+       
+       return $self;
+}
+
+
+sub http {
+       my $self = shift;
+       if (@_) {
+               $self->{http} = $_[0];
+       }
+       return $self->{http};
+}
+
+
+sub to_uri {
+       my ($uri) = @_;
+       return $uri if ref($uri) && $uri->isa('URI');
+       return URI->new($uri);
+}
+
+
+#
+# Performs an HTTP GET request asynchronously.
+#
+sub do_get {
+       my $self = shift;
+       my ($uri, $callback, $data) = @_;
+       $uri = to_uri($uri);
+       
+       # Note this is not asynchronous
+       $self->http->write_request(GET => $uri->path_query);
+       
+       
+       my ($code, $message, %headers);
+       my $content = "";
+       Glib::IO->add_watch($self->http->fileno, 'in', sub {
+               
+               # Read the headers
+               if (!$code) {
+                       ($code, $message, %headers) = $self->http->read_response_headers();
+                       return TRUE;
+               }
+               
+               # Read the content              
+               my $line;
+               my $n = $self->http->read_entity_body($line, 1024);
+               $content .= $line;
+               
+               if ($self->http->keep_alive) {
+                       # In the case where the HTTP request has keep-alive we need to see if the
+                       # content has all arrived as read_entity_body() will not tell when the end
+                       # of the content has been reached.
+                       return TRUE unless length($content) == $headers{'Content-Length'};
+               }
+               elsif ($n) {
+                       return TRUE;
+               }
+               
+               # End of the document
+               my $response = HTTP::Response->new($code, $message, [%headers], $content);
+               $callback->($self, $uri, $response, $data);
+               return FALSE;
+       });
+}
+
+# A true value
+1;
author	Emmanuel Rodriguez <emmanuel.rodriguez@gmail.com>
	Mon, 30 Mar 2009 05:27:32 +0000 (07:27 +0200)
committer	Pierre-Luc Beaudoin <pierre-luc@pierlux.com>
	Tue, 31 Mar 2009 09:04:20 +0000 (12:04 +0300)