Unicode

From AdaCommons

Jump to: navigation, search

By default, Ada 95 doesn't offer any special support for Unicode. In theory, you can use 16-bit Wide_Character to hold a subset of Unicode characters, but for actual support you need third party libraries. For Ada 2005 the situation is slightly better and you can use 32-bit Wide_Wide_Character, but even then most of the Unicode string manipulation you need to do yourself.

Currently, there are 5 open source Unicode libraries for Ada 95/2005:

  • Dmitry Kazakov's Simple Components
  • Unicode package from XMLAda
  • NGeadal (unmaintained)
  • Encodings package from Gela
  • Matreshka, provides full case conversion, case folding, collation, normalization; text codecs; XML processor; also G11N, I18N and L10N utilities

There is also AI05-0137 which proposes standard UTF-8 and UTF-16 Encoding/Decoding interfaces for Ada.

XMLAda Unicode Example

 -- Read (raw) UTF-8 data from file, convert it to Latin-1
 -- and print to screen
 with Ada.Text_IO;
 with Ada.Strings.Unbounded;
 with Ada.IO_Exceptions;
 with Unicode.CES.Utf8;
 with Unicode.CES.Utf32;
 with Unicode.CES.Basic_8bit;
 with Unicode.CCS.Iso_8859_1;
 
 use Ada.Strings.Unbounded;
 
 procedure Read_File is
    Data : Unbounded_String;
 begin
    Data := ...; -- UTF-8 data from file.
    declare
       use Unicode.CES;
       U32_Str : Utf32.Utf32_LE_String := Utf8.To_Utf32 (To_String (Data));
       Basic_Str : Basic_8bit.Basic_8bit_String :=
         Basic_8bit.From_Utf32 (U32_Str);
       Latin_Str : Basic_8bit.Basic_8bit_String :=
         Basic_8bit.To_Cs
           (Basic_Str,  Unicode.CCS.Iso_8859_1.Iso_8859_1_Character_Set);
    begin
       Ada.Text_IO.Put_Line ("Data:");
       Ada.Text_IO.Put (Latin_Str);
       Ada.Text_IO.Put_Line ("END");
    end;
 exception
    when Constraint_Error =>
       Ada.Text_IO.Put_Line ("input data had invalid latin1 characters");
    when Unicode.CES.Invalid_encoding =>
       Ada.Text_IO.Put_Line ("encoding error");
 end Read_File;

Gela Encodings Example

 -- Read (raw) UTF-8 data from file, convert it to Latin-1
 -- and print to screen
 with Encodings;
 with Ada.Text_IO;
 with Ada.Strings.Unbounded;
 
 use Ada.Strings.Unbounded;
 
 procedure Gela_Encodings_Example is
    Data : Unbounded_String;
 begin
    Data := ...; -- UTF-8 data from file
    declare
       use Encodings;
       Wide_Str  : Wide_String := Decode (To_String (Data), UTF_8);
       Latin_Str : String      := Encode (Wide_Str, ISO_8859_15);
    begin
       Ada.Text_IO.Put_Line ("Data:");
       Ada.Text_IO.Put (Latin_Str);
       Ada.Text_IO.Put_Line ("END");
    end;
 exception
    when Encodings.Invalid_Encoding =>
       Ada.Text_IO.Put_Line ("encoding error");
 end Gela_Encodings_Example;

Matreshka Text Decoder Example

--  Read raw data from the file specified by the first argument in command
--  line and decode it from the encoding specified by the second argument
--  in the command line. Print decoded data to screen.
with Ada.Command_Line;
with Ada.Characters.Conversions;
with Ada.Streams.Stream_IO;
with Ada.Wide_Wide_Text_IO;
 
with League.Strings;
with League.Text_Codecs;
 
procedure Text_Codecs is
   File : Ada.Streams.Stream_IO.File_Type;
   Data : Ada.Streams.Stream_Element_Array (0 .. 16383);
   Last : Ada.Streams.Stream_Element_Offset;
 
begin
   --  Read data from the file.
 
   Ada.Streams.Stream_IO.Open
    (File, Ada.Streams.Stream_IO.In_File, Ada.Command_Line.Argument (1));
   Ada.Streams.Stream_IO.Read (File, Data, Last);
   Ada.Streams.Stream_IO.Close (File);
 
   --  Construct decoder, decode data and output text to screen.
 
   declare
      Decoder : League.Text_Codecs.Text_Codec
        := League.Text_Codecs.Codec
            (League.Strings.To_Universal_String
              (Ada.Characters.Conversions.To_Wide_Wide_String
                (Ada.Command_Line.Argument (2))));
      Str     : League.Strings.Universal_String;
 
   begin
      Str := Decoder.Decode (Data (Data'First .. Last));
      Ada.Wide_Wide_Text_IO.Put_Line (Str.To_Wide_Wide_String);
 
   exception
      when Constraint_Error =>
         Ada.Wide_Wide_Text_IO.Put_Line ("Decoding error");
 
         raise;
   end;
end Text_Codecs;
Personal tools