utf8_detail Module


Uses


Contents


Interfaces

public interface assignment(=)

  • public subroutine utf8_assign_from_char(utf8, str)

    Arguments

    TypeIntentOptionalAttributesName
    type(utf8_string), intent(out) :: utf8
    character(kind=c_char,len=*), intent(in) :: str
  • public subroutine char_assign_from_utf8(str, utf8)

    Arguments

    TypeIntentOptionalAttributesName
    character(kind=c_char,len=:), intent(out), allocatable:: str
    type(utf8_string), intent(in) :: utf8

public interface construct_utf8_string

  • public subroutine utf8_construct_from_char(utf8, str, escape)

    Arguments

    TypeIntentOptionalAttributesName
    type(utf8_string), intent(out) :: utf8
    character(kind=c_char,len=*), intent(in) :: str
    logical, intent(in), optional :: escape

public interface utf8_is_valid

  • public pure function utf8_is_valid_char(str) result(r)

    check if the whole string is valid utf8 encoding

    Arguments

    TypeIntentOptionalAttributesName
    character(kind=c_char,len=*), intent(in) :: str

    Return Value logical

  • public pure function utf8_is_valid_string(str) result(r)

    Arguments

    TypeIntentOptionalAttributesName
    type(utf8_string), intent(in) :: str

    Return Value logical


Derived Types

type, public :: utf8_string

Components

TypeVisibilityAttributesNameInitial
character(kind=c_char,len=:), public, allocatable:: str

Type-Bound Procedures

procedure, public :: iterator

type, public :: utf8_string_iterator

Components

TypeVisibilityAttributesNameInitial
integer, public :: cur =1
character(kind=c_char,len=:), public, pointer:: ptr=> null()

Type-Bound Procedures

procedure, public :: get_next => iterator_get_next
procedure, public :: has_next => iterator_has_next

Functions

public pure function cast_byte(char) result(byte)

private helper functions cast char to byte (8-bits integer in Fortran) display: private

Arguments

TypeIntentOptionalAttributesName
character(kind=c_char,len=1), intent(in) :: char

Return Value integer(kind=c_int8_t)

public pure function codepoint_num_bytes(byte) result(n)

get the number of bytes of a code point based on its first byte display: private

Arguments

TypeIntentOptionalAttributesName
integer(kind=c_int8_t), intent(in) :: byte

Return Value integer

public function iterator(this) result(itr)

return an iterator of utf8_string

Arguments

TypeIntentOptionalAttributesName
class(utf8_string), intent(in), target:: this

Return Value type(utf8_string_iterator)

public function iterator_get_next(this) result(cp)

Arguments

TypeIntentOptionalAttributesName
class(utf8_string_iterator), intent(inout) :: this

Return Value character(kind=c_char,len=:),allocatable

public pure function iterator_has_next(this) result(r)

iterator methods

Arguments

TypeIntentOptionalAttributesName
class(utf8_string_iterator), intent(in) :: this

Return Value logical

public pure function utf8_at(utf8, idx) result(s)

return the code point at specified position

Arguments

TypeIntentOptionalAttributesName
class(utf8_string), intent(in) :: utf8
integer, intent(in) :: idx

Return Value character(kind=c_char,len=:),allocatable

public pure function utf8_count(utf8, substring) result(count)

count the substring in utf8_string overlaps are not considered e.g. utf8_count(“AUAUAUAUAUAUAU”,”AUA”) returns 3

Arguments

TypeIntentOptionalAttributesName
class(utf8_string), intent(in) :: utf8
character(kind=c_char,len=*), intent(in) :: substring

Return Value integer

public pure function utf8_index(utf8, substring) result(idx)

return the position where substring occurs in utf8_string for the first time

Arguments

TypeIntentOptionalAttributesName
class(utf8_string), intent(in) :: utf8
character(kind=c_char,len=*), intent(in) :: substring

Return Value integer

public pure function utf8_is_valid_char(str) result(r)

check if the whole string is valid utf8 encoding

Arguments

TypeIntentOptionalAttributesName
character(kind=c_char,len=*), intent(in) :: str

Return Value logical

public pure function utf8_is_valid_string(str) result(r)

Arguments

TypeIntentOptionalAttributesName
type(utf8_string), intent(in) :: str

Return Value logical

public pure function utf8_len(utf8) result(l)

return the number of UTF-8 code points

Arguments

TypeIntentOptionalAttributesName
class(utf8_string), intent(in) :: utf8

Return Value integer

public pure function utf8_slice(utf8, begin, end) result(slice)

return a substring of utf8_string

Arguments

TypeIntentOptionalAttributesName
class(utf8_string), intent(in) :: utf8
integer, intent(in) :: begin
integer, intent(in) :: end

Return Value character(kind=c_char,len=:),allocatable


Subroutines

public subroutine char_assign_from_utf8(str, utf8)

Arguments

TypeIntentOptionalAttributesName
character(kind=c_char,len=:), intent(out), allocatable:: str
type(utf8_string), intent(in) :: utf8

public subroutine utf8_assign_from_char(utf8, str)

Arguments

TypeIntentOptionalAttributesName
type(utf8_string), intent(out) :: utf8
character(kind=c_char,len=*), intent(in) :: str

public subroutine utf8_construct_from_char(utf8, str, escape)

Arguments

TypeIntentOptionalAttributesName
type(utf8_string), intent(out) :: utf8
character(kind=c_char,len=*), intent(in) :: str
logical, intent(in), optional :: escape

public subroutine utf8_reverse(utf8)

reverse the order of code points in place

Arguments

TypeIntentOptionalAttributesName
class(utf8_string), intent(inout) :: utf8

public subroutine utf8_split(utf8, sep, list)

split utf8_string based on the separation string

Arguments

TypeIntentOptionalAttributesName
class(utf8_string), intent(in), target:: utf8
character(kind=c_char,len=*), intent(in) :: sep
type(utf8_string), intent(out), dimension(:), allocatable:: list