A語の単語自動生成プログラムを公開：C++での挑戦

2024年10月9日 12:21

こんにちは、皆さん！再びこのブログにお越しいただき、ありがとうございます。今回は、A語の単語自動生成プログラムをブラッシュアップしたので、それを公開することにしました。

単語自動生成プログラムの公開

A語の単語自動生成プログラムを練習を兼ねてC++で書いてみました。ソースコードが乱雑で汚いので恥ずかしいですが、今回思い切って公開します。以下にプログラムのソースコードを掲載しますので、ぜひ試してみてください。

// A言語の音節自動生成プログラム
#include <iostream>
#include <stdexcept>
#include <string>
#include <iterator>
#include <random>
#include <algorithm>
#include <numeric>
#include <utility>
#include <cstdlib>

// 音素と重み
template <typename T>
struct weight_t
{
  T value;
  int weight = 10;
};

// 頭子音の音素と重み
const weight_t<std::string> onset_weights[] = {
  { "", 50 },
  { "m", }, { "n", },
  { "p", }, { "b", 9 }, { "t", }, { "d", 9 }, { "k", }, { "g", 9 },
  { "pr", 7 }, { "br", 6 }, { "tr", 7 }, { "dr", 6 }, { "kr", 7 }, { "gr", 6 },
  { "ts", 6 }, { "dz", 5 },
  { "s", }, { "z", 7 }, { "h", },
  { "sr", 7 }, { "zr", 3 },
  { "l", },
  { "j", }, { "w", },
};

// 末尾子音の音素と重み
const weight_t<std::string> coda_weights[] = {
  { "", 400 },
  { "m", 100 }, { "n", 70 }, { "ng", 50 },
  { "ts", }, { "dz", },
  { "tsj", 7 }, { "dzj", 3 },
  { "s", }, { "z", 5 }, { "h", 2 },
  { "sj", 6 }, { "zj", 2 }, { "hj", 5 },
  { "r", 15 },
};

// 母音の音素と重み
const weight_t<std::string> vowel_weights[] = {
  { "a", 50 }, { "e", 20 }, { "i", 30 }, { "o", 20 }, { "u", 30 },
  { "ja", 15 }, { "je", 3 }, { "ji", 9 }, { "jo", 3 }, { "ju", 9 },
  { "wa", 10 }, { "we", 2 }, { "wi", 6 }, { "wo", 2 },
  { "aj", 20 }, { "ej", 4 }, { "oj", 4 },
  { "aw", 12 }, { "ew", 2.4 }, { "ow", 2.4 },
};

// [0.0, 1.0)の範囲の擬似乱数
static double random()
{
  static std::random_device seed_gen;
  static std::mt19937_64 mt64(seed_gen());
  std::uniform_real_distribution<double> get_rand_uni_real(0.0, 1.0);
  return get_rand_uni_real(mt64);
}

// weight_t配列から重み表を作成する。
template <typename T>
std::vector<std::pair<T, double>> make_weight_table(const weight_t<T>* w, std::size_t n)
{
  double total = std::accumulate(w, w + n, 0.0, [](double t, const weight_t<T>& e) { return t + e.weight; });
  std::vector<std::pair<T, double>> table;
  double t = 0;

  std::for_each_n(w, n - 1, [&table, total, &t](const weight_t<T>& e) {
    t += e.weight / total;
    table.push_back({ e.value, t });
  });

  // 誤差が出ると嫌なので、末尾の.secondは必ず1.0にする。
  table.push_back({ w[n - 2].value, 1.0 });

  return table;
}

// 重み表からkに該当する値を取り出す。
template <typename T>
T get_from_table(double k, const std::vector<std::pair<T, double>>& table)
{
  if (k < 0 || 1.0 <= k)
    throw std::out_of_range(__func__);

  auto it = std::find_if(table.cbegin(), table.cend(), [k](const std::pair<T, double>& e) {
    return k <= e.second;
  });

  if (it == table.cend())
    throw std::out_of_range(__func__);
  return it->first;
}

static std::string make_one_continuous(const std::string& str, char c)
{
  std::string r = str;

  for (;;)
  {
    char key[] = { c, c, '\0' };
    auto pos = r.find(key);
    if (pos == std::string::npos)
      break;
    r.erase(pos, 1);
  }

  for (;;)
  {
    char key[] = { c, '-', c, '\0 '};
    auto pos = r.find(key);
    if (pos == std::string::npos)
      break;
    r.erase(pos, 1);
  }

  return r;
}

int main(int argc, char* argv[])
{
  // 音節数の決定
  int n = 1;
  if (argc > 1)
  {
    n = std::atoi(argv[1]);
    n = std::clamp(n, 1, 16);
  }
  else
  {
    const weight_t<int> n_of_syllables_weight[] = {
      { 1, 20 }, { 2, 50 }, { 3, 15 }, { 4, 7 }, { 5, 3 },
    };
    auto table = make_weight_table(n_of_syllables_weight, std::size(n_of_syllables_weight));
    auto k = random();
    n = get_from_table(k, table);
  }

  auto onset_table = make_weight_table(onset_weights, std::size(onset_weights));
  auto coda_table = make_weight_table(coda_weights, std::size(coda_weights));
  auto vowel_table = make_weight_table(vowel_weights, std::size(vowel_weights));

  std::string word;

  for (int i = 0; i < n; i++)
  {
    std::string onset = get_from_table(random(), onset_table);
    std::string coda = get_from_table(random(), coda_table);
    std::string vowel = get_from_table(random(), vowel_table);

    if (i > 0)
    {
      if (onset == "l")
        onset = "r";
    }

    if (!word.empty())
    {
      // 語中では子音の前の末尾子音のhは省略する。
      if (word.back() == 'h')
      {
        if (!onset.empty())
        {
          word.pop_back();
        }
      }

      if (!onset.empty())
      {
        // 後続の音節によって鼻音を調整する。
        auto c = onset.front();
        if (word.ends_with('n'))
        {
          switch (c)
          {
            case 'm':
            case 'p':
            case 'b':
              word.back() = 'm';
              break;
            case 'k':
            case 'g':
              word.back() = 'n';
              word += 'g';
              break;
          }
        }
        else if (word.ends_with('m'))
        {
          switch (c)
          {
            case 'k':
            case 'g':
              word.back() = 'n';
              word += 'g';
              break;
            case 't':
            case 'd':
              word.back() = 'n';
              break;
          }
        }
        else if (word.ends_with("ng"))
        {
          switch (c)
          {
            case 'm':
            case 'p':
            case 'b':
              word.pop_back();
              word.back() = 'm';
              break;
            case 't':
            case 'd':
              word.pop_back();
              break;
          }
        }
      }
    }

    if (!onset.empty())
    {
      // 頭子音がrで終わる二重子音の場合、母音先頭の半母音は脱落する。
      if (onset.back() == 'r')
      {
        switch (vowel.front())
        {
          case 'j':
          case 'w':
            vowel = vowel.substr(1);
            break;
        }
      }

      // 半母音が連続する場合はうしろの半母音を除去する。
      switch (onset.back())
      {
        case 'j':
        case 'w':
          switch (vowel.front())
          {
            case 'j':
            case 'w':
              vowel = vowel.substr(1);
              break;
          }
          break;
      }
    }

    // 頭子音が破擦音と摩擦音以外の場合、jiは単にiとする。
    if (vowel.front() == 'j')
    {
      if (!onset.empty())
      {
        if (vowel == "ji")
        {
          switch (onset.back())
          {
            case 's':
            case 'z':
            case 'h':
              break;
            default:
              vowel = "i";
              break;
          }
        }
      }
    }
    if (!coda.empty())
    {
      // 末尾子音が硬口蓋音になるのは母音がiまたはjで終わる場合のみとする。
      if (coda.back() == 'j')
      {
        auto c = vowel.back();
        switch (c)
        {
          case 'i':
          case 'j':
            break;
          default:
            coda.pop_back();
            break;
        }
      }
    }

    // 音節の区切りに-を入れる。
    if (!word.empty())
      word += "-";

    // 単語に音節を追加する。
    word += onset;
    word += vowel;
    word += coda;
  }

  // 連続したjまたはwが現れる場合、最初のjまたはwを省略する。
  word = make_one_continuous(word, 'j');
  word = make_one_continuous(word, 'w');

  std::cout << n << " " << word << std::endl;
}

このプログラムは、指定された長さの単語を自動生成します。長さの指定を省略すると、自動的に長さを決定します。母音と子音を配置し、音韻のルールに従って単語を生成します。ぜひ試してみて、フィードバックをいただけると嬉しいです。

今後も、A語の音韻設定については随時微調整を行っていく可能性があります。単語自動生成プログラムを使って試行錯誤しながら、より自然で使いやすい言語を目指していきます。

以上が、A語の単語自動生成プログラムのブラッシュアップについての報告です。これからも皆さんの意見やアイデアを参考にしながら、架空世界を発展させていきたいと思います。どうぞよろしくお願いします！

この記事が気に入ったらサポートをしてみませんか？