バヌアツの法則をコードインタープリターで確かめてみる コード編

こちらでは、バヌアツの法則を確かめるためにPythonが生成してくれたコードを示します。

ちなみに、本編はこちら。
バヌアツの法則をコードインタープリター で確かめてみる|usamix (note.com)

ChatGPTとのやりとりは、以下から参照できます。

国名の分類処理
https://chat.openai.com/share/e7b9c990-db92-45ea-9542-2ec67a812bd6

バヌアツの法則検証
https://chat.openai.com/share/f75edf77-618d-4d85-a26e-0ea03b6f8fa0

データはUSGSのサイトからダウンロードしました。
Search Earthquake Catalog (usgs.gov)

以下は、コードの主要部分を抜粋したものです。

前処理

# Convert the time column to datetime format
df['time'] = pd.to_datetime(df['time'])

# Extract only earthquakes in Vanuatu and Japan
df_vanuatu = df[df['countries'] == 'Vanuatu']
df_japan = df[df['countries'] == 'Japan']

# Compute the number of days between the first and last earthquake
total_days = (df['time'].max() - df['time'].min()).days

df_vanuatu.head(), df_japan.head(), total_days

頻度計算

import numpy as np
from datetime import timedelta

# Define a function to count the number of earthquakes in Japan within 2 weeks after each earthquake in Vanuatu
def count_earthquakes_in_japan_within_2_weeks(row):
    start_time = row['time']
    end_time = start_time + timedelta(weeks=2)
    return ((df_japan['time'] >= start_time) & (df_japan['time'] <= end_time)).sum()

# Apply the function to each row in the Vanuatu earthquake data
df_vanuatu['num_japan_earthquakes_within_2_weeks'] = df_vanuatu.apply(count_earthquakes_in_japan_within_2_weeks, axis=1)

# Calculate the average number of earthquakes per day in Japan within 2 weeks after each earthquake in Vanuatu
avg_num_japan_earthquakes_per_day_after_vanuatu = df_vanuatu['num_japan_earthquakes_within_2_weeks'].mean() / 14

# Calculate the average number of earthquakes per day in Japan over the total period
avg_num_japan_earthquakes_per_day_total = df_japan.shape[0] / total_days

avg_num_japan_earthquakes_per_day_after_vanuatu, avg_num_japan_earthquakes_per_day_total

グラフ表示

import matplotlib.pyplot as plt

# Create the figure and the axes
fig, ax = plt.subplots()

# The labels for the bars
labels = ['After Vanuatu Earthquake', 'Total Period']

# The values for the bars
values = [avg_num_japan_earthquakes_per_day_after_vanuatu, avg_num_japan_earthquakes_per_day_total]

# Create the bar chart
bars = ax.bar(labels, values, color=['blue', 'orange'])

# Add the value of each bar on top
for bar in bars:
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width() / 2, height,
            round(height, 4), ha='center', va='bottom')

# Set the title and the labels
ax.set_title('Average Number of Earthquakes per Day in Japan')
ax.set_ylabel('Average Number of Earthquakes per Day')

# Show the plot
plt.show()

2011年のデータの削除

# Remove rows with earthquakes that happened in 2011
df_no_2011 = df[df['time'].dt.year != 2011]

df_no_2011.head(), df_no_2011.shape

頻度の比較(2011年を除く)

# Extract only earthquakes in Vanuatu and Japan from the data without 2011
df_no_2011_vanuatu = df_no_2011[df_no_2011['countries'] == 'Vanuatu']
df_no_2011_japan = df_no_2011[df_no_2011['countries'] == 'Japan']

# Compute the number of days between the first and last earthquake excluding 2011
total_days_no_2011 = total_days - 365  # subtract the number of days in 2011

# Apply the function to each row in the Vanuatu earthquake data excluding 2011
df_no_2011_vanuatu['num_japan_earthquakes_within_2_weeks'] = df_no_2011_vanuatu.apply(count_earthquakes_in_japan_within_2_weeks, axis=1)

# Calculate the average number of earthquakes per day in Japan within 2 weeks after each earthquake in Vanuatu excluding 2011
avg_num_japan_earthquakes_per_day_after_vanuatu_no_2011 = df_no_2011_vanuatu['num_japan_earthquakes_within_2_weeks'].mean() / 14

# Calculate the average number of earthquakes per day in Japan over the total period excluding 2011
avg_num_japan_earthquakes_per_day_total_no_2011 = df_no_2011_japan.shape[0] / total_days_no_2011

avg_num_japan_earthquakes_per_day_after_vanuatu_no_2011, avg_num_japan_earthquakes_per_day_total_no_2011

逆計算(日本→バヌアツ)

# Define a function to count the number of earthquakes in Vanuatu within 2 weeks after each earthquake in Japan
def count_earthquakes_in_vanuatu_within_2_weeks(row):
    start_time = row['time']
    end_time = start_time + timedelta(weeks=2)
    return ((df_no_2011_vanuatu['time'] >= start_time) & (df_no_2011_vanuatu['time'] <= end_time)).sum()

# Apply the function to each row in the Japan earthquake data excluding 2011
df_no_2011_japan['num_vanuatu_earthquakes_within_2_weeks'] = df_no_2011_japan.apply(count_earthquakes_in_vanuatu_within_2_weeks, axis=1)

# Calculate the average number of earthquakes per day in Vanuatu within 2 weeks after each earthquake in Japan excluding 2011
avg_num_vanuatu_earthquakes_per_day_after_japan_no_2011 = df_no_2011_japan['num_vanuatu_earthquakes_within_2_weeks'].mean() / 14

# Calculate the average number of earthquakes per day in Vanuatu over the total period excluding 2011
avg_num_vanuatu_earthquakes_per_day_total_no_2011 = df_no_2011_vanuatu.shape[0] / total_days_no_2011

avg_num_vanuatu_earthquakes_per_day_after_japan_no_2011, avg_num_vanuatu_earthquakes_per_day_total_no_2011

国別の頻度計算

# Define a function to count the number of earthquakes in Japan within 2 weeks after each earthquake in a specific country
def count_earthquakes_in_japan_within_2_weeks_for_country(df_country):
    df_country['num_japan_earthquakes_within_2_weeks'] = df_country.apply(count_earthquakes_in_japan_within_2_weeks, axis=1)
    avg_num_japan_earthquakes_per_day_after_country = df_country['num_japan_earthquakes_within_2_weeks'].mean() / 14
    return avg_num_japan_earthquakes_per_day_after_country

# List of countries
countries = df_no_2011['countries'].unique()

# Dictionary to store the average number of earthquakes per day in Japan within 2 weeks after each earthquake in each country
avg_num_japan_earthquakes_per_day_after_each_country = {}

# For each country, calculate the average number of earthquakes per day in Japan within 2 weeks after each earthquake in that country
for country in countries:
    df_country = df_no_2011[df_no_2011['countries'] == country]
    if df_country.shape[0] >= 50:  # only consider countries with at least 50 earthquakes
        avg_num_japan_earthquakes_per_day_after_each_country[country] = count_earthquakes_in_japan_within_2_weeks_for_country(df_country)

# Sort the dictionary by the average number of earthquakes
sorted_avg_num_japan_earthquakes_per_day_after_each_country = dict(sorted(avg_num_japan_earthquakes_per_day_after_each_country.items(), key=lambda item: item[1], reverse=True))

sorted_avg_num_japan_earthquakes_per_day_after_each_country

国別グラフ表示

# Create the figure and the axes
fig, ax = plt.subplots(figsize=(8, 6))

# Reverse the labels and the values for the bars to have the highest values on top
labels_no_2011_countries.reverse()
values_no_2011_countries.reverse()

# Create the horizontal bar chart
bars_no_2011_countries = ax.barh(labels_no_2011_countries, values_no_2011_countries, color='blue')

# Add the value of each bar next to it
for bar in bars_no_2011_countries:
    width = bar.get_width()
    ax.text(width, bar.get_y() + bar.get_height() / 2,
            round(width, 4), ha='left', va='center')

# Set the title and the labels
ax.set_title('Average Number of Earthquakes per Day in Japan After Each Country\'s Earthquake (Excluding 2011)')
ax.set_xlabel('Average Number of Earthquakes per Day')

# Show the plot
plt.show()

ランダムな2週間をとった場合のシミュレーション

# Define a function to generate random 2-week intervals excluding 2011 and calculate the frequency of earthquakes in Japan during these intervals
def simulate_random_intervals_and_calculate_frequency(df_japan, num_intervals, num_simulations):
    # Define the start and end dates of the period excluding 2011
    start_date = pd.to_datetime('2003-07-29')
    end_date_2010 = pd.to_datetime('2010-12-31')
    start_date_2012 = pd.to_datetime('2012-01-01')
    end_date = pd.to_datetime('2023-07-30')

    # Calculate the number of days in these periods
    num_days_2003_2010 = (end_date_2010 - start_date).days
    num_days_2012_2023 = (end_date - start_date_2012).days

    # Initialize a list to store the average frequency of earthquakes in Japan during the random intervals in each simulation
    avg_frequencies = []

    # Perform the simulations
    for _ in range(num_simulations):
        # Initialize a list to store the number of earthquakes in Japan during each random interval
        num_earthquakes = []

        # Generate the random intervals and count the number of earthquakes in Japan during these intervals
        for _ in range(num_intervals):
            # Randomly choose a start day from 2003 to 2010 or from 2012 to 2023
            if np.random.rand() < num_days_2003_2010 / (num_days_2003_2010 + num_days_2012_2023):
                start_day = np.random.randint(num_days_2003_2010)
                start_time = start_date + timedelta(days=start_day)
            else:
                start_day = np.random.randint(num_days_2012_2023)
                start_time = start_date_2012 + timedelta(days=start_day)

            # The end time is 2 weeks after the start time
            end_time = start_time + timedelta(weeks=2)

            # Count the number of earthquakes in Japan during this interval and add it to the list
            num_earthquakes.append(((df_japan['time'] >= start_time) & (df_japan['time'] <= end_time)).sum())

        # Calculate the average frequency of earthquakes in Japan during the random intervals and add it to the list
        avg_frequencies.append(np.mean(num_earthquakes) / 14)

    return avg_frequencies

# Perform the simulation and calculate the average frequency of earthquakes in Japan during random 2-week intervals excluding 2011
avg_frequencies = simulate_random_intervals_and_calculate_frequency(df_no_2011_japan, num_earthquakes_vanuatu_no_2011, 1000)

# Calculate the percentiles
percentiles = np.percentile(avg_frequencies, [5, 25, 75, 95])

avg_frequencies, percentiles

修正パート

# Remove timezone information from the 'time' column in df_no_2011_japan
df_no_2011_japan['time'] = df_no_2011_japan['time'].dt.tz_convert(None)

# Perform the simulation again and calculate the average frequency of earthquakes in Japan during random 2-week intervals excluding 2011
avg_frequencies = simulate_random_intervals_and_calculate_frequency(df_no_2011_japan, num_earthquakes_vanuatu_no_2011, 1000)

# Calculate the percentiles
percentiles = np.percentile(avg_frequencies, [5, 25, 75, 95])

avg_frequencies, percentiles

グラフ表示

# Plot histogram of average frequencies
plt.figure(figsize=(10, 6))
plt.hist(avg_frequencies, bins=20, edgecolor='black')
plt.axvline(percentiles[0], color='r', linestyle='dashed', linewidth=1, label='5% percentile')
plt.axvline(percentiles[1], color='orange', linestyle='dashed', linewidth=1, label='25% percentile')
plt.axvline(percentiles[2], color='blue', linestyle='dashed', linewidth=1, label='75% percentile')
plt.axvline(percentiles[3], color='purple', linestyle='dashed', linewidth=1, label='95% percentile')
plt.title('Distribution of Average Earthquake Frequencies in Japan (Excluding 2011)')
plt.xlabel('Average Frequency')
plt.ylabel('Count')
plt.legend()
plt.show()

この記事が気に入ったらサポートをしてみませんか?