This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def generate_text(seed_text, num_words): | |
for _ in range(num_words): | |
token_list = tokenizer.texts_to_sequences([seed_text])[0] | |
token_list = tf.keras.preprocessing.sequence.pad_sequences([token_list], maxlen=max_length) | |
predicted_index = model.predict(token_list)[0].argmax() | |
predicted_word = tokenizer.index_word[predicted_index] | |
seed_text += " " + predicted_word | |
return seed_text | |
# Generate a story |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Tokenize text and create word-to-index mapping | |
tokenizer = tf.keras.preprocessing.text.Tokenizer() | |
tokenizer.fit_on_texts(data["preprocessed_text"]) | |
sequences = tokenizer.texts_to_sequences(data["preprocessed_text"]) | |
padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen=max_length) | |
# Create training data | |
X_train, X_val, y_train, y_val = train_test_split(padded_sequences, padded_sequences, test_size=0.2, random_state=42) | |
# Train the model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout | |
from tensorflow.keras.models import Sequential | |
# Define model architecture | |
model = Sequential([ | |
Embedding(input_dim=vocab_size, output_dim=embedding_dim), | |
LSTM(units=256, return_sequences=True), | |
Dropout(0.2), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Preprocess text | |
def preprocess_text(text): | |
# Remove stop words stop_words = set(stopwords.words("english")) | |
words = word_tokenize(text.lower()) | |
filtered_words = [word for word in words if word not in stop_words] | |
return " ".join(filtered_words) | |
data["preprocessed_text"] = data["text"].apply(preprocess_text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load data (assuming a CSV file) | |
data = pd.read_csv("stories.csv") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import nltk | |
import tensorflow as tf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.stats import jarque_bera | |
import numpy as np | |
# generate two sample datasets | |
data1 = np.random.normal(0, 1, size=100) | |
data2 = np.random.uniform(size=100) | |
# perform Jarque-Bera test on each dataset | |
jb_stat1, jb_p1 = jarque_bera(data1) | |
jb_stat2, jb_p2 = jarque_bera(data2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.stats import jarque_bera | |
import numpy as np | |
# generate a sample dataset | |
data = np.random.normal(0, 1, 1000) | |
# calculate the test statistic and p-value | |
jb_stat, jb_p = jarque_bera(data) | |
# print the results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.datasets import load_diabetes | |
from sklearn.impute import SimpleImputer | |
from sklearn.model_selection import train_test_split | |
from sklearn.linear_model import LinearRegression | |
from sklearn.metrics import mean_squared_error | |
import numpy as np | |
# Load the diabetes dataset | |
diabetes = load_diabetes() | |
X, y = diabetes.data, diabetes.target |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.experimental import enable_iterative_imputer | |
from sklearn.impute import IterativeImputer | |
from sklearn.datasets import load_diabetes | |
diabetes = load_diabetes() | |
X, y = diabetes.data, diabetes.target | |
# Introduce some missing values in X | |
missing_mask = np.random.rand(*X.shape) < 0.1 | |
X_missing = X.copy() |
NewerOlder