Skip to content

Instantly share code, notes, and snippets.

View saimadhu-polamuri's full-sized avatar
💭
For the love of data.

saimadhu saimadhu-polamuri

💭
For the love of data.
View GitHub Profile
def generate_text(seed_text, num_words):
for _ in range(num_words):
token_list = tokenizer.texts_to_sequences([seed_text])[0]
token_list = tf.keras.preprocessing.sequence.pad_sequences([token_list], maxlen=max_length)
predicted_index = model.predict(token_list)[0].argmax()
predicted_word = tokenizer.index_word[predicted_index]
seed_text += " " + predicted_word
return seed_text
# Generate a story
# Tokenize text and create word-to-index mapping
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(data["preprocessed_text"])
sequences = tokenizer.texts_to_sequences(data["preprocessed_text"])
padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen=max_length)
# Create training data
X_train, X_val, y_train, y_val = train_test_split(padded_sequences, padded_sequences, test_size=0.2, random_state=42)
# Train the model
import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.models import Sequential
# Define model architecture
model = Sequential([
Embedding(input_dim=vocab_size, output_dim=embedding_dim),
LSTM(units=256, return_sequences=True),
Dropout(0.2),
# Preprocess text
def preprocess_text(text):
# Remove stop words stop_words = set(stopwords.words("english"))
words = word_tokenize(text.lower())
filtered_words = [word for word in words if word not in stop_words]
return " ".join(filtered_words)
data["preprocessed_text"] = data["text"].apply(preprocess_text)
# Load data (assuming a CSV file)
data = pd.read_csv("stories.csv")
import pandas as pd
import nltk
import tensorflow as tf
from scipy.stats import jarque_bera
import numpy as np
# generate two sample datasets
data1 = np.random.normal(0, 1, size=100)
data2 = np.random.uniform(size=100)
# perform Jarque-Bera test on each dataset
jb_stat1, jb_p1 = jarque_bera(data1)
jb_stat2, jb_p2 = jarque_bera(data2)
from scipy.stats import jarque_bera
import numpy as np
# generate a sample dataset
data = np.random.normal(0, 1, 1000)
# calculate the test statistic and p-value
jb_stat, jb_p = jarque_bera(data)
# print the results
from sklearn.datasets import load_diabetes
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import numpy as np
# Load the diabetes dataset
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.datasets import load_diabetes
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target
# Introduce some missing values in X
missing_mask = np.random.rand(*X.shape) < 0.1
X_missing = X.copy()