felixmon/list.py

## list.py
import pandas as pd
import numpy as np

# Specify the path to your excel file
file_path = 'list.xlsx'

# Read the Excel file
df = pd.read_excel(file_path)

# Drop rows with missing values
# df = df.dropna()

# Define the word you're searching for
search_word = "金元证券"

# Find the column and row where the word appears
location = np.where(df.applymap(lambda x: search_word in str(x)))

# Create a list of (row, column) tuples
locations = list(zip(*location))

# For each location where the word was found...
for location in locations:
    row, col = location

    # Iterate over the columns to the right until you find a non-NaN value
    for next_col in range(col + 1, df.shape[1]):
        value = df.iloc[row, next_col]
        if pd.notna(value):
            print(f"First non-NaN value to the right of '{search_word}' at row {row + 1}, column {col + 1} is: {value}")
            # Start from the row of the non-NaN value, and search upwards in the same column to find the first non-numeric value
            for prev_row in range(row - 1, -1, -1):
                value_up = df.iloc[prev_row, next_col]
                if pd.notna(value_up) and not isinstance(value_up, (int, float)):
                    print(f"First non-numeric value above '{value}' at row {prev_row + 1}, column {next_col + 1} is: {value_up}")
                    break
            break

## list_left.py
import pandas as pd
import numpy as np

# Specify the path to your excel file
file_path = 'list.xlsx'

# Read the Excel file
df = pd.read_excel(file_path)

# Drop rows with missing values
# df = df.dropna()

# Define the word you're searching for
search_word = "金元证券"

# Find the column and row where the word appears
location = np.where(df.applymap(lambda x: search_word in str(x)))

# Create a list of (row, column) tuples
locations = list(zip(*location))

# For each location where the word was found...
for location in locations:
    row, col = location

    # Iterate over the columns to the left until you find a non-NaN value
    for prev_col in range(col - 1, -1, -1):
        value = df.iloc[row, prev_col]
        if pd.notna(value):
            print(f"First non-NaN value to the left of '{search_word}' at row {row + 1}, column {col + 1} is: {value}")
            # Start from the row of the non-NaN value, and search upwards in the same column to find the second non-numeric value
            non_numeric_count = 0
            for prev_row in range(row - 1, -1, -1):
                value_up = df.iloc[prev_row, prev_col]
                if pd.notna(value_up) and not isinstance(value_up, (int, float)):
                    non_numeric_count += 1
                    if non_numeric_count == 2:
                        print(f"Second non-numeric value above '{value}' at row {prev_row + 1}, column {prev_col + 1} is: {value_up}")
                        break
            break

## list_ranking_of_company.py
import pandas as pd
import numpy as np
import re

# Specify the path to your excel file
file_path = 'list.xlsx'

# Read the Excel file
df = pd.read_excel(file_path)

# Drop rows with missing values
# df = df.dropna()

# Define the word you're searching for
search_word = "金元证券"

# Find the column and row where the word appears
location = np.where(df.applymap(lambda x: search_word in str(x)))

# Create a list of (row, column) tuples
locations = list(zip(*location))

results = []

# For each location where the word was found...
for location in locations:
    row, col = location

    # Iterate over the columns to the right until you find a non-NaN value
    for next_col in range(col + 1, df.shape[1]):
        value = df.iloc[row, next_col]
        if pd.notna(value):
            # Remove commas in the string representation of the value
            results.append(str(value).replace(',', ''))
            break

    # Iterate over the columns to the left until you find a non-NaN value
    for prev_col in range(col - 1, -1, -1):
        value = df.iloc[row, prev_col]
        if pd.notna(value):
            results.append(value)
            # Start from the row of the non-NaN value, and search upwards in the same column to find the second cell that contains a Chinese character
            chinese_char_count = 0
            for prev_row in range(row - 1, -1, -1):
                value_up = df.iloc[prev_row, prev_col]
                if pd.notna(value_up) and re.search('[\u4e00-\u9fff]', str(value_up)):
                    chinese_char_count += 1
                    if chinese_char_count == 2:
                        value_up = value_up.replace("\n", "").replace(" ", "")
                        part1 = re.search('度(.*)名', value_up)
                        part2 = re.search('：(.*)元', value_up)
                        part1 = '无' if not part1 else part1.group(1) + '名'
                        part2 = '无' if not part2 else part2.group(1) + '元'
                        results.append(part1)
                        results.append(part2)
                        break
            break

# Convert the results list into a dataframe
# four lines as a group
try:
    result_df = pd.DataFrame(np.array(results).reshape(-1, 4), columns = ["金额", "排名","类别","单位"])
except ValueError as e:
    print(f"Error: {e}")
    print(f"Length of results: {len(results)}")
    print("Results:")
    for i, result in enumerate(results):
        print(f"{i}: {result}")

print(result_df)

# Write the DataFrame back to the Excel spreadsheet
result_df["金额"] = pd.to_numeric(result_df["金额"], errors='coerce')
result_df.to_excel("results.xlsx", index=False)
	import pandas as pd
	import numpy as np

	# Specify the path to your excel file
	file_path = 'list.xlsx'

	# Read the Excel file
	df = pd.read_excel(file_path)

	# Drop rows with missing values
	# df = df.dropna()

	# Define the word you're searching for
	search_word = "金元证券"

	# Find the column and row where the word appears
	location = np.where(df.applymap(lambda x: search_word in str(x)))

	# Create a list of (row, column) tuples
	locations = list(zip(*location))

	# For each location where the word was found...
	for location in locations:
	row, col = location

	# Iterate over the columns to the right until you find a non-NaN value
	for next_col in range(col + 1, df.shape[1]):
	value = df.iloc[row, next_col]
	if pd.notna(value):
	print(f"First non-NaN value to the right of '{search_word}' at row {row + 1}, column {col + 1} is: {value}")
	# Start from the row of the non-NaN value, and search upwards in the same column to find the first non-numeric value
	for prev_row in range(row - 1, -1, -1):
	value_up = df.iloc[prev_row, next_col]
	if pd.notna(value_up) and not isinstance(value_up, (int, float)):
	print(f"First non-numeric value above '{value}' at row {prev_row + 1}, column {next_col + 1} is: {value_up}")
	break
	break
	import pandas as pd
	import numpy as np
	import re

	# Specify the path to your excel file
	file_path = 'list.xlsx'

	# Read the Excel file
	df = pd.read_excel(file_path)

	# Drop rows with missing values
	# df = df.dropna()

	# Define the word you're searching for
	search_word = "金元证券"

	# Find the column and row where the word appears
	location = np.where(df.applymap(lambda x: search_word in str(x)))

	# Create a list of (row, column) tuples
	locations = list(zip(*location))

	results = []

	# For each location where the word was found...
	for location in locations:
	row, col = location

	# Iterate over the columns to the right until you find a non-NaN value
	for next_col in range(col + 1, df.shape[1]):
	value = df.iloc[row, next_col]
	if pd.notna(value):
	# Remove commas in the string representation of the value
	results.append(str(value).replace(',', ''))
	break

	# Iterate over the columns to the left until you find a non-NaN value
	for prev_col in range(col - 1, -1, -1):
	value = df.iloc[row, prev_col]
	if pd.notna(value):
	results.append(value)
	# Start from the row of the non-NaN value, and search upwards in the same column to find the second cell that contains a Chinese character
	chinese_char_count = 0
	for prev_row in range(row - 1, -1, -1):
	value_up = df.iloc[prev_row, prev_col]
	if pd.notna(value_up) and re.search('[\u4e00-\u9fff]', str(value_up)):
	chinese_char_count += 1
	if chinese_char_count == 2:
	value_up = value_up.replace("\n", "").replace(" ", "")
	part1 = re.search('度(.*)名', value_up)
	part2 = re.search('：(.*)元', value_up)
	part1 = '无' if not part1 else part1.group(1) + '名'
	part2 = '无' if not part2 else part2.group(1) + '元'
	results.append(part1)
	results.append(part2)
	break
	break

	# Convert the results list into a dataframe
	# four lines as a group
	try:
	result_df = pd.DataFrame(np.array(results).reshape(-1, 4), columns = ["金额", "排名","类别","单位"])
	except ValueError as e:
	print(f"Error: {e}")
	print(f"Length of results: {len(results)}")
	print("Results:")
	for i, result in enumerate(results):
	print(f"{i}: {result}")

	print(result_df)

	# Write the DataFrame back to the Excel spreadsheet
	result_df["金额"] = pd.to_numeric(result_df["金额"], errors='coerce')
	result_df.to_excel("results.xlsx", index=False)