Last active
May 26, 2023 16:02
-
-
Save felixmon/db34e0d0ed71a1ca9a2adef63c3fffcd to your computer and use it in GitHub Desktop.
找出证券业协会证券公司排名中特定证券公司的所有排名
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
# Specify the path to your excel file | |
file_path = 'list.xlsx' | |
# Read the Excel file | |
df = pd.read_excel(file_path) | |
# Drop rows with missing values | |
# df = df.dropna() | |
# Define the word you're searching for | |
search_word = "金元证券" | |
# Find the column and row where the word appears | |
location = np.where(df.applymap(lambda x: search_word in str(x))) | |
# Create a list of (row, column) tuples | |
locations = list(zip(*location)) | |
# For each location where the word was found... | |
for location in locations: | |
row, col = location | |
# Iterate over the columns to the right until you find a non-NaN value | |
for next_col in range(col + 1, df.shape[1]): | |
value = df.iloc[row, next_col] | |
if pd.notna(value): | |
print(f"First non-NaN value to the right of '{search_word}' at row {row + 1}, column {col + 1} is: {value}") | |
# Start from the row of the non-NaN value, and search upwards in the same column to find the first non-numeric value | |
for prev_row in range(row - 1, -1, -1): | |
value_up = df.iloc[prev_row, next_col] | |
if pd.notna(value_up) and not isinstance(value_up, (int, float)): | |
print(f"First non-numeric value above '{value}' at row {prev_row + 1}, column {next_col + 1} is: {value_up}") | |
break | |
break |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
# Specify the path to your excel file | |
file_path = 'list.xlsx' | |
# Read the Excel file | |
df = pd.read_excel(file_path) | |
# Drop rows with missing values | |
# df = df.dropna() | |
# Define the word you're searching for | |
search_word = "金元证券" | |
# Find the column and row where the word appears | |
location = np.where(df.applymap(lambda x: search_word in str(x))) | |
# Create a list of (row, column) tuples | |
locations = list(zip(*location)) | |
# For each location where the word was found... | |
for location in locations: | |
row, col = location | |
# Iterate over the columns to the left until you find a non-NaN value | |
for prev_col in range(col - 1, -1, -1): | |
value = df.iloc[row, prev_col] | |
if pd.notna(value): | |
print(f"First non-NaN value to the left of '{search_word}' at row {row + 1}, column {col + 1} is: {value}") | |
# Start from the row of the non-NaN value, and search upwards in the same column to find the second non-numeric value | |
non_numeric_count = 0 | |
for prev_row in range(row - 1, -1, -1): | |
value_up = df.iloc[prev_row, prev_col] | |
if pd.notna(value_up) and not isinstance(value_up, (int, float)): | |
non_numeric_count += 1 | |
if non_numeric_count == 2: | |
print(f"Second non-numeric value above '{value}' at row {prev_row + 1}, column {prev_col + 1} is: {value_up}") | |
break | |
break |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import re | |
# Specify the path to your excel file | |
file_path = 'list.xlsx' | |
# Read the Excel file | |
df = pd.read_excel(file_path) | |
# Drop rows with missing values | |
# df = df.dropna() | |
# Define the word you're searching for | |
search_word = "金元证券" | |
# Find the column and row where the word appears | |
location = np.where(df.applymap(lambda x: search_word in str(x))) | |
# Create a list of (row, column) tuples | |
locations = list(zip(*location)) | |
results = [] | |
# For each location where the word was found... | |
for location in locations: | |
row, col = location | |
# Iterate over the columns to the right until you find a non-NaN value | |
for next_col in range(col + 1, df.shape[1]): | |
value = df.iloc[row, next_col] | |
if pd.notna(value): | |
# Remove commas in the string representation of the value | |
results.append(str(value).replace(',', '')) | |
break | |
# Iterate over the columns to the left until you find a non-NaN value | |
for prev_col in range(col - 1, -1, -1): | |
value = df.iloc[row, prev_col] | |
if pd.notna(value): | |
results.append(value) | |
# Start from the row of the non-NaN value, and search upwards in the same column to find the second cell that contains a Chinese character | |
chinese_char_count = 0 | |
for prev_row in range(row - 1, -1, -1): | |
value_up = df.iloc[prev_row, prev_col] | |
if pd.notna(value_up) and re.search('[\u4e00-\u9fff]', str(value_up)): | |
chinese_char_count += 1 | |
if chinese_char_count == 2: | |
value_up = value_up.replace("\n", "").replace(" ", "") | |
part1 = re.search('度(.*)名', value_up) | |
part2 = re.search(':(.*)元', value_up) | |
part1 = '无' if not part1 else part1.group(1) + '名' | |
part2 = '无' if not part2 else part2.group(1) + '元' | |
results.append(part1) | |
results.append(part2) | |
break | |
break | |
# Convert the results list into a dataframe | |
# four lines as a group | |
try: | |
result_df = pd.DataFrame(np.array(results).reshape(-1, 4), columns = ["金额", "排名","类别","单位"]) | |
except ValueError as e: | |
print(f"Error: {e}") | |
print(f"Length of results: {len(results)}") | |
print("Results:") | |
for i, result in enumerate(results): | |
print(f"{i}: {result}") | |
print(result_df) | |
# Write the DataFrame back to the Excel spreadsheet | |
result_df["金额"] = pd.to_numeric(result_df["金额"], errors='coerce') | |
result_df.to_excel("results.xlsx", index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment