Skip to content

Instantly share code, notes, and snippets.

@felixmon
Last active May 26, 2023 16:02
Show Gist options
  • Save felixmon/db34e0d0ed71a1ca9a2adef63c3fffcd to your computer and use it in GitHub Desktop.
Save felixmon/db34e0d0ed71a1ca9a2adef63c3fffcd to your computer and use it in GitHub Desktop.
找出证券业协会证券公司排名中特定证券公司的所有排名
import pandas as pd
import numpy as np
# Specify the path to your excel file
file_path = 'list.xlsx'
# Read the Excel file
df = pd.read_excel(file_path)
# Drop rows with missing values
# df = df.dropna()
# Define the word you're searching for
search_word = "金元证券"
# Find the column and row where the word appears
location = np.where(df.applymap(lambda x: search_word in str(x)))
# Create a list of (row, column) tuples
locations = list(zip(*location))
# For each location where the word was found...
for location in locations:
row, col = location
# Iterate over the columns to the right until you find a non-NaN value
for next_col in range(col + 1, df.shape[1]):
value = df.iloc[row, next_col]
if pd.notna(value):
print(f"First non-NaN value to the right of '{search_word}' at row {row + 1}, column {col + 1} is: {value}")
# Start from the row of the non-NaN value, and search upwards in the same column to find the first non-numeric value
for prev_row in range(row - 1, -1, -1):
value_up = df.iloc[prev_row, next_col]
if pd.notna(value_up) and not isinstance(value_up, (int, float)):
print(f"First non-numeric value above '{value}' at row {prev_row + 1}, column {next_col + 1} is: {value_up}")
break
break
import pandas as pd
import numpy as np
# Specify the path to your excel file
file_path = 'list.xlsx'
# Read the Excel file
df = pd.read_excel(file_path)
# Drop rows with missing values
# df = df.dropna()
# Define the word you're searching for
search_word = "金元证券"
# Find the column and row where the word appears
location = np.where(df.applymap(lambda x: search_word in str(x)))
# Create a list of (row, column) tuples
locations = list(zip(*location))
# For each location where the word was found...
for location in locations:
row, col = location
# Iterate over the columns to the left until you find a non-NaN value
for prev_col in range(col - 1, -1, -1):
value = df.iloc[row, prev_col]
if pd.notna(value):
print(f"First non-NaN value to the left of '{search_word}' at row {row + 1}, column {col + 1} is: {value}")
# Start from the row of the non-NaN value, and search upwards in the same column to find the second non-numeric value
non_numeric_count = 0
for prev_row in range(row - 1, -1, -1):
value_up = df.iloc[prev_row, prev_col]
if pd.notna(value_up) and not isinstance(value_up, (int, float)):
non_numeric_count += 1
if non_numeric_count == 2:
print(f"Second non-numeric value above '{value}' at row {prev_row + 1}, column {prev_col + 1} is: {value_up}")
break
break
import pandas as pd
import numpy as np
import re
# Specify the path to your excel file
file_path = 'list.xlsx'
# Read the Excel file
df = pd.read_excel(file_path)
# Drop rows with missing values
# df = df.dropna()
# Define the word you're searching for
search_word = "金元证券"
# Find the column and row where the word appears
location = np.where(df.applymap(lambda x: search_word in str(x)))
# Create a list of (row, column) tuples
locations = list(zip(*location))
results = []
# For each location where the word was found...
for location in locations:
row, col = location
# Iterate over the columns to the right until you find a non-NaN value
for next_col in range(col + 1, df.shape[1]):
value = df.iloc[row, next_col]
if pd.notna(value):
# Remove commas in the string representation of the value
results.append(str(value).replace(',', ''))
break
# Iterate over the columns to the left until you find a non-NaN value
for prev_col in range(col - 1, -1, -1):
value = df.iloc[row, prev_col]
if pd.notna(value):
results.append(value)
# Start from the row of the non-NaN value, and search upwards in the same column to find the second cell that contains a Chinese character
chinese_char_count = 0
for prev_row in range(row - 1, -1, -1):
value_up = df.iloc[prev_row, prev_col]
if pd.notna(value_up) and re.search('[\u4e00-\u9fff]', str(value_up)):
chinese_char_count += 1
if chinese_char_count == 2:
value_up = value_up.replace("\n", "").replace(" ", "")
part1 = re.search('度(.*)名', value_up)
part2 = re.search(':(.*)元', value_up)
part1 = '无' if not part1 else part1.group(1) + '名'
part2 = '无' if not part2 else part2.group(1) + '元'
results.append(part1)
results.append(part2)
break
break
# Convert the results list into a dataframe
# four lines as a group
try:
result_df = pd.DataFrame(np.array(results).reshape(-1, 4), columns = ["金额", "排名","类别","单位"])
except ValueError as e:
print(f"Error: {e}")
print(f"Length of results: {len(results)}")
print("Results:")
for i, result in enumerate(results):
print(f"{i}: {result}")
print(result_df)
# Write the DataFrame back to the Excel spreadsheet
result_df["金额"] = pd.to_numeric(result_df["金额"], errors='coerce')
result_df.to_excel("results.xlsx", index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment