Skip to content

Instantly share code, notes, and snippets.

@schorba
Last active September 14, 2020 19:40
Show Gist options
  • Save schorba/e49fa52f8143fa794fa503dc885baa84 to your computer and use it in GitHub Desktop.
Save schorba/e49fa52f8143fa794fa503dc885baa84 to your computer and use it in GitHub Desktop.
def expand_osm_addresses(df):
    frames = [df]

    ids = []
    array_n = []
    for i in df.index.values:
        if '-' in df.loc[i, 'number']: 
            ids.append(df.loc[i, 'id'])
            n = df.loc[i, 'number']
            array_n = n.split('-')
            for s in ['a', 'b', 'c', 'd', 'f']:
                if array_n[0].endswith(s):
                    array_n[0] = array_n[0].replace(s, '') 
                if array_n[1].endswith(s):
                    array_n[1] = array_n[1].replace(s, '')  

            numbers = [i for i in range(int(array_n[0]), int(array_n[1])+1)]
                
            new_df = pd.DataFrame(np.repeat(df[df.index == i].values,len(numbers),axis=0))
            new_df.columns = df.columns
            new_df['id'] = 'expand'
            
            for x in numbers:
                new_df.loc[numbers.index(x), 'number'] = str(x)
            frames.append(new_df) 
        
        if ',' in df.loc[i, 'number']: 
            ids.append(df.loc[i, 'id'])
            n = df.loc[i, 'number']
            array_n = n.split(',') 

            numbers = [i.lstrip().rstrip() for i in array_n]

            new_df = pd.DataFrame(np.repeat(df[df.index == i].values,len(numbers),axis=0))
            new_df.columns = df.columns
            new_df['id'] = 'expand'

            for x in numbers:
                new_df.loc[numbers.index(x), 'number'] = str(x)
            frames.append(new_df) 
            
        if ';' in df.loc[i, 'number']: 
            ids.append(df.loc[i, 'id'])
            n = df.loc[i, 'number']
            array_n = n.split(';') 

            numbers = [i.lstrip().rstrip() for i in array_n]

            new_df = pd.DataFrame(np.repeat(df[df.index == i].values,len(numbers),axis=0))
            new_df.columns = df.columns
            new_df['id'] = 'expand'

            for x in numbers:
                new_df.loc[numbers.index(x), 'number'] = str(x)
            frames.append(new_df)   
            
        if '/' in df.loc[i, 'number']:  
            ids.append(df.loc[i, 'id'])
            n = df.loc[i, 'number']
            array_n = n.split('/') 

            numbers = [i.lstrip().rstrip() for i in array_n]

            new_df = pd.DataFrame(np.repeat(df[df.index == i].values,len(numbers),axis=0))
            new_df.columns = df.columns
            new_df['id'] = 'expand'

            for x in numbers:
                new_df.loc[numbers.index(x), 'number'] = str(x)
            frames.append(new_df) 
            
    result = pd.concat(frames)         
    result['zipcode'] = result['zipcode'].astype('int')

    for j in ids:
        result = result[result.id != j]  
    return result.reset_index()    
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment