아미(아름다운미소)

암호화엑셀을 CSV로 변경

유키공 — Wed, 29 Apr 2026 11:57:15 +0900

import win32com.client as win32
import os

excel = win32.Dispatch("Excel.Application")
excel.Visible = False
excel.DisplayAlerts = False

folder = r"C:\excel_folder"

try:
    for file in os.listdir(folder):
        if file.endswith(".xlsx"):
            xlsx = os.path.join(folder, file)
            wb = excel.Workbooks.Open(xlsx)

            for sheet in wb.Worksheets:
                csv = os.path.join(
                    folder,
                    f"{file.replace('.xlsx','')}_{sheet.Name}.csv"
                )
                sheet.SaveAs(csv, FileFormat=62)

            wb.Close(False)

finally:
    excel.Quit()   # ⭐ 에러 나도 엑셀 강제 종료

한국 & 영국 상담 시간 판별 로직

유키공 — Sat, 11 Apr 2026 21:58:36 +0900

한국 & 영국 상담 시간 판별 로직
보통 상담 시간을 09:00 ~ 18:00라고 가정했을 때의 코드입니다.

import pandas as pd

# 1. UTC 기반의 원본 데이터를 읽고 변환 (한 방 코드)
df['time_utc'] = pd.to_datetime(df['time'], utc=True, errors='coerce')

# 2. 한국 로컬 시간 생성 및 상담 여부 판별
df['time_kr'] = df['time_utc'].dt.tz_convert('Asia/Seoul')
df['is_kr_biz_hours'] = df['time_kr'].dt.hour.between(9, 17) # 09:00 ~ 17:59

# 3. 영국 로컬 시간 생성 및 상담 여부 판별 (서머타임 자동 계산)
df['time_uk'] = df['time_utc'].dt.tz_convert('Europe/London')
df['is_uk_biz_hours'] = df['time_uk'].dt.hour.between(9, 17)

Json

유키공 — Mon, 29 Sep 2025 10:10:28 +0900

import requests
import pandas as pd

urls = [
    "https://jsonplaceholder.typicode.com/todos/1",
    "https://jsonplaceholder.typicode.com/todos/2",
    "https://jsonplaceholder.typicode.com/todos/3"
]

results = []

for url in urls:
    resp = requests.get(url)
    if resp.status_code == 200:
        data = resp.json()       # JSON → dict
        results.append(data)     # dict 누적

# dict 리스트 → DataFrame
df = pd.DataFrame(results)

print(df.head())

시간차이

유키공 — Tue, 26 Aug 2025 16:29:22 +0900

from datetime import datetime

# 문자열 형태의 시간
time_str1 = "2023-10-15 14:30:00"
time_str2 = "2023-10-15 16:45:30"

# 문자열을 datetime 객체로 변환
time1 = datetime.strptime(time_str1, "%Y-%m-%d %H:%M:%S")
time2 = datetime.strptime(time_str2, "%Y-%m-%d %H:%M:%S")

# 차이 계산
difference = time2 - time1
print(f"시간 차이: {difference}")

2주운동 식단플랜

유키공 — Thu, 14 Aug 2025 14:18:46 +0900

from openpyxl import Workbook
from openpyxl.styles import Alignment, Font

# 새 워크북 생성
wb = Workbook()

# 운동 시트
ws = wb.active
ws.title = "운동 플랜"

# 열 너비 조정
columns = ['A', 'B', 'C', 'D', 'E']
for col in columns:
    ws.column_dimensions[col].width = 25

# 헤더
headers = ["구분", "운동", "횟수/시간", "세트", "비고"]
ws.append(headers)
for cell in ws[1]:
    cell.alignment = Alignment(horizontal='center', vertical='center')
    cell.font = Font(bold=True)

# 운동 데이터 (그림 없이)
workouts = [
    ("유산소", "빠른 걷기 (트레드밀)", "20분(1주차) → 30분(2주차)", "-", "속도 6.0~6.5km/h"),
    ("근력", "레그프레스", "10~12회", "3세트", "무릎·허리 부담 ↓"),
    ("근력", "체스트 프레스", "8~10회", "3세트", "가슴·팔 근육"),
    ("근력", "시티드 로우 머신", "10~12회", "3세트", "등 근육"),
    ("근력", "플랭크", "10~15초 유지", "3세트", "복부·코어 강화"),
]

for w in workouts:
    ws.append(w)

# 식단 시트
ws_diet = wb.create_sheet(title="식단")
diet = [
    "아침: 삶은 달걀 2개 + 채소 + 현미밥 소량",
    "점심: 일반식(밥은 평소보다 70%) + 단백질 반찬 위주",
    "간식: 플레인 요거트, 견과류 한 줌, 프로틴 쉐이크 중 택1",
    "저녁: 샐러드 + 단백질(닭가슴살/두부/생선), 밥은 소량 또는 생략",
    "음료: 물 2L, 단 음료·술·과자 최소화"
]
for i, item in enumerate(diet, start=1):
    ws_diet[f"A{i}"] = item

# 파일 저장
file_path = "2주_운동_식단_플랜.xlsx"
wb.save(file_path)
print(f"엑셀 파일 생성 완료: {file_path}")

재무제표 최종

유키공 — Thu, 7 Aug 2025 08:44:49 +0900

import pandas as pd
import requests
from typing import Optional, Dict, Any, Tuple
import warnings
warnings.filterwarnings('ignore')

class EnhancedFinancialAnalyzer:
    """네이버 금융 재무제표 분석기 - 확장된 재무비율 포함"""

    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }

    def get_financial_statement(self, ticker: str) -> Optional[pd.DataFrame]:
        """네이버 금융에서 손익계산서 데이터를 가져오는 함수"""
        url = f'https://finance.naver.com/item/main.naver?code={ticker}'

        try:
            response = requests.get(url, headers=self.headers, timeout=10)
            if response.status_code != 200:
                print(f"⚠️ 페이지 접근 실패: HTTP {response.status_code}")
                return None

            tables = pd.read_html(url, encoding='euc-kr', header=0)

        except requests.exceptions.RequestException as e:
            print(f"⚠️ 네트워크 오류: {e}")
            return None
        except Exception as e:
            print(f"⚠️ 데이터 파싱 오류: {e}")
            return None

        # 손익계산서 테이블 찾기
        for i, table in enumerate(tables):
            if table.shape[1] >= 3 and len(table) > 5:
                first_col = table.iloc[:, 0].astype(str).str.strip()
                if any('매출' in cell for cell in first_col):
                    print(f"✅ 재무제표 발견 (테이블 #{i+1})")
                    return table

        print("⚠️ 손익계산서 테이블을 찾을 수 없습니다")
        return None

    def get_balance_sheet(self, ticker: str) -> Optional[pd.DataFrame]:
        """네이버 금융에서 재무상태표 데이터를 가져오는 함수"""
        url = f'https://finance.naver.com/item/main.naver?code={ticker}'

        try:
            response = requests.get(url, headers=self.headers, timeout=10)
            if response.status_code != 200:
                return None

            tables = pd.read_html(url, encoding='euc-kr', header=0)

            # 재무상태표 테이블 찾기 (자산, 부채 등이 포함된 테이블)
            for i, table in enumerate(tables):
                if table.shape[1] >= 3 and len(table) > 5:
                    first_col = table.iloc[:, 0].astype(str).str.strip()
                    if any(keyword in cell for keyword in ['자산', '부채', '자본'] for cell in first_col):
                        print(f"✅ 재무상태표 발견 (테이블 #{i+1})")
                        return table

        except Exception as e:
            print(f"⚠️ 재무상태표 데이터 오류: {e}")

        return None

    def get_company_info(self, ticker: str) -> Dict[str, Any]:
        """기업 기본정보 및 주가 정보 수집"""
        try:
            # 주식 정보 페이지에서 시가총액, 주가 등 정보 수집
            url = f'https://finance.naver.com/item/main.naver?code={ticker}'
            response = requests.get(url, headers=self.headers, timeout=10)

            if response.status_code != 200:
                return {}

            tables = pd.read_html(url, encoding='euc-kr')

            # 시가총액, 주가 등 정보가 있는 테이블 찾기
            company_info = {}

            for table in tables:
                if len(table.columns) >= 2:
                    # 테이블을 문자열로 변환하여 검색
                    table_str = table.astype(str)
                    if table_str.apply(lambda x: x.str.contains('시가총액|주가|거래량', na=False)).any().any():
                        # 시가총액 정보 추출 시도
                        try:
                            for idx, row in table.iterrows():
                                if '시가총액' in str(row.iloc[0]):
                                    company_info['시가총액'] = str(row.iloc[1])
                                elif '현재가' in str(row.iloc[0]) or '주가' in str(row.iloc[0]):
                                    company_info['현재가'] = str(row.iloc[1])
                        except:
                            continue

            return company_info

        except Exception as e:
            print(f"⚠️ 기업정보 수집 오류: {e}")
            return {}

    def clean_financial_df(self, df: pd.DataFrame) -> pd.DataFrame:
        """데이터프레임 전처리"""
        df_copy = df.copy()
        df_copy.set_index(df_copy.columns[0], inplace=True)
        df_copy = df_copy.replace(['-', '/', 'N/A', '', ' '], '0')

        def convert_to_number(x):
            if pd.isna(x) or x == '':
                return 0
            try:
                if isinstance(x, str):
                    cleaned = x.replace(',', '').replace('(', '-').replace(')', '').strip()
                    return float(cleaned) if cleaned else 0
                return float(x)
            except (ValueError, TypeError):
                return 0

        for col in df_copy.columns:
            df_copy[col] = df_copy[col].apply(convert_to_number)

        return df_copy

    def find_row_name(self, df: pd.DataFrame, candidates: list) -> str:
        """유연한 행 이름 매칭"""
        index_str = df.index.astype(str).str.strip()

        for candidate in candidates:
            if candidate in index_str.values:
                return candidate

            matches = index_str[index_str.str.contains(candidate, na=False)]
            if len(matches) > 0:
                return matches.iloc[0]

        raise KeyError(f"다음 항목을 찾을 수 없습니다: {candidates}")

    def calculate_growth_rate(self, current: float, previous: float) -> Tuple[str, float]:
        """성장률 계산 (문자열과 숫자값 모두 반환)"""
        try:
            if previous == 0:
                return "N/A (이전값 0)", 0

            growth_rate = ((current - previous) / abs(previous)) * 100
            return f"{growth_rate:.2f}%", growth_rate

        except (ZeroDivisionError, TypeError):
            return "N/A", 0

    def calculate_extended_ratios(self, income_df: pd.DataFrame, balance_df: Optional[pd.DataFrame] = None) -> Dict[str, Any]:
        """확장된 재무비율 계산"""
        ratios = {}

        try:
            income_cleaned = self.clean_financial_df(income_df)

            # 최신 두 기간 데이터
            if len(income_cleaned.columns) < 2:
                return {'오류': '비교할 데이터가 충분하지 않습니다'}

            latest = income_cleaned.columns[-1]
            prev = income_cleaned.columns[-2]

            # 손익계산서 주요 항목
            revenue_current = income_cleaned.loc[self.find_row_name(income_cleaned, ['매출액', '수익(매출액)', '총매출액']), latest]
            revenue_previous = income_cleaned.loc[self.find_row_name(income_cleaned, ['매출액', '수익(매출액)', '총매출액']), prev]

            operating_current = income_cleaned.loc[self.find_row_name(income_cleaned, ['영업이익', '영업이익(손실)', '영업손익']), latest]
            operating_previous = income_cleaned.loc[self.find_row_name(income_cleaned, ['영업이익', '영업이익(손실)', '영업손익']), prev]

            net_current = income_cleaned.loc[self.find_row_name(income_cleaned, ['당기순이익', '당기순이익(손실)', '순이익', '당기순손익']), latest]
            net_previous = income_cleaned.loc[self.find_row_name(income_cleaned, ['당기순이익', '당기순이익(손실)', '순이익', '당기순손익']), prev]

            # 1. 수익성 비율 (Profitability Ratios)
            if revenue_current != 0:
                ratios['매출총이익률'] = f"{((revenue_current - 0) / revenue_current * 100):.2f}%"  # 매출원가 데이터 필요시 수정
                ratios['영업이익률'] = f"{(operating_current / revenue_current * 100):.2f}%"
                ratios['순이익률'] = f"{(net_current / revenue_current * 100):.2f}%"

                # EBITDA 추정 (감가상각비 데이터가 있다면 더 정확)
                try:
                    # 감가상각비 찾기 시도
                    depreciation = 0
                    try:
                        depreciation_row = self.find_row_name(income_cleaned, ['감가상각비', '상각비'])
                        depreciation = income_cleaned.loc[depreciation_row, latest]
                    except KeyError:
                        # 감가상각비를 찾을 수 없으면 영업이익의 10%로 추정
                        depreciation = operating_current * 0.1

                    ebitda = operating_current + depreciation
                    ratios['EBITDA'] = f"{ebitda:,.0f}백만원"
                    ratios['EBITDA마진'] = f"{(ebitda / revenue_current * 100):.2f}%" if revenue_current != 0 else "N/A"
                except:
                    ratios['EBITDA'] = "계산불가"
                    ratios['EBITDA마진'] = "계산불가"

            # 2. 성장성 비율 (Growth Ratios)
            revenue_growth_str, revenue_growth_val = self.calculate_growth_rate(revenue_current, revenue_previous)
            operating_growth_str, operating_growth_val = self.calculate_growth_rate(operating_current, operating_previous)
            net_growth_str, net_growth_val = self.calculate_growth_rate(net_current, net_previous)

            ratios['매출액증가율'] = revenue_growth_str
            ratios['영업이익증가율'] = operating_growth_str
            ratios['순이익증가율'] = net_growth_str

            # 재무상태표 기반 비율 (데이터가 있는 경우)
            if balance_df is not None:
                try:
                    balance_cleaned = self.clean_financial_df(balance_df)

                    # 자산 관련
                    total_assets = balance_cleaned.loc[self.find_row_name(balance_cleaned, ['자산총계', '총자산', '자산합계']), latest]

                    # 부채 관련
                    total_liabilities = balance_cleaned.loc[self.find_row_name(balance_cleaned, ['부채총계', '총부채', '부채합계']), latest]

                    # 자본 관련
                    total_equity = balance_cleaned.loc[self.find_row_name(balance_cleaned, ['자본총계', '총자본', '자본합계', '자기자본']), latest]

                    # 3. 안전성 비율 (Stability Ratios)
                    if total_assets != 0:
                        ratios['부채비율'] = f"{(total_liabilities / total_equity * 100):.2f}%" if total_equity != 0 else "N/A"
                        ratios['자기자본비율'] = f"{(total_equity / total_assets * 100):.2f}%"
                        ratios['부채자산비율'] = f"{(total_liabilities / total_assets * 100):.2f}%"

                    # 4. 활동성 비율 (Activity Ratios)
                    if total_assets != 0:
                        ratios['총자산회전율'] = f"{(revenue_current / total_assets):.2f}회"

                    # 5. 수익성 심화 분석
                    if total_assets != 0:
                        ratios['ROA(총자산수익률)'] = f"{(net_current / total_assets * 100):.2f}%"
                    if total_equity != 0:
                        ratios['ROE(자기자본수익률)'] = f"{(net_current / total_equity * 100):.2f}%"

                except KeyError as e:
                    ratios['재무상태표_오류'] = f"재무상태표 항목 부족: {str(e)}"
                except Exception as e:
                    ratios['재무상태표_계산오류'] = str(e)

            # 6. 종합 평가 점수 시스템
            score = 0
            max_score = 0

            # 수익성 점수 (40점)
            max_score += 40
            if revenue_current > 0:
                operating_margin = operating_current / revenue_current * 100
                if operating_margin >= 20: score += 15
                elif operating_margin >= 15: score += 12
                elif operating_margin >= 10: score += 8
                elif operating_margin >= 5: score += 4
                elif operating_margin >= 0: score += 1

                net_margin = net_current / revenue_current * 100
                if net_margin >= 15: score += 15
                elif net_margin >= 10: score += 12
                elif net_margin >= 5: score += 8
                elif net_margin >= 2: score += 4
                elif net_margin >= 0: score += 1

                # EBITDA 마진 평가
                try:
                    ebitda_margin = float(ratios.get('EBITDA마진', '0%').replace('%', ''))
                    if ebitda_margin >= 25: score += 10
                    elif ebitda_margin >= 20: score += 8
                    elif ebitda_margin >= 15: score += 6
                    elif ebitda_margin >= 10: score += 3
                    elif ebitda_margin >= 5: score += 1
                except:
                    pass

            # 성장성 점수 (30점)
            max_score += 30
            if revenue_growth_val >= 20: score += 10
            elif revenue_growth_val >= 10: score += 8
            elif revenue_growth_val >= 5: score += 6
            elif revenue_growth_val >= 0: score += 3

            if operating_growth_val >= 30: score += 10
            elif operating_growth_val >= 15: score += 8
            elif operating_growth_val >= 5: score += 6
            elif operating_growth_val >= 0: score += 3

            if net_growth_val >= 30: score += 10
            elif net_growth_val >= 15: score += 8
            elif net_growth_val >= 5: score += 6
            elif net_growth_val >= 0: score += 3

            # 안정성 점수 (30점) - 재무상태표 데이터가 있는 경우만
            if balance_df is not None and '부채비율' in ratios:
                max_score += 30
                try:
                    debt_ratio = float(ratios['부채비율'].replace('%', ''))
                    if debt_ratio <= 30: score += 15
                    elif debt_ratio <= 50: score += 12
                    elif debt_ratio <= 100: score += 8
                    elif debt_ratio <= 200: score += 4
                    elif debt_ratio <= 300: score += 1

                    equity_ratio = float(ratios['자기자본비율'].replace('%', ''))
                    if equity_ratio >= 70: score += 15
                    elif equity_ratio >= 50: score += 12
                    elif equity_ratio >= 30: score += 8
                    elif equity_ratio >= 20: score += 4
                    elif equity_ratio >= 10: score += 1
                except:
                    max_score -= 30

            # 최종 점수 계산
            if max_score > 0:
                final_score = (score / max_score) * 100
                ratios['종합점수'] = f"{final_score:.1f}점 ({score}/{max_score})"

                if final_score >= 80:
                    ratios['투자등급'] = "  우수 (A급)"
                elif final_score >= 65:
                    ratios['투자등급'] = "  양호 (B급)"
                elif final_score >= 50:
                    ratios['투자등급'] = "  보통 (C급)"
                elif final_score >= 35:
                    ratios['투자등급'] = "  주의 (D급)"
                else:
                    ratios['투자등급'] = "  위험 (E급)"
            else:
                ratios['종합점수'] = "계산불가"
                ratios['투자등급'] = "평가불가"

        except Exception as e:
            ratios['계산오류'] = str(e)

        return ratios

    def analyze_financials_extended(self, ticker: str) -> Dict[str, Any]:
        """확장된 재무 분석"""
        print(f"\n{'='*60}")
        print(f"  [{ticker}] 심화 재무제표 분석 보고서")
        print(f"{'='*60}")

        # 손익계산서 데이터 수집
        income_df = self.get_financial_statement(ticker)
        if income_df is None:
            return {'오류': '손익계산서 데이터를 가져올 수 없습니다'}

        # 재무상태표 데이터 수집 (선택적)
        balance_df = self.get_balance_sheet(ticker)
        if balance_df is not None:
            print("✅ 재무상태표 데이터도 확보됨 - 더 정확한 분석 가능")
        else:
            print("⚠️ 재무상태표 데이터 없음 - 손익계산서 중심 분석")

        # 기업 정보 수집
        company_info = self.get_company_info(ticker)

        # 확장된 재무비율 계산
        ratios = self.calculate_extended_ratios(income_df, balance_df)

        # 기본 정보 출력
        print(f"\n  기업 기본정보:")
        print("-" * 30)
        for key, value in company_info.items():
            print(f"{key}: {value}")

        # 재무비율 출력
        print(f"\n  재무비율 분석 결과:")
        print("-" * 30)

        # 카테고리별로 구분하여 출력
        categories = {
            '  수익성 지표': ['매출총이익률', '영업이익률', '순이익률', 'EBITDA', 'EBITDA마진', 'ROA(총자산수익률)', 'ROE(자기자본수익률)'],
            '  성장성 지표': ['매출액증가율', '영업이익증가율', '순이익증가율'],
            ' ️ 안정성 지표': ['부채비율', '자기자본비율', '부채자산비율'],
            '  활동성 지표': ['총자산회전율'],
            '⭐ 종합평가': ['종합점수', '투자등급']
        }

        for category, metrics in categories.items():
            category_ratios = {k: v for k, v in ratios.items() if k in metrics}
            if category_ratios:
                print(f"\n{category}:")
                for metric, value in category_ratios.items():
                    print(f"  • {metric}: {value}")

        # 기타 정보 출력
        other_ratios = {k: v for k, v in ratios.items()
                       if k not in sum(categories.values(), [])
                       and not k.endswith('오류')}

        if other_ratios:
            print(f"\n  추가 정보:")
            for key, value in other_ratios.items():
                print(f"  • {key}: {value}")

        # 오류 정보 출력
        error_ratios = {k: v for k, v in ratios.items() if k.endswith('오류')}
        if error_ratios:
            print(f"\n⚠️ 분석 제한사항:")
            for key, value in error_ratios.items():
                print(f"  • {key}: {value}")

        print(f"\n{'='*60}")
        return ratios

    def compare_companies(self, tickers: list) -> pd.DataFrame:
        """여러 기업 재무비율 비교"""
        print(f"\n{'='*70}")
        print(f"  기업 비교 분석 ({len(tickers)}개 기업)")
        print(f"{'='*70}")

        comparison_data = []

        for ticker in tickers:
            print(f"\n  {ticker} 분석 중...")

            income_df = self.get_financial_statement(ticker)
            if income_df is None:
                continue

            balance_df = self.get_balance_sheet(ticker)
            ratios = self.calculate_extended_ratios(income_df, balance_df)

            # 비교용 데이터 추출
            company_data = {'종목코드': ticker}

            # 주요 지표만 선택
            key_metrics = ['영업이익률', '순이익률', 'ROE(자기자본수익률)', 'ROA(총자산수익률)',
                          '매출액증가율', '영업이익증가율', '부채비율', '자기자본비율', '종합점수', '투자등급']

            for metric in key_metrics:
                company_data[metric] = ratios.get(metric, 'N/A')

            comparison_data.append(company_data)

        # 데이터프레임 생성
        if comparison_data:
            comparison_df = pd.DataFrame(comparison_data)
            print(f"\n  비교 결과:")
            print("-" * 70)
            print(comparison_df.to_string(index=False))
            return comparison_df
        else:
            print("❌ 비교할 데이터가 없습니다")
            return pd.DataFrame()


def main():
    analyzer = EnhancedFinancialAnalyzer()

    # 1. 단일 기업 심화 분석
    print("  심화 분석 예시")
    analyzer.analyze_financials_extended("005930")  # 삼성전자

    # 2. 여러 기업 비교 분석
    print("\n" + "="*80)
    print("  기업 비교 분석 예시")

    tech_companies = ["005930", "000660", "035420"]  # 삼성전자, SK하이닉스, 네이버
    comparison_result = analyzer.compare_companies(tech_companies)

    if not comparison_result.empty:
        # 특정 지표 기준 랭킹
        try:
            print(f"\n  ROE 기준 순위:")
            roe_ranking = comparison_result[comparison_result['ROE(자기자본수익률)'] != 'N/A'].copy()
            if not roe_ranking.empty:
                roe_ranking['ROE_숫자'] = roe_ranking['ROE(자기자본수익률)'].str.replace('%', '').astype(float)
                roe_ranking = roe_ranking.sort_values('ROE_숫자', ascending=False)
                for idx, row in roe_ranking.iterrows():
                    print(f"  {idx+1}위: {row['종목코드']} - {row['ROE(자기자본수익률)']}")
        except Exception as e:
            print(f"랭킹 계산 오류: {e}")


if __name__ == "__main__":
    main()

재무재표심화

유키공 — Wed, 6 Aug 2025 22:43:49 +0900

import pandas as pd
import requests
from typing import Optional, Dict, Any, Tuple
import warnings
warnings.filterwarnings('ignore')

class EnhancedFinancialAnalyzer:
    """네이버 금융 재무제표 분석기 - 확장된 재무비율 포함"""

    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }

    def get_financial_statement(self, ticker: str) -> Optional[pd.DataFrame]:
        """네이버 금융에서 손익계산서 데이터를 가져오는 함수"""
        url = f'https://finance.naver.com/item/main.naver?code={ticker}'

        try:
            response = requests.get(url, headers=self.headers, timeout=10)
            if response.status_code != 200:
                print(f"⚠️ 페이지 접근 실패: HTTP {response.status_code}")
                return None

            tables = pd.read_html(url, encoding='euc-kr', header=0)

        except requests.exceptions.RequestException as e:
            print(f"⚠️ 네트워크 오류: {e}")
            return None
        except Exception as e:
            print(f"⚠️ 데이터 파싱 오류: {e}")
            return None

        # 손익계산서 테이블 찾기
        for i, table in enumerate(tables):
            if table.shape[1] >= 3 and len(table) > 5:
                first_col = table.iloc[:, 0].astype(str).str.strip()
                if any('매출' in cell for cell in first_col):
                    print(f"✅ 재무제표 발견 (테이블 #{i+1})")
                    return table

        print("⚠️ 손익계산서 테이블을 찾을 수 없습니다")
        return None

    def get_balance_sheet(self, ticker: str) -> Optional[pd.DataFrame]:
        """네이버 금융에서 재무상태표 데이터를 가져오는 함수"""
        url = f'https://finance.naver.com/item/main.naver?code={ticker}'

        try:
            response = requests.get(url, headers=self.headers, timeout=10)
            if response.status_code != 200:
                return None

            tables = pd.read_html(url, encoding='euc-kr', header=0)

            # 재무상태표 테이블 찾기 (자산, 부채 등이 포함된 테이블)
            for i, table in enumerate(tables):
                if table.shape[1] >= 3 and len(table) > 5:
                    first_col = table.iloc[:, 0].astype(str).str.strip()
                    if any(keyword in cell for keyword in ['자산', '부채', '자본'] for cell in first_col):
                        print(f"✅ 재무상태표 발견 (테이블 #{i+1})")
                        return table

        except Exception as e:
            print(f"⚠️ 재무상태표 데이터 오류: {e}")

        return None

    def get_company_info(self, ticker: str) -> Dict[str, Any]:
        """기업 기본정보 및 주가 정보 수집"""
        try:
            # 주식 정보 페이지에서 시가총액, 주가 등 정보 수집
            url = f'https://finance.naver.com/item/main.naver?code={ticker}'
            response = requests.get(url, headers=self.headers, timeout=10)
            
            if response.status_code != 200:
                return {}

            tables = pd.read_html(url, encoding='euc-kr')
            
            # 시가총액, 주가 등 정보가 있는 테이블 찾기
            company_info = {}
            
            for table in tables:
                if len(table.columns) >= 2:
                    # 테이블을 문자열로 변환하여 검색
                    table_str = table.astype(str)
                    if table_str.apply(lambda x: x.str.contains('시가총액|주가|거래량', na=False)).any().any():
                        # 시가총액 정보 추출 시도
                        try:
                            for idx, row in table.iterrows():
                                if '시가총액' in str(row.iloc[0]):
                                    company_info['시가총액'] = str(row.iloc[1])
                                elif '현재가' in str(row.iloc[0]) or '주가' in str(row.iloc[0]):
                                    company_info['현재가'] = str(row.iloc[1])
                        except:
                            continue
                            
            return company_info
            
        except Exception as e:
            print(f"⚠️ 기업정보 수집 오류: {e}")
            return {}

    def clean_financial_df(self, df: pd.DataFrame) -> pd.DataFrame:
        """데이터프레임 전처리"""
        df_copy = df.copy()
        df_copy.set_index(df_copy.columns[0], inplace=True)
        df_copy = df_copy.replace(['-', '/', 'N/A', '', ' '], '0')

        def convert_to_number(x):
            if pd.isna(x) or x == '':
                return 0
            try:
                if isinstance(x, str):
                    cleaned = x.replace(',', '').replace('(', '-').replace(')', '').strip()
                    return float(cleaned) if cleaned else 0
                return float(x)
            except (ValueError, TypeError):
                return 0

        for col in df_copy.columns:
            df_copy[col] = df_copy[col].apply(convert_to_number)

        return df_copy

    def find_row_name(self, df: pd.DataFrame, candidates: list) -> str:
        """유연한 행 이름 매칭"""
        index_str = df.index.astype(str).str.strip()

        for candidate in candidates:
            if candidate in index_str.values:
                return candidate

            matches = index_str[index_str.str.contains(candidate, na=False)]
            if len(matches) > 0:
                return matches.iloc[0]

        raise KeyError(f"다음 항목을 찾을 수 없습니다: {candidates}")

    def calculate_growth_rate(self, current: float, previous: float) -> Tuple[str, float]:
        """성장률 계산 (문자열과 숫자값 모두 반환)"""
        try:
            if previous == 0:
                return "N/A (이전값 0)", 0

            growth_rate = ((current - previous) / abs(previous)) * 100
            return f"{growth_rate:.2f}%", growth_rate

        except (ZeroDivisionError, TypeError):
            return "N/A", 0

    def calculate_extended_ratios(self, income_df: pd.DataFrame, balance_df: Optional[pd.DataFrame] = None) -> Dict[str, Any]:
        """확장된 재무비율 계산"""
        ratios = {}
        
        try:
            income_cleaned = self.clean_financial_df(income_df)
            
            # 최신 두 기간 데이터
            if len(income_cleaned.columns) < 2:
                return {'오류': '비교할 데이터가 충분하지 않습니다'}
                
            latest = income_cleaned.columns[-1]
            prev = income_cleaned.columns[-2]

            # 손익계산서 주요 항목
            revenue_current = income_cleaned.loc[self.find_row_name(income_cleaned, ['매출액', '수익(매출액)', '총매출액']), latest]
            revenue_previous = income_cleaned.loc[self.find_row_name(income_cleaned, ['매출액', '수익(매출액)', '총매출액']), prev]
            
            operating_current = income_cleaned.loc[self.find_row_name(income_cleaned, ['영업이익', '영업이익(손실)', '영업손익']), latest]
            operating_previous = income_cleaned.loc[self.find_row_name(income_cleaned, ['영업이익', '영업이익(손실)', '영업손익']), prev]
            
            net_current = income_cleaned.loc[self.find_row_name(income_cleaned, ['당기순이익', '당기순이익(손실)', '순이익', '당기순손익']), latest]
            net_previous = income_cleaned.loc[self.find_row_name(income_cleaned, ['당기순이익', '당기순이익(손실)', '순이익', '당기순손익']), prev]

            # 1. 수익성 비율 (Profitability Ratios)
            if revenue_current != 0:
                ratios['매출총이익률'] = f"{((revenue_current - 0) / revenue_current * 100):.2f}%"  # 매출원가 데이터 필요시 수정
                ratios['영업이익률'] = f"{(operating_current / revenue_current * 100):.2f}%"
                ratios['순이익률'] = f"{(net_current / revenue_current * 100):.2f}%"
                
                # EBITDA 추정 (감가상각비 데이터가 있다면 더 정확)
                try:
                    # 감가상각비 찾기 시도
                    depreciation = 0
                    try:
                        depreciation_row = self.find_row_name(income_cleaned, ['감가상각비', '상각비'])
                        depreciation = income_cleaned.loc[depreciation_row, latest]
                    except KeyError:
                        # 감가상각비를 찾을 수 없으면 영업이익의 10%로 추정
                        depreciation = operating_current * 0.1
                    
                    ebitda = operating_current + depreciation
                    ratios['EBITDA'] = f"{ebitda:,.0f}백만원"
                    ratios['EBITDA마진'] = f"{(ebitda / revenue_current * 100):.2f}%" if revenue_current != 0 else "N/A"
                except:
                    ratios['EBITDA'] = "계산불가"
                    ratios['EBITDA마진'] = "계산불가"

            # 2. 성장성 비율 (Growth Ratios)
            revenue_growth_str, revenue_growth_val = self.calculate_growth_rate(revenue_current, revenue_previous)
            operating_growth_str, operating_growth_val = self.calculate_growth_rate(operating_current, operating_previous)
            net_growth_str, net_growth_val = self.calculate_growth_rate(net_current, net_previous)
            
            ratios['매출액증가율'] = revenue_growth_str
            ratios['영업이익증가율'] = operating_growth_str
            ratios['순이익증가율'] = net_growth_str

            # 재무상태표 기반 비율 (데이터가 있는 경우)
            if balance_df is not None:
                try:
                    balance_cleaned = self.clean_financial_df(balance_df)
                    
                    # 자산 관련
                    total_assets = balance_cleaned.loc[self.find_row_name(balance_cleaned, ['자산총계', '총자산', '자산합계']), latest]
                    
                    # 부채 관련
                    total_liabilities = balance_cleaned.loc[self.find_row_name(balance_cleaned, ['부채총계', '총부채', '부채합계']), latest]
                    
                    # 자본 관련
                    total_equity = balance_cleaned.loc[self.find_row_name(balance_cleaned, ['자본총계', '총자본', '자본합계', '자기자본']), latest]
                    
                    # 3. 안전성 비율 (Stability Ratios)
                    if total_assets != 0:
                        ratios['부채비율'] = f"{(total_liabilities / total_equity * 100):.2f}%" if total_equity != 0 else "N/A"
                        ratios['자기자본비율'] = f"{(total_equity / total_assets * 100):.2f}%"
                        ratios['부채자산비율'] = f"{(total_liabilities / total_assets * 100):.2f}%"
                    
                    # 4. 활동성 비율 (Activity Ratios)
                    if total_assets != 0:
                        ratios['총자산회전율'] = f"{(revenue_current / total_assets):.2f}회"
                    
                    # 5. 수익성 심화 분석
                    if total_assets != 0:
                        ratios['ROA(총자산수익률)'] = f"{(net_current / total_assets * 100):.2f}%"
                    if total_equity != 0:
                        ratios['ROE(자기자본수익률)'] = f"{(net_current / total_equity * 100):.2f}%"
                        
                except KeyError as e:
                    ratios['재무상태표_오류'] = f"재무상태표 항목 부족: {str(e)}"
                except Exception as e:
                    ratios['재무상태표_계산오류'] = str(e)

            # 6. 종합 평가 점수 시스템
            score = 0
            max_score = 0
            
            # 수익성 점수 (40점)
            max_score += 40
            if revenue_current > 0:
                operating_margin = operating_current / revenue_current * 100
                if operating_margin >= 20: score += 15
                elif operating_margin >= 15: score += 12
                elif operating_margin >= 10: score += 8
                elif operating_margin >= 5: score += 4
                elif operating_margin >= 0: score += 1
                
                net_margin = net_current / revenue_current * 100
                if net_margin >= 15: score += 15
                elif net_margin >= 10: score += 12
                elif net_margin >= 5: score += 8
                elif net_margin >= 2: score += 4
                elif net_margin >= 0: score += 1
                
                # EBITDA 마진 평가
                try:
                    ebitda_margin = float(ratios.get('EBITDA마진', '0%').replace('%', ''))
                    if ebitda_margin >= 25: score += 10
                    elif ebitda_margin >= 20: score += 8
                    elif ebitda_margin >= 15: score += 6
                    elif ebitda_margin >= 10: score += 3
                    elif ebitda_margin >= 5: score += 1
                except:
                    pass
            
            # 성장성 점수 (30점)
            max_score += 30
            if revenue_growth_val >= 20: score += 10
            elif revenue_growth_val >= 10: score += 8
            elif revenue_growth_val >= 5: score += 6
            elif revenue_growth_val >= 0: score += 3
            
            if operating_growth_val >= 30: score += 10
            elif operating_growth_val >= 15: score += 8
            elif operating_growth_val >= 5: score += 6
            elif operating_growth_val >= 0: score += 3
            
            if net_growth_val >= 30: score += 10
            elif net_growth_val >= 15: score += 8
            elif net_growth_val >= 5: score += 6
            elif net_growth_val >= 0: score += 3
            
            # 안정성 점수 (30점) - 재무상태표 데이터가 있는 경우만
            if balance_df is not None and '부채비율' in ratios:
                max_score += 30
                try:
                    debt_ratio = float(ratios['부채비율'].replace('%', ''))
                    if debt_ratio <= 30: score += 15
                    elif debt_ratio <= 50: score += 12
                    elif debt_ratio <= 100: score += 8
                    elif debt_ratio <= 200: score += 4
                    elif debt_ratio <= 300: score += 1
                    
                    equity_ratio = float(ratios['자기자본비율'].replace('%', ''))
                    if equity_ratio >= 70: score += 15
                    elif equity_ratio >= 50: score += 12
                    elif equity_ratio >= 30: score += 8
                    elif equity_ratio >= 20: score += 4
                    elif equity_ratio >= 10: score += 1
                except:
                    max_score -= 30

            # 최종 점수 계산
            if max_score > 0:
                final_score = (score / max_score) * 100
                ratios['종합점수'] = f"{final_score:.1f}점 ({score}/{max_score})"
                
                if final_score >= 80:
                    ratios['투자등급'] = "  우수 (A급)"
                elif final_score >= 65:
                    ratios['투자등급'] = "  양호 (B급)"
                elif final_score >= 50:
                    ratios['투자등급'] = "  보통 (C급)"
                elif final_score >= 35:
                    ratios['투자등급'] = "  주의 (D급)"
                else:
                    ratios['투자등급'] = "  위험 (E급)"
            else:
                ratios['종합점수'] = "계산불가"
                ratios['투자등급'] = "평가불가"

        except Exception as e:
            ratios['계산오류'] = str(e)

        return ratios

    def analyze_financials_extended(self, ticker: str) -> Dict[str, Any]:
        """확장된 재무 분석"""
        print(f"\n{'='*60}")
        print(f"  [{ticker}] 심화 재무제표 분석 보고서")
        print(f"{'='*60}")

        # 손익계산서 데이터 수집
        income_df = self.get_financial_statement(ticker)
        if income_df is None:
            return {'오류': '손익계산서 데이터를 가져올 수 없습니다'}

        # 재무상태표 데이터 수집 (선택적)
        balance_df = self.get_balance_sheet(ticker)
        if balance_df is not None:
            print("✅ 재무상태표 데이터도 확보됨 - 더 정확한 분석 가능")
        else:
            print("⚠️ 재무상태표 데이터 없음 - 손익계산서 중심 분석")

        # 기업 정보 수집
        company_info = self.get_company_info(ticker)

        # 확장된 재무비율 계산
        ratios = self.calculate_extended_ratios(income_df, balance_df)

        # 기본 정보 출력
        print(f"\n  기업 기본정보:")
        print("-" * 30)
        for key, value in company_info.items():
            print(f"{key}: {value}")

        # 재무비율 출력
        print(f"\n  재무비율 분석 결과:")
        print("-" * 30)

        # 카테고리별로 구분하여 출력
        categories = {
            '  수익성 지표': ['매출총이익률', '영업이익률', '순이익률', 'EBITDA', 'EBITDA마진', 'ROA(총자산수익률)', 'ROE(자기자본수익률)'],
            '  성장성 지표': ['매출액증가율', '영업이익증가율', '순이익증가율'],
            ' ️ 안정성 지표': ['부채비율', '자기자본비율', '부채자산비율'],
            '  활동성 지표': ['총자산회전율'],
            '⭐ 종합평가': ['종합점수', '투자등급']
        }

        for category, metrics in categories.items():
            category_ratios = {k: v for k, v in ratios.items() if k in metrics}
            if category_ratios:
                print(f"\n{category}:")
                for metric, value in category_ratios.items():
                    print(f"  • {metric}: {value}")

        # 기타 정보 출력
        other_ratios = {k: v for k, v in ratios.items() 
                       if k not in sum(categories.values(), []) 
                       and not k.endswith('오류')}
        
        if other_ratios:
            print(f"\n  추가 정보:")
            for key, value in other_ratios.items():
                print(f"  • {key}: {value}")

        # 오류 정보 출력
        error_ratios = {k: v for k, v in ratios.items() if k.endswith('오류')}
        if error_ratios:
            print(f"\n⚠️ 분석 제한사항:")
            for key, value in error_ratios.items():
                print(f"  • {key}: {value}")

        print(f"\n{'='*60}")
        return ratios

    def compare_companies(self, tickers: list) -> pd.DataFrame:
        """여러 기업 재무비율 비교"""
        print(f"\n{'='*70}")
        print(f"  기업 비교 분석 ({len(tickers)}개 기업)")
        print(f"{'='*70}")
        
        comparison_data = []
        
        for ticker in tickers:
            print(f"\n  {ticker} 분석 중...")
            
            income_df = self.get_financial_statement(ticker)
            if income_df is None:
                continue
                
            balance_df = self.get_balance_sheet(ticker)
            ratios = self.calculate_extended_ratios(income_df, balance_df)
            
            # 비교용 데이터 추출
            company_data = {'종목코드': ticker}
            
            # 주요 지표만 선택
            key_metrics = ['영업이익률', '순이익률', 'ROE(자기자본수익률)', 'ROA(총자산수익률)', 
                          '매출액증가율', '영업이익증가율', '부채비율', '자기자본비율', '종합점수', '투자등급']
            
            for metric in key_metrics:
                company_data[metric] = ratios.get(metric, 'N/A')
            
            comparison_data.append(company_data)
        
        # 데이터프레임 생성
        if comparison_data:
            comparison_df = pd.DataFrame(comparison_data)
            print(f"\n  비교 결과:")
            print("-" * 70)
            print(comparison_df.to_string(index=False))
            return comparison_df
        else:
            print("❌ 비교할 데이터가 없습니다")
            return pd.DataFrame()


def main():
    analyzer = EnhancedFinancialAnalyzer()

    # 1. 단일 기업 심화 분석
    print("  심화 분석 예시")
    analyzer.analyze_financials_extended("005930")  # 삼성전자

    # 2. 여러 기업 비교 분석
    print("\n" + "="*80)
    print("  기업 비교 분석 예시")
    
    tech_companies = ["005930", "000660", "035420"]  # 삼성전자, SK하이닉스, 네이버
    comparison_result = analyzer.compare_companies(tech_companies)
    
    if not comparison_result.empty:
        # 특정 지표 기준 랭킹
        try:
            print(f"\n  ROE 기준 순위:")
            roe_ranking = comparison_result[comparison_result['ROE(자기자본수익률)'] != 'N/A'].copy()
            if not roe_ranking.empty:
                roe_ranking['ROE_숫자'] = roe_ranking['ROE(자기자본수익률)'].str.replace('%', '').astype(float)
                roe_ranking = roe_ranking.sort_values('ROE_숫자', ascending=False)
                for idx, row in roe_ranking.iterrows():
                    print(f"  {idx+1}위: {row['종목코드']} - {row['ROE(자기자본수익률)']}")
        except Exception as e:
            print(f"랭킹 계산 오류: {e}")


if __name__ == "__main__":
    main()

재무재표개선

유키공 — Wed, 6 Aug 2025 22:15:08 +0900

import pandas as pd
import requests
from typing import Optional, Dict, Any
import warnings
warnings.filterwarnings('ignore')

class FinancialAnalyzer:
    """네이버 금융 재무제표 분석기"""
    
    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
    
    def get_financial_statement(self, ticker: str) -> Optional[pd.DataFrame]:
        """
        네이버 금융에서 손익계산서 데이터를 가져오는 함수
        
        Args:
            ticker (str): 종목 코드 (예: '005930')
            
        Returns:
            pd.DataFrame or None: 재무제표 데이터
        """
        url = f'https://finance.naver.com/item/main.naver?code={ticker}'
        
        try:
            # requests로 먼저 페이지 확인
            response = requests.get(url, headers=self.headers, timeout=10)
            if response.status_code != 200:
                print(f"⚠️ 페이지 접근 실패: HTTP {response.status_code}")
                return None
                
            # 한글 인코딩 처리 개선
            tables = pd.read_html(url, encoding='euc-kr', header=0)
            
        except requests.exceptions.RequestException as e:
            print(f"⚠️ 네트워크 오류: {e}")
            return None
        except Exception as e:
            print(f"⚠️ 데이터 파싱 오류: {e}")
            return None

        # 손익계산서 테이블 찾기
        for i, table in enumerate(tables):
            if table.shape[1] >= 3 and len(table) > 5:
                # 매출액이 포함된 테이블 찾기
                first_col = table.iloc[:, 0].astype(str).str.strip()
                if any('매출' in cell for cell in first_col):
                    print(f"✅ 재무제표 발견 (테이블 #{i+1})")
                    return table
                    
        print("⚠️ 손익계산서 테이블을 찾을 수 없습니다")
        return None

    def clean_financial_df(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        데이터프레임 전처리: 인덱싱, 숫자 변환, 결측치 처리 등
        
        Args:
            df (pd.DataFrame): 원본 재무제표 데이터
            
        Returns:
            pd.DataFrame: 정제된 데이터
        """
        df_copy = df.copy()
        
        # 첫 번째 컬럼을 인덱스로 설정
        df_copy.set_index(df_copy.columns[0], inplace=True)
        
        # 결측치 및 특수 문자 처리
        df_copy = df_copy.replace(['-', '/', 'N/A', '', ' '], '0')
        
        # 숫자 변환 함수
        def convert_to_number(x):
            if pd.isna(x) or x == '':
                return 0
            try:
                # 문자열인 경우 콤마 제거 후 숫자 변환
                if isinstance(x, str):
                    cleaned = x.replace(',', '').replace('(', '-').replace(')', '').strip()
                    return float(cleaned) if cleaned else 0
                return float(x)
            except (ValueError, TypeError):
                return 0
        
        # 모든 컬럼에 숫자 변환 적용
        for col in df_copy.columns:
            df_copy[col] = df_copy[col].apply(convert_to_number)
            
        return df_copy

    def find_row_name(self, df: pd.DataFrame, candidates: list) -> str:
        """
        유연한 행 이름 매칭
        
        Args:
            df (pd.DataFrame): 데이터프레임
            candidates (list): 후보 행 이름들
            
        Returns:
            str: 매칭된 행 이름
        """
        index_str = df.index.astype(str).str.strip()
        
        for candidate in candidates:
            # 정확히 일치하는 경우
            if candidate in index_str.values:
                return candidate
            
            # 부분 매칭 (포함 관계)
            matches = index_str[index_str.str.contains(candidate, na=False)]
            if len(matches) > 0:
                return matches.iloc[0]
        
        raise KeyError(f"다음 항목을 찾을 수 없습니다: {candidates}")

    def calculate_growth_rate(self, current: float, previous: float) -> str:
        """
        성장률 계산
        
        Args:
            current (float): 현재 값
            previous (float): 이전 값
            
        Returns:
            str: 성장률 (%)
        """
        try:
            if previous == 0:
                return "N/A (이전값 0)"
            
            growth_rate = ((current - previous) / abs(previous)) * 100
            return f"{growth_rate:.2f}%"
            
        except (ZeroDivisionError, TypeError):
            return "N/A"

    def analyze_financials(self, df: pd.DataFrame) -> Dict[str, Any]:
        """
        주요 지표 분석 및 해석 제공
        
        Args:
            df (pd.DataFrame): 재무제표 데이터
            
        Returns:
            dict: 분석 결과
        """
        analysis = {}
        
        try:
            df_cleaned = self.clean_financial_df(df)
            
            # 최신 두 기간 선택
            if len(df_cleaned.columns) < 2:
                raise ValueError("비교할 데이터가 충분하지 않습니다 (최소 2개 기간 필요)")
            
            latest = df_cleaned.columns[-1]
            prev = df_cleaned.columns[-2]
            
            print(f"  비교 기간: {prev} vs {latest}")
            
            # 주요 항목 찾기
            try:
                row_revenue = self.find_row_name(df_cleaned, ['매출액', '수익(매출액)', '총매출액'])
                row_operating = self.find_row_name(df_cleaned, ['영업이익', '영업이익(손실)', '영업손익'])
                row_net = self.find_row_name(df_cleaned, ['당기순이익', '당기순이익(손실)', '순이익', '당기순손익'])
            except KeyError as e:
                analysis['오류'] = str(e)
                return analysis
            
            # 데이터 추출 (백만원 단위)
            revenue_current = df_cleaned.loc[row_revenue, latest]
            revenue_previous = df_cleaned.loc[row_revenue, prev]
            
            operating_current = df_cleaned.loc[row_operating, latest]
            operating_previous = df_cleaned.loc[row_operating, prev]
            
            net_current = df_cleaned.loc[row_net, latest]
            net_previous = df_cleaned.loc[row_net, prev]
            
            # 기본 정보
            analysis['  매출액 현재'] = f"{revenue_current:,.0f}백만원"
            analysis['  매출액 이전'] = f"{revenue_previous:,.0f}백만원"
            analysis['  매출액 변화'] = '증가' if revenue_current > revenue_previous else '감소'
            analysis['  매출액 증감률'] = self.calculate_growth_rate(revenue_current, revenue_previous)
            
            analysis['  영업이익 현재'] = f"{operating_current:,.0f}백만원"
            analysis['  영업이익 이전'] = f"{operating_previous:,.0f}백만원"
            analysis['  영업이익 변화'] = '증가' if operating_current > operating_previous else '감소'
            analysis['  영업이익 증감률'] = self.calculate_growth_rate(operating_current, operating_previous)
            
            analysis['  순이익 현재'] = f"{net_current:,.0f}백만원"
            analysis['  순이익 이전'] = f"{net_previous:,.0f}백만원"
            analysis['  순이익 변화'] = '증가' if net_current > net_previous else '감소'
            analysis['  순이익 증감률'] = self.calculate_growth_rate(net_current, net_previous)
            
            # 수익성 지표 계산
            if revenue_current != 0:
                operating_margin = (operating_current / revenue_current) * 100
                net_margin = (net_current / revenue_current) * 100
                
                analysis['  영업이익률'] = f"{operating_margin:.2f}%"
                analysis['  순이익률'] = f"{net_margin:.2f}%"
                
                # 영업이익률 평가
                if operating_margin >= 15:
                    analysis['⭐ 영업이익률 평가'] = '매우 우수'
                elif operating_margin >= 10:
                    analysis['⭐ 영업이익률 평가'] = '우수'
                elif operating_margin >= 5:
                    analysis['⭐ 영업이익률 평가'] = '보통'
                elif operating_margin >= 0:
                    analysis['⭐ 영업이익률 평가'] = '낮음'
                else:
                    analysis['⭐ 영업이익률 평가'] = '적자'
            
            # 종합 평가
            positive_signals = 0
            if revenue_current > revenue_previous:
                positive_signals += 1
            if operating_current > operating_previous:
                positive_signals += 1
            if net_current > net_previous:
                positive_signals += 1
                
            if positive_signals == 3:
                analysis['  종합평가'] = '매우 긍정적 (3/3 지표 개선)'
            elif positive_signals == 2:
                analysis['  종합평가'] = '긍정적 (2/3 지표 개선)'
            elif positive_signals == 1:
                analysis['  종합평가'] = '혼조 (1/3 지표 개선)'
            else:
                analysis['  종합평가'] = '부정적 (모든 지표 악화)'
            
            # 데이터 유형 추정
            column_names = ' '.join(df_cleaned.columns.astype(str))
            if any(keyword in column_names for keyword in ['년', 'Year', '연간']):
                analysis['  데이터 유형'] = '연간 실적'
            else:
                analysis['  데이터 유형'] = '분기별 실적'
                
        except Exception as e:
            analysis['❌ 분석 오류'] = str(e)
            
        return analysis

    def print_analysis(self, ticker: str):
        """
        재무 분석 결과 출력
        
        Args:
            ticker (str): 종목 코드
        """
        print(f"\n{'='*50}")
        print(f"  [{ticker}] 재무제표 분석 보고서")
        print(f"{'='*50}")
        
        df = self.get_financial_statement(ticker)
        
        if df is None:
            print("❌ 재무제표 데이터를 가져올 수 없습니다")
            print("   - 종목 코드가 올바른지 확인해주세요")
            print("   - 네트워크 연결 상태를 확인해주세요")
            return
        
        print(f"✅ 데이터 로드 완료 (행: {len(df)}, 열: {len(df.columns)})")
        
        analysis_result = self.analyze_financials(df)
        
        print(f"\n  분석 결과:")
        print("-" * 40)
        
        for key, value in analysis_result.items():
            print(f"{key}: {value}")
        
        print(f"\n{'='*50}")


# 사용 예시
def main():
    analyzer = FinancialAnalyzer()
    
    # 여러 종목 분석 예시
    tickers = [
        "005930",  # 삼성전자
        "000660",  # SK하이닉스  
        "035420",  # NAVER
        "005380",  # 현대차
    ]
    
    for ticker in tickers:
        try:
            analyzer.print_analysis(ticker)
            print("\n" + "="*60 + "\n")
        except KeyboardInterrupt:
            print("\n사용자에 의해 중단되었습니다.")
            break
        except Exception as e:
            print(f"❌ {ticker} 분석 중 오류: {e}")
            continue


if __name__ == "__main__":
    # 단일 종목 분석
    analyzer = FinancialAnalyzer()
    analyzer.print_analysis("005930")  # 삼성전자
    
    # 또는 여러 종목 분석
    # main()

재무제표 분석

유키공 — Wed, 6 Aug 2025 21:17:28 +0900

import pandas as pd
import requests

def get_financial_statement(ticker):
    """
    네이버 금융에서 손익계산서 데이터를 가져오는 함수
    """
    url = f'https://finance.naver.com/item/main.naver?code={ticker}'
    try:
        tables = pd.read_html(url, encoding='euc-kr')
    except:
        return None

    for table in tables:
        if table.shape[1] >= 3 and '매출액' in table.iloc[:, 0].values:
            return table
    return None

def clean_financial_df(df):
    """
    데이터프레임 전처리: 인덱싱, 숫자 변환, 결측치 처리 등
    """
    df.set_index(df.columns[0], inplace=True)
    df = df.replace('-', '0')  # 결측치는 0으로
    df = df.applymap(lambda x: int(str(x).replace(',', '')) if isinstance(x, str) else x)
    return df

def analyze_financials(df):
    """
    주요 지표 분석 및 해석 제공
    """
    analysis = {}

    try:
        df_cleaned = clean_financial_df(df)

        # 컬럼(연도 또는 분기) 자동 탐지
        latest = df_cleaned.columns[-1]
        prev = df_cleaned.columns[-2]

        # 유연한 행 이름 처리 (예: '영업이익(손실)', '당기순이익(손실)')
        def get_row_name(df, candidates):
            for candidate in candidates:
                if candidate in df.index:
                    return candidate
            raise ValueError(f"다음 항목을 찾을 수 없습니다: {candidates}")

        row_매출 = get_row_name(df_cleaned, ['매출액'])
        row_영업이익 = get_row_name(df_cleaned, ['영업이익', '영업이익(손실)'])
        row_순이익 = get_row_name(df_cleaned, ['당기순이익', '당기순이익(손실)'])

        # 주요 지표 추출
        revenue_now = df_cleaned.loc[row_매출, latest]
        revenue_prev = df_cleaned.loc[row_매출, prev]

        op_now = df_cleaned.loc[row_영업이익, latest]
        op_prev = df_cleaned.loc[row_영업이익, prev]

        net_now = df_cleaned.loc[row_순이익, latest]
        net_prev = df_cleaned.loc[row_순이익, prev]

        # 증가 여부 판단
        analysis['매출액 증가 여부'] = '  증가' if revenue_now > revenue_prev else '  감소'
        analysis['영업이익 증가 여부'] = '  증가' if op_now > op_prev else '  감소'
        analysis['당기순이익 증가 여부'] = '  증가' if net_now > net_prev else '  감소'

        # 증가율(%) 계산
        def calc_rate(now, prev):
            try:
                if prev == 0:
                    return "N/A"
                return f"{((now - prev) / abs(prev)) * 100:.2f}%"
            except:
                return "N/A"

        analysis['매출액 증가율'] = calc_rate(revenue_now, revenue_prev)
        analysis['영업이익 증가율'] = calc_rate(op_now, op_prev)
        analysis['당기순이익 증가율'] = calc_rate(net_now, net_prev)

        # 영업이익률
        if revenue_now != 0:
            op_margin = op_now / revenue_now * 100
            analysis['영업이익률'] = f"{op_margin:.2f}%"

            if op_margin >= 10:
                analysis['영업이익률 평가'] = '✅ 우수'
            elif op_margin >= 5:
                analysis['영업이익률 평가'] = '⚠️ 보통'
            else:
                analysis['영업이익률 평가'] = '❌ 낮음'
        else:
            analysis['영업이익률'] = "N/A"

        # 연간/분기 추정 힌트
        if any("년" in str(col) for col in df.columns):
            analysis['데이터 유형'] = '  연간 실적'
        else:
            analysis['데이터 유형'] = '  분기 실적 (추정)'

    except Exception as e:
        analysis['에러'] = f"⚠️ 분석 실패: {str(e)}"

    return analysis

def print_analysis(ticker):
    print(f"[{ticker}] 재무 분석 요약")
    df = get_financial_statement(ticker)

    if df is None:
        print("❌ 재무제표 데이터 로딩 실패 또는 해당 종목 없음")
        return
    
    result = analyze_financials(df)
    for k, v in result.items():
        print(f"{k}: {v}")

# 예시 실행: 삼성전자(005930)
if __name__ == "__main__":
    print_analysis("005930")  # 원하는 종목 코드 입력

df a,b,c컬럼을 groupby 하고 df의 d컬럼의 최빈값을 취하고 동률일경우 min값을 가져옴

유키공 — Fri, 1 Aug 2025 14:13:54 +0900

import pandas as pd

# 예시 데이터프레임 생성 (실제 사용시에는 주석 처리)
data = {
    'a': [1, 1, 1, 2, 2, 2],
    'b': ['x', 'x', 'y', 'y', 'y', 'y'],
    'c': [10, 10, 20, 20, 20, 30],
    'd': [100, 100, 200, 300, 300, 400]
}
df = pd.DataFrame(data)

# 그룹별로 d 컬럼의 최빈값 계산 (동률일 경우 최소값 선택)
result = df.groupby(['a', 'b', 'c'])['d'].agg(
    lambda x: x.mode().min() if not x.mode().empty else None
).reset_index()

print(result)

pandas cross join

유키공 — Fri, 1 Aug 2025 14:05:55 +0900

import pandas as pd

# 예제 데이터 생성
df1 = pd.DataFrame({
    'a': [1, 2, 3],
    'b': ['x', 'y', 'z'],
    'c': [0.1, 0.2, 0.3]
})

df2 = pd.DataFrame({
    'd': [10, 20]
})

# 크로스 조인 수행 (방법 1)
cross_join = df1.assign(key=1).merge(df2.assign(key=1), on='key').drop('key', axis=1)

# 크로스 조인 수행 (방법 2 - pandas 1.2.0+)
cross_join = df1.merge(df2, how='cross')

print(cross_join)

그룹별 최신 유효값으로 결측값 채우기

유키공 — Thu, 31 Jul 2025 22:12:23 +0900

**✅ 기능**: 주어진 데이터프레임에서 지정한 그룹 컬럼 기준으로, **유효한 값이 있는 가장 최신 주차의 데이터**로 결측값을 채웁니다.

** ️ 안전성**: 최신 주차 값이 `NaN`이어도 그 다음 최신 유효값을 자동으로 찾아 처리합니다.

import pandas as pd
import numpy as np

# ------------------------------
# 1. 예제 데이터 생성
# ------------------------------
df = pd.DataFrame({
    'a': ['x', 'x', 'x', 'x', 'y', 'y'],  # 그룹 컬럼 1
    'b': ['u', 'u', 'u', 'u', 'v', 'v'],  # 그룹 컬럼 2
    'c': ['p', 'p', 'p', 'p', 'q', 'q'],  # 그룹 컬럼 3
    '주차': [202410, 202411, 202412, 202413, 202411, 202412],  # 시간 순서 컬럼
    'd': [10, np.nan, 30, np.nan, np.nan, 60]  # 결측값이 있는 타겟 컬럼
})

print("▶ 원본 데이터:")
print(df)

# ------------------------------
# 2. 결측값 채우기 함수 정의 (안전한 버전)
# ------------------------------
def fillna_with_latest_valid(df, group_cols, week_col, value_col):
    """
      기능: 각 그룹별로 유효한 값이 있는 가장 최신 주차의 데이터로 결측값을 채움
    
    Parameters:
        df (pd.DataFrame): 입력 데이터프레임
        group_cols (list): 그룹화할 컬럼 리스트 (예: ['a','b','c'])
        week_col (str): 시간 순서 컬럼 (예: '주차')
        value_col (str): 결측값을 채울 타겟 컬럼 (예: 'd')
    
    Returns:
        pd.DataFrame: 결측값이 채워진 데이터프레임
    """
    # STEP 1. 유효한 값만 필터링 → 주차 순 정렬 → 그룹별 최신 값 추출
    latest_values = (
        df.dropna(subset=[value_col])  # 결측값 행 제외
          .sort_values(week_col)       # 주차 오름차순 정렬
          .groupby(group_cols, as_index=False)
          .last()                     # 각 그룹의 마지막 행(최신 주차) 선택
          [group_cols + [value_col]]   # 필요한 컬럼만 추출
          .rename(columns={value_col: 'latest_val'})  # 컬럼명 변경
    )
    
    # STEP 2. 원본 데이터와 병합 후 결측값 채우기
    df_filled = (
        df.merge(latest_values, on=group_cols, how='left')  # 그룹 키로 병합
          .assign(**{value_col: lambda x: x[value_col].fillna(x['latest_val'])})  # 결측값 채우기
          .drop(columns='latest_val')  # 임시 컬럼 제거
    )
    
    return df_filled

# ------------------------------
# 3. 함수 실행 및 결과 비교
# ------------------------------
# ✅ 안전한 버전 실행
df_result_safe = fillna_with_latest_valid(
    df, 
    group_cols=['a', 'b', 'c'], 
    week_col='주차', 
    value_col='d'
)

print("\n▶ 안전한 버전 적용 결과:")
print(df_result_safe)

# ------------------------------
# 4. 기존 코드 vs 개선 코드 비교
# ------------------------------
#   주목할 점: (x,u,p) 그룹의 202411주차 결측값 처리 차이
print("\n  비교 테이블 (기존 코드 vs 개선 코드):")
comparison = pd.DataFrame({
    '원본_d': df['d'],
    '기존코드결과': [10, np.nan, 30, 30, 60, 60],  # 202411주차 NaN 유지
    '개선코드결과': df_result_safe['d']           # 202411주차 10.0으로 채워짐
}, index=df['주차'])
print(comparison)

max

유키공 — Thu, 31 Jul 2025 10:50:31 +0900

.max(['d'])

max()는 aggregation 함수인데, 여기에 리스트 ['d']를 넘기면 의미가 없음
→ 이는 Pandas 내부적으로 무시되며, numeric_only=True인 기본 동작으로 numeric 컬럼만 집계합니다.
따라서 d가 숫자형이 아닌 경우나 f만 숫자형이라면 d는 사라집니다.

max

유키공 — Thu, 31 Jul 2025 10:50:30 +0900

.max(['d'])

max()는 aggregation 함수인데, 여기에 리스트 ['d']를 넘기면 의미가 없음
→ 이는 Pandas 내부적으로 무시되며, numeric_only=True인 기본 동작으로 numeric 컬럼만 집계합니다.
따라서 d가 숫자형이 아닌 경우나 f만 숫자형이라면 d는 사라집니다.

dataframe 타입지정

유키공 — Wed, 30 Jul 2025 17:50:50 +0900

def process_dataframe_optimized(dict_df_types, df):
    type_handlers = {
        'int': lambda s: pd.to_numeric(s, errors='coerce').fillna(0).astype('int32'),
        'float': lambda s: pd.to_numeric(s, errors='coerce').fillna(0).astype('float32'),
        'bool': lambda s: s.astype(str).str.lower().isin(['true', 't', '1']),
        'datetime': lambda s: pd.to_datetime(s, errors='coerce'),
        'string': lambda s: s.astype('string').fillna(''),  # 빈 문자열로 채우기
        'category': lambda s: s.fillna('').astype('category')  # 빈 문자열로 채우기
    }
    
    # 교집합으로 존재하는 컬럼만 선택 (속도 향상)
    valid_cols = set(df.columns) & set(dict_df_types.keys())
    
    # 한 번에 모든 컬럼 처리 (assign 사용)
    return df.assign(**{
        col: type_handlers[dtype](df[col]) 
        for col, dtype in dict_df_types.items() 
        if col in valid_cols and dtype in type_handlers
    })

flutter clean

유키공 — Sat, 26 Jul 2025 16:36:16 +0900

cd ~/StudioProjects/stock-flutter
flutter clean
flutter pub get
cd android
./gradlew --stop
./gradlew clean
cd ..
flutter run -d emulator-5554

컬럼 유무 및 NaN 검사를 고려한 안전한 최대값 추출

유키공 — Sat, 26 Jul 2025 06:36:15 +0900

df1 = df.groupby(by=['a'], as_index=False).max()  # 또는 다른 집계 함수
df['a'] = np.nan if df1['a'].isnull().any() else df1['a'].max()

# 컬럼 'a'가 있고, NaN이 아닌 값이 하나라도 있으면 최대값, 아니면 np.nan
df['a'] = df1['a'].max() if 'a' in df1.columns and df1['a'].notna().any() else np.nan

EX)

import pandas as pd
import numpy as np

# 예제 1: 'a' 컬럼이 있고, NaN이 아닌 값이 존재하는 경우
df1 = pd.DataFrame({'a': [3, 7, np.nan]})
df = pd.DataFrame(index=range(3))  # 결과를 저장할 df (빈 3행짜리)

# 적용
df['a'] = df1['a'].max() if 'a' in df1.columns and df1['a'].notna().any() else np.nan
print("예제 1 결과:\n", df)

# 예제 2: 'a' 컬럼이 있지만 모든 값이 NaN인 경우
df1 = pd.DataFrame({'a': [np.nan, np.nan]})
df = pd.DataFrame(index=range(3))

df['a'] = df1['a'].max() if 'a' in df1.columns and df1['a'].notna().any() else np.nan
print("\n예제 2 결과:\n", df)

# 예제 3: 'a' 컬럼이 존재하지 않는 경우
df1 = pd.DataFrame({'b': [1, 2, 3]})
df = pd.DataFrame(index=range(3))

df['a'] = df1['a'].max() if 'a' in df1.columns and df1['a'].notna().any() else np.nan
print("\n예제 3 결과:\n", df)

전체에 null이 하나라도 있으면 np.nan, 아니면 최대값

유키공 — Fri, 25 Jul 2025 22:54:24 +0900

시작

# ❌ 잘못된 원래 코드 (에러 발생)
df['a'] = df1['a'].agg(lambda x: np.nan if x.isnull.any() else x.max()).reset_index(drop=True)

# "전체에 null이 하나라도 있으면 np.nan, 아니면 최대값"
# ✅ 올바르고 간결한 코드
df['a'] = np.nan if df1['a'].isnull().any() else df1['a'].max()
# 결론: 맞습니다! 이게 가장 pandas다운 깔끔한 코드입니다.
# 복잡한 agg() 체이닝 대신 조건부 표현식 + 자동 브로드캐스팅을 활용한 완벽한 해결책

결론 : 최종선택

if 'a' in df1.columns:
    # NaN이 아닌 값이 하나라도 있으면 최대값, 아니면 np.nan
    df['a'] = df1['a'].max() if df1['a'].notna().any() else np.nan
else:
    df['a'] = np.nan

=========결론 최종선택까지 풀이과정==================================================

✅ "지금 구조가 실무적으로 합리적입니다."

너무 풀어서 복잡하게 쓸 필요도 없고,

너무 압축해서 읽기 어렵게 만들 필요도 없습니다.

✅ 장점

간결하고 한눈에 로직이 보임

컬럼 존재 여부 + NaN 체크까지 한 줄에 처리

⚠️ 단점

df1['a'].isnull().any()는 "하나라도 NaN이면 무조건 NaN 반환"

→ 즉, 값이 섞여 있어도 무시함

예: [1, 2, NaN]이면 결과는 np.nan (❌ 실무에서 손해날 수 있음)

# "전체에 null이 하나라도 있으면 np.nan, 아니면 최대값"
if 'a' in df1.columns:
    df['a'] = np.nan if df1['a'].isnull().any() else df1['a'].max()
else:
    df['a'] = np.nan

✅ 버전 1 — 간결한 3항 연산자 사용

✅ 장점

훨씬 정확하고 실무 친화적

df1['a'] = [NaN, NaN, 5] 같은 경우 → 5를 반환 (정상)

df1['a'] = [NaN, NaN, NaN] → np.nan (정상)

⚠️ 단점

코드가 약간 더 길어짐 (하지만 논리 분기상 더 명확)

if 'a' in df1.columns:
    if df1['a'].notna().any():  # NaN이 아닌 값이 하나라도 있을 때
        df['a'] = df1['a'].max()
    else:  # 모든 값이 NaN
        df['a'] = np.nan
else:  # 컬럼 자체 없음
    df['a'] = np.nan

if 'a' in df1.columns:
    if df1['a'].isnull().any():
        df['a'] = np.nan
    else:
        df['a'] = df1['a'].max()
else:
    df['a'] = np.nan

df['a'] = df1['a'].max() if 'a' in df1.columns and not df1['a'].isnull().any() else np.nan

df['a'] = df1['a'].max() if 'a' in df1 and not df1['a'].isnull().any() else np.nan

# 개발자가 헷갈린 부분들:1. agg() 사용법 오해
# 잘못된 이해: agg()가 각 행에 함수를 적용한다고 생각
df1['a'].agg(lambda x: ...)  # 실제로는 전체 시리즈에 한 번만 적용

# 실제 용도: 그룹별이나 다중 집계용
df.groupby('group')['a'].agg(lambda x: ...)  # 이게 맞는 용법2. reset_index(drop=True) 오남용# 스칼라 결과에 reset_index 적용 (의미없음)
single_value.reset_index(drop=True)  # 스칼라엔 인덱스가 없음

# 실제 용도: DataFrame/Series 인덱스 재설정용
df.reset_index(drop=True)  # 이게 맞는 용법3. 브로드캐스팅 개념 부족# 스칼라를 DataFrame 컬럼에 할당하는 올바른 방법을 몰랐음
df['a'] = single_value  # pandas가 자동으로 브로드캐스팅해줌
# 또는
df['a'] = np.full(len(df), single_value) 
# 명시적 방법아마 다른 언어나 라이브러리 경험에서 오는 혼동이거나, 
# pandas 메서드들의 정확한 동작 방식을 제대로 학습하지 않고 "되겠지" 하는 마음으로 조합한 것 같네요.
# 이런 실수는 pandas 초보자들에게 꽤 흔한 패턴입니다!

길이 불일치

유키공 — Fri, 25 Jul 2025 16:33:03 +0900

df['a'] = df1['a'].agg(lambda x: np.nan if x.isnull.any() else x.max()).reset_index(drop=True)

Colums must be same length as key 에러

원래 코드 `df['a'] = df1['a'].agg(...)`에서 발생한 에러는 `agg()`가 단일 값을 반환하기 때문에 `df['a']`의 길이와 맞지 않아서 발생한 문제

수정

import pandas as pd
import numpy as np

# 1. 예제 데이터 생성
df = pd.DataFrame({'other_col': [10, 20, 30]})  # 타겟 DataFrame (3행)
df1 = pd.DataFrame({'a': [1, np.nan, 3]})      # 소스 DataFrame (NaN 포함, 3행)

# 2. 조건에 따라 채울 값 계산
fill_value = np.nan if df1['a'].isnull().any() else df1['a'].max()

# 3. 전체 행에 동일 값 할당
df['a'] = np.full(len(df), fill_value)

# 4. 결과 출력
print("=== 최종 결과 ===")
print(df)
print("\n=== 검증 ===")
print(f"df['a'] 길이: {len(df['a'])}, df 행 개수: {len(df)}")
print(f"할당된 값: {fill_value}")

이 코드의 핵심 원리는

원래 코드에서 발생한 "Columns must be same length as key" 에러의 원인은 다음과 같습니다:

```python
df['a'] = df1['a'].agg(lambda x: np.nan if x.isnull.any() else x.max()).reset_index(drop=True)
```

이 코드의 문제점들:

1. **`agg()` 결과가 스칼라**: `agg()`는 전체 시리즈에 대해 하나의 집계값(스칼라)을 반환합니다
2. **`reset_index(drop=True)`의 무의미함**: 스칼라 값에는 인덱스가 없어서 이 메서드가 의미없습니다
3. **길이 불일치**: 스칼라 값을 DataFrame의 컬럼(여러 행)에 할당하려고 해서 길이가 맞지 않습니다

**수정된 코드의 장점:**

```python
# 조건부 값 계산 (명확하고 읽기 쉬움)
fill_value = np.nan if df1['a'].isnull().any() else df1['a'].max()

# 전체 행에 동일한 값으로 채우기
df['a'] = np.full(len(df), fill_value)
```

1. **명확한 로직**: 조건 확인과 값 할당을 분리
2. **효율적**: `np.full()`로 한 번에 모든 행을 같은 값으로 채움
3. **에러 없음**: 길이가 정확히 맞춰짐

이런 식으로 집계 결과를 DataFrame 전체에 브로드캐스팅할 때는 스칼라 값을 명시적으로 처리하는 것이 가장 안전하고 명확한 방법입니다.

카테고리타입

유키공 — Fri, 25 Jul 2025 09:53:01 +0900

import pandas as pd

def add_blank_category_or_fillna(series):
    try:
        if pd.api.types.is_categorical_dtype(series):
            # ''가 이미 포함되어 있는지 확인
            if '' not in series.cat.categories:
                return series.cat.add_categories([''])
            else:
                return series
        else:
            return series.fillna('')
    except Exception as e:
        print(f"예외 발생: {e}")
        return series

# category 타입
s1 = pd.Series(['a', 'b', None], dtype='category')
s1 = add_blank_category_or_fillna(s1)
print(s1.cat.categories)  # ['a', 'b', '']

# object 타입
s2 = pd.Series(['x', None, 'y'])  # dtype = object
s2 = add_blank_category_or_fillna(s2)
print(s2)  # NaN → ''로 대체됨

Python으로 CSV → DuckDB 저장

유키공 — Wed, 16 Jul 2025 10:46:14 +0900

import duckdb

# DuckDB DB 파일에 연결 (없으면 생성됨)
con = duckdb.connect("mydata.duckdb")

# CSV 파일 읽어서 테이블로 저장
con.execute("""
CREATE TABLE my_table AS
SELECT * FROM read_csv_auto('sample.csv')
""")

import pandas as pd
import duckdb

df = pd.read_csv("sample.csv")

# DuckDB에 저장
con = duckdb.connect("mydata.duckdb")
con.register("df_view", df)

# DataFrame을 테이블로 저장
con.execute("CREATE TABLE my_table AS SELECT * FROM df_view")

error

유키공 — Tue, 15 Jul 2025 13:22:55 +0900

import traceback

try:
    # 에러 유발 코드 (예시: 0으로 나누기)
    x = 1 / 0
except Exception as e:
    print("❗ 에러 발생:", str(e))
    traceback.print_exc()  # 전체 에러 트레이스 출력

import numpy as np

# 인덱스가 다른 위치 찾기
diff_indices = np.where(df.index != df1.index)[0]

# 결과 출력
print("인덱스가 다른 위치:", diff_indices)
print("\n--- df의 인덱스 ---")
print(df.index[diff_indices])
print("\n--- df1의 인덱스 ---")
print(df1.index[diff_indices])

날짜결측치처리

유키공 — Thu, 10 Jul 2025 08:48:02 +0900

first_date = df.get('날짜열', pd.Series([pd.NaT])).iloc[0]

pandas concat

유키공 — Wed, 9 Jul 2025 10:37:39 +0900

import pandas as pd

# 예시: 둘 다 빈 DataFrame이지만 컬럼이 다름
df1 = pd.DataFrame(columns=['a', 'b'])
df2 = pd.DataFrame(columns=['b', 'c'])

#   모든 컬럼의 합집합 구하기
all_columns = sorted(set(df1.columns).union(set(df2.columns)))

#   컬럼 맞춰주기 (reindex로 없으면 NaN 채움)
df1 = df1.reindex(columns=all_columns)
df2 = df2.reindex(columns=all_columns)

#   concat
result = pd.concat([df1, df2], ignore_index=True)

print(result)

문자열 datetime

유키공 — Tue, 8 Jul 2025 14:07:42 +0900

import pandas as pd

df = pd.DataFrame({
    'a': ['2024-01-01', '2024-01-02', None]
})

# 문자열 → datetime으로 변환
df['a'] = pd.to_datetime(df['a'], errors='coerce')

# 첫 번째 값 가져오기
first_val = df['a'].iloc[0]  # 또는 .iat[0]

print(first_val)

API 요청을 통해 res를 받아오고, 그 안에 "signal" 값을 추출

유키공 — Wed, 2 Jul 2025 21:54:43 +0900

import requests

url = "https://your-api-url.com/stock/signal"
params = {
    "stock_name": "극동유화",
    "max_news": 50
}
headers = {
    "Authorization": "Bearer YOUR_API_KEY"  # 필요 시
}

response = requests.post(url, json=params, headers=headers)

# 응답값이 JSON이면
if response.status_code == 200:
    res = response.json()
    signal = res.get("signal")  # 또는 res["signal"]
    print("  Signal:", signal)
else:
    print("❌ API 호출 실패:", response.status_code, response.text)

출력 예시:

Signal: buy

추가 팁:

res.get("signal")을 사용하면 "signal" 키가 없을 때도 에러가 나지 않고 None을 반환합니다.

컬럼 값 비교

유키공 — Fri, 27 Jun 2025 12:12:02 +0900

import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import PatternFill

#   파일 경로
file_path = '원본.xlsx'

# 1. pandas로 데이터 읽기
df = pd.read_excel(file_path)

# 기준 데이터 (a, b)
left = df[['a', 'b']].dropna(subset=['a']).copy()
left['a_clean'] = left['a'].astype(str).str.strip().str.lower()
left['b_clean'] = left['b'].astype(str).str.strip().str.lower()

# 비교 대상 데이터 (c, d)
right = df[['c', 'd']].dropna(subset=['c']).copy()
right['c_clean'] = right['c'].astype(str).str.strip().str.lower()
right['d_clean'] = right['d'].astype(str).str.strip().str.lower()

# 2. 키 기준으로 내부 조인 (모든 조합 비교)
merged = pd.merge(left, right, left_on='a_clean', right_on='c_clean', how='inner')

# 3. 비교 결과
# key: c_clean
# 값이 일치하는 c_clean은 흰색, 불일치하는 c_clean은 노란색 대상
color_map = {}  # key = (c값, d값), value = 'white' or 'yellow'

for _, row in merged.iterrows():
    key = (row['c'], row['d'])  # 실제 표시용 키
    if row['b_clean'] == row['d_clean']:
        color_map[key] = 'white'
    else:
        color_map[key] = 'yellow'

# 4. openpyxl 로드
wb = load_workbook(file_path)
ws = wb.active

# 5. 색상 정의
fill_yellow = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
fill_none = PatternFill(fill_type=None)  # 색 제거용

# 6. C/D 열에서 색칠
for row in range(2, ws.max_row + 1):
    c_val = ws[f"C{row}"].value
    d_val = ws[f"D{row}"].value
    key = (c_val, d_val)

    if key in color_map:
        if color_map[key] == 'yellow':
            ws[f"D{row}"].fill = fill_yellow
        else:
            ws[f"D{row}"].fill = fill_none  # 흰색 처리

# 7. 저장
wb.save(file_path)
print("✅ 완료: 중복 키 처리 포함해 D 셀 색칠/색 제거 완료")

안전하게 삭제하는 코드 예시

유키공 — Thu, 26 Jun 2025 18:38:24 +0900

import os
import time
from openpyxl import load_workbook

temp_path = "temp.xlsx"

try:
    wb = load_workbook(temp_path)
    ws = wb.active
    # ... 작업 ...

finally:
    # 임시 파일 닫고 삭제 (삭제 실패 시 재시도)
    try:
        wb.close()
    except:
        pass

    max_retries = 5
    retry_delay = 0.5  # 초
    for attempt in range(1, max_retries + 1):
        try:
            os.remove(temp_path)
            print(f"✅ 임시 파일 삭제 완료: {temp_path}")
            break
        except PermissionError as e:
            print(f"⚠️ 삭제 실패 (시도 {attempt}/{max_retries}) - 파일이 열려 있거나 사용 중")
            time.sleep(retry_delay)
        except Exception as e:
            print(f"❗ 알 수 없는 오류: {e}")
            break
    else:
        print(f"❌ 최종 실패: 파일이 계속 사용 중입니다. 직접 닫은 후 수동 삭제하세요 → {temp_path}")

방법 1: wb.close() 후 삭제 (가장 확실함)

from openpyxl import load_workbook
import os
import time
import gc

temp_path = "temp.xlsx"

# temp.xlsx를 열어 처리
wb = load_workbook(temp_path)
ws = wb.active

# ... 원하는 작업 수행 ...

# 반드시 닫기
wb.close()
del wb
gc.collect()  # 가비지 컬렉션으로 완전 해제

# 잠시 대기 후 삭제 시도 (잠금 해제 지연 방지)
time.sleep(0.5)

# 삭제 시도
try:
    os.remove(temp_path)
    print("임시 파일 삭제 완료")
except PermissionError:
    print("❗ 파일이 열려 있거나 사용 중입니다. 수동으로 닫은 후 다시 시도하세요.")

import os
import time
import gc

file_path = "경로/파일.xlsx"

# 파일 작업 이후 객체 해제
try:
    wb.save(file_path)
    wb.close()  # 명시적으로 닫기
    del wb
    gc.collect()  # 가비지 컬렉션으로 파일 잠금 해제 유도
except Exception as e:
    print("파일 저장 오류:", e)

# 삭제 시도 (엑셀이 열려 있으면 실패)
try:
    os.remove(file_path)
    print("삭제 성공")
except PermissionError:
    print("⚠️ 파일이 열려 있어서 삭제할 수 없습니다. 엑셀 창을 닫고 다시 시도하세요.")

git merge

유키공 — Thu, 26 Jun 2025 13:56:50 +0900

# 1. 현재 어떤 브랜치에 있는지 확인
git branch

# 2. main 브랜치로 이동
git checkout main

# 3. 최신 상태로 업데이트 (필요 시)
git pull origin main

# 4. a 브랜치를 main에 머지
git merge a

# 5. 원격 저장소에 반영
git push origin main

None type replace

유키공 — Mon, 23 Jun 2025 13:49:17 +0900

if text is not None:
    text = text.replace("a", "b")
else:
    text = ""  # 또는 기본값 설정

화면보호기

유키공 — Fri, 13 Jun 2025 08:12:56 +0900

from pynput import keyboard, mouse
from pynput.keyboard import Controller, Key
import threading
import time
from datetime import datetime
import ctypes
import os

# ✅ 로그 저장 기본 디렉토리 (원하는 경로로 변경하세요)
LOG_BASE_DIR = r"D:/logs"

# 설정
IDLE_TIME_LIMIT = 300  # 5분 이상 비활동 시
CHECK_INTERVAL = 10    # 활동 체크 주기
keyboard_controller = Controller()
last_activity_time = time.time()

# 로그 기록 함수
def write_log(message):
    now = datetime.now()
    timestamp = now.strftime('%Y-%m-%d %H:%M:%S')
    year = now.strftime('%Y')
    month = now.strftime('%m')
    day = now.strftime('%d')

    log_dir = os.path.join(LOG_BASE_DIR, year, month)
    os.makedirs(log_dir, exist_ok=True)  # 폴더 없으면 생성

    log_path = os.path.join(log_dir, f"{day}.txt")
    log_line = f"[{timestamp}] {message}"

    print(log_line)
    with open(log_path, "a", encoding="utf-8") as f:
        f.write(log_line + "\n")

# 사용자 입력 감지 핸들러
def on_input_activity(event):
    global last_activity_time
    last_activity_time = time.time()

# 화면보호기 방지를 위한 Shift 입력
def prevent_sleep_with_key():
    write_log("  활동 없음: Shift 입력 시뮬레이션")
    keyboard_controller.press(Key.shift)
    time.sleep(0.1)
    keyboard_controller.release(Key.shift)

# 화면 잠금
def lock_workstation():
    write_log("  화면 잠금 실행")
    ctypes.windll.user32.LockWorkStation()

# 시스템 종료
def shutdown_system():
    write_log("⏹️ 시스템 종료 명령 실행")
    os.system("shutdown /s /t 0")

# 시스템 재부팅
def reboot_system():
    write_log("  시스템 재부팅 명령 실행")
    os.system("shutdown /r /t 0")

# 활동 감지 쓰레드
def monitor_idle():
    while True:
        idle_time = time.time() - last_activity_time
        if idle_time >= IDLE_TIME_LIMIT:
            prevent_sleep_with_key()
        time.sleep(CHECK_INTERVAL)

# 시간 기반 이벤트 실행
def scheduled_actions():
    triggered = set()
    while True:
        now = datetime.now()
        current_time = now.strftime('%H:%M')
        weekday = now.weekday()  # 0:월 ~ 4:금

        # 오전 11시 화면 잠금
        if current_time == "11:00" and "lock" not in triggered:
            lock_workstation()
            triggered.add("lock")

        # 오후 5시 종료/재부팅
        if current_time == "17:00" and "shutdown" not in triggered:
            if weekday in [0, 1, 2, 3]:  # 월~목
                write_log("  월~목: 시스템 재부팅 예정")
                reboot_system()
            elif weekday == 4:  # 금
                write_log("⏹️ 금요일: 시스템 종료 예정")
                shutdown_system()
            triggered.add("shutdown")

        if current_time == "00:00":
            triggered.clear()

        time.sleep(5)

# 입력 리스너 시작
keyboard.Listener(on_press=on_input_activity).start()
mouse.Listener(
    on_move=on_input_activity,
    on_click=on_input_activity,
    on_scroll=on_input_activity
).start()

# 쓰레드 실행
threading.Thread(target=monitor_idle, daemon=True).start()
threading.Thread(target=scheduled_actions, daemon=True).start()

write_log("✅ 활동 감지 + 자동 잠금/재부팅/종료 프로그램 시작됨 (Ctrl+C로 종료 가능)")

try:
    while True:
        time.sleep(1)
except KeyboardInterrupt:
    write_log("  프로그램 수동 종료됨")

pyinstaller --noconsole --onefile idle_guard.py

cProfile

유키공 — Thu, 12 Jun 2025 14:38:02 +0900

python -m cProfile -o profile_results.prof your_script.py

python -m pstats profile_results.prof

numba

유키공 — Thu, 12 Jun 2025 10:47:18 +0900

import numpy as np
import pandas as pd
from numba import njit

@njit
def numba_select(condlist, choicelist, default=0):
    output = np.full(condlist[0].shape, default)
    for cond, choice in zip(reversed(condlist), reversed(choicelist)):
        output = np.where(cond, choice, output)
    return output  # NumPy 배열 반환

# 예시 데이터
condlist = [np.array([True, False, False]), np.array([False, True, False])]
choicelist = [np.array([1, 1, 1]), np.array([2, 2, 2])]

# Numba 함수 실행
result_array = numba_select(condlist, choicelist, default=0)

# NumPy 배열 → Pandas DataFrame 변환
df = pd.DataFrame({"result": result_array})
print(df)

np.select 멀티프로세싱 적용

유키공 — Thu, 12 Jun 2025 08:30:19 +0900

import numpy as np
import pandas as pd
import multiprocessing as mp
from functools import partial

def process_chunk(df_chunk, conditions, choices, default):
    # 각 청크에 np.select 적용
    result = np.select(conditions, choices, default=default)
    return pd.Series(result, index=df_chunk.index)

def parallel_select(df, conditions, choices, default='default', num_processes=None):
    if num_processes is None:
        num_processes = mp.cpu_count()
    
    # 데이터 분할
    chunks = np.array_split(df, num_processes)
    
    # 부분 함수 생성 (conditions, choices, default 고정)
    worker = partial(process_chunk, conditions=conditions, choices=choices, default=default)
    
    with mp.Pool(num_processes) as pool:
        results = pool.map(worker, chunks)
    
    # 결과 병합
    return pd.concat(results)

# 대용량 데이터 생성 (1000만 행)
df = pd.DataFrame({
    'col1': np.random.randint(0, 100, 10_000_000),
    'col2': np.random.choice(['A','B','C'], 10_000_000),
    'col3': np.random.randn(10_000_000)
})

# 복잡한 조건 정의
conditions = [
    (df['col1'] > 50) & (df['col2'] == 'A'),
    (df['col1'] < 20) | (df['col3'].abs() > 2),
    df['col2'].isin(['B','C'])
]

choices = ['High A', 'Low or Outlier', 'B or C']

# 멀티프로세싱 적용
df['category'] = parallel_select(df, conditions, choices, default='Other')

메모리측정

유키공 — Wed, 11 Jun 2025 12:24:57 +0900

import pandas as pd

def report_memory_usage(df: pd.DataFrame, sort: bool = True, top: int = None) -> pd.DataFrame:
    """
    DataFrame의 열별 실제 메모리 사용량을 GB 단위로 리포팅합니다.
    'Index'는 제외됩니다.

    Args:
        df (pd.DataFrame): 측정할 DataFrame
        sort (bool): 메모리 사용량 기준 정렬 여부 (default: True)
        top (int): 상위 N개 열만 출력 (default: None: 전체)

    Returns:
        pd.DataFrame: 열별 메모리 사용량(GByte), dtype 포함
    """
    usage = df.memory_usage(deep=True) / 1024**3  # GB 단위
    usage = usage.drop('Index')  #   'Index' 항목 제거

    usage_df = pd.DataFrame({
        'column': usage.index,
        'memory_gb': usage.values,
        'dtype': [df[col].dtype for col in usage.index]
    })

    if sort:
        usage_df = usage_df.sort_values(by='memory_gb', ascending=False)

    if top:
        usage_df = usage_df.head(top)

    usage_df.reset_index(drop=True, inplace=True)
    total = usage_df['memory_gb'].sum()
    print(f"  Total memory usage (columns only): {total:.4f} GB")

    return usage_df

df = pd.DataFrame({
    'id': range(10_000_000),
    'name': ['apple'] * 10_000_000,
    'value': [3.14] * 10_000_000
})

mem_report = report_memory_usage(df)
print(mem_report)

  Total memory usage: 0.5584 GB

     column  memory_gb     dtype
0      name     0.3810    object
1        id     0.0763      int64
2     value     0.0763    float64

def report_memory_usage(df: pd.DataFrame, sort: bool = True, top: int = None, df_name: str = None) -> pd.DataFrame:
    """
    DataFrame의 열별 실제 메모리 사용량을 GB 단위로 리포팅합니다.
    'Index'는 제외됩니다.

    Args:
        df (pd.DataFrame): 측정할 DataFrame
        sort (bool): 메모리 사용량 기준 정렬 여부 (default: True)
        top (int): 상위 N개 열만 출력 (default: None: 전체)
        df_name (str): DataFrame 이름 (default: None)

    Returns:
        pd.DataFrame: 열별 메모리 사용량(GByte), dtype 포함
    """
    usage = df.memory_usage(deep=True) / 1024**3  # GB 단위
    usage = usage.drop('Index')  # 'Index' 항목 제거

    usage_df = pd.DataFrame({
        'column': usage.index,
        'memory_gb': usage.values,
        'dtype': [df[col].dtype for col in usage.index]
    })

    if sort:
        usage_df = usage_df.sort_values(by='memory_gb', ascending=False)

    if top:
        usage_df = usage_df.head(top)

    usage_df.reset_index(drop=True, inplace=True)
    total = usage_df['memory_gb'].sum()

    if df_name:
        print(f"  Total memory usage of '{df_name}' (columns only): {total:.4f} GB")
    else:
        print(f"  Total memory usage (columns only): {total:.4f} GB")

    return usage_df

Type 변경

유키공 — Mon, 9 Jun 2025 14:52:24 +0900

cols = ['col1', 'col2', 'col3']  # category로 바꿀 컬럼 리스트
df[cols] = df[cols].apply(lambda x: x.astype('category'))

import pandas as pd
import numpy as np

# 예시 데이터 생성
df = pd.DataFrame({
    'col1': np.random.choice(['apple', 'banana', 'cherry'], 1_000_000),
    'col2': np.random.choice(['red', 'green', 'blue'], 1_000_000),
    'col3': np.random.choice(['small', 'medium', 'large'], 1_000_000)
})

#   메모리 사용량 (변환 전)
print("변환 전 메모리 사용량:")
print(df.memory_usage(deep=True))
print("총합:", df.memory_usage(deep=True).sum() / 1024**2, "MB")

#   category 변환
df = df.astype({col: 'category' for col in ['col1', 'col2', 'col3']})

#   메모리 사용량 (변환 후)
print("\n변환 후 메모리 사용량:")
print(df.memory_usage(deep=True))
print("총합:", df.memory_usage(deep=True).sum() / 1024**2, "MB")

join

유키공 — Mon, 2 Jun 2025 13:40:03 +0900

import pandas as pd

df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}, index=['x', 'y'])
df2 = pd.DataFrame({'A': [5, 6], 'C': [7, 8]}, index=['x', 'y'])

# Left join with drop=False
result = df1.join(df2.set_index('A', drop=False), 
                 how='left',  # 명시적으로 left join 지정
                 lsuffix='_left', 
                 rsuffix='_right')

print(result)

merge

유키공 — Wed, 28 May 2025 16:04:23 +0900

from concurrent.futures import ThreadPoolExecutor
import numpy as np
import pandas as pd

def safe_parallel_merge(df1, df2, left_key, right_key=None, n_partitions=4, how='left'):
    """
    개선된 병렬 merge 함수 - 안정성 강화 버전
    
    Parameters:
    - df1: 왼쪽 DataFrame
    - df2: 오른쪽 DataFrame
    - left_key: df1의 조인 키 (컬럼명 또는 컬럼 리스트)
    - right_key: df2의 조인 키 (None이면 left_key와 동일)
    - n_partitions: 분할 개수
    - how: merge 방식 ('left', 'right', 'inner', 'outer')
    """
    # 1. 키 컬럼 검증 및 표준화
    right_key = right_key if right_key is not None else left_key
    
    left_keys = [left_key] if isinstance(left_key, str) else list(left_key)
    right_keys = [right_key] if isinstance(right_key, str) else list(right_key)
    
    # 2. 키 컬럼 존재 여부 확인
    missing_in_left = set(left_keys) - set(df1.columns)
    missing_in_right = set(right_keys) - set(df2.columns)
    
    if missing_in_left:
        raise ValueError(f"df1에 다음 키 컬럼이 없습니다: {missing_in_left}")
    if missing_in_right:
        raise ValueError(f"df2에 다음 키 컬럼이 없습니다: {missing_in_right}")
    
    # 3. 키 컬럼 타입 통일 (중요!)
    for lk, rk in zip(left_keys, right_keys):
        df1[lk] = df1[lk].astype(df2[rk].dtype)
    
    # 4. 안정적인 데이터 분할
    df1 = df1.reset_index(drop=True)
    split_indices = np.linspace(0, len(df1), n_partitions + 1, dtype=int)
    chunks = [df1.iloc[split_indices[i]:split_indices[i+1]] for i in range(n_partitions)]
    
    # 5. 병렬 처리
    results = []
    with ThreadPoolExecutor(max_workers=n_partitions) as executor:
        futures = []
        for chunk in chunks:
            futures.append(
                executor.submit(
                    pd.merge,
                    chunk.copy(),  # 안정성을 위해 복사본 사용
                    df2.copy(),
                    left_on=left_keys,
                    right_on=right_keys,
                    how=how
                )
            )
        
        for future in futures:
            try:
                result = future.result()
                results.append(result)
            except Exception as e:
                executor.shutdown(wait=False)
                raise RuntimeError(f"병렬 merge 실패: {str(e)}")
    
    # 6. 결과 병합 및 중복 처리
    final_df = pd.concat(results, ignore_index=True)
    
    if how in ['outer', 'right']:
        final_df = final_df.drop_duplicates(subset=left_keys if how == 'right' else right_keys)
    
    return final_df

# 같은 키 이름
result = safe_parallel_merge(df1, df2, left_key='id', n_partitions=4)

# 다른 키 이름
result = safe_parallel_merge(df1, df2, left_key='df1_id', right_key='df2_id')

# 여러 컬럼으로 조인
result = safe_parallel_merge(
    df1, df2, 
    left_key=['date', 'user_id'],
    right_key=['transaction_date', 'customer_id']
)

Polars type 변경

유키공 — Mon, 26 May 2025 13:49:16 +0900

import polars as pl

def process_dataframe_optimized_pl(dict_df_types: dict, df: pl.DataFrame) -> pl.DataFrame:
    def handle_column(col: str, dtype: str) -> pl.Expr:
        try:
            expr = pl.col(col)

            if dtype == 'int':
                return expr.cast(pl.Int32).fill_null(0).alias(col)
            elif dtype == 'float':
                return expr.cast(pl.Float32).fill_null(0).alias(col)
            elif dtype == 'bool':
                return expr.cast(pl.Utf8).str.to_lowercase().is_in(['true', 't', '1']).alias(col)
            elif dtype == 'datetime':
                return expr.cast(pl.Utf8).str.strptime(pl.Datetime, strict=False).alias(col)
            elif dtype == 'string':
                return expr.cast(pl.String).alias(col)
            elif dtype == 'category':
                return expr.cast(pl.Categorical).alias(col)
            else:
                return expr  # return original if dtype not recognized
        except Exception as e:
            print(f"컬럼 '{col}' 처리 중 오류 발생: {e}")
            return expr  # return original on error

    # Get intersection of DataFrame columns and dictionary keys
    valid_cols = set(df.columns) & set(dict_df_types.keys())
    
    # Filter for only valid types we want to process
    valid_types = {'int', 'float', 'bool', 'datetime', 'string', 'category'}
    
    # Create expressions for columns that need processing
    exprs = [
        handle_column(col, dict_df_types[col])
        for col in valid_cols
        if dict_df_types.get(col) in valid_types
    ]
    
    return df.with_columns(exprs)

object 타입 컬럼을 모두 문자열(str)로 변환

유키공 — Mon, 26 May 2025 12:33:00 +0900

import pandas as pd

def convert_object_columns_to_str(df: pd.DataFrame) -> pd.DataFrame:
    """
    Pandas DataFrame에서 object 타입 컬럼을 모두 문자열(str)로 변환합니다.
    
    Parameters:
        df (pd.DataFrame): 변환할 DataFrame
    
    Returns:
        pd.DataFrame: 문자열 컬럼이 str 타입으로 변환된 DataFrame
    """
    df = df.copy()
    for col in df.select_dtypes(include='object').columns:
        df[col] = df[col].astype(str)
    return df

import polars as pl

pandas_df = pd.read_csv("data.csv")
pandas_df = convert_object_columns_to_str(pandas_df)
lazy_df = pl.from_pandas(pandas_df).lazy()

df count

유키공 — Thu, 15 May 2025 15:38:54 +0900

import pandas as pd

# 샘플 DataFrame 생성
df = pd.DataFrame({'A': [1, 2, None, 4], 'B': ['x', None, 'z', 'w']})

# 1. len() 함수 사용 (가장 일반적)
row_count = len(df)
print(f"행 수 (len): {row_count}")

# 2. shape 속성 사용
row_count = df.shape[0]  # shape는 (행수, 열수) 튜플 반환
print(f"행 수 (shape): {row_count}")

# 3. index 길이 확인
row_count = df.index.size
print(f"행 수 (index): {row_count}")

# 4. 각 열별 결측값(None/NaN) 개수 확인
print("\n각 열별 결측값 개수:")
print(df.isnull().sum())  # 또는 df.isna().sum()

# 5. 전체 결측값 개수 확인
total_nulls = df.isnull().sum().sum()
print(f"\n전체 결측값 총 개수: {total_nulls}")

# 6. 결측값이 있는 행만 카운트
null_rows_count = df.isnull().any(axis=1).sum()
print(f"결측값이 하나라도 있는 행 수: {null_rows_count}")

oracle Procedure 내용조회

유키공 — Wed, 7 May 2025 16:35:29 +0900

-- 특정 프로시저 내용 확인
SELECT text 
FROM all_source 
WHERE name = '프로시저명' 
AND type = 'PROCEDURE' 
ORDER BY line;

-- 모든 프로시저 목록
SELECT object_name 
FROM all_objects 
WHERE object_type = 'PROCEDURE';

macOS 기준 Android Studio 완전 삭제 방법

유키공 — Sat, 3 May 2025 21:46:21 +0900

1. 앱 제거

sudo rm -rf /Applications/Android\ Studio.app

2. 설정 및 캐시 제거

rm -rf ~/Library/Application\ Support/Google/AndroidStudio*
rm -rf ~/Library/Preferences/AndroidStudio*
rm -rf ~/Library/Logs/AndroidStudio*
rm -rf ~/Library/Caches/AndroidStudio*
rm -rf ~/.android
rm -rf ~/.gradle
rm -rf ~/Library/Android

✅ 삭제 방법 (macOS) : (선택) SDK도 삭제할 경우

rm -rf ~/Library/Android/sdk

전체 삭제 명령어 (복사해서 터미널에 붙여 넣기)

# Android Studio 앱 삭제
sudo rm -rf /Applications/Android\ Studio.app

# 설정 및 캐시 제거
rm -rf ~/Library/Preferences/AndroidStudio*
rm -rf ~/Library/Application\ Support/Google/AndroidStudio*
rm -rf ~/Library/Caches/AndroidStudio*
rm -rf ~/Library/Logs/AndroidStudio*

# Android SDK, AVD, Gradle 등 관련 구성 제거
rm -rf ~/Library/Android
rm -rf ~/.android
rm -rf ~/.gradle

csv reader

유키공 — Wed, 30 Apr 2025 13:08:06 +0900

import sys
import os
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import json
from PyQt5.QtWidgets import (
    QApplication, QMainWindow, QTableView, QFileDialog,
    QVBoxLayout, QWidget, QPushButton, QLabel,
    QStatusBar, QMessageBox, QLineEdit, QHBoxLayout,
    QComboBox, QHeaderView, QProgressDialog
)
from PyQt5.QtCore import (
    Qt, QAbstractTableModel, QSortFilterProxyModel, 
    QThread, pyqtSignal, QObject
)

class LoadWorker(QThread):
    """데이터 로딩 작업 스레드 (Dict 오류 처리 추가)"""
    progress = pyqtSignal(int)
    finished = pyqtSignal(pd.DataFrame)
    error = pyqtSignal(str)
    
    def __init__(self, file_path):
        super().__init__()
        self.file_path = file_path
        
    def safe_json_dumps(self, obj):
        """Dict/List 타입을 안전하게 JSON 문자열로 변환"""
        try:
            if isinstance(obj, (dict, list)):
                return json.dumps(obj, ensure_ascii=False)
            return str(obj)
        except:
            return "[Conversion Error]"

    def convert_complex_types(self, df):
        """DataFrame 내의 복합 타입(dict, list)을 문자열로 변환"""
        for col in df.columns:
            try:
                # 첫 번째 행의 값으로 타입 체크
                sample = df[col].iloc[0] if len(df) > 0 else None
                
                if isinstance(sample, (dict, list)):
                    df[col] = df[col].apply(self.safe_json_dumps)
                elif not pd.api.types.is_string_dtype(df[col]):
                    df[col] = df[col].astype(str)
            except Exception as e:
                print(f"컬럼 {col} 처리 오류: {e}")
                df[col] = df[col].astype(str)
        return df

    def run(self):
        try:
            self.progress.emit(5)
            
            if self.file_path.endswith('.parquet'):
                # Parquet 파일 로드
                parquet_file = pq.ParquetFile(self.file_path)
                num_row_groups = parquet_file.num_row_groups
                chunks = []
                
                for i in range(num_row_groups):
                    self.progress.emit(10 + int((i+1)/num_row_groups*70))
                    table = parquet_file.read_row_group(i)
                    df = table.to_pandas()
                    df = self.convert_complex_types(df)  # Dict/List 처리
                    chunks.append(df)
                
                self.progress.emit(90)
                result_df = pd.concat(chunks, ignore_index=True)
                
            else:
                # CSV 파일 로드
                chunksize = 100000
                chunks = []
                total_rows = sum(1 for _ in open(self.file_path, 'r', encoding='utf-8')) - 1
                processed_rows = 0
                
                for chunk in pd.read_csv(self.file_path, chunksize=chunksize):
                    progress = 10 + int(processed_rows / total_rows * 70)
                    self.progress.emit(progress)
                    chunk = self.convert_complex_types(chunk)  # Dict/List 처리
                    chunks.append(chunk)
                    processed_rows += len(chunk)
                
                self.progress.emit(90)
                result_df = pd.concat(chunks, ignore_index=True)
            
            self.progress.emit(95)
            result_df = result_df.fillna("")  # NULL 값 처리
            self.progress.emit(100)
            self.finished.emit(result_df)
            
        except Exception as e:
            error_msg = f"로딩 실패: {str(e)}\n\n{traceback.format_exc()}"
            self.error.emit(error_msg)

class DataFrameModel(QAbstractTableModel):
    """Dict 타입을 안전하게 처리하는 데이터 모델"""
    def __init__(self, data):
        super().__init__()
        self._data = data

    def rowCount(self, parent=None):
        return len(self._data)

    def columnCount(self, parent=None):
        return len(self._data.columns)

    def data(self, index, role=Qt.DisplayRole):
        if not index.isValid():
            return None

        value = self._data.iloc[index.row(), index.column()]

        if role == Qt.DisplayRole:
            return str(value) if not pd.isna(value) else ""
        elif role == Qt.BackgroundRole:
            if isinstance(value, (dict, list)):
                return QColor(240, 248, 255)  # 복합 타입 배경색
            return QColor(255, 255, 255)
        return None

    def headerData(self, section, orientation, role):
        if role == Qt.DisplayRole:
            if orientation == Qt.Horizontal:
                return str(self._data.columns[section])
            return str(self._data.index[section])
        return None

class DataViewer(QMainWindow):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("Data Viewer with Dict Handling")
        self.setGeometry(100, 100, 1200, 800)
        self.setup_ui()
        
    def setup_ui(self):
        self.central_widget = QWidget()
        self.setCentralWidget(self.central_widget)
        layout = QVBoxLayout(self.central_widget)
        
        # 컨트롤 패널
        control_panel = QWidget()
        control_layout = QHBoxLayout(control_panel)
        
        self.btn_open = QPushButton("파일 열기")
        self.btn_open.clicked.connect(self.open_file)
        control_layout.addWidget(self.btn_open)
        
        self.search_input = QLineEdit()
        self.search_input.setPlaceholderText("검색어 입력")
        control_layout.addWidget(self.search_input)
        
        self.column_combo = QComboBox()
        self.column_combo.addItem("모든 컬럼")
        control_layout.addWidget(self.column_combo)
        
        layout.addWidget(control_panel)
        
        # 테이블 뷰
        self.table_view = QTableView()
        self.table_view.setSortingEnabled(True)
        self.proxy_model = QSortFilterProxyModel()
        self.proxy_model.setFilterCaseSensitivity(Qt.CaseInsensitive)
        self.table_view.setModel(self.proxy_model)
        layout.addWidget(self.table_view)
        
        # 상태바
        self.status_bar = QStatusBar()
        self.setStatusBar(self.status_bar)
        
        # 로딩 다이얼로그
        self.progress_dialog = QProgressDialog("파일을 로드 중입니다...", "취소", 0, 100, self)
        self.progress_dialog.setWindowModality(Qt.WindowModal)
        self.progress_dialog.canceled.connect(self.cancel_loading)
        
    def open_file(self):
        file_path, _ = QFileDialog.getOpenFileName(
            self, "파일 열기", "",
            "데이터 파일 (*.parquet *.csv);;모든 파일 (*)")
            
        if file_path:
            self.load_data(file_path)
    
    def load_data(self, file_path):
        """데이터 로드 및 진행률 표시"""
        self.progress_dialog.reset()
        self.progress_dialog.show()
        self.btn_open.setEnabled(False)
        
        self.load_worker = LoadWorker(file_path)
        self.load_worker.progress.connect(self.update_progress)
        self.load_worker.finished.connect(self.data_load_complete)
        self.load_worker.error.connect(self.data_load_error)
        self.load_worker.start()
    
    def update_progress(self, value):
        """진행률 업데이트"""
        self.progress_dialog.setValue(value)
        
    def data_load_complete(self, df):
        """데이터 로드 완료 처리"""
        self.progress_dialog.reset()
        self.btn_open.setEnabled(True)
        
        # 데이터 모델 설정
        model = DataFrameModel(df)
        self.proxy_model.setSourceModel(model)
        
        # 컬럼 목록 업데이트
        self.column_combo.clear()
        self.column_combo.addItem("모든 컬럼")
        self.column_combo.addItems(df.columns.tolist())
        
        # 상태바 업데이트
        file_size = os.path.getsize(self.load_worker.file_path) / (1024 * 1024)  # MB 단위
        self.status_bar.showMessage(
            f"로드 완료: {len(df):,}행 | {len(df.columns)}열 | {file_size:.2f}MB | "
            f"Dict/List 컬럼: {self.count_complex_columns(df)}개"
        )
    
    def count_complex_columns(self, df):
        """Dict/List 타입 컬럼 수 카운트"""
        count = 0
        for col in df.columns:
            sample = df[col].iloc[0] if len(df) > 0 else None
            if isinstance(sample, (dict, list)):
                count += 1
        return count
    
    def data_load_error(self, error_msg):
        """데이터 로드 오류 처리"""
        self.progress_dialog.reset()
        self.btn_open.setEnabled(True)
        QMessageBox.critical(self, "로드 오류", error_msg)
        self.status_bar.showMessage("로드 실패")
    
    def cancel_loading(self):
        """로딩 취소"""
        if hasattr(self, 'load_worker') and self.load_worker.isRunning():
            self.load_worker.terminate()
        self.progress_dialog.reset()
        self.btn_open.setEnabled(True)
        self.status_bar.showMessage("로딩 취소됨")

if __name__ == "__main__":
    app = QApplication(sys.argv)
    app.setStyle('Fusion')
    viewer = DataViewer()
    viewer.show()
    sys.exit(app.exec_())

Parquet 기능

유키공 — Wed, 30 Apr 2025 08:08:25 +0900

import sys
import os
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import json
import traceback
from concurrent.futures import ThreadPoolExecutor
from PyQt5.QtWidgets import (
    QApplication, QMainWindow, QTableView, QFileDialog,
    QVBoxLayout, QWidget, QPushButton, QLabel,
    QStatusBar, QMessageBox, QLineEdit, QHBoxLayout,
    QComboBox, QHeaderView, QProgressDialog, QCheckBox
)
from PyQt5.QtCore import (
    Qt, QAbstractTableModel, QSortFilterProxyModel, 
    QThread, pyqtSignal, QObject, QRunnable, QThreadPool
)
from PyQt5.QtGui import QColor, QFont

class WorkerSignals(QObject):
    """작업자 스레드 시그널"""
    progress = pyqtSignal(int)
    finished = pyqtSignal(object)
    error = pyqtSignal(str)
    message = pyqtSignal(str)

class ExportWorker(QRunnable):
    """내보내기 작업을 처리하는 Runnable"""
    def __init__(self, df, file_path, file_type):
        super().__init__()
        self.df = df
        self.file_path = file_path
        self.file_type = file_type
        self.signals = WorkerSignals()
        self._is_running = True

    def run(self):
        try:
            if not self._is_running:
                return

            self.signals.message.emit(f"Exporting to {self.file_type}...")
            
            if self.file_type == "CSV (*.csv)":
                self.export_csv()
            elif self.file_type == "Excel (*.xlsx)":
                self.export_excel()
            elif self.file_type == "Parquet (*.parquet)":
                self.export_parquet()
            elif self.file_type == "JSON (*.json)":
                self.export_json()
            
            if self._is_running:
                self.signals.finished.emit(self.file_path)
                self.signals.message.emit("Export completed successfully")

        except Exception as e:
            self.signals.error.emit(f"Export failed: {str(e)}")
            self.signals.message.emit("Export failed")

    def export_csv(self):
        """CSV 형식으로 내보내기"""
        chunksize = 100000
        total_rows = len(self.df)
        
        for i in range(0, total_rows, chunksize):
            if not self._is_running:
                return
                
            chunk = self.df.iloc[i:i+chunksize]
            mode = 'w' if i == 0 else 'a'
            header = (i == 0)
            
            chunk.to_csv(
                self.file_path,
                mode=mode,
                header=header,
                index=False
            )
            
            progress = int((i + chunksize) / total_rows * 100)
            self.signals.progress.emit(min(progress, 100))

    def export_excel(self):
        """Excel 형식으로 내보내기"""
        if not self._is_running:
            return
            
        self.signals.progress.emit(20)
        try:
            self.df.to_excel(self.file_path, index=False, engine='openpyxl')
        except ImportError:
            self.df.to_excel(self.file_path, index=False)
        self.signals.progress.emit(100)

    def export_parquet(self):
        """Parquet 형식으로 내보내기"""
        if not self._is_running:
            return
            
        self.signals.progress.emit(30)
        self.df.to_parquet(self.file_path, engine='pyarrow')
        self.signals.progress.emit(100)

    def export_json(self):
        """JSON 형식으로 내보내기"""
        if not self._is_running:
            return
            
        self.signals.progress.emit(10)
        
        chunksize = 50000
        total_rows = len(self.df)
        
        with open(self.file_path, 'w', encoding='utf-8') as f:
            for i in range(0, total_rows, chunksize):
                if not self._is_running:
                    return
                    
                chunk = self.df.iloc[i:i+chunksize]
                json_str = chunk.to_json(orient='records', lines=True, force_ascii=False)
                f.write(json_str)
                
                progress = int((i + chunksize) / total_rows * 100)
                self.signals.progress.emit(min(progress, 100))

    def stop(self):
        """작업 중단"""
        self._is_running = False

class ArrowTableConverter:
    """모든 PyArrow 버전에서 안전하게 테이블 변환"""
    @staticmethod
    def get_columns(table):
        """모든 버전에서 컬럼 이름 추출"""
        if hasattr(table, 'column_names'):
            return table.column_names
        return [table.schema[i].name for i in range(table.num_columns)]

    @staticmethod
    def get_column_data(table, col):
        """모든 버전에서 컬럼 데이터 추출"""
        if hasattr(table, 'column'):
            return table.column(col)
        return table[col]

    @staticmethod
    def to_dataframe(table):
        """테이블을 DataFrame으로 안전하게 변환"""
        try:
            return table.to_pandas()
        except:
            return ArrowTableConverter.manual_conversion(table)

    @staticmethod
    def manual_conversion(table):
        """수동 테이블 변환 (최후의 방법)"""
        data = {}
        columns = ArrowTableConverter.get_columns(table)
        
        for col in columns:
            try:
                col_data = ArrowTableConverter.get_column_data(table, col)
                if hasattr(col_data, 'to_pandas'):
                    data[col] = col_data.to_pandas()
                else:
                    data[col] = [str(x) for x in col_data]
            except:
                data[col] = ["[Conversion Error]"] * len(table)
        
        return pd.DataFrame(data)

class ParquetLoader(QThread):
    """강력한 Parquet 파일 로더"""
    def __init__(self, file_path, max_rows=None):
        super().__init__()
        self.file_path = file_path
        self.max_rows = max_rows
        self.signals = WorkerSignals()
        self._is_running = True

    def run(self):
        try:
            self.signals.progress.emit(5)
            parquet_file = pq.ParquetFile(self.file_path)
            num_row_groups = parquet_file.num_row_groups
            self.signals.progress.emit(10)

            chunks = []
            loaded_rows = 0
            
            for i in range(num_row_groups):
                if not self._is_running:
                    return
                
                self.signals.progress.emit(10 + int((i+1)/num_row_groups*70))
                table = parquet_file.read_row_group(i)
                df_chunk = ArrowTableConverter.to_dataframe(table)
                
                if self.max_rows:
                    remaining = self.max_rows - loaded_rows
                    if remaining <= 0:
                        break
                    df_chunk = df_chunk.head(remaining)
                
                chunks.append(df_chunk)
                loaded_rows += len(df_chunk)
                
                if self.max_rows and loaded_rows >= self.max_rows:
                    break

            self.signals.progress.emit(85)
            combined_df = pd.concat(chunks, ignore_index=True)
            final_df = self.clean_data(combined_df)
            
            self.signals.progress.emit(100)
            self.signals.finished.emit(final_df)
            
        except Exception as e:
            error_trace = traceback.format_exc()
            self.signals.error.emit(f"로딩 실패:\n{str(e)}\n\n{error_trace}")
        finally:
            if 'parquet_file' in locals():
                del parquet_file

    def clean_data(self, df):
        """데이터 정제"""
        for col in df.columns:
            try:
                df[col] = df[col].fillna("")
                sample = df[col].iloc[0] if len(df) > 0 else None
                if isinstance(sample, (dict, list)):
                    df[col] = df[col].apply(self.safe_json_dumps)
                elif not pd.api.types.is_string_dtype(df[col]):
                    df[col] = df[col].astype(str)
            except Exception as col_error:
                print(f"컬럼 {col} 정제 오류: {col_error}")
                df[col] = "[Error] " + df[col].astype(str)
        return df

    def safe_json_dumps(self, value):
        """안전한 JSON 변환"""
        try:
            if value is None:
                return ""
            if isinstance(value, (dict, list)):
                return json.dumps(value, ensure_ascii=False, default=str)[:2000]
            return str(value)
        except:
            return "[Conversion Error]"

    def stop(self):
        """작업 중단"""
        self._is_running = False

class DataFrameModel(QAbstractTableModel):
    """고성능 데이터 모델"""
    def __init__(self, data):
        super().__init__()
        self._data = data

    def rowCount(self, parent=None):
        return len(self._data)

    def columnCount(self, parent=None):
        return len(self._data.columns)

    def data(self, index, role=Qt.DisplayRole):
        if not index.isValid():
            return None

        value = self._data.iloc[index.row(), index.column()]

        if role == Qt.DisplayRole:
            return str(value) if not pd.isna(value) else ""
        elif role == Qt.BackgroundRole:
            if isinstance(value, (dict, list)):
                return QColor(240, 248, 255)
            return QColor(255, 255, 255)
        elif role == Qt.TextAlignmentRole:
            return Qt.AlignLeft | Qt.AlignVCenter
        return None

    def headerData(self, section, orientation, role):
        if role != Qt.DisplayRole:
            return None
        if orientation == Qt.Horizontal:
            return str(self._data.columns[section])
        return str(self._data.index[section])

class ParquetViewer(QMainWindow):
    """메인 뷰어 클래스"""
    def __init__(self):
        super().__init__()
        self.setWindowTitle("Universal Parquet Viewer")
        self.setGeometry(100, 100, 1400, 900)
        self.setup_ui()
        self.thread_pool = QThreadPool.globalInstance()
        self.thread_pool.setMaxThreadCount(2)
        self.export_worker = None
        
    def setup_ui(self):
        """UI 초기화"""
        self.central_widget = QWidget()
        self.setCentralWidget(self.central_widget)
        layout = QVBoxLayout(self.central_widget)
        
        # 컨트롤 패널
        panel = QWidget()
        panel_layout = QHBoxLayout(panel)
        
        self.btn_open = QPushButton("Open Parquet")
        self.btn_open.clicked.connect(self.open_file)
        panel_layout.addWidget(self.btn_open)
        
        self.preview_check = QCheckBox("Preview Mode (First 1,000 rows)")
        self.preview_check.setChecked(True)
        panel_layout.addWidget(self.preview_check)
        
        panel_layout.addWidget(QLabel("Search:"))
        self.search_input = QLineEdit()
        self.search_input.setPlaceholderText("Search...")
        self.search_input.textChanged.connect(self.apply_filter)
        self.search_input.setEnabled(False)
        panel_layout.addWidget(self.search_input)
        
        self.column_combo = QComboBox()
        self.column_combo.addItem("All Columns")
        self.column_combo.setEnabled(False)
        panel_layout.addWidget(self.column_combo)
        
        self.btn_export = QPushButton("Export")
        self.btn_export.clicked.connect(self.export_data)
        self.btn_export.setEnabled(False)
        panel_layout.addWidget(self.btn_export)
        
        layout.addWidget(panel)
        
        # 테이블 뷰
        self.table_view = QTableView()
        self.table_view.setSortingEnabled(True)
        self.table_view.horizontalHeader().setSectionResizeMode(QHeaderView.Interactive)
        self.table_view.setStyleSheet("""
            QTableView {
                font-size: 10pt;
                selection-background-color: #3498db;
                selection-color: white;
            }
            QHeaderView::section {
                background-color: #34495e;
                color: white;
                padding: 5px;
                font-weight: bold;
            }
        """)
        layout.addWidget(self.table_view)
        
        # 상태 바
        self.status_bar = QStatusBar()
        self.setStatusBar(self.status_bar)
        
        # 프록시 모델
        self.proxy_model = QSortFilterProxyModel()
        self.proxy_model.setFilterCaseSensitivity(Qt.CaseInsensitive)
    
    def open_file(self):
        """파일 열기 다이얼로그"""
        options = QFileDialog.Options()
        file_path, _ = QFileDialog.getOpenFileName(
            self, "Open Parquet File", "", 
            "Parquet Files (*.parquet);;All Files (*)", 
            options=options)
        
        if file_path:
            self.load_parquet(file_path)
    
    def load_parquet(self, file_path):
        """Parquet 파일 로드"""
        self.progress = QProgressDialog("Loading...", "Cancel", 0, 100, self)
        self.progress.setWindowModality(Qt.WindowModal)
        self.progress.canceled.connect(self.cancel_loading)
        
        max_rows = 1000 if self.preview_check.isChecked() else None
        self.loader = ParquetLoader(file_path, max_rows=max_rows)
        self.loader.signals.progress.connect(self.update_progress)
        self.loader.signals.finished.connect(self.on_load_complete)
        self.loader.signals.error.connect(self.on_load_error)
        self.loader.start()
        
        self.progress.show()
    
    def update_progress(self, value):
        """진행률 업데이트"""
        self.progress.setValue(value)
    
    def cancel_loading(self):
        """로딩 취소"""
        if hasattr(self, 'loader'):
            self.loader.stop()
        self.progress.close()
        self.status_bar.showMessage("Loading canceled", 3000)
    
    def on_load_complete(self, df):
        """로딩 완료 처리"""
        self.progress.close()
        
        model = DataFrameModel(df)
        self.proxy_model.setSourceModel(model)
        self.table_view.setModel(self.proxy_model)
        self.table_view.resizeColumnsToContents()
        
        self.column_combo.clear()
        self.column_combo.addItem("All Columns")
        self.column_combo.addItems(df.columns.tolist())
        self.column_combo.setEnabled(True)
        
        self.search_input.setEnabled(True)
        self.btn_export.setEnabled(True)
        
        file_size = os.path.getsize(self.loader.file_path) / (1024 * 1024)
        self.status_bar.showMessage(
            f"Loaded: {len(df):,} rows | {len(df.columns)} cols | {file_size:.2f} MB", 
            5000
        )
    
    def on_load_error(self, error_msg):
        """로딩 오류 처리"""
        self.progress.close()
        QMessageBox.critical(self, "Load Error", error_msg)
        self.status_bar.showMessage("Load failed", 5000)
    
    def apply_filter(self, text):
        """테이블 필터링 적용"""
        if not hasattr(self, 'proxy_model') or not hasattr(self.proxy_model, 'sourceModel'):
            return
            
        if self.column_combo.currentText() == "All Columns":
            self.proxy_model.setFilterKeyColumn(-1)
        else:
            try:
                col_idx = self.proxy_model.sourceModel()._data.columns.get_loc(
                    self.column_combo.currentText())
                self.proxy_model.setFilterKeyColumn(col_idx)
            except (AttributeError, KeyError):
                return
                
        self.proxy_model.setFilterFixedString(text)
    
    def export_data(self):
        """데이터 내보내기"""
        if not hasattr(self, 'proxy_model') or not hasattr(self.proxy_model, 'sourceModel'):
            QMessageBox.warning(
                self, 
                "No Data", 
                "Please load a Parquet file first before exporting.",
                QMessageBox.Ok
            )
            self.status_bar.showMessage("Export failed: No data loaded", 3000)
            return
            
        df = self.proxy_model.sourceModel()._data
        
        options = QFileDialog.Options()
        file_path, selected_filter = QFileDialog.getSaveFileName(
            self, "Export Data", "", 
            "CSV (*.csv);;Excel (*.xlsx);;Parquet (*.parquet);;JSON (*.json)", 
            options=options)
        
        if not file_path:
            return
            
        if selected_filter == "CSV (*.csv)" and not file_path.endswith('.csv'):
            file_path += '.csv'
        elif selected_filter == "Excel (*.xlsx)" and not file_path.endswith('.xlsx'):
            file_path += '.xlsx'
        elif selected_filter == "Parquet (*.parquet)" and not file_path.endswith('.parquet'):
            file_path += '.parquet'
        elif selected_filter == "JSON (*.json)" and not file_path.endswith('.json'):
            file_path += '.json'
        
        self.export_progress = QProgressDialog("Exporting...", "Cancel", 0, 100, self)
        self.export_progress.setWindowModality(Qt.WindowModal)
        self.export_progress.canceled.connect(self.cancel_export)
        
        self.export_worker = ExportWorker(df, file_path, selected_filter)
        self.export_worker.signals.progress.connect(self.export_progress.setValue)
        self.export_worker.signals.message.connect(self.status_bar.showMessage)
        self.export_worker.signals.finished.connect(self.on_export_complete)
        self.export_worker.signals.error.connect(self.on_export_error)
        
        self.thread_pool.start(self.export_worker)
        self.export_progress.show()
    
    def cancel_export(self):
        """내보내기 취소"""
        if self.export_worker:
            self.export_worker.stop()
        self.export_progress.close()
        self.status_bar.showMessage("Export canceled", 3000)
    
    def on_export_complete(self, file_path):
        """내보내기 완료 처리"""
        self.export_progress.close()
        QMessageBox.information(
            self, 
            "Success", 
            f"Data exported to:\n{file_path}"
        )
        self.status_bar.showMessage(f"Exported to {file_path}", 5000)
    
    def on_export_error(self, error_msg):
        """내보내기 오류 처리"""
        self.export_progress.close()
        QMessageBox.critical(
            self, 
            "Export Error", 
            f"Export failed:\n{error_msg}"
        )
        self.status_bar.showMessage("Export failed", 5000)

if __name__ == "__main__":
    app = QApplication(sys.argv)
    app.setStyle('Fusion')
    
    if hasattr(Qt, 'AA_EnableHighDpiScaling'):
        app.setAttribute(Qt.AA_EnableHighDpiScaling, True)
    if hasattr(Qt, 'AA_UseHighDpiPixmaps'):
        app.setAttribute(Qt.AA_UseHighDpiPixmaps, True)
    
    viewer = ParquetViewer()
    viewer.show()
    sys.exit(app.exec_())

parquet 뷰어

유키공 — Tue, 29 Apr 2025 14:58:39 +0900

pip install pyarrow pandas PyQt5==5.15.4 PyQt5-sip==12.8.1 PyQt5==5.15.2

pyinstaller --onefile --hidden-import=fastparquet --noconsole parquet_redy.py

a = Analysis(
    ['parquet_viewer.py'],
    pathex=[],
    binaries=[],
    datas=[],
    hiddenimports=[
        'fastparquet',
        'fastparquet.speedups',  # fastparquet의 C 확장 모듈
        'pandas',
        'pyarrow'
    ],
    hookspath=[],
    hooksconfig={},
    runtime_hooks=[],
    excludes=[],
    win_no_prefer_redirects=False,
    win_private_assemblies=False,
    cipher=block_cipher,
    noarchive=False,
)

import sys
import pandas as pd
from PyQt5.QtWidgets import (
    QApplication, QMainWindow, QTableView, QFileDialog,
    QVBoxLayout, QWidget, QPushButton, QLabel,
    QStatusBar, QMessageBox
)
from PyQt5.QtCore import Qt, QAbstractTableModel

class PandasModel(QAbstractTableModel):
    """Pandas DataFrame을 QTableView에 표시하기 위한 모델"""
    def __init__(self, data):
        QAbstractTableModel.__init__(self)
        self._data = data

    def rowCount(self, parent=None):
        return self._data.shape[0]

    def columnCount(self, parent=None):
        return self._data.shape[1]

    def data(self, index, role=Qt.DisplayRole):
        if index.isValid():
            if role == Qt.DisplayRole:
                return str(self._data.iloc[index.row(), index.column()])
        return None

    def headerData(self, section, orientation, role):
        if orientation == Qt.Horizontal and role == Qt.DisplayRole:
            return self._data.columns[section]
        if orientation == Qt.Vertical and role == Qt.DisplayRole:
            return str(self._data.index[section])
        return None

class ParquetViewer(QMainWindow):
    def __init__(self):
        super().__init__()
        self.setWindowTitle("Parquet File Viewer (PyArrow Only)")
        self.setGeometry(100, 100, 1000, 800)
        
        # 메인 위젯과 레이아웃 설정
        self.main_widget = QWidget()
        self.setCentralWidget(self.main_widget)
        self.layout = QVBoxLayout(self.main_widget)
        
        # 파일 열기 버튼
        self.open_button = QPushButton("Open Parquet File")
        self.open_button.clicked.connect(self.open_file)
        self.layout.addWidget(self.open_button)
        
        # 파일 정보 표시 레이블
        self.file_info_label = QLabel("No file loaded")
        self.file_info_label.setStyleSheet("font-weight: bold; color: #333;")
        self.layout.addWidget(self.file_info_label)
        
        # 테이블 뷰
        self.table_view = QTableView()
        self.table_view.setStyleSheet("QTableView { font-size: 10pt; }")
        self.layout.addWidget(self.table_view)
        
        # 상태 표시줄
        self.status_bar = QStatusBar()
        self.setStatusBar(self.status_bar)
        
        # 초기 데이터
        self.df = pd.DataFrame()
        
    def open_file(self):
        """파일 다이얼로그를 열고 선택한 Parquet 파일을 로드"""
        options = QFileDialog.Options()
        file_name, _ = QFileDialog.getOpenFileName(
            self, "Open Parquet File", "", 
            "Parquet Files (*.parquet);;All Files (*)", 
            options=options)
        
        if file_name:
            try:
                # PyArrow 엔진으로 명시적 지정
                self.df = pd.read_parquet(file_name, engine='pyarrow')
                
                # 딕셔너리/리스트 타입 컬럼 처리
                for col in self.df.columns:
                    if self.df[col].apply(lambda x: isinstance(x, (dict, list))).any():
                        self.df[col] = self.df[col].astype(str)
                
                # 모델 설정
                model = PandasModel(self.df)
                self.table_view.setModel(model)
                self.table_view.resizeColumnsToContents()
                
                # 파일 정보 업데이트
                self.file_info_label.setText(
                    f"File: {file_name.split('/')[-1]} | "
                    f"Rows: {len(self.df):,} | "
                    f"Columns: {len(self.df.columns)} | "
                    f"Engine: PyArrow")
                
                self.status_bar.showMessage("File loaded successfully", 3000)
                
            except Exception as e:
                error_msg = f"Error: {str(e)}\n\nRequired: pip install pyarrow"
                QMessageBox.critical(self, "Load Error", error_msg)
                self.status_bar.showMessage("Error: Install pyarrow first", 5000)

if __name__ == "__main__":
    app = QApplication(sys.argv)
    app.setStyle('Fusion')
    
    # 폰트 설정
    font = app.font()
    font.setPointSize(10)
    app.setFont(font)
    
    viewer = ParquetViewer()
    viewer.show()
    sys.exit(app.exec_())

s3디버깅

유키공 — Thu, 24 Apr 2025 14:48:59 +0900

try:
    s3.put_object(...)
except Exception as e:
    print(f"Error: {e.response['Error']['Code']}")  # AccessDenied, KMS.Disabled 등

parquet

유키공 — Wed, 23 Apr 2025 15:47:08 +0900

import pandas as pd

# Parquet 파일 읽기
df = pd.read_parquet('example.parquet')

# 전체 데이터 출력
print("전체 데이터:")
print(df)

# 상위 5행 출력
print("\n상위 5행:")
print(df.head())

# 데이터 구조 확인
print("\n데이터 구조:")
print(df.info())

# 기술 통계 정보
print("\n기술 통계:")
print(df.describe())

Yml

유키공 — Wed, 23 Apr 2025 10:21:05 +0900

import yaml

# YAML 파일 로드
with open('config.yml') as f:
    config = yaml.safe_load(f)

# 값 접근
print(config['app']['name'])  # "My Awesome App"
print(config['database']['production']['credentials']['username'])  # "admin"

# 리스트 항목 접근
for feature in config['app']['features']:
    print(feature)

app:
  name: "My Awesome App"
  version: 2.3.1
  features:
    - "authentication"
    - "data_export"
    - "notifications"
  settings:
    cache_enabled: true
    max_retries: 3
    allowed_file_types: [".jpg", ".png", ".pdf"]

.env yaml

유키공 — Wed, 23 Apr 2025 08:42:29 +0900

DB_HOST: localhost
DB_PORT: 3306
DB_USER: root
DB_PASSWORD: secret

import yaml

with open("env.yml", "r") as f:
    config = yaml.safe_load(f)

print(config["DB_HOST"])