10 REM > WordToCSV
   20 REM Convert Whitby 1871 Census text transcripts to CSV file
   30 :
   40 verbose%=TRUE
   50 max%=19:year%=1871:month%=4
   60 :
   70 A%=0:X%=1:os%=((USR&FFF4)AND&FF00)DIV256
   80 s$=".":fmax%=255:IFos%AND32:d$="\" ELSE IFos%AND8:d$="/" ELSE s$="/":d$=".":fmax%=10
   90 DIM head$(max%),info$(max%)
  100 FOR A%=0 TO max%:READ head$(A%):NEXT
  110 DATA "HOUSEHOLD NUMBER","SURNAME","NAME","AGE","RELATIONSHIP","SEX","MARITAL STATUS","STREET ADDRESS"
  120 DATA "OCCUPATION","BIRTH YEAR","BIRTH PLACE","BIRTH COUNTY","BIRTH COUNTRY","INDIVIDUAL NUMBER"
  130 DATA "CIVIL PARISH","SUB-REGISTRATION DISTRICT","ENUMERATION DISTRICT","PIECE","PAGE","FOLIO"
  140 :
  150 ON ERROR REPORT:PROCClose_All:PRINT" at line ";ERL:IF ERR=17:QUIT ELSE END
  160 out%=OPENOUT(LEFT$("Whitby"+STR$year%+s$+"csv",fmax%))
  170 IF out%=0:PRINT"Couldn't open output file, is it already open?":END
  180 PROCcsv_wr(out%,head$())
  190 IF verbose%:CLS:PRINT':FOR A%=0 TO max%:PRINTFNd0(A%,2);": ";head$(A%);":":NEXT:OFF
  200 :
  210 RESTORE +0
  220 READ SrcFile$
  230 REPEAT:PROCScan(SrcFile$):READ SrcFile$:UNTIL SrcFile$="*"
  240 CLOSE#out%:out%=0
  250 QUIT
  260 :
  270 DATA 1871-ED_01to07
  280 DATA 1871-ED_08to13
  290 DATA 1871-ED_14to16
  300 DATA 1871-ED_17to22
  310 DATA *
  320 :
  330 DEFPROCScan(in$)
  340 LOCAL DATA
  350 in%=OPENIN(in$+s$+"doc"):IFin%=0:ENDPROC
  360 IF verbose%:VDU30:PRINTSPC4;in$;SPC4
  370 REPEAT
  380   A$=GET$#in%
  390 UNTIL INSTR(A$,"CENSUS")
  400 piece$     =FNswap(MID$(A$,15),"-","/")
  410 folio$     =""
  420 enum$      =""
  430 subdist$   =""
  440 number$    =""
  450 district$  =""
  460 parish$    =""
  470 country$   =""
  480 county$    =""
  490 birth$     =""
  500 year$      =""
  510 occupation$=""
  520 address$   =""
  530 condition$ =""
  540 sex$       =""
  550 relation$  =""
  560 age$       =""
  570 name$      =""
  580 surname$   =""
  590 house$     =""
  600 REPEAT:A$=GET$#in%:PROCline:UNTIL EOF#in%
  610 CLOSE#in%:in%=0
  620 ENDPROC
  630 :
  640 DEFPROCline
  650 IF A$="" OR ASCA$>126 OR ASCA$<32:ENDPROC
  660 IF INSTR(A$,CHR$7)=0 THEN
  670   IF INSTR(A$,"Enum"THEN
  680     FOR B%=1 TO 2:A%=INSTR(A$," "):IF A%=0:A%=INSTR(A$,CHR$9)
  690     A$=FNs(MID$(A$,A%+1)):NEXT B%
  700     A%=INSTR(A$,CHR$9):IF A%=0:A%=INSTR(A$," ")
  710     IF A%:enum$=LEFT$(A$,A%-1):parish$=MID$(A$,A%+1) ELSE enum$=A$:parish$="Whitby"
  720     A%=INSTR(parish$,"("):IF A%:parish$=FNs(LEFT$(parish$,A%-1))
  730   ENDIF
  740   A%=INSTR(A$,CHR$9)
  750   IF A% THEN
  760     REPEAT A$=LEFT$(A$,A%-1)+" "+MID$(A$,A%+1):A%=INSTR(A$,CHR$9):UNTIL A%=0
  770     A%=INSTR(A$,"Fo ")
  780     address$=FNs(LEFT$(A$,A%-1))
  790     folio$=FNswap(MID$(A$,A%+3)," - ","/")
  800     folio$=FNswap(folio$,CHR$32+CHR$150+CHR$32,"/")
  810     folio$=FNs(FNswap(folio$,CHR$32+CHR$150,"/"))
  820     A%=INSTR(folio$,"/"):page$=MID$(folio$,A%+1):folio$=LEFT$(folio$,A%-1)
  830     PROCaddress
  840     ENDPROC
  850   ELSE
  860     ENDPROC
  870   ENDIF
  880 ENDIF
  890 :
  900 entry$=A$
  910 REPEAT
  920   info$()="":n%=0
  930   REPEAT
  940     A%=INSTR(entry$+CHR$7,CHR$7)
  950     info$(n%)=FNs(LEFT$(entry$,A%-1)):entry$=FNs(MID$(entry$,A%+1))
  960     n%=n%+1
  970   UNTIL n%=7 OR entry$=""
  980   IF RIGHT$(info$(0),3)=" do":info$(0)=FNs(LEFT$(info$(0),LENinfo$(0)-3))+" "+surname$
  990   IF info$(0)<>"do":name$      =info$(0)
 1000   IF info$(1)<>"do":relation$  =info$(1)
 1010   IF info$(2)<>"do":condition$ =info$(2)
 1020   IF info$(3)<>"do":age$       =info$(3)
 1030   IF LEFT$(info$(4),3)="do ":info$(4)=occupation$+"'s "+FNs(MID$(info$(4),3))
 1040   IF info$(4)<>"do":occupation$=info$(4)
 1050   IF info$(5)<>"do":birth$     =info$(5)
 1060   PROCentry
 1070 UNTIL entry$=""
 1080 ENDPROC
 1090 :
 1100 DEFPROCaddress
 1110 A$=address$
 1120 :
 1130 REM Swap building name
 1140 IF INSTR(A$,"Queen"):A$="3 "+A$
 1150 IF RIGHT$(A$,2)=" H":A$=A$+"ouse"
 1160 A%=INSTR(A$,CHR$32+CHR$150+CHR$32)
 1170 IF A%:A$=FNs(MID$(A$,A%+3))+", "+FNs(LEFT$(A$,A%-1))
 1180 :
 1190 REM Ensure correct apostophication
 1200 RESTORE +0
 1210 READ B$:REPEAT
 1220   A$=FNswap(A$,"s "+B$,"'s "+B$)
 1230   READ B$
 1240 UNTIL B$="*"
 1250 DATA B,C,G,H,L,S,T,Y,*
 1260 REPEAT:A$=FNswap(A$,"''","'"):UNTIL INSTR(A$,"''")=0
 1270 :
 1280 REM Repair names
 1290 A$=FNswap(A$,"Sqr","Square")
 1300 A$=FNswap(A$,"Argt","Argument")
 1310 A$=FNswap(A$,"Lion Band","Lion Bank")
 1320 A$=FNswap(A$,"Severn Stars","Seven Stars")
 1330 A$=FNswap(A$,"Pan Wells","Pan Well Steps")
 1340 A$=FNswap(A$,", Staith",", St. Ann's Staith")
 1350 A$=FNswap(A$,"Flowergate Rose ? Yard","Rose & Crown Yard")
 1360 IF LEFT$(A$,11)="Flowergate "     :A$=MID$(A$,12)+", "+LEFT$(A$,11)
 1370 IF LEFT$(A$,13)="Little Angel "   :A$=LEFT$(A$,12)+", "+MID$(A$,14)
 1380 IF LEFT$(A$,16)="Skinner Street, ":A$=MID$(A$,17)+", "+LEFT$(A$,14)
 1390 :
 1400 REM Canonicalise addresses
 1410 READ street$
 1420 REPEAT
 1430   READ yard$
 1440   REPEAT
 1450     A$=FNadd(A$,yard$,street$)
 1460     READ yard$
 1470   UNTIL yard$="*"
 1480   READ street$
 1490 UNTIL street$="*"
 1500 DATA "Salt Pan Well Steps","White Row",*
 1510 DATA "123 Church Street","Monkman's",*
 1520 DATA "McLachlin's Yard","Bolton's Buildings",*
 1530 DATA "Golden Lion Bank","Golden Lion Inn",*
 1540 DATA "Downdinner Hill","Hanover Terrace",*
 1550 DATA "Spital Bridge/Dog Lane","Olive",*
 1560 DATA "Green Lane","New Gardens",*
 1570 DATA "High Well Yard, Church Street","Ainsley's",*
 1580 DATA "Waterstead Lane","Boghole",*
 1590 :
 1600 DATA "Church Street"
 1610 DATA "Blackburn's","Wilson's","Kiln","Monkmans","Borough","Ditchburn's","Blackhorse"
 1620 DATA "Argument's","New Way","Green's","Renwick's","Ainsley's","Benson's"
 1630 DATA "Smale's","Falkingbridge","Fleece Inn Yard","Elbow Yard","Taylerson's"
 1640 DATA "School Yard","Corner's","Craven's","Hospital","Dark Entry Yard"
 1650 DATA "Cappleman's","Boanson's","Gaskin's","Sayer's","Walk","Steps","Jefferson's"
 1660 DATA "Prospect Place","Hall's Yard","Ivy Yard","Brewery Yard","Old Gas House Yard","Salt Pan"
 1670 DATA "Boulby","Timber","Studley","Horner's","White Horse","Dock Co"
 1680 DATA *
 1690 REM DATA "Sandgate"
 1700 REM DATA "Nicholson's","Queen Hotel"
 1710 REM DATA *
 1720 DATA "Grape Lane"
 1730 DATA "Tin","Lamb's","Raffled Anchor","Custom House Hotel"
 1740 DATA *
 1750 DATA "Baxtergate"
 1760 DATA "Belle Hotel","Mead's","Angel Hotel","Vipond's","Goodwill's","Ward's","Plough Inn"
 1770 DATA "Brignell's","Tyreman's","Woodhouse","Mackridge's","Albion Hotel","Trueman's"
 1780 DATA "Wald's","Leng's","Coffee Bar","Bland's","Dotchson's","Brunswick Chapel"
 1790 DATA "Breckon's"
 1800 DATA *
 1810 DATA "Flowergate"
 1820 DATA "Rose & Crown","Haydock's","McLachlin's","Marwood's","Fawcett's","Easterby"
 1830 DATA "Old Abbey Inn","Gardiner's"
 1840 DATA *
 1850 DATA "Cliff Street"
 1860 DATA "Andrew's"
 1870 DATA *
 1880 REM DATA "St. Ann's Staith"
 1890 REM DATA "Wear's","Buck Hotel","Old Post Office","St Ann's Lane"
 1900 REM DATA *
 1910 DATA "Haggersgate"
 1920 DATA "Paradise","Whitby Arms","Elephant","Institute","Neptune"
 1930 DATA "Muncaster's","Miller's"
 1940 DATA *
 1950 DATA "Skinner Street"
 1960 DATA "Sandal","Harrison's","Ivy Place","Botham's","Abbey View"
 1970 DATA *
 1980 DATA "Bagdale"
 1990 DATA "Broomfield Terrace","Carr's"
 2000 DATA *
 2010 DATA "Larpool"
 2020 DATA "Cock","Crowdy"
 2030 DATA *
 2040 DATA "Spital Bridge"
 2050 DATA "Whitehall","Kelp"
 2060 DATA *
 2070 DATA *
 2080 :
 2090 address$=FNs(A$)
 2100 ENDPROC
 2110 :
 2120 DEFPROCentry
 2130 REM Seperate surname
 2140 surname$=name$:name$=""
 2150 REPEAT
 2160   A%=INSTR(surname$," ")
 2170   IF A%:name$=FNs(name$+" "+FNs(LEFT$(surname$,A%-1))):surname$=FNs(MID$(surname$,A%+1))
 2180 UNTIL A%=0
 2190 IF LENsurname$<2:ENDPROC
 2200 IF LENaddress$<3:ENDPROC
 2210 :
 2220 REM Tidy names
 2230 name$=FNswap(name$,"Wm","William")
 2240 name$=FNswap(name$,"Rbt","Robert")
 2250 IF RIGHT$(name$,3)="Geo":name$=name$+"rge"
 2260 :
 2270 REM Tidy young ages
 2280 IF RIGHT$(age$,1)="m":age$=age$+"th"
 2290 IF RIGHT$(age$,1)="w":age$=age$+"eek"
 2300 IF RIGHT$(age$,1)="d":age$=age$+"ay"
 2310 IF LENage$<>LENSTR$VALage$:IFVALage$>1:IFINSTR(age$,"?")=0:IFRIGHT$(age$,1)<>"s":age$=age$+"s"
 2320 :
 2330 REM Calculate birth year
 2340 IF LENage$<>LENSTR$VALage$ AND INSTR(age$,"?")=0 THEN
 2350   IF INSTR(age$,"m"THEN
 2360     year$=STR$(year%-(VALage$-month%+13)DIV12) :REM Age in months
 2370   ELSE
 2380     year$=STR$year%                            :REM Age in weeks or days
 2390   ENDIF
 2400 ELSE
 2410   year$="abt "+STR$(year%-VALage$-1)           :REM Age in years
 2420 ENDIF
 2430 :
 2440 REM Tidy relationship
 2450 IF relation$="Daur":relation$="Daughter"
 2460 IF RIGHT$(relation$,2)=" D":relation$=relation$+"aughter"
 2470 IF RIGHT$(relation$,2)=" S":relation$=relation$+"on"
 2480 IF LEFT$(relation$,2)="G ":relation$="Grand "+MID$(relation$,2)
 2490 IF LEFT$(relation$,2)="F ":relation$="Father"+MID$(relation$,2)
 2500 IF LEFT$(relation$,2)="M ":relation$="Mother"+MID$(relation$,2)
 2510 IF LEFT$(relation$,2)="S ":relation$="Son"+MID$(relation$,2)
 2520 IF relation$="Gen Serv":relation$="General Servant"
 2530 :
 2540 REM Tidy condition
 2550 IF condition$="M"   :condition$="Married"
 2560 IF condition$="Unm" :condition$="Unmarried"
 2570 IF condition$="Wid" :condition$="Widow":sex$="F"
 2580 IF condition$="Widr":condition$="Widower":sex$="M"
 2590 IF condition$="N K" :condition$="Not known"
 2600 :
 2610 REM Tidy sex
 2620 IF relation$="Wife":sex$="F"
 2630 IF INSTR(relation$,"Daughter"):sex$="F"
 2640 IF INSTR(relation$,"Son"):sex$="M"
 2650 IF sex$="F":sex$="Female"
 2660 IF sex$="M":sex$="Male"
 2670 :
 2680 REM Tidy birthplace
 2690 A%=INSTR(birth$,"  ")
 2700 IF A%:county$=LEFT$(birth$,A%-1):birth$=MID$(birth$,A%+2)
 2710 IF county$="Yorks":county$="Yorkshire"
 2720 IF county$="Lincs":county$="Lincolnshire"
 2730 birth$=FNswap(birth$,"R H Bay","Robin Hood's Bay")
 2740 IF country$="" THEN
 2750   IF county$="Yorkshire"   :country$="England"
 2760   IF county$="Lincolnshire":country$="England"
 2770 ENDIF
 2780 :
 2790 REM Fill output array
 2800 info$(0)=house$             :REM HOUSEHOLD NUMBER
 2810 info$(1)=surname$           :REM SURNAME
 2820 info$(2)=name$              :REM NAME
 2830 info$(3)=age$               :REM AGE
 2840 info$(4)=relation$          :REM RELATIONSHIP
 2850 info$(5)=sex$               :REM GENDER
 2860 info$(6)=condition$         :REM MARITAL STATUS
 2870 info$(7)=address$           :REM STREET ADDRESS
 2880 info$(8)=occupation$        :REM OCCUPATION
 2890 info$(9)=year$              :REM BIRTH YEAR
 2900 info$(10)=birth$            :REM BIRTH PLACE
 2910 info$(11)=county$           :REM BIRTH PLACE
 2920 info$(12)=country$          :REM BIRTH PLACE
 2930 info$(13)=""                :REM INDIVIDUAL NUMBER
 2940 info$(14)=parish$           :REM CIVIL PARISH
 2950 REM info$(15)="Whitby"      :REM REGISTRATION DISTRICT
 2960 REM info$(16)=""            :REM REGISTRATION DISTRICT NUMBER
 2970 info$(15)="Whitby"          :REM SUB-REGISTRATION DISTRICT
 2980 info$(16)=enum$             :REM ED, INSTITUTION, OR VESSEL
 2990 info$(17)=piece$            :REM PIECE
 3000 info$(18)=page$             :REM PAGE
 3010 info$(19)=folio$            :REM FOLIO
 3020 :
 3030 IF verbose%:FOR A%=0 TO max%:PRINTTAB(36,A%+2);LEFT$(info$(A%),45);SPC(45-LENLEFT$(info$(A%),45)):NEXT
 3040 PROCcsv_wr(out%,info$())
 3050 ENDPROC
 3060 :
 3070 DEFFNadd(addr$,yard$,street$)
 3080 IF INSTR(addr$,yard$+" "):IF INSTR(addr$,street$)=0:=addr$+", "+street$
 3090 IF RIGHT$(addr$,LENyard$)=yard$:IF INSTR(addr$,street$)=0:=addr$+", "+street$
 3100 =addr$
 3110 :
 3120 DEFFNswap(in$,match$,swap$)
 3130 A%=INSTR(in$,match$):IF A%:in$=LEFT$(in$,A%-1)+swap$+MID$(in$,A%+LENmatch$)
 3140 =in$
 3150 :
 3160 DEFPROCClose_All
 3170 out%=out%:IFout%:A%=out%:out%=0:CLOSE#A%
 3180 in%=in%:IFin%:A%=in%:in%=0:CLOSE#A%
 3190 ENDPROC
 3200 :
 3210 DEFFNd0(A%,N%)=RIGHT$("        "+STR$A%,N%)
 3220 DEFFNuc(A$):LOCAL B$:IFA$="":=""
 3230 REPEATB$=B$+CHR$(ASCA$AND((A$<"@")OR&DF)):A$=MID$(A$,2):UNTILA$="":=B$
 3240 DEFFNlc(A$):LOCAL B$:IFA$="":=""
 3250 REPEATB$=B$+CHR$(ASCA$OR((A$<"_")AND&20)):A$=MID$(A$,2):UNTILA$="":=B$
 3260 DEFFNs(A$):IFLEFT$(A$,1)=" ":REPEATA$=MID$(A$,2):UNTILLEFT$(A$,1)<>" "
 3270 IFRIGHT$(A$,1)=" ":REPEATA$=LEFT$(A$,LENA$-1):UNTILRIGHT$(A$,1)<>" "
 3280 =A$
 3290 :
 3300 DEFPROCcsv_wr(o%,array$())
 3310 LOCAL n%,q%:n%=DIM(array$(),1)
 3320 FOR A%=0 TO n%:A$=array$(A%)
 3330   q%=INSTR(A$,","):IFq%=0:q%=ASCA$=48:IFq%=0:q%=LENSTR$VALA$>8:IFq%=0:IFVALA$:q%=INSTR(A$,"/")
 3340   IF q%:A$=""""+A$+"""":IFINSTR(A$,",")=0:A$="="+A$
 3350   BPUT#o%,A$;:IF A%<>n%:BPUT#o%,",";
 3360 NEXT A%:BPUT#o%,""
 3370 ENDPROC